diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 25fabf7..1f079fd 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -11,8 +11,14 @@ jobs: runs-on: ubuntu-latest strategy: + fail-fast: false matrix: - python-version: [3.5, 3.6, 3.7, 3.8, 3.9] + include: + - python-version: "3.9" + - python-version: "3.10" + - python-version: "3.11" + - python-version: "3.12" + - python-version: "3.13" steps: - uses: actions/checkout@v2 @@ -21,7 +27,7 @@ jobs: sudo apt-get install libdb-dev - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Cache pip @@ -38,4 +44,4 @@ jobs: pip install -r tests/requirements-test.txt - name: Test with pytest run: | - pytest \ No newline at end of file + pytest diff --git a/scrapy_deltafetch/middleware.py b/scrapy_deltafetch/middleware.py index 9f6f1b4..cca97e9 100644 --- a/scrapy_deltafetch/middleware.py +++ b/scrapy_deltafetch/middleware.py @@ -5,7 +5,7 @@ from scrapy.http import Request from scrapy.item import Item -from scrapy.utils.request import request_fingerprint +from scrapy.utils.request import RequestFingerprinter from scrapy.utils.project import data_path from scrapy.utils.python import to_bytes from scrapy.exceptions import NotConfigured @@ -41,6 +41,14 @@ def from_crawler(cls, crawler): o = cls(dir, reset, crawler.stats) crawler.signals.connect(o.spider_opened, signal=signals.spider_opened) crawler.signals.connect(o.spider_closed, signal=signals.spider_closed) + + try: + o.fingerprint = crawler.request_fingerprinter.fingerprint + except AttributeError: + from scrapy.utils.request import request_fingerprint + + o.fingerprint = request_fingerprint + return o def spider_opened(self, spider): @@ -79,7 +87,7 @@ def process_spider_output(self, response, result, spider): yield r def _get_key(self, request): - key = request.meta.get('deltafetch_key') or request_fingerprint(request) + key = request.meta.get('deltafetch_key') or self.fingerprint(request) return to_bytes(key) def _is_enabled_for_request(self, request): diff --git a/setup.py b/setup.py index ad86588..6bcd948 100644 --- a/setup.py +++ b/setup.py @@ -17,11 +17,12 @@ 'Operating System :: OS Independent', 'Programming Language :: Python', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', + 'Programming Language :: Python :: 3.13', ], - install_requires=['Scrapy>=1.1.0'] + install_requires=['Scrapy>=1.1.0'], + python_requires='>=3.9', ) diff --git a/tests/test_deltafetch.py b/tests/test_deltafetch.py index 362bf1e..0a99579 100644 --- a/tests/test_deltafetch.py +++ b/tests/test_deltafetch.py @@ -9,11 +9,17 @@ from scrapy.spiders import Spider from scrapy.settings import Settings from scrapy.exceptions import NotConfigured -from scrapy.utils.request import request_fingerprint from scrapy.utils.python import to_bytes from scrapy.statscollectors import StatsCollector from scrapy.utils.test import get_crawler +try: + from scrapy.utils.request import request_fingerprint + _legacy_fingerprint=True +except ImportError: + from scrapy.utils.request import RequestFingerprinter + _legacy_fingerprint=False + from scrapy_deltafetch.middleware import DeltaFetch @@ -124,7 +130,7 @@ def test_spider_opened_reset_non_existing_db(self): self.spider.deltafetch_reset = True mw.spider_opened(self.spider) assert mw.db.get(b'random') is None - + def test_spider_opened_recreate(self): self._create_test_db() mw = self.mwcls(self.temp_dir, reset=True, stats=self.stats) @@ -185,7 +191,12 @@ def test_process_spider_output(self): def test_process_spider_output_with_ignored_request(self): self._create_test_db() - mw = self.mwcls(self.temp_dir, reset=False, stats=self.stats) + settings = { + "DELTAFETCH_DIR": self.temp_dir, + "DELTAFETCH_ENABLED": True, + } + crawler = get_crawler(Spider, settings_dict=settings) + mw = self.mwcls.from_crawler(crawler) mw.spider_opened(self.spider) response = mock.Mock() response.request = Request('http://url') @@ -316,10 +327,20 @@ def __init__(self, dir, reset=False, *args, **kwargs): self.assertEqual(self.stats.get_value('deltafetch/stored'), None) def test_get_key(self): - mw = self.mwcls(self.temp_dir, reset=True) + settings = { + "DELTAFETCH_DIR": self.temp_dir, + "DELTAFETCH_ENABLED": True, + "DELTAFETCH_RESET": True, + } + crawler = get_crawler(Spider, settings_dict=settings) + mw = self.mwcls.from_crawler(crawler) test_req1 = Request('http://url1') + if _legacy_fingerprint: + fingerprint = request_fingerprint + else: + fingerprint = RequestFingerprinter.from_crawler(crawler).fingerprint self.assertEqual(mw._get_key(test_req1), - to_bytes(request_fingerprint(test_req1))) + to_bytes(fingerprint(test_req1))) test_req2 = Request('http://url2', meta={'deltafetch_key': b'dfkey1'}) self.assertEqual(mw._get_key(test_req2), b'dfkey1')