Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Allow user to ignore some requests
  • Loading branch information
complikator committed Jun 22, 2021
commit e37f89767bda5052618733eb5546243d64770e82
5 changes: 4 additions & 1 deletion scrapy_deltafetch/middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def process_spider_output(self, response, result, spider):
for r in result:
if isinstance(r, Request):
key = self._get_key(r)
if key in self.db:
if key in self.db and not self._is_ignored(r):
logger.info("Ignoring already visited: %s" % r)
if self.stats:
self.stats.inc_value('deltafetch/skipped', spider=spider)
Expand All @@ -92,3 +92,6 @@ def _get_key(self, request):
key = request.meta.get('deltafetch_key') or request_fingerprint(request)
# request_fingerprint() returns `hashlib.sha1().hexdigest()`, is a string
return to_bytes(key)

def _is_ignored(self, request):
return request.meta.get('deltafetch_ignore') is not None