in atr/tasks/bulk.py [0:0]
def url_excluded(seen: set[str], url: str, args: Args) -> bool:
# Filter for sorting URLs to avoid redundant crawling
sorting_patterns = ["?C=N;O=", "?C=M;O=", "?C=S;O=", "?C=D;O="]
if not url.startswith(args.base_url):
_LOGGER.debug(f"Skipping URL outside base URL scope: {url}")
return True
if url in seen:
_LOGGER.debug(f"Skipping already seen URL: {url}")
return True
# Skip sorting URLs to avoid redundant crawling
if any(pattern in url for pattern in sorting_patterns):
_LOGGER.debug(f"Skipping sorting URL: {url}")
return True
return False