def url_excluded()

in atr/tasks/bulk.py [0:0]


def url_excluded(seen: set[str], url: str, args: Args) -> bool:
    # Filter for sorting URLs to avoid redundant crawling
    sorting_patterns = ["?C=N;O=", "?C=M;O=", "?C=S;O=", "?C=D;O="]

    if not url.startswith(args.base_url):
        _LOGGER.debug(f"Skipping URL outside base URL scope: {url}")
        return True

    if url in seen:
        _LOGGER.debug(f"Skipping already seen URL: {url}")
        return True

    # Skip sorting URLs to avoid redundant crawling
    if any(pattern in url for pattern in sorting_patterns):
        _LOGGER.debug(f"Skipping sorting URL: {url}")
        return True

    return False