def check_absolute_url()

in tools/url-checker/url_checker.py [0:0]


def check_absolute_url(url, md_file=None, retries=3):
    """
    Check if an absolute URL (http/https) is reachable.
    
    Args:
        url: The URL to check
        md_file: Source markdown file containing this URL
        retries: Number of attempts before giving up
        
    Returns:
        Log entry string with result
    """
    # Extract domain from URL for domain-based verification
    parsed_url = urlparse(url)
    domain = parsed_url.netloc
    is_trusted_domain = domain in KNOWN_VALID_DOMAINS
    
    print(f"Checking absolute URL: {url}")
    print(f"Domain: {domain}, Trusted: {is_trusted_domain}")
    
    attempt = 0
    while attempt < retries:
        try:
            # Make the request with configured timeout
            response = requests.get(url, headers=HEADERS, allow_redirects=True, timeout=TIMEOUT, stream=True)
            
            if response.status_code < 400:
                log_entry = f"{Colors.OKGREEN}[OK ABSOLUTE] {url}{Colors.ENDC}"
                print(log_entry)
                return log_entry
            elif response.status_code in TEMPORARY_ERROR_CODES:
                # For temporary errors, handle differently based on trusted status
                print(f"Status Code {response.status_code} for {url}. Retrying... ({attempt + 1}/{retries})")
                attempt += 1
                
                if attempt >= retries:
                    file_info = f" (in file: {md_file})" if md_file else ""
                    
                    if is_trusted_domain:
                        # For trusted domains, mark as OK even with temporary errors
                        log_entry = f"{Colors.OKGREEN}[OK ABSOLUTE] {url} (trusted domain with temporary status code: {response.status_code}){file_info}{Colors.ENDC}"
                        print(log_entry)
                        return log_entry
                    else:
                        # For non-trusted domains, still mark as broken but note it might be temporary
                        log_entry = f"{Colors.FAIL}[BROKEN ABSOLUTE] {url} - Temporary error: {response.status_code}{file_info}{Colors.ENDC}"
                        print(log_entry)
                        return log_entry
            else:
                file_info = f" (in file: {md_file})" if md_file else ""
                # For non-temporary errors, mark as broken even for trusted domains
                log_entry = f"{Colors.FAIL}[BROKEN ABSOLUTE] {url} - Status Code: {response.status_code}{file_info}{Colors.ENDC}"
                print(log_entry)
                return log_entry
                
        except requests.RequestException as e:
            file_info = f" (in file: {md_file})" if md_file else ""
            
            # For connection errors on trusted domains, consider as temporarily unavailable
            if is_trusted_domain and isinstance(e, (
                requests.Timeout, 
                requests.ConnectionError,
                requests.TooManyRedirects
            )):
                # Last retry and it's a trusted domain with connection issues
                if attempt >= retries - 1:
                    log_entry = f"{Colors.OKGREEN}[OK ABSOLUTE] {url} (trusted domain, connection issue: {type(e).__name__}){file_info}{Colors.ENDC}"
                    print(log_entry)
                    return log_entry
            
            # Special handling for certificate errors on trusted domains
            if isinstance(e, requests.exceptions.SSLError):
                if any(trusted_domain in domain for trusted_domain in TRUSTED_DOMAINS_WITH_CERT_ISSUES) or any(trusted_domain in url for trusted_domain in TRUSTED_DOMAINS_WITH_CERT_ISSUES):
                    log_entry = f"{Colors.OKGREEN}[OK ABSOLUTE] {url} (trusted domain with certificate issue){file_info}{Colors.ENDC}"
                    print(log_entry)
                    return log_entry
            
            log_entry = f"{Colors.FAIL}[BROKEN ABSOLUTE] {url} - Error: {e}{file_info}{Colors.ENDC}"
            print(log_entry)
            attempt += 1
            if attempt < retries:
                print(f"Retrying... ({attempt}/{retries})")
            else:
                return log_entry