def check_relative_url()

in tools/url-checker/url_checker.py [0:0]


def check_relative_url(url, md_file):
    """
    Check if a relative file path exists in the filesystem.
    
    Args:
        url: Relative path to check
        md_file: Source markdown file containing this path
        
    Returns:
        Tuple containing: (log_entry, is_image, is_svg, is_root_relative, has_anchor)
    """
    # Flag to track if URL has an anchor
    has_anchor = '#' in url
    anchor_text = None
    
    # Handle header links (e.g., #section-name or file.md#section-name)
    if has_anchor and md_file.lower().endswith('.md'):
        base_url, anchor = url.split('#', 1)
        anchor_text = anchor
        # If it's a same-page link (just #header)
        if not base_url:
            headers = extract_headers(md_file)
            if anchor in headers:
                log_entry = f"{Colors.OKGREEN}[OK HEADER] #{anchor} (header in {md_file}){Colors.ENDC}"
                print(log_entry)
                return log_entry, False, False, False, has_anchor
            else:
                log_entry = f"{Colors.FAIL}[BROKEN HEADER] #{anchor} (header not found in {md_file}){Colors.ENDC}"
                print(f"Available headers in {md_file}: {', '.join(headers)}")
                print(log_entry)
                return log_entry, False, False, False, has_anchor
        else:
            # Construct the target path based on the base_url
            target_file = os.path.join(os.path.dirname(md_file), base_url)
            
            # Handle the case where the base_url points to a directory
            if os.path.isdir(target_file):
                print(f"Base URL {base_url} points to a directory: {target_file}")
                # Check if an _index.md file exists in the directory
                index_file = os.path.join(target_file, "_index.md")
                if os.path.exists(index_file):
                    log_entry = f"{Colors.OKGREEN}[OK RELATIVE] {index_file}#{anchor} (directory with _index.md, anchor not validated){Colors.ENDC}"
                    print(log_entry)
                    return log_entry, False, False, False, has_anchor
                
                # Also check for other common index files
                for index_name in ["index.md", "README.md"]:
                    index_file = os.path.join(target_file, index_name)
                    if os.path.exists(index_file):
                        log_entry = f"{Colors.OKGREEN}[OK RELATIVE] {index_file}#{anchor} (directory with {index_name}, anchor not validated){Colors.ENDC}"
                        print(log_entry)
                        return log_entry, False, False, False, has_anchor
            
            # Check if file exists without case sensitivity
            case_insensitive_path = find_path_case_insensitive(os.path.dirname(md_file), base_url)
            if case_insensitive_path and os.path.exists(case_insensitive_path):
                # Found with case-insensitive match
                if os.path.isdir(case_insensitive_path):
                    # It's a directory, check for index files
                    for index_name in ["_index.md", "index.md", "README.md"]:
                        index_file = os.path.join(case_insensitive_path, index_name)
                        if os.path.exists(index_file):
                            log_entry = f"{Colors.OKGREEN}[OK RELATIVE] {index_file}#{anchor} (directory with {index_name}, case-insensitive match, anchor not validated){Colors.ENDC}"
                            print(log_entry)
                            return log_entry, False, False, False, has_anchor
                else:
                    # It's a file
                    log_entry = f"{Colors.OKGREEN}[OK RELATIVE] {case_insensitive_path}#{anchor} (file exists, case-insensitive match, anchor not validated){Colors.ENDC}"
                    print(log_entry)
                    return log_entry, False, False, False, has_anchor
            
            # Original check if file exists (case sensitive)
            if os.path.exists(target_file):
                log_entry = f"{Colors.OKGREEN}[OK RELATIVE] {target_file}#{anchor} (file exists, anchor not validated){Colors.ENDC}"
                print(log_entry)
                return log_entry, False, False, False, has_anchor
            else:
                log_entry = f"{Colors.FAIL}[BROKEN RELATIVE WITH ANCHOR] {target_file}#{anchor} (file not found){Colors.ENDC}"
                print(log_entry)
                return log_entry, False, False, False, has_anchor
                
    # Handle hash in URL for non-markdown source files
    elif has_anchor:
        base_url, anchor = url.split('#', 1)
        anchor_text = anchor
        # For non-markdown file links with anchors, we just check if the file exists
        if not base_url:
            # Same-file anchor in non-markdown file, we can't validate this
            log_entry = f"{Colors.OKGREEN}[OK HEADER] #{anchor} (in non-markdown file {md_file}){Colors.ENDC}"
            print(log_entry)
            return log_entry, False, False, False, has_anchor
        else:
            target_file = os.path.join(os.path.dirname(md_file), base_url)
            if os.path.exists(target_file):
                log_entry = f"{Colors.OKGREEN}[OK RELATIVE] {target_file}#{anchor} (file exists, anchor not validated){Colors.ENDC}"
                print(log_entry)
                return log_entry, False, False, False, has_anchor
            else:
                log_entry = f"{Colors.FAIL}[BROKEN RELATIVE WITH ANCHOR] {target_file}#{anchor} (file not found){Colors.ENDC}"
                print(log_entry)
                return log_entry, False, False, False, has_anchor

    # Check if it's an SVG file
    is_svg = any(url.lower().endswith(ext) for ext in SVG_EXTENSIONS)
    # Check if it's an image file
    is_image = not is_svg and any(url.lower().endswith(ext) for ext in IMAGE_EXTENSIONS)
    
    # Handle root-relative URLs (starting with /)
    is_root_relative = url.startswith('/')
    if is_root_relative:
        # URLs starting with / are relative to repo root, not the current file
        file_path = os.path.join(REPO_PATH, url[1:])  # Remove leading / and join with repo root
        print(f"Root-relative path detected. Checking against repo root: {file_path}")
    else:
        # Regular document-relative URL
        file_path = os.path.join(os.path.dirname(md_file), url)
    
    file_type = "SVG" if is_svg else "image" if is_image else "root-relative" if is_root_relative else "relative"
    print(f"Checking {file_type} URL: {file_path}")
    
    # -- New Approach: Handle case sensitivity more robustly --
    # Check if path exists directly
    path_exists = os.path.exists(file_path)
    
    # If path doesn't exist, try case-insensitive matching
    if not path_exists:
        print(f"Path not found: {file_path}")
        print(f"Trying case-insensitive path resolution...")
        
        # For directory URLs (ending with /)
        if url.endswith('/'):
            # Split the file_path into components
            path_parts = os.path.normpath(file_path).split(os.sep)
            
            # Start from an existing directory
            current = os.path.dirname(md_file) if not is_root_relative else REPO_PATH
            built_path = current
            
            # Process each segment of the relative path
            rel_segments = url.rstrip('/').split('/')
            print(f"Processing relative segments: {rel_segments}")
            
            for segment in rel_segments:
                if segment == '..':
                    # Go up one directory
                    current = os.path.dirname(current)
                    built_path = current
                    print(f"Going up to parent: {current}")
                elif segment == '.':
                    # Stay in current directory
                    continue
                else:
                    # Try to find a case-insensitive match for this segment
                    if os.path.exists(os.path.join(current, segment)):
                        # Exact case match
                        current = os.path.join(current, segment)
                        built_path = current
                        print(f"Exact match found: {segment}")
                    else:
                        found = False
                        try:
                            for item in os.listdir(current):
                                if item.lower() == segment.lower():
                                    current = os.path.join(current, item)
                                    built_path = current
                                    print(f"Case-insensitive match found: {segment} -> {item}")
                                    found = True
                                    break
                        except (PermissionError, FileNotFoundError, NotADirectoryError) as e:
                            print(f"Error accessing {current}: {str(e)}")
                        
                        if not found:
                            print(f"No match found for segment: {segment} in {current}")
                            break
            
            if os.path.exists(built_path):
                file_path = built_path
                path_exists = True
                print(f"Successfully resolved case-insensitive path: {built_path}")
                
                # Check for default files in the directory
                if os.path.isdir(built_path):
                    for default_file in ['_index.md', 'index.md', 'README.md']:
                        default_path = os.path.join(built_path, default_file)
                        if os.path.exists(default_path):
                            file_path = default_path
                            print(f"Found default file: {default_path}")
                            break
    
    # If path still doesn't exist and it's a directory URL, try to check for markdown files
    if not path_exists and url.endswith('/') and os.path.isdir(os.path.dirname(file_path)):
        try:
            md_files = [f for f in os.listdir(file_path) if f.endswith('.md')]
            if md_files:
                path_exists = True
                file_path = os.path.join(file_path, md_files[0])  # Use the first markdown file found
                print(f"Directory contains markdown files: {', '.join(md_files)}")
            else:
                print(f"Directory exists but contains no markdown files")
        except PermissionError:
            print(f"Permission error accessing directory: {file_path}")
        except FileNotFoundError:
            print(f"Directory doesn't exist: {file_path}")
    
    if path_exists:
        if is_svg:
            log_entry = f"{Colors.OKGREEN}[OK SVG] {file_path}{Colors.ENDC}"
        elif is_image:
            log_entry = f"{Colors.OKGREEN}[OK IMAGE] {file_path}{Colors.ENDC}"
        elif is_root_relative:
            log_entry = f"{Colors.OKGREEN}[OK ROOT-RELATIVE] {file_path} (root-relative path: {url}){Colors.ENDC}"
        else:
            log_entry = f"{Colors.OKGREEN}[OK RELATIVE] {file_path}{Colors.ENDC}"
        print(log_entry)
        return log_entry, is_image, is_svg, is_root_relative, has_anchor
    else:
        if is_svg:
            log_entry = f"{Colors.FAIL}[BROKEN SVG] {file_path} (SVG in {md_file}){Colors.ENDC}"
        elif is_image:
            log_entry = f"{Colors.FAIL}[BROKEN IMAGE] {file_path} (image in {md_file}){Colors.ENDC}"
        elif is_root_relative:
            log_entry = f"{Colors.FAIL}[BROKEN ROOT-RELATIVE] {file_path} (root-relative path: {url} in {md_file}){Colors.ENDC}"
        else:
            # Update the log message to indicate whether the URL has an anchor or not
            if has_anchor:
                log_entry = f"{Colors.FAIL}[BROKEN RELATIVE WITH ANCHOR] {url} (relative path in {md_file}){Colors.ENDC}"
            else:
                log_entry = f"{Colors.FAIL}[BROKEN RELATIVE WITHOUT ANCHOR] {url} (relative path in {md_file}){Colors.ENDC}"
        print(log_entry)
        return log_entry, is_image, is_svg, is_root_relative, has_anchor