in tools/url-checker/url_checker.py [0:0]
def check_relative_url(url, md_file):
"""
Check if a relative file path exists in the filesystem.
Args:
url: Relative path to check
md_file: Source markdown file containing this path
Returns:
Tuple containing: (log_entry, is_image, is_svg, is_root_relative, has_anchor)
"""
# Flag to track if URL has an anchor
has_anchor = '#' in url
anchor_text = None
# Handle header links (e.g., #section-name or file.md#section-name)
if has_anchor and md_file.lower().endswith('.md'):
base_url, anchor = url.split('#', 1)
anchor_text = anchor
# If it's a same-page link (just #header)
if not base_url:
headers = extract_headers(md_file)
if anchor in headers:
log_entry = f"{Colors.OKGREEN}[OK HEADER] #{anchor} (header in {md_file}){Colors.ENDC}"
print(log_entry)
return log_entry, False, False, False, has_anchor
else:
log_entry = f"{Colors.FAIL}[BROKEN HEADER] #{anchor} (header not found in {md_file}){Colors.ENDC}"
print(f"Available headers in {md_file}: {', '.join(headers)}")
print(log_entry)
return log_entry, False, False, False, has_anchor
else:
# Construct the target path based on the base_url
target_file = os.path.join(os.path.dirname(md_file), base_url)
# Handle the case where the base_url points to a directory
if os.path.isdir(target_file):
print(f"Base URL {base_url} points to a directory: {target_file}")
# Check if an _index.md file exists in the directory
index_file = os.path.join(target_file, "_index.md")
if os.path.exists(index_file):
log_entry = f"{Colors.OKGREEN}[OK RELATIVE] {index_file}#{anchor} (directory with _index.md, anchor not validated){Colors.ENDC}"
print(log_entry)
return log_entry, False, False, False, has_anchor
# Also check for other common index files
for index_name in ["index.md", "README.md"]:
index_file = os.path.join(target_file, index_name)
if os.path.exists(index_file):
log_entry = f"{Colors.OKGREEN}[OK RELATIVE] {index_file}#{anchor} (directory with {index_name}, anchor not validated){Colors.ENDC}"
print(log_entry)
return log_entry, False, False, False, has_anchor
# Check if file exists without case sensitivity
case_insensitive_path = find_path_case_insensitive(os.path.dirname(md_file), base_url)
if case_insensitive_path and os.path.exists(case_insensitive_path):
# Found with case-insensitive match
if os.path.isdir(case_insensitive_path):
# It's a directory, check for index files
for index_name in ["_index.md", "index.md", "README.md"]:
index_file = os.path.join(case_insensitive_path, index_name)
if os.path.exists(index_file):
log_entry = f"{Colors.OKGREEN}[OK RELATIVE] {index_file}#{anchor} (directory with {index_name}, case-insensitive match, anchor not validated){Colors.ENDC}"
print(log_entry)
return log_entry, False, False, False, has_anchor
else:
# It's a file
log_entry = f"{Colors.OKGREEN}[OK RELATIVE] {case_insensitive_path}#{anchor} (file exists, case-insensitive match, anchor not validated){Colors.ENDC}"
print(log_entry)
return log_entry, False, False, False, has_anchor
# Original check if file exists (case sensitive)
if os.path.exists(target_file):
log_entry = f"{Colors.OKGREEN}[OK RELATIVE] {target_file}#{anchor} (file exists, anchor not validated){Colors.ENDC}"
print(log_entry)
return log_entry, False, False, False, has_anchor
else:
log_entry = f"{Colors.FAIL}[BROKEN RELATIVE WITH ANCHOR] {target_file}#{anchor} (file not found){Colors.ENDC}"
print(log_entry)
return log_entry, False, False, False, has_anchor
# Handle hash in URL for non-markdown source files
elif has_anchor:
base_url, anchor = url.split('#', 1)
anchor_text = anchor
# For non-markdown file links with anchors, we just check if the file exists
if not base_url:
# Same-file anchor in non-markdown file, we can't validate this
log_entry = f"{Colors.OKGREEN}[OK HEADER] #{anchor} (in non-markdown file {md_file}){Colors.ENDC}"
print(log_entry)
return log_entry, False, False, False, has_anchor
else:
target_file = os.path.join(os.path.dirname(md_file), base_url)
if os.path.exists(target_file):
log_entry = f"{Colors.OKGREEN}[OK RELATIVE] {target_file}#{anchor} (file exists, anchor not validated){Colors.ENDC}"
print(log_entry)
return log_entry, False, False, False, has_anchor
else:
log_entry = f"{Colors.FAIL}[BROKEN RELATIVE WITH ANCHOR] {target_file}#{anchor} (file not found){Colors.ENDC}"
print(log_entry)
return log_entry, False, False, False, has_anchor
# Check if it's an SVG file
is_svg = any(url.lower().endswith(ext) for ext in SVG_EXTENSIONS)
# Check if it's an image file
is_image = not is_svg and any(url.lower().endswith(ext) for ext in IMAGE_EXTENSIONS)
# Handle root-relative URLs (starting with /)
is_root_relative = url.startswith('/')
if is_root_relative:
# URLs starting with / are relative to repo root, not the current file
file_path = os.path.join(REPO_PATH, url[1:]) # Remove leading / and join with repo root
print(f"Root-relative path detected. Checking against repo root: {file_path}")
else:
# Regular document-relative URL
file_path = os.path.join(os.path.dirname(md_file), url)
file_type = "SVG" if is_svg else "image" if is_image else "root-relative" if is_root_relative else "relative"
print(f"Checking {file_type} URL: {file_path}")
# -- New Approach: Handle case sensitivity more robustly --
# Check if path exists directly
path_exists = os.path.exists(file_path)
# If path doesn't exist, try case-insensitive matching
if not path_exists:
print(f"Path not found: {file_path}")
print(f"Trying case-insensitive path resolution...")
# For directory URLs (ending with /)
if url.endswith('/'):
# Split the file_path into components
path_parts = os.path.normpath(file_path).split(os.sep)
# Start from an existing directory
current = os.path.dirname(md_file) if not is_root_relative else REPO_PATH
built_path = current
# Process each segment of the relative path
rel_segments = url.rstrip('/').split('/')
print(f"Processing relative segments: {rel_segments}")
for segment in rel_segments:
if segment == '..':
# Go up one directory
current = os.path.dirname(current)
built_path = current
print(f"Going up to parent: {current}")
elif segment == '.':
# Stay in current directory
continue
else:
# Try to find a case-insensitive match for this segment
if os.path.exists(os.path.join(current, segment)):
# Exact case match
current = os.path.join(current, segment)
built_path = current
print(f"Exact match found: {segment}")
else:
found = False
try:
for item in os.listdir(current):
if item.lower() == segment.lower():
current = os.path.join(current, item)
built_path = current
print(f"Case-insensitive match found: {segment} -> {item}")
found = True
break
except (PermissionError, FileNotFoundError, NotADirectoryError) as e:
print(f"Error accessing {current}: {str(e)}")
if not found:
print(f"No match found for segment: {segment} in {current}")
break
if os.path.exists(built_path):
file_path = built_path
path_exists = True
print(f"Successfully resolved case-insensitive path: {built_path}")
# Check for default files in the directory
if os.path.isdir(built_path):
for default_file in ['_index.md', 'index.md', 'README.md']:
default_path = os.path.join(built_path, default_file)
if os.path.exists(default_path):
file_path = default_path
print(f"Found default file: {default_path}")
break
# If path still doesn't exist and it's a directory URL, try to check for markdown files
if not path_exists and url.endswith('/') and os.path.isdir(os.path.dirname(file_path)):
try:
md_files = [f for f in os.listdir(file_path) if f.endswith('.md')]
if md_files:
path_exists = True
file_path = os.path.join(file_path, md_files[0]) # Use the first markdown file found
print(f"Directory contains markdown files: {', '.join(md_files)}")
else:
print(f"Directory exists but contains no markdown files")
except PermissionError:
print(f"Permission error accessing directory: {file_path}")
except FileNotFoundError:
print(f"Directory doesn't exist: {file_path}")
if path_exists:
if is_svg:
log_entry = f"{Colors.OKGREEN}[OK SVG] {file_path}{Colors.ENDC}"
elif is_image:
log_entry = f"{Colors.OKGREEN}[OK IMAGE] {file_path}{Colors.ENDC}"
elif is_root_relative:
log_entry = f"{Colors.OKGREEN}[OK ROOT-RELATIVE] {file_path} (root-relative path: {url}){Colors.ENDC}"
else:
log_entry = f"{Colors.OKGREEN}[OK RELATIVE] {file_path}{Colors.ENDC}"
print(log_entry)
return log_entry, is_image, is_svg, is_root_relative, has_anchor
else:
if is_svg:
log_entry = f"{Colors.FAIL}[BROKEN SVG] {file_path} (SVG in {md_file}){Colors.ENDC}"
elif is_image:
log_entry = f"{Colors.FAIL}[BROKEN IMAGE] {file_path} (image in {md_file}){Colors.ENDC}"
elif is_root_relative:
log_entry = f"{Colors.FAIL}[BROKEN ROOT-RELATIVE] {file_path} (root-relative path: {url} in {md_file}){Colors.ENDC}"
else:
# Update the log message to indicate whether the URL has an anchor or not
if has_anchor:
log_entry = f"{Colors.FAIL}[BROKEN RELATIVE WITH ANCHOR] {url} (relative path in {md_file}){Colors.ENDC}"
else:
log_entry = f"{Colors.FAIL}[BROKEN RELATIVE WITHOUT ANCHOR] {url} (relative path in {md_file}){Colors.ENDC}"
print(log_entry)
return log_entry, is_image, is_svg, is_root_relative, has_anchor