def extract_headers()

in scripts/python/url-checker/url-checker.py [0:0]


def extract_headers(md_file):
    """Extract all headers from a markdown file and convert to slug format for link validation."""
    headers = []
    # Only attempt to extract headers from markdown files
    if not md_file.lower().endswith('.md'):
        print(f"Warning: Attempted to extract headers from non-markdown file: {md_file}")
        return headers
        
    try:
        with open(md_file, 'r', encoding='utf-8') as f:
            for line in f:
                if line.strip().startswith('#'):
                    # Extract the header text (remove the # and any leading/trailing whitespace)
                    header_text = line.lstrip('#').strip()
                    
                    # Convert to lowercase
                    header_text_lower = header_text.lower()
                    
                    # Remove markdown formatting (bold, italic, code)
                    header_text_clean = re.sub(r'[*_`]', '', header_text_lower)
                    
                    # Create slug: keep only alphanumeric chars and hyphens, replace spaces with hyphens
                    header_slug = re.sub(r'[^\w\- ]', '', header_text_clean)
                    header_slug = re.sub(r'\s+', '-', header_slug)
                    
                    # Add to the list of headers
                    headers.append(header_slug)
                    print(f"Found header: '{header_text}' -> slug: '{header_slug}'")
    except Exception as e:
        print(f"Warning: Could not extract headers from {md_file}: {str(e)}")
    return headers