def find_files_to_check()

in tools/url-checker/url_checker.py [0:0]


def find_files_to_check(exclude_folders=None):
    """
    Find all supported files in the repository, skipping 'archive' folders
    and any user-specified excluded folders.
    
    Args:
        exclude_folders: List of folder paths to exclude
        
    Returns:
        List of file paths to check
    """
    if exclude_folders is None:
        exclude_folders = []
    
    # Convert exclude_folders to absolute paths for easier comparison
    abs_exclude_folders = []
    for folder in exclude_folders:
        if os.path.isabs(folder):
            abs_exclude_folders.append(os.path.normpath(folder))
        else:
            abs_exclude_folders.append(os.path.normpath(os.path.join(REPO_PATH, folder)))
    
    if exclude_folders:
        print(f"Excluding folders: {', '.join(exclude_folders)}")
    
    files_to_check = []
    for root, dirs, files in os.walk(REPO_PATH):
        # Skip 'archive' folders, hidden directories, and excluded folders
        dirs[:] = [d for d in dirs if d.lower() != 'archive' and not d.startswith('.')]
        
        # Check if the current directory should be excluded
        if any(os.path.abspath(root).startswith(excluded) for excluded in abs_exclude_folders):
            print(f"Skipping excluded directory: {root}")
            dirs[:] = []  # Skip all subdirectories
            continue
        
        for file in files:
            file_ext = os.path.splitext(file)[1].lower()
            # Check if this is a supported file type
            if file_ext in SUPPORTED_FILE_TYPES:
                files_to_check.append(os.path.join(root, file))
    
    return files_to_check