def find_files_in_directory()

in tools/url-checker/url_checker.py [0:0]


def find_files_in_directory(directory, exclude_folders=None):
    """
    Find all supported files in the given directory, excluding specified folders.
    
    Args:
        directory: Directory to search in
        exclude_folders: List of folder paths to exclude
        
    Returns:
        List of file paths to check
    """
    if exclude_folders is None:
        exclude_folders = []
        
    # Convert exclude_folders to absolute paths for easier comparison
    abs_exclude_folders = []
    for folder in exclude_folders:
        if os.path.isabs(folder):
            abs_exclude_folders.append(os.path.normpath(folder))
        else:
            abs_exclude_folders.append(os.path.normpath(os.path.join(directory, folder)))
    
    files_to_check = []
    for root, dirs, files in os.walk(directory):
        # Check if the current directory should be excluded
        if any(os.path.abspath(root).startswith(excluded) for excluded in abs_exclude_folders):
            print(f"Skipping excluded directory: {root}")
            dirs[:] = []  # Skip all subdirectories
            continue
            
        for file in files:
            file_ext = os.path.splitext(file)[1].lower()
            # Check if this is a supported file type
            if file_ext in SUPPORTED_FILE_TYPES:
                files_to_check.append(os.path.join(root, file))
    return files_to_check