in tools/url-checker/url_checker.py [0:0]
def find_files_to_check(exclude_folders=None):
"""
Find all supported files in the repository, skipping 'archive' folders
and any user-specified excluded folders.
Args:
exclude_folders: List of folder paths to exclude
Returns:
List of file paths to check
"""
if exclude_folders is None:
exclude_folders = []
# Convert exclude_folders to absolute paths for easier comparison
abs_exclude_folders = []
for folder in exclude_folders:
if os.path.isabs(folder):
abs_exclude_folders.append(os.path.normpath(folder))
else:
abs_exclude_folders.append(os.path.normpath(os.path.join(REPO_PATH, folder)))
if exclude_folders:
print(f"Excluding folders: {', '.join(exclude_folders)}")
files_to_check = []
for root, dirs, files in os.walk(REPO_PATH):
# Skip 'archive' folders, hidden directories, and excluded folders
dirs[:] = [d for d in dirs if d.lower() != 'archive' and not d.startswith('.')]
# Check if the current directory should be excluded
if any(os.path.abspath(root).startswith(excluded) for excluded in abs_exclude_folders):
print(f"Skipping excluded directory: {root}")
dirs[:] = [] # Skip all subdirectories
continue
for file in files:
file_ext = os.path.splitext(file)[1].lower()
# Check if this is a supported file type
if file_ext in SUPPORTED_FILE_TYPES:
files_to_check.append(os.path.join(root, file))
return files_to_check