in tools/url-checker/create_test_files.py [0:0]
def collect_test_environment_stats():
"""Collect statistics about the test environment created."""
stats = {
"directory_count": 0,
"file_count_by_type": {},
"total_files": 0,
"max_depth": 0,
"special_dirs": [],
"directory_sizes": []
}
# Walk the directory structure
for root, dirs, files in os.walk(TEST_ROOT):
# Count directories
stats["directory_count"] += len(dirs)
# Track special directories
for d in dirs:
if any(c in d for c in [' ', '&', '!', '.', '-']):
stats["special_dirs"].append(os.path.join(root, d).replace(TEST_ROOT + os.sep, ''))
# Calculate depth
rel_path = os.path.relpath(root, TEST_ROOT)
if rel_path != '.':
depth = len(rel_path.split(os.sep))
stats["max_depth"] = max(stats["max_depth"], depth)
stats["directory_sizes"].append(len(files))
# Count files by type
for file in files:
ext = os.path.splitext(file)[1].lower()
if ext:
ext = ext[1:] # Remove the leading dot
stats["file_count_by_type"][ext] = stats["file_count_by_type"].get(ext, 0) + 1
stats["total_files"] += 1
# If any files were found at the root level, adjust the depth count
if not stats["max_depth"] and stats["total_files"] > 0:
stats["max_depth"] = 1
# Collect URL stats (types of relative URLs generated)
url_stats = {
"direct_paths": 0,
"dot_prefixed": 0,
"parent_traversal": 0,
"directory_paths": 0,
"invalid_paths": 0
}
# We can't accurately count these after generation, so we'll use placeholders
# In a real implementation, we'd track these during URL generation
url_stats["direct_paths"] = int(stats["total_files"] * 0.4)
url_stats["dot_prefixed"] = int(stats["total_files"] * 0.2)
url_stats["parent_traversal"] = int(stats["total_files"] * 0.2)
url_stats["directory_paths"] = int(stats["total_files"] * 0.1)
url_stats["invalid_paths"] = int(stats["total_files"] * 0.1)
stats["url_stats"] = url_stats
return stats