dags/map_reproducibility/utils/file_comparison.py (88 lines of code) (raw):
#!/usr/bin/env python3
import os
import filecmp
import difflib
from pathlib import Path
def compare_folders(folder1, folder2):
"""
Compare two folders and report differences in files.
Args:
folder1 (str): Path to the first folder
folder2 (str): Path to the second folder
"""
folder1_path = Path(folder1)
folder2_path = Path(folder2)
# Check if folders exist
if not folder1_path.exists() or not folder1_path.is_dir():
print(f"Error: {folder1} is not a valid directory")
return
if not folder2_path.exists() or not folder2_path.is_dir():
print(f"Error: {folder2} is not a valid directory")
return
# Get lists of files in both folders
folder1_files = {f.name: f for f in folder1_path.glob("**/*") if f.is_file()}
folder2_files = {f.name: f for f in folder2_path.glob("**/*") if f.is_file()}
# Files only in folder1
only_in_folder1 = set(folder1_files.keys()) - set(folder2_files.keys())
if only_in_folder1:
print(f"\nFiles only in {folder1}:")
for filename in sorted(only_in_folder1):
print(f" - {filename}")
# Files only in folder2
only_in_folder2 = set(folder2_files.keys()) - set(folder1_files.keys())
if only_in_folder2:
print(f"\nFiles only in {folder2}:")
for filename in sorted(only_in_folder2):
print(f" - {filename}")
# Common files - check for differences
common_files = set(folder1_files.keys()) & set(folder2_files.keys())
different_files = []
print("\nComparing common files...")
for filename in sorted(common_files):
file1_path = folder1_files[filename]
file2_path = folder2_files[filename]
# First quick check with filecmp
if not filecmp.cmp(file1_path, file2_path, shallow=False):
different_files.append(filename)
# Print summary
if not different_files:
print("All common files are identical")
else:
print(f"\nFiles with differences ({len(different_files)}):")
for filename in different_files:
print(f" - {filename}")
show_diff(folder1_files[filename], folder2_files[filename])
def show_diff(file1_path, file2_path):
"""
Show line differences between two files.
Args:
file1_path (Path): Path to the first file
file2_path (Path): Path to the second file
"""
try:
with open(file1_path, "r") as file1, open(file2_path, "r") as file2:
file1_lines = file1.readlines()
file2_lines = file2.readlines()
diff = list(
difflib.unified_diff(
file1_lines,
file2_lines,
fromfile=str(file1_path),
tofile=str(file2_path),
lineterm="",
)
)
if diff:
print("\n Differences:")
# Only show first 10 diff lines to avoid overwhelming output
for line in diff[:10]:
print(f" {line}")
if len(diff) > 10:
print(f" ... and {len(diff)-10} more lines")
print()
except UnicodeDecodeError:
print(" Cannot display diff (binary file or encoding issue)")
except Exception as e:
print(f" Error displaying diff: {e}")
if __name__ == "__main__":
# Pre-defined paths for comparison
print(
f"**********************Comparing folders recipes**********************"
)
folder1 = "../internal-gpu-recipes/recipes"
folder2 = "./dags/map_reproducibility/recipes"
print(f"Folder 1: {folder1}")
print(f"Folder 2: {folder2}")
compare_folders(folder1, folder2)
print(f"**********************Comparing folders values**********************")
folder1 = "../internal-gpu-recipes/values"
folder2 = "./dags/map_reproducibility/values"
print(f"Folder 1: {folder1}")
print(f"Folder 2: {folder2}")
compare_folders(folder1, folder2)
print(
f"**********************Comparing folders helm charts**********************"
)
folder1 = "../internal-gpu-recipes/src/helm-charts"
folder2 = "./dags/map_reproducibility/helm-charts"
print(f"Folder 1: {folder1}")
print(f"Folder 2: {folder2}")
compare_folders(folder1, folder2)