def compare_folders()

in dags/map_reproducibility/utils/file_comparison.py [0:0]


def compare_folders(folder1, folder2):
  """
  Compare two folders and report differences in files.

  Args:
      folder1 (str): Path to the first folder
      folder2 (str): Path to the second folder
  """
  folder1_path = Path(folder1)
  folder2_path = Path(folder2)

  # Check if folders exist
  if not folder1_path.exists() or not folder1_path.is_dir():
    print(f"Error: {folder1} is not a valid directory")
    return

  if not folder2_path.exists() or not folder2_path.is_dir():
    print(f"Error: {folder2} is not a valid directory")
    return

  # Get lists of files in both folders
  folder1_files = {f.name: f for f in folder1_path.glob("**/*") if f.is_file()}
  folder2_files = {f.name: f for f in folder2_path.glob("**/*") if f.is_file()}

  # Files only in folder1
  only_in_folder1 = set(folder1_files.keys()) - set(folder2_files.keys())
  if only_in_folder1:
    print(f"\nFiles only in {folder1}:")
    for filename in sorted(only_in_folder1):
      print(f"  - {filename}")

  # Files only in folder2
  only_in_folder2 = set(folder2_files.keys()) - set(folder1_files.keys())
  if only_in_folder2:
    print(f"\nFiles only in {folder2}:")
    for filename in sorted(only_in_folder2):
      print(f"  - {filename}")

  # Common files - check for differences
  common_files = set(folder1_files.keys()) & set(folder2_files.keys())
  different_files = []

  print("\nComparing common files...")
  for filename in sorted(common_files):
    file1_path = folder1_files[filename]
    file2_path = folder2_files[filename]

    # First quick check with filecmp
    if not filecmp.cmp(file1_path, file2_path, shallow=False):
      different_files.append(filename)

  # Print summary
  if not different_files:
    print("All common files are identical")
  else:
    print(f"\nFiles with differences ({len(different_files)}):")
    for filename in different_files:
      print(f"  - {filename}")
      show_diff(folder1_files[filename], folder2_files[filename])