def print_file_differences()

in data_extraction_transformation/scripts/one_time_use_scripts/compare_differing_files.py [0:0]


def print_file_differences(file1, file2):
    # Load both files as dataframes
    df1 = pd.read_csv(file1)
    df2 = pd.read_csv(file2)

    # Drop the columns to ignore from both dataframes
    df1 = df1.drop(columns=[col for col in ignore_columns if col in df1.columns], errors='ignore')
    df2 = df2.drop(columns=[col for col in ignore_columns if col in df2.columns], errors='ignore')
    
    # Display differences for each column
    for col in df1.columns:
        if col in df2.columns:
            if not df1[col].equals(df2[col]):
                # Find indices where the values differ
                differing_indices = df1[df1[col] != df2[col]].index.tolist()
                print(f"Differences in column '{col}':")
                for idx in differing_indices:
                    print(f"Index {idx}: {file1} = {df1[col][idx]}, {file2} = {df2[col][idx]}")
                print()  # Print a new line for better readability
        else:
            print(f"Column '{col}' is not present in {file2}.")
    
    for col in df2.columns:
        if col not in df1.columns:
            print(f"Column '{col}' is not present in {file1}.")