in data_extraction_transformation/scripts/one_time_use_scripts/compare_differing_files.py [0:0]
def print_file_differences(file1, file2):
# Load both files as dataframes
df1 = pd.read_csv(file1)
df2 = pd.read_csv(file2)
# Drop the columns to ignore from both dataframes
df1 = df1.drop(columns=[col for col in ignore_columns if col in df1.columns], errors='ignore')
df2 = df2.drop(columns=[col for col in ignore_columns if col in df2.columns], errors='ignore')
# Display differences for each column
for col in df1.columns:
if col in df2.columns:
if not df1[col].equals(df2[col]):
# Find indices where the values differ
differing_indices = df1[df1[col] != df2[col]].index.tolist()
print(f"Differences in column '{col}':")
for idx in differing_indices:
print(f"Index {idx}: {file1} = {df1[col][idx]}, {file2} = {df2[col][idx]}")
print() # Print a new line for better readability
else:
print(f"Column '{col}' is not present in {file2}.")
for col in df2.columns:
if col not in df1.columns:
print(f"Column '{col}' is not present in {file1}.")