def compare_table_groups()

in utility.py [0:0]


    def compare_table_groups(self):
        if not self.df_list:
            print("No data to compare")
            return

        group_key_sets = [set(self.df_list[i]['group_key'].unique()) for i in range(len(self.df_list))]
        last_set = group_key_sets[0]
        total_set = last_set
        common_set = last_set
        print(f"# table groups in df1 {len(last_set)}")
        for i in range(1, len(self.df_list)):
            group_key_set = group_key_sets[i]
            print(f"# table groups in df{i + 1} {len(group_key_set)}")
            new_set = group_key_set.difference(last_set)
            print(f"# table groups in df{i + 1} not in df{i} {len(new_set)}")
            common_set = common_set.intersection(group_key_set)
            print(f"# table groups in df1 to df{i + 1} {len(common_set)} in common")
            never_seen_set = group_key_set.difference(total_set)
            print(f"# table groups never seen before {len(never_seen_set)}")

            # how these tables contribute to the total cputime
            cputime = self.df_list[i]['cputime'].sum()
            cputime_new = self.df_list[i][self.df_list[i]['group_key'].isin(never_seen_set)]['cputime'].sum()
            print(f"Total cputime in df{i + 1} {cputime}, cputime of new table groups {cputime_new}, "
                  f"ratio {cputime_new / cputime * 100:.2f}%")

            total_set = total_set.union(group_key_set)

        print(f"Total # table groups: {len(total_set)}")