in utility.py [0:0]
def print_table_info(self, df_slice, start_rank, end_rank):
def get_workload_info_for_slice(slice_df):
# Create a copy of the slice
slice_df_copy = slice_df.copy()
# Splitting the db_table column into two columns
slice_df_copy[['split_db_name', 'split_table_name']] = slice_df_copy['db_table'].str.split('.', expand=True)
# Merge with workload_df on table names
merged_df = slice_df_copy.merge(
self.workload_df,
left_on=['split_db_name', 'split_table_name'],
right_on=['db_name', 'table_name'],
how='left'
)
# read_size_ = merged_df['inputDataSize'].sum()
# total_cputime = merged_df['cputime'].sum()
cold_tables = merged_df[pd.isna(merged_df['inputDataSize'])]
cold_table_count = len(cold_tables)
cold_table_size = cold_tables['size_in_gbs'].sum()
test_df = slice_df_copy.merge(
self.workload_df,
left_on=['split_db_name', 'split_table_name'],
right_on=['db_name', 'table_name'],
how='right'
)
test_df = test_df[pd.notna(test_df['size_in_gbs'])]
read_size_ = test_df['inputDataSize'].sum()
total_cputime = test_df['cputime'].sum()
return read_size_, cold_table_count, cold_table_size, total_cputime
for code_val, desc in [(10, 'code=10'), (None, 'code!=10')]:
curr_slice = df_slice[df_slice['code'] == code_val] if code_val is not None else df_slice[
df_slice['code'] != 10]
read_size, cold_count, cold_size, total_cputime = get_workload_info_for_slice(curr_slice)
print(
f"{start_rank}~{end_rank} tables with {desc}: Count: {len(curr_slice)} "
f"| Size: {human_readable_size(curr_slice['size_in_gbs'].sum() * 1024 ** 3)} "
f"| Read Size: {human_readable_size(read_size * 1024 ** 3)} "
f"| Total CPU Time: {total_cputime} "
f"| Cold Tables: {cold_count} | Cold Tables Size: {human_readable_size(cold_size * 1024 ** 3)}"
)