in placement.py [0:0]
def merge_similar_rows(df):
df = df.copy()
initial_length = len(df)
df['db_name'] = df['table'].apply(lambda x: x.split('.')[0])
df['table_name'] = df['table'].apply(lambda x: x.split('.')[1])
removed_rows = [] # for debug
for db_name in df['db_name'].unique():
group_row = df[df['table'] == f'{db_name}.group']
if group_row.empty or len(group_row) > 1:
assert len(group_row) <= 1, "Multiple conflicting groups found"
# print(f"[Merge Rows] {db_name}.group not found")
continue # Skip if no group or multiple conflicting groups
group_z, group_w = group_row['z'].values[0], group_row['w'].values[0]
if group_z == 0 and group_w == 0:
print(f"[Merge Rows] {db_name}.group is replicated (z=w=0), skipped")
continue
similar_rows = df[(df['db_name'] == db_name) &
(df['z'] == group_z) & (df['w'] == group_w) &
(df['table'] != f'{db_name}.group')]
if not similar_rows.empty:
# Store removed rows before dropping
removed_rows.append(similar_rows.copy())
# update group size to add up
group_size = similar_rows['size'].sum()
df.loc[group_row.index, 'size'] += group_size
df = df.drop(similar_rows.index)
print(f"Merging rows reduces rows from {initial_length} to {len(df)}.")
second_length = len(df)
removed_rows.append(df[df['size'] == 0].copy())
# Drop rows where size is 0
df = df[df['size'] > 0]
print(f"Removing rows with size 0 reduces rows from {second_length} to {len(df)}.")
return df, pd.concat(removed_rows)