def data_placement_by_compute_distribution()

in baselines.py [0:0]


    def data_placement_by_compute_distribution(self, df):
        df['access_size'] = df['inputDataSize'] + df['outputDataSize']
        grouped = df.groupby(['db_name', 'table_name', 'Status'], as_index=False)['access_size'].sum()
        grouped_map = {}


        for _, row in grouped.iterrows():
            key = f"{row['db_name']}.{row['table_name']}"
            grouped_map.setdefault(key, {Status.ONPREM: 0, Status.CLOUD: 0})

            if row['Status'] == 0:
                grouped_map[key][Status.ONPREM] = row['access_size']
            else:
                grouped_map[key][Status.CLOUD] = row['access_size']

        onprem_size = 0
        cloud_size = 0
        for table_key, traffic in grouped_map.items():
            onprem_traffic = traffic[Status.ONPREM]
            cloud_traffic = traffic[Status.CLOUD]
            table_size = self.table_size_lookup.get(table_key, 0)
            if table_key in self.placement or table_size == 0:
                continue

            if onprem_traffic > cloud_traffic and self.on_prem_data_size + onprem_size + table_size <= self.on_prem_capacity:
                self.placement[table_key] = Status.ONPREM
                onprem_size += table_size
            else:
                self.placement[table_key] = Status.CLOUD
                cloud_size += table_size

        print(f"on-prem new data size: {human_readable_size(onprem_size)}, "
              f"cloud new data size: {human_readable_size(cloud_size)}")
        self.on_prem_data_size += onprem_size
        print(f"on-prem data size: {human_readable_size(self.on_prem_data_size)}")

        decisions = self.placement.keys()
        for table, table_size in self.table_size_lookup.items():
            if table not in decisions:
                if table_size + self.on_prem_data_size <= self.on_prem_capacity:
                    self.placement[table] = Status.ONPREM
                    self.on_prem_data_size += table_size
                else:
                    self.placement[table] = Status.CLOUD
                    cloud_size += table_size
        print(f"on-prem data size: {human_readable_size(self.on_prem_data_size)}")
        print(f"cloud data size: {human_readable_size(cloud_size)}")
        print(f"total data size: {human_readable_size(self.on_prem_data_size + cloud_size)}")
        print(f"=====================")