def _aggregate_data()

in cost-optimization/hpa-config-recommender/src/hpaconfigrecommender/read_workload_timeseries.py [0:0]


def _aggregate_data(merged_df: pd.DataFrame)-> pd.DataFrame:
    '''
    Aggregate and process container resource data.
    '''
    # Set Resource Request sums
    merged_df['sum_containers_cpu_request'] = (
        merged_df['avg_container_cpu_request']
        * merged_df['num_replicas_at_usage_window']
    )
    merged_df['sum_containers_mem_request_mi'] = (
        merged_df['avg_container_mem_request_mi']
        * merged_df['num_replicas_at_usage_window']
    )

    # Set Resource Usage sums
    merged_df['sum_containers_cpu_usage'] = (
        merged_df['avg_container_cpu_usage']
        * merged_df['num_replicas_at_usage_window']
    )
    merged_df['sum_containers_mem_usage_mi'] = (
        merged_df['max_containers_mem_usage_mi']
        * merged_df['num_replicas_at_usage_window']
    )

    # Convert aggregated values from bytes to MiB
    merged_df[
        [
            'avg_container_mem_request_mi',
            'avg_container_mem_usage_mi',
            'max_containers_mem_usage_mi',
            'sum_containers_mem_request_mi',
            'sum_containers_mem_usage_mi',
        ]
    ] /= (
        1024**2
    )

    # Ensure 'window_begin' is a datetime type and resample data
    if not pd.api.types.is_datetime64_any_dtype(merged_df['window_begin']):
        try:
            merged_df['window_begin'] = pd.to_datetime(
                merged_df['window_begin']
            )
        except ValueError as ve:
            logging.error(
                'ValueError converting window_begin to datetime: %s', ve
            )
            return pd.DataFrame()
        except TypeError as te:
            logging.error(
                'TypeError converting window_begin to datetime: %s', te
            )
            return pd.DataFrame()
    necessary_columns = [
        'window_begin',
        'num_replicas_at_usage_window',
        'avg_container_cpu_usage',
        'avg_container_mem_usage_mi',
        'max_containers_mem_usage_mi',
        'stddev_containers_cpu_usage',
        'sum_containers_cpu_request',
        'sum_containers_cpu_usage',
        'sum_containers_mem_request_mi',
        'sum_containers_mem_usage_mi'
    ]
    merged_df = merged_df[necessary_columns]
    return merged_df