in cost-optimization/hpa-config-recommender/src/hpaconfigrecommender/read_workload_timeseries.py [0:0]
def _aggregate_data(merged_df: pd.DataFrame)-> pd.DataFrame:
'''
Aggregate and process container resource data.
'''
# Set Resource Request sums
merged_df['sum_containers_cpu_request'] = (
merged_df['avg_container_cpu_request']
* merged_df['num_replicas_at_usage_window']
)
merged_df['sum_containers_mem_request_mi'] = (
merged_df['avg_container_mem_request_mi']
* merged_df['num_replicas_at_usage_window']
)
# Set Resource Usage sums
merged_df['sum_containers_cpu_usage'] = (
merged_df['avg_container_cpu_usage']
* merged_df['num_replicas_at_usage_window']
)
merged_df['sum_containers_mem_usage_mi'] = (
merged_df['max_containers_mem_usage_mi']
* merged_df['num_replicas_at_usage_window']
)
# Convert aggregated values from bytes to MiB
merged_df[
[
'avg_container_mem_request_mi',
'avg_container_mem_usage_mi',
'max_containers_mem_usage_mi',
'sum_containers_mem_request_mi',
'sum_containers_mem_usage_mi',
]
] /= (
1024**2
)
# Ensure 'window_begin' is a datetime type and resample data
if not pd.api.types.is_datetime64_any_dtype(merged_df['window_begin']):
try:
merged_df['window_begin'] = pd.to_datetime(
merged_df['window_begin']
)
except ValueError as ve:
logging.error(
'ValueError converting window_begin to datetime: %s', ve
)
return pd.DataFrame()
except TypeError as te:
logging.error(
'TypeError converting window_begin to datetime: %s', te
)
return pd.DataFrame()
necessary_columns = [
'window_begin',
'num_replicas_at_usage_window',
'avg_container_cpu_usage',
'avg_container_mem_usage_mi',
'max_containers_mem_usage_mi',
'stddev_containers_cpu_usage',
'sum_containers_cpu_request',
'sum_containers_cpu_usage',
'sum_containers_mem_request_mi',
'sum_containers_mem_usage_mi'
]
merged_df = merged_df[necessary_columns]
return merged_df