def _calculate_savings()

in cost-optimization/hpa-config-recommender/src/hpaconfigrecommender/run_workload_simulation.py [0:0]


def _calculate_savings(
        analysis_df: pd.DataFrame, config: Config) -> pd.DataFrame:
    '''
    Calculates the CPU and memory savings based on recommendations forecasts.

    Args:
        analysis_df (pd.DataFrame): The DataFrame containing workload
        data to analyze.
        config (Config): Run configurations.

    Returns:
        pd.DataFrame: The DataFrame with calculated savings columns.
    '''
    if analysis_df.empty:
        logger.info('The analysis dataframe is empty')
        return pd.DataFrame()

    # Convert 'window_begin' to datetime if it's not already
    analysis_df['window_begin'] = pd.to_datetime(analysis_df['window_begin'])

    # Ensure the DataFrame is sorted by 'window_begin'
    analysis_df = analysis_df.sort_values('window_begin')

    # Set 'window_begin' as the index
    analysis_df.set_index('window_begin', inplace=True)

    # Calculate CPU and memory savings
    analysis_df['forecast_cpu_saving'] = (
        analysis_df.get('sum_containers_cpu_request', np.inf)
        - analysis_df['forecast_sum_cpu_up_and_running']
    ).round(3)

    analysis_df['forecast_mem_saving_mi'] = np.ceil(
        analysis_df.get('sum_containers_mem_request_mi', np.inf)
        - analysis_df['forecast_sum_mem_up_and_running']
    )
    analysis_df['avg_saving_in_cpus'] = (
        analysis_df['forecast_cpu_saving']
        + (
            (analysis_df['forecast_mem_saving_mi'] / 1024)
            / config.COST_OF_GB_IN_CPUS
        )
    ).round(2)

    # Calculate line clash as a boolean
    analysis_df['forecast_clash'] = (
        analysis_df['sum_containers_cpu_usage']
        > analysis_df['forecast_sum_cpu_up_and_running']
    ) | (
        analysis_df['sum_containers_mem_usage_mi']
        > analysis_df['forecast_sum_mem_up_and_running']
    )
    # Apply rolling mean over a 1-day (24-hour)
    # window using time-based rolling
    analysis_df['avg_saving_in_cpus_1d_mean'] = (
        analysis_df['avg_saving_in_cpus']
        .rolling(window='1D', min_periods=1)
        .mean()
        .round(2)
    )
    # Reset the index if needed
    analysis_df.reset_index(inplace=True)

    return analysis_df