def calculate_traffic_percentiles()

in scheduler.py [0:0]


def calculate_traffic_percentiles(traffic_dir: str, start_date: datetime, end_date: datetime, debug: bool = False):
    """Reads traffic data from CSV files and computes percentiles"""
    all_traffic_rates = []

    for single_date in pd.date_range(start_date, end_date):
        traffic_file = os.path.join(traffic_dir, f"traffic_{single_date.strftime('%Y%m%d')}.csv")
        if os.path.exists(traffic_file):
            df = pd.read_csv(traffic_file)
            df['egress_rate_bps'] = df['egress_rate_presto_bps'] + df['egress_rate_spark_bps']
            df['ingress_rate_bps'] = df['ingress_rate_presto_bps'] + df['ingress_rate_spark_bps']
            df['traffic_rate_bps'] = df['egress_rate_bps'] + df['ingress_rate_bps']
            if debug and len(df) != 1440:
                print(f"Check {traffic_file}: {len(df)}")
            all_traffic_rates.extend(df["traffic_rate_bps"].tolist())
        else:
            print(f"Traffic file not found: {traffic_file}")

    if not all_traffic_rates:
        return None, None, None  # No data found

    return (
        int(np.percentile(all_traffic_rates, 90)),
        int(np.percentile(all_traffic_rates, 95)),
        int(np.percentile(all_traffic_rates, 99)),
    )