in scripts/figs.py [0:0]
def verify_traffic_rate(yugong: bool = False):
traffic_rate = 0
start_date = datetime(year=2024, month=10, day=29) + timedelta(days=7*8)
print("Start date:", start_date)
for date in date_range(start=start_date, end=start_date + timedelta(days=6), freq='D'):
if yugong:
df = pd.read_csv(f'../yugong_results_rep0.002/c30/traffic_{date.strftime("%Y%m%d")}.csv')
else:
df = pd.read_csv(f'../sample_1.000_rep0.001/c30/traffic_{date.strftime("%Y%m%d")}.csv')
df['traffic_rate'] = df['egress_rate_presto_bps'] + df['egress_rate_spark_bps'] + \
df['ingress_rate_presto_bps'] + df['ingress_rate_spark_bps']
traffic_rate += df['traffic_rate'].sum()
weekly_traffic = traffic_rate / 8 * 60
print("Weekly traffic:",
human_readable_size(weekly_traffic))
all_traffic_rates = []
for single_date in pd.date_range(start=start_date, end=start_date + timedelta(days=6), freq='D'):
if yugong:
traffic_file = os.path.join("../yugong_results_rep0.002/c30", f"traffic_{single_date.strftime('%Y%m%d')}.csv")
else:
traffic_file = os.path.join("../sample_1.000_rep0.001/c30", f"traffic_{single_date.strftime('%Y%m%d')}.csv")
if os.path.exists(traffic_file):
df = pd.read_csv(traffic_file)
df['egress_rate_bps'] = df['egress_rate_presto_bps'] + df['egress_rate_spark_bps']
df['ingress_rate_bps'] = df['ingress_rate_presto_bps'] + df['ingress_rate_spark_bps']
df['traffic_rate'] = df['egress_rate_bps'] + df['ingress_rate_bps']
all_traffic_rates.extend(df["traffic_rate"].tolist())
# all_traffic_rates.extend(df["egress_rate_bps"].tolist())
# all_traffic_rates.extend(df["ingress_rate_bps"].tolist())
else:
print(f"Traffic file not found: {traffic_file}")
print("# traffic rates:", len(all_traffic_rates))
print("P90", int(np.percentile(all_traffic_rates, 90)),
"P95", int(np.percentile(all_traffic_rates, 95)),
"P99", int(np.percentile(all_traffic_rates, 99)),)