in cluster-trace-gpu-v2020/analysis/utils.py [0:0]
def get_hourly_task_resource_request(df, metrics='cpu'): # df = dftjkix
sum_df_list = []
df = add_hour_date(df)
if metrics == 'cpu':
df['plan_resource'] = df.plan_cpu.apply(lambda x: x/100)
elif metrics == 'gpu':
df['plan_resource'] = df.plan_gpu.apply(lambda x: x/100)
elif metrics == 'mem':
df['plan_resource'] = df.plan_mem.apply(lambda x: x/1000)
else:
exit()
# for day in sorted(df.dayofyear.unique()):
for date in sorted(df.date.unique()):
# tempdf = df[df.dayofyear==day]
tempdf = df[df.date==date]
res_df = tempdf.groupby('hour').sum()[['plan_resource']]
res_df.rename(columns={'job_name':date}, inplace=True)
sum_df_list.append(res_df.T)
out_df = pd.DataFrame().append(sum_df_list)
return out_df.dropna() # if a day contains hours of NaN, it is not a typical day