in cluster-trace-gpu-v2020/analysis/utils.py [0:0]
def get_dfia(dfi):
dfi_s = dfi[dfi.start_time > 0][['job_name','task_name','start_time']].groupby(['job_name','task_name']).min() # start_time
dfi_e = dfi[dfi.end_time > 0][['job_name','task_name','end_time']].groupby(['job_name','task_name']).max() # end_time
dfi_m = dfi[(dfi.start_time > 0) & (dfi.end_time > 0)][['job_name','task_name','end_time','start_time']]
dfi_m['runtime'] = dfi_m.end_time-dfi_m.start_time
dfi_m = dfi_m.groupby(['job_name','task_name']).mean()[['runtime']].reset_index() # runtime
dfi_u = dfi[['job_name','task_name','status']].drop_duplicates().groupby(['job_name','task_name']).max() # status
dfia = dfi_u
for df in [dfi_s, dfi_e, dfi_m]:
dfia = dfia.merge(df, on=['job_name','task_name'], how='left')
return dfia