scripts/jobtrace_to_yugong.py [21:37]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    for i in range(0, number_of_dates - 7, 7):
        week_start = start_date + timedelta(days=i)
        week_end = week_start + timedelta(days=6)
        print(f"Processing {week_start:%Y%m%d} to {week_end:%Y%m%d}...")

        weekly_dfs = []
        for date in pd.date_range(week_start, week_end): # is inclusive
            df_Presto = pd.read_csv(f"{csv_folder}/{date.strftime('%Y%m%d')}-Presto.csv", dtype={
                'job_id': str, 'start_time': str, 'duration': float,
                'cputime': float, 'db_name': str, 'table_name': str,
                'uown_names': str, 'inputDataSize': float,
                'outputDataSize': float, 'template_id': str
            }, na_values=['\\N'])
            df_Presto = df_Presto[['db_name', 'table_name', 'inputDataSize', 'outputDataSize',
                                   'cputime', 'uown_names']]

            test_df = df_Presto[df_Presto['uown_names'].isna()]
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



scripts/jobtrace_to_yugong_agg.py [15:31]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    for i in range(0, number_of_dates - 7, 7):
        week_start = start_date + timedelta(days=i)
        week_end = week_start + timedelta(days=6)
        print(f"Processing {week_start:%Y%m%d} to {week_end:%Y%m%d}...")

        weekly_dfs = []
        for date in pd.date_range(week_start, week_end): # is inclusive
            df_Presto = pd.read_csv(f"{csv_folder}/{date.strftime('%Y%m%d')}-Presto.csv", dtype={
                'job_id': str, 'start_time': str, 'duration': float,
                'cputime': float, 'db_name': str, 'table_name': str,
                'uown_names': str, 'inputDataSize': float,
                'outputDataSize': float, 'template_id': str
            }, na_values=['\\N'])
            df_Presto = df_Presto[['db_name', 'table_name', 'inputDataSize', 'outputDataSize',
                                   'cputime', 'uown_names']]

            test_df = df_Presto[df_Presto['uown_names'].isna()]
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



