def process_file()

in extract_metrics.py [0:0]


def process_file(filepath):
    tokens_s_gpu_values = []
    mfu_values = []
    with open(filepath, 'r') as f:
        for line in f:
            if re.search(r'\[default\d+\]:\[rank \d+\]', line):
                mfu_value, tokens_s_gpu_value = parse_log_line(line)
                if tokens_s_gpu_value is not None:
                    tokens_s_gpu_values.append(tokens_s_gpu_value)
                if mfu_value is not None:
                    mfu_values.append(mfu_value)
    
    #NOTE: skip 3 first beginning (warmup)
    if len(tokens_s_gpu_values) < 3 and len(mfu_values) < 3:
        print(f"Warning: Not enough data points for {filepath}")
        return None, None
    tokens_s_gpu = int(round(np.mean(tokens_s_gpu_values[3:]))) if tokens_s_gpu_values else None
    mfu = int(round(np.mean(mfu_values[3:]))) if mfu_values else None

    return mfu, tokens_s_gpu