in extract_metrics.py [0:0]
def aggregate_metrics(input_folder):
"""Create global_metrics.csv from all subdirectory metrics"""
top_level_dir = glob.glob(input_folder + '/*')
for top_dir_path in top_level_dir:
subdirs = glob.glob(top_dir_path + '/*')
aggregated_data = []
for subdir_path in subdirs:
metrics_file = os.path.join(subdir_path, 'metrics.csv')
status_file = os.path.join(subdir_path, 'status.txt')
folder_name = os.path.basename(subdir_path)
data = {
'run_name': folder_name,
'status': read_status(status_file),
**parse_folder_name(folder_name) # Unpack the parsed parameters
}
# If metrics.csv exists, read the avg_tokens_s_gpu from it
if os.path.exists(metrics_file):
try:
with open(metrics_file, 'r') as f:
reader = csv.DictReader(f)
metrics_data = next(reader)
data['avg_tokens_s_gpu'] = int(metrics_data['avg_tokens_s_gpu'])
data['avg_mfu'] = int(metrics_data['avg_mfu'])
except:
data['avg_tokens_s_gpu'] = -1
data['avg_mfu'] = -1
else:
data['avg_tokens_s_gpu'] = -1
data['avg_mfu'] = -1
aggregated_data.append(data)
# Write global metrics file
output_file = os.path.join(top_dir_path, 'global_metrics.csv')
fieldnames = ['run_name', 'status', 'dp', 'tp', 'pp', 'micro_batch_size',
'grad_acc', 'seq_len', 'avg_tokens_s_gpu', 'avg_mfu']
with open(output_file, 'w', newline='') as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(aggregated_data)
print(f"Created global_metrics.csv with {len(aggregated_data)} entries")