in scripts/scaling_benchmarks.py [0:0]
def save_experiment_configs(configs, output_path, job_ids=None):
"""Save core experiment configurations for tracking"""
records = []
for i, config in enumerate(configs):
# Calculate total params
tie_word_embeddings = True if config["model"]["model_config"]["intermediate_size"] < 10_000 else False
estimate_num_params(
config["model"]["model_config"]["num_hidden_layers"],
config["model"]["model_config"]["hidden_size"],
config["model"]["model_config"]["num_attention_heads"],
config["model"]["model_config"]["intermediate_size"],
tie_word_embeddings,
config["model"]["model_config"]["vocab_size"],
NUM_KEY_VALUE_HEADS,
)
record = {
"date": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"name": config["general"]["run"],
"nodes": config["parallelism"]["dp"] * config["parallelism"]["tp"] * config["parallelism"]["pp"] / 8,
"seq_len": config["tokens"]["sequence_length"],
"mbs": config["tokens"]["micro_batch_size"],
"batch_accum": config["tokens"]["batch_accumulation_per_replica"],
"gbs": config["tokens"]["sequence_length"] * config["tokens"]["micro_batch_size"] * config["tokens"]["batch_accumulation_per_replica"] * config["parallelism"]["dp"],
"dp": config["parallelism"]["dp"],
"pp": config["parallelism"]["pp"],
"tp": config["parallelism"]["tp"],
"tp_mode": f"{config['parallelism']['tp_mode']}",
"hidden_size": config["model"]["model_config"]["hidden_size"],
"num_layers": config["model"]["model_config"]["num_hidden_layers"],
"num_heads": config["model"]["model_config"]["num_attention_heads"],
"vocab_size": config["model"]["model_config"]["vocab_size"],
"zero_stage": config["optimizer"]["zero_stage"],
"job_id": job_ids[i] if job_ids else None,
}
records.append(record)
# Save to CSV
if os.path.exists(output_path):
# Read existing data and append new records
existing_df = pd.read_csv(output_path)
df = pd.DataFrame(records)
df = pd.concat([existing_df, df], ignore_index=True)
else:
df = pd.DataFrame(records)
df.to_csv(output_path, index=False)
print(f"Saved {len(records)} experiment configurations to {output_path}")