in method_comparison/processing.py [0:0]
def preprocess(rows, task_name: str, print_fn=print):
results = []
skipped = 0
for row in rows:
run_info = row["run_info"]
train_info = row["train_info"]
meta_info = row["meta_info"]
if run_info["peft_config"]:
peft_type = run_info["peft_config"]["peft_type"]
else:
peft_type = "full-finetuning"
if train_info["status"] != "success":
skipped += 1
continue
train_metrics = train_info["metrics"][-1]
# extract the fields that make most sense
dct = {
"task_name": task_name,
"experiment_name": run_info["experiment_name"],
"model_id": run_info["train_config"]["model_id"],
"train_config": run_info["train_config"],
"peft_type": peft_type,
"peft_config": run_info["peft_config"],
"cuda_memory_reserved_avg": train_info["cuda_memory_reserved_avg"],
"cuda_memory_max": train_info["cuda_memory_max"],
"cuda_memory_reserved_99th": train_info["cuda_memory_reserved_99th"],
"total_time": run_info["total_time"],
"train_time": train_info["train_time"],
"file_size": train_info["file_size"],
"test_accuracy": train_metrics["test accuracy"],
"train_loss": train_metrics["train loss"],
"train_samples": train_metrics["train samples"],
"train_total_tokens": train_metrics["train total tokens"],
"peft_version": meta_info["package_info"]["peft-version"],
"peft_branch": run_info["peft_branch"],
"transformers_version": meta_info["package_info"]["transformers-version"],
"datasets_version": meta_info["package_info"]["datasets-version"],
"torch_version": meta_info["package_info"]["torch-version"],
"bitsandbytes_version": meta_info["package_info"]["bitsandbytes-version"],
"package_info": meta_info["package_info"],
"system_info": meta_info["system_info"],
"created_at": run_info["created_at"],
}
results.append(dct)
if skipped:
print_fn(f"Skipped {skipped} of {len(rows)} entries because the train status != success")
return results