experiments/make_submission.py (84 lines of code) (raw):

import argparse import json from logging import Logger from pathlib import Path import pandas as pd from mlebench.utils import get_logger def main( metadata_path: Path, output: Path, rel_log_path: Path, rel_code_path: Path, logger: Logger, ): run_statuses = [] submission_lines = [] with open(metadata_path, "r") as f: metadata = json.load(f) for run_id in metadata["runs"]: run_dir = metadata_path.parent / run_id # run_id is something like f"{comp_id}_bfa0c73d" comp_id = run_id.split("_")[0] log_path = run_dir / rel_log_path has_log = log_path.exists() code_path = run_dir / rel_code_path has_code = code_path.exists() submission_path = run_dir / "submission/submission.csv" submitted = submission_path.exists() submission_lines.append( { "competition_id": comp_id, "submission_path": submission_path.as_posix() if submitted else None, "logs_path": log_path.as_posix() if has_log else None, "code_path": code_path.as_posix() if has_code else None, } ) run_status = { "competition_id": comp_id[:20], "has_log": has_log, "has_code": has_code, "submitted": submitted, } run_statuses.append(run_status) status_df = pd.DataFrame(run_statuses) logger.info(f"All runs:\n{status_df.to_string()}") # Create submission.jsonl with open(output, "w") as f: for line in submission_lines: f.write(f"{json.dumps(line)}\n") logger.info(f"Written sorted submission to {output}") if __name__ == "__main__": parser = argparse.ArgumentParser( description="Makes a submission.jsonl for mlebench grade from a mlebench run group" ) parser.add_argument( "--metadata", type=str, help="Path to metadata.json file", required=True, ) parser.add_argument( "--output", type=str, help="Path to the output jsonl file which can be used for `mlebench grade`", default="submission.jsonl", ) parser.add_argument( "--rel-log-path", type=str, help="Path to logfile for analysis, relative to a run checkpoint. For example, if your logs are at `{runs_dir}/{run_id}/{checkpoint}/logs/agent.log`, this should be `logs/agent.log`.", default="logs/agent.log", ) parser.add_argument( "--rel-code-path", type=str, help="Path to code file for analysis, relative to a run checkpoint. For example, if your code is at `{runs_dir}/{run_id}/{checkpoint}/code/train.py`, this should be `code/train.py`.", default="code/train.py", ) args = parser.parse_args() logger = get_logger(__name__) main( metadata_path=Path(args.metadata), output=Path(args.output), rel_log_path=Path(args.rel_log_path), rel_code_path=Path(args.rel_code_path), logger=logger, )