ablations/evaluation/launch_random_evals.py (50 lines of code) (raw):
import argparse
import os
import subprocess
import tempfile
from loguru import logger
USER=os.environ["USER"]
def launch_slurm_job(launch_file_contents, *args):
"""
Small helper function to save a sbatch script and call it.
Args:
launch_file_contents: Contents of the sbatch script
*args: any other arguments to pass to the sbatch command
Returns: the id of the launched slurm job
"""
with tempfile.NamedTemporaryFile("w") as f:
f.write(launch_file_contents)
f.flush()
try:
return subprocess.check_output(["sbatch", *args, f.name]).decode("utf-8").split()[-1]
except Exception as e:
print(launch_file_contents, flush=True)
raise e
parser = argparse.ArgumentParser("Launch the original english evals for a set of checkpoints.")
parser.add_argument(
"language", type=str, help="Language to run evals for. Example: zh"
)
parser.add_argument(
"checkpoints", type=str, help="Checkpoints to run evals for. Example: 0,1,2", default=0
)
parser.add_argument(
"--offline-datasets", action="store_true", help="Turns off datasets downloading"
)
parser.add_argument("--qos", type=str, default="normal", help="qos to use")
parser.add_argument("--time_limit", type=str, default="01:20:00", help="slurm time limit. 15:00 by default")
parser.add_argument("--parallel", "-p",type=int, default=100, help="How many eval tasks to run simultaneously")
parser.add_argument("--logging_dir", type=str, default="/path/to/eval-results", help="Repo to push results to")
parser.add_argument("-d", help="dependency job", type=str, default=None)
parser.add_argument("--overwrite", "-ow", action="store_true",
help="Overwrite existing eval results. Will skip completed checkpoints by default")
parser.add_argument("--tasks", type=str, default="early-signals", help="Tasks to run. Example: all,m3exam")
parser.add_argument("--tokenizer", type=str, default="google/gemma-7b", help="Tokenizer to use for the model")
if __name__ == "__main__":
args = parser.parse_args()
job_id = None
model_name = f"dummy-{args.language}-"
selected_checkpoints = args.checkpoints.split(",")
bash_ckpts_list = "(" + " ".join(f'"{item}"' for item in sorted(map(int, selected_checkpoints), reverse=True)) + ")"
os.makedirs(f"/path/to/eval-logs/{model_name}/{args.language}", exist_ok=True)
deps = []
if args.d:
deps.append(f"afterok:{args.d}")
if job_id:
deps.append(f"afterany:{job_id}")
launch_script = f"""#!/bin/bash
#SBATCH --job-name=eval-{model_name}-{args.language}
#SBATCH --tasks=1
#SBATCH --partition=partition
#SBATCH --qos={args.qos}
#SBATCH --array=0-{len(selected_checkpoints)-1}%{args.parallel}
#SBATCH --time={args.time_limit}
#SBATCH --cpus-per-task=4
#SBATCH --output=/path/to/logs/train/multilingual/eval-logs/{model_name}/{args.language}/eval-%A_%a.out
#SBATCH --error=/path/to/logs/train/multilingual/eval-logs/{model_name}/{args.language}/eval-%A_%a.out
{"#SBATCH --dependency=" + ",".join(deps) if deps else ""}
#SBATCH --requeue
###########################################
# [BEGINING] ADAPT TO YOUR ENVIRONMENT
source /admin/home/{USER}/.bashrc
source /path/to/miniconda3/etc/profile.d/conda.sh
conda activate /path/to/miniconda3/envs/exp/
LIGHTEVAL_FOLDER=/path/to/ml-lighteval
export HUGGINGFACE_HUB_CACHE=/path/to/.cache/huggingface
export HF_DATASETS_CACHE=/path/to/.cache/huggingface
export HF_MODULES_CACHE=/path/to/.cache/huggingface
export HF_HOME=/path/to/.cache/huggingface
export HF_DATASETS_OFFLINE={1 if args.offline_datasets else 0}
# [END] ADAPT TO YOUR ENVIRONMENT
###########################################
set -x -e
echo "START TIME: $(date)"
echo python3 version = `python3 --version`
# SLURM stuff
export TMPDIR=/scratch/{USER}/{model_name}/{args.language}
mkdir -p $TMPDIR
CHECKPOINTS_LIST={bash_ckpts_list}
NSTEP=$((SLURM_ARRAY_TASK_ID))
STEP=${{CHECKPOINTS_LIST[$NSTEP]}}
launch_args="$LIGHTEVAL_FOLDER/run_evals_accelerate.py --model_args='dummy,name=dummy-{args.language}-/${{STEP}},tokenizer={args.tokenizer}' --max_samples=1000 --custom_tasks=lighteval.community_tasks.multilingual.configs.{args.language} --tasks={args.tasks} --save_results --logging_dir={args.logging_dir}"
sleep $((RANDOM % 60))
srun -u bash -c "python3 -u ${{launch_args}}" """
launched_id = launch_slurm_job(launch_script)
logger.success(f"{model_name} evals launched with id={launched_id}. Logs: /path/to/logs/train/multilingual/eval-logs/{model_name}/{args.language}")
job_id = launched_id