def create_slurm_script()

in generate/run_ioi_slurm.py [0:0]


def create_slurm_script(args, logs_dir):
    # Override with custom values if provided
    concurrency = get_concurrency(args.model, args.concurrency)
    tp = get_tp(args.model, args.revision)
    context_length = get_context_length(args.model, args.revision)
    
    # Create a sanitized model name for the job name
    job_name = f"ioi-eval-{args.model.replace('/', '-')}"

    log_dir = logs_dir / job_name
    log_dir.mkdir(parents=True, exist_ok=True)

    n_nodes = ceil(tp / 8)
    tasks = n_nodes

    revision_arg = f"--revision {args.revision}" if args.revision else ""
    
    slurm_script = f"""#!/bin/bash
#SBATCH --job-name={job_name}
#SBATCH --partition={args.partition}
#SBATCH --qos={args.qos}
#SBATCH --nodes={n_nodes}
#SBATCH --gpus-per-node=8
#SBATCH --exclusive
#SBATCH --output={log_dir}/%j-%x.out
#SBATCH --error={log_dir}/%j-%x.out
#SBATCH --time={args.time}
#SBATCH --ntasks-per-node=1

set -exuo pipefail

SERVER_PORT=39877
DIST_PORT=45000

# random sleep (0-100) to prevent ddosing server
sleep $((RANDOM % 100 + 1))

# Environment configuration
export OUTLINES_CACHE_DIR=/scratch/serve_r1/ocache/
export TRITON_HOME=/scratch/serve_r1/triton/
export GLOO_SOCKET_IFNAME="enp71s0"
export NCCL_SOCKET_IFNAME="enp71s0"

# Evaluation script path
EVAL_SCRIPT_PATH="/fsx/hynek_kydlicek/projects/ioi/generate/evaluate.py"

module load cuda/12.4
source ~/.bashrc

# Activate uv
source {args.uv_env or UV_ENV}/bin/activate

FIRST_NODE=$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n1)
FIRST_NODE_IP=$(srun --nodes=1 --ntasks=1 -w "$FIRST_NODE" hostname --ip-address)

# Launch servers synchronously across all nodes
srun --nodes={n_nodes} --ntasks={tasks} --ntasks-per-node=1 \\
    bash -c "python -m sglang.launch_server \\
        --model-path '{args.model}' \\
        --tp {tp} \\
        --dist-init-addr '$FIRST_NODE_IP:$DIST_PORT' \\
        {revision_arg} \\
        --nnodes {n_nodes} \\
        --node-rank \\$SLURM_PROCID \\
        --port '$SERVER_PORT' \\
        --host 0.0.0.0 \\
        --trust-remote-code \\
        --max-running-requests {concurrency} \\
        --context-length {context_length}" &

# Wait for server with timeout
TIMEOUT={args.startup_delay}  # 1h, but model loading should take ~30min
START_TIME=$(date +%s)
echo "Waiting for SGLang server (http://$FIRST_NODE_IP:$SERVER_PORT)..."

while true; do
    if curl -s -o /dev/null -w "%{{http_code}}" "http://$FIRST_NODE_IP:$SERVER_PORT/health" >/dev/null 2>&1; then
        echo "Server is ready at http://$FIRST_NODE_IP:$SERVER_PORT"
        break
    fi

    CURRENT_TIME=$(date +%s)
    if [ $((CURRENT_TIME - START_TIME)) -gt $TIMEOUT ]; then
        echo "Error: Server failed to start within $TIMEOUT seconds"
        exit 1
    fi

    echo "Still waiting... ($(($CURRENT_TIME - $START_TIME)) seconds elapsed)"
    sleep 60
done

echo "Checking available models..."
curl "http://$FIRST_NODE_IP:$SERVER_PORT/v1/models"
sleep 10

echo "Executing sanity check..."
curl "http://$FIRST_NODE_IP:$SERVER_PORT/v1/completions" \\
    -H "Content-Type: application/json" \\
    -d '{{
        "model": "default",
        "prompt": "hi, how are you?",
        "max_tokens": 2048,
        "temperature": 0.6
    }}'

python "$EVAL_SCRIPT_PATH" \\
    --model_id "sglang/{args.model}" \\
    {revision_arg} \\
    --api_base "http://localhost:$SERVER_PORT/v1" \\
    --concurrency {concurrency} \\
    {args.eval_args}

# Kill the server and exit
pkill -f "python -m sglang.launch_server"
exit 0
"""
    
    return slurm_script, job_name