in k8s-bench/eval.go [137:205]
func evaluateTask(ctx context.Context, config EvalConfig, taskID string, task Task, llmConfig model.LLMConfig, log io.Writer) model.TaskResult {
result := model.TaskResult{
Task: taskID,
LLMConfig: llmConfig,
}
taskOutputDir := filepath.Join(config.OutputDir, taskID, llmConfig.ID)
x := &TaskExecution{
AgentBin: config.AgentBin,
kubeConfig: config.KubeConfig,
result: &result,
llmConfig: llmConfig,
log: log,
task: &task,
taskID: taskID,
taskOutputDir: taskOutputDir,
}
taskDir := filepath.Join(config.TasksDir, taskID)
taskDirAbs, err := filepath.Abs(taskDir)
if err != nil {
result.Result = "fail"
result.Error = err.Error()
return result
}
taskDir = taskDirAbs
x.taskDir = taskDir
defer func() {
if err := x.runCleanup(ctx); err != nil {
fmt.Printf("Warning: cleanup failed for task %s: %v\n", taskID, err)
}
}()
if err := x.runSetup(ctx); err != nil {
// Unexpected error
result.Error = err.Error()
return result
}
// Run the agent
if err := x.runAgent(ctx); err != nil {
// Unexpected error
result.Error = err.Error()
return result
}
// Run verifier if specified
if task.Verifier != "" {
verifierPath := filepath.Join(taskDir, task.Verifier)
cmd := exec.CommandContext(ctx, verifierPath)
cmd.Env = append(os.Environ(), fmt.Sprintf("KUBECONFIG=%s", x.kubeConfig))
fmt.Printf("\nRunning verifier for task %s\n", taskID)
err := x.runCommand(cmd)
if err == nil {
result.Result = "success"
} else if _, ok := err.(*exec.ExitError); ok {
// "Normal" script failure
result.Result = "fail"
} else {
// Unexpected error
result.Error = err.Error()
}
}
return result
}