in k8s-bench/eval.go [33:81]
func runEvaluation(ctx context.Context, config EvalConfig) error {
if config.OutputDir == "" {
return fmt.Errorf("must set OutputDir")
}
tasks, err := loadTasks(config)
if err != nil {
return fmt.Errorf("failed to load tasks: %w", err)
}
var allResults []model.TaskResult
for taskID, task := range tasks {
fmt.Printf("Evaluating task: %s\n", taskID)
for _, llmConfig := range config.LLMConfigs {
taskOutputDir := ""
if config.OutputDir != "" {
taskOutputDir = filepath.Join(config.OutputDir, taskID, llmConfig.ID)
if err := os.MkdirAll(taskOutputDir, 0755); err != nil {
return fmt.Errorf("creating directory %q: %w", taskOutputDir, err)
}
}
var log io.Writer
if taskOutputDir != "" {
logPath := filepath.Join(taskOutputDir, "log.txt")
logFile, err := os.Create(logPath)
if err != nil {
return fmt.Errorf("creating log file %q: %w", logPath, err)
}
defer logFile.Close()
log = logFile
}
result := evaluateTask(ctx, config, taskID, task, llmConfig, log)
if taskOutputDir != "" {
if err := writeToYAMLFile(filepath.Join(taskOutputDir, "results.yaml"), result); err != nil {
return fmt.Errorf("writing results to file: %w", err)
}
}
allResults = append(allResults, result)
}
}
printResults(allResults)
return nil
}