func runEvaluation()

in k8s-bench/eval.go [33:81]


func runEvaluation(ctx context.Context, config EvalConfig) error {
	if config.OutputDir == "" {
		return fmt.Errorf("must set OutputDir")
	}

	tasks, err := loadTasks(config)
	if err != nil {
		return fmt.Errorf("failed to load tasks: %w", err)
	}

	var allResults []model.TaskResult

	for taskID, task := range tasks {
		fmt.Printf("Evaluating task: %s\n", taskID)

		for _, llmConfig := range config.LLMConfigs {
			taskOutputDir := ""
			if config.OutputDir != "" {
				taskOutputDir = filepath.Join(config.OutputDir, taskID, llmConfig.ID)
				if err := os.MkdirAll(taskOutputDir, 0755); err != nil {
					return fmt.Errorf("creating directory %q: %w", taskOutputDir, err)
				}
			}

			var log io.Writer
			if taskOutputDir != "" {
				logPath := filepath.Join(taskOutputDir, "log.txt")
				logFile, err := os.Create(logPath)
				if err != nil {
					return fmt.Errorf("creating log file %q: %w", logPath, err)
				}
				defer logFile.Close()
				log = logFile
			}

			result := evaluateTask(ctx, config, taskID, task, llmConfig, log)

			if taskOutputDir != "" {
				if err := writeToYAMLFile(filepath.Join(taskOutputDir, "results.yaml"), result); err != nil {
					return fmt.Errorf("writing results to file: %w", err)
				}
			}
			allResults = append(allResults, result)
		}
	}

	printResults(allResults)
	return nil
}