in k8s-bench/main.go [146:225]
func runEvals(ctx context.Context) error {
config := EvalConfig{
TasksDir: "./tasks",
}
// Set custom usage for 'run' subcommand
flag.Usage = func() {
fmt.Fprintf(os.Stderr, "Usage: %s run [options]\n\n", os.Args[0])
fmt.Fprintf(os.Stderr, "Run K8s-bench evaluation benchmarks.\n\n")
fmt.Fprintf(os.Stderr, "Options:\n")
flag.PrintDefaults()
}
llmProvider := "gemini"
modelList := ""
defaultKubeConfig := "~/.kube/config"
enableToolUseShim := true
quiet := true
flag.StringVar(&config.TasksDir, "tasks-dir", config.TasksDir, "Directory containing evaluation tasks")
flag.StringVar(&config.KubeConfig, "kubeconfig", config.KubeConfig, "Path to kubeconfig file")
flag.StringVar(&config.TaskPattern, "task-pattern", config.TaskPattern, "Pattern to filter tasks (e.g. 'pod' or 'redis')")
flag.StringVar(&config.AgentBin, "agent-bin", config.AgentBin, "Path to kubernetes agent binary")
flag.StringVar(&llmProvider, "llm-provider", llmProvider, "Specific LLM provider to evaluate (e.g. 'gemini' or 'ollama')")
flag.StringVar(&modelList, "models", modelList, "Comma-separated list of models to evaluate (e.g. 'gemini-1.0,gemini-2.0')")
flag.BoolVar(&enableToolUseShim, "enable-tool-use-shim", enableToolUseShim, "Enable tool use shim")
flag.BoolVar(&quiet, "quiet", quiet, "Quiet mode (non-interactive mode)")
flag.StringVar(&config.OutputDir, "output-dir", config.OutputDir, "Directory to write results to")
flag.Parse()
if config.KubeConfig == "" {
config.KubeConfig = defaultKubeConfig
}
expandedKubeconfig, err := expandPath(config.KubeConfig)
if err != nil {
return fmt.Errorf("failed to expand kubeconfig path %q: %w", config.KubeConfig, err)
}
config.KubeConfig = expandedKubeconfig
defaultModels := map[string][]string{
"gemini": {"gemini-2.5-pro-preview-03-25"},
}
models := defaultModels
if modelList != "" {
if llmProvider == "" {
return fmt.Errorf("--llm-provider is required when --models is specified")
}
modelSlice := strings.Split(modelList, ",")
models = map[string][]string{
llmProvider: modelSlice,
}
}
for llmProviderID, models := range models {
var toolUseShimStr string
if enableToolUseShim {
toolUseShimStr = "shim_enabled"
} else {
toolUseShimStr = "shim_disabled"
}
for _, modelID := range models {
id := fmt.Sprintf("%s-%s-%s", toolUseShimStr, llmProviderID, modelID)
config.LLMConfigs = append(config.LLMConfigs, model.LLMConfig{
ID: id,
ProviderID: llmProviderID,
ModelID: modelID,
EnableToolUseShim: enableToolUseShim,
Quiet: quiet,
})
}
}
if err := runEvaluation(ctx, config); err != nil {
return fmt.Errorf("running evaluation: %w", err)
}
return nil
}