func()

in k8s-bench/eval.go [306:379]


func (x *TaskExecution) runAgent(ctx context.Context) error {
	tracePath := filepath.Join(x.taskOutputDir, "trace.yaml")

	args := []string{
		"--kubeconfig", x.kubeConfig,
		"--llm-provider", x.llmConfig.ProviderID,
		fmt.Sprintf("--enable-tool-use-shim=%t", x.llmConfig.EnableToolUseShim),
		fmt.Sprintf("--quiet=%t", x.llmConfig.Quiet),
		"--model", x.llmConfig.ModelID,
		"--trace-path", tracePath,
		"--skip-permissions",
	}

	stdinReader, stdinWriter := io.Pipe()

	cmd := exec.CommandContext(ctx,
		x.AgentBin,
		args...,
	)
	cmd.Stdin = stdinReader
	cmd.Stdout = os.Stdout
	cmd.Stderr = os.Stderr
	if x.log != nil {
		cmd.Stdout = io.MultiWriter(cmd.Stdout, x.log)
		cmd.Stderr = io.MultiWriter(cmd.Stderr, x.log)
	}

	cmd.Env = append(os.Environ(), fmt.Sprintf("KUBECONFIG=%s", x.kubeConfig))

	go func() {
		// TODO: Wait for idle between sending steps?
		for _, step := range x.task.Script {
			fmt.Fprintf(stdinWriter, "%s\n", step.Prompt)
		}
		stdinWriter.Close()
	}()

	if err := cmd.Run(); err != nil {
		return err
	}

	// Run expectations if specified
	if len(x.task.Expect) != 0 {
		events, err := journal.ParseEventsFromFile(tracePath)
		if err != nil {
			return err
		} else {
			var lastEvent *journal.Event
			for _, event := range events {
				if event.Action == journal.ActionUIRender {
					lastEvent = event
				}
			}

			if lastEvent == nil {
				x.result.AddFailure("did not found ui.render event in trace")
			} else {
				lastOutput, ok := lastEvent.GetString("text")
				if !ok {
					x.result.AddFailure("did not found 'text' key in event %+v", lastEvent)
				}
				for _, expect := range x.task.Expect {
					if expect.Contains != "" {
						if !strings.Contains(lastOutput, expect.Contains) {
							x.result.AddFailure("expected value %q not found in output %q", expect.Contains, lastOutput)
						}
					}
				}
			}
		}
	}

	return nil
}