in k8s-bench/eval.go [306:379]
func (x *TaskExecution) runAgent(ctx context.Context) error {
tracePath := filepath.Join(x.taskOutputDir, "trace.yaml")
args := []string{
"--kubeconfig", x.kubeConfig,
"--llm-provider", x.llmConfig.ProviderID,
fmt.Sprintf("--enable-tool-use-shim=%t", x.llmConfig.EnableToolUseShim),
fmt.Sprintf("--quiet=%t", x.llmConfig.Quiet),
"--model", x.llmConfig.ModelID,
"--trace-path", tracePath,
"--skip-permissions",
}
stdinReader, stdinWriter := io.Pipe()
cmd := exec.CommandContext(ctx,
x.AgentBin,
args...,
)
cmd.Stdin = stdinReader
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
if x.log != nil {
cmd.Stdout = io.MultiWriter(cmd.Stdout, x.log)
cmd.Stderr = io.MultiWriter(cmd.Stderr, x.log)
}
cmd.Env = append(os.Environ(), fmt.Sprintf("KUBECONFIG=%s", x.kubeConfig))
go func() {
// TODO: Wait for idle between sending steps?
for _, step := range x.task.Script {
fmt.Fprintf(stdinWriter, "%s\n", step.Prompt)
}
stdinWriter.Close()
}()
if err := cmd.Run(); err != nil {
return err
}
// Run expectations if specified
if len(x.task.Expect) != 0 {
events, err := journal.ParseEventsFromFile(tracePath)
if err != nil {
return err
} else {
var lastEvent *journal.Event
for _, event := range events {
if event.Action == journal.ActionUIRender {
lastEvent = event
}
}
if lastEvent == nil {
x.result.AddFailure("did not found ui.render event in trace")
} else {
lastOutput, ok := lastEvent.GetString("text")
if !ok {
x.result.AddFailure("did not found 'text' key in event %+v", lastEvent)
}
for _, expect := range x.task.Expect {
if expect.Contains != "" {
if !strings.Contains(lastOutput, expect.Contains) {
x.result.AddFailure("expected value %q not found in output %q", expect.Contains, lastOutput)
}
}
}
}
}
}
return nil
}