in pkg/api/internal/jobs/jobs.go [122:157]
func (j *Job) onRunCompletion(stepResult *proto.StepResult, err error) {
j.mux.Lock()
defer j.mux.Unlock()
j.finishTime = time.Now()
switch stepResult.Status {
case proto.StepResult_unspecified:
j.err = fmt.Errorf("job %q did not start running: %w", j.ID, err)
j.status = proto.StepResult_failure
case proto.StepResult_running:
j.err = fmt.Errorf("job %q did not finish running: %w", j.ID, err)
j.status = proto.StepResult_failure
case proto.StepResult_failure:
// When a job is cancelled (by calling `Job.Close()`) or times out (both of
// which cancel the context passed to `exec.CommandContext()`), the returned
// error can:
// * be one of context.Cancelled or context.DeadlineExceeded.
// * be another error type that ends with the string "signal: killed".
//
// In both cases the `StepResult_Status` returned by `Step.Run()` is
// `failure`, but we want it to be `cancelled`. Since the latter can also
// happen when the process is otherwise killed (e.g. OOM killer), so we
// have to also check that the context was actually cancelled.
if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) ||
(j.Ctx.Err() != nil && strings.HasSuffix(err.Error(), "signal: killed")) {
j.err = fmt.Errorf("job %q cancelled: %w", j.ID, err)
j.status = proto.StepResult_cancelled
return
}
fallthrough
default:
j.err = err
j.status = stepResult.Status
}
}