in main/vmWatch.go [109:181]
func executeVMWatchHelper(lg *slog.Logger, attempt int, vmWatchSettings *vmWatchSettings, hEnv *handlerenv.HandlerEnvironment) (err error) {
pid := -1
defer func() {
if r := recover(); r != nil {
err = fmt.Errorf("error: %w\n Additonal Details: %+v", err, r)
telemetry.SendEvent(telemetry.ErrorEvent, telemetry.StartVMWatchTask, fmt.Sprintf("Recovered %+v", r))
}
}()
// Setup command
var resourceGovernanceRequired bool
vmWatchCommand, resourceGovernanceRequired, err = setupVMWatchCommand(vmWatchSettings, hEnv)
if err != nil {
err = fmt.Errorf("[%v][PID -1] Attempt %d: VMWatch setup failed. Error: %w", time.Now().UTC().Format(time.RFC3339), attempt, err)
telemetry.SendEvent(telemetry.ErrorEvent, telemetry.SetupVMWatchTask, err.Error())
return err
}
telemetry.SendEvent(telemetry.InfoEvent, telemetry.SetupVMWatchTask,
fmt.Sprintf("Attempt %d: Setup VMWatch command: %s\nArgs: %v\nDir: %s\nEnv: %v\n",
attempt, vmWatchCommand.Path, vmWatchCommand.Args, vmWatchCommand.Dir, vmWatchCommand.Env),
)
// TODO: Combined output may get excessively long, especially since VMWatch is a long running process
// We should trim the output or only get from Stderr
combinedOutput := &bytes.Buffer{}
vmWatchCommand.Stdout = combinedOutput
vmWatchCommand.Stderr = combinedOutput
vmWatchCommand.SysProcAttr = &syscall.SysProcAttr{Pdeathsig: syscall.SIGTERM}
// Start command
if err := vmWatchCommand.Start(); err != nil {
err = fmt.Errorf("[%v][PID -1] Attempt %d: VMWatch failed to start. Error: %w\nOutput: %s", time.Now().UTC().Format(time.RFC3339), attempt, err, combinedOutput.String())
telemetry.SendEvent(telemetry.ErrorEvent, telemetry.StartVMWatchTask, err.Error(), "error", err)
return err
}
pid = vmWatchCommand.Process.Pid // cmd.Process should be populated on success
telemetry.SendEvent(telemetry.InfoEvent, telemetry.StartVMWatchTask, fmt.Sprintf("Attempt %d: Started VMWatch with PID %d", attempt, pid))
if !resourceGovernanceRequired {
telemetry.SendEvent(telemetry.InfoEvent, telemetry.StartVMWatchTask, fmt.Sprintf("Resource governance was already applied at process launch of PID %d", pid))
} else {
err = applyResourceGovernance(lg, vmWatchSettings, vmWatchCommand)
if err != nil {
// if this has failed we have already killed the process as we failed to assign to cgroup so log the appropriate error
err = fmt.Errorf("[%v][PID %d] Attempt %d: VMWatch process exited. Error: %w\nOutput: %s", time.Now().UTC().Format(time.RFC3339), pid, attempt, err, combinedOutput.String())
telemetry.SendEvent(telemetry.ErrorEvent, telemetry.StopVMWatchTask, err.Error(), "error", err)
return err
}
}
processDone := make(chan bool)
// create a waitgroup to coordinate the goroutines
var wg sync.WaitGroup
// add a task to wait for process completion
wg.Add(1)
go func() {
defer wg.Done()
err = vmWatchCommand.Wait()
processDone <- true
close(processDone)
}()
// add a task to monitor heartbeat
wg.Add(1)
go func() {
defer wg.Done()
monitorHeartBeat(lg, GetVMWatchHeartbeatFilePath(hEnv), processDone, vmWatchCommand)
}()
wg.Wait()
err = fmt.Errorf("[%v][PID %d] Attempt %d: VMWatch process exited. Error: %w\nOutput: %s", time.Now().UTC().Format(time.RFC3339), pid, attempt, err, combinedOutput.String())
telemetry.SendEvent(telemetry.ErrorEvent, telemetry.StopVMWatchTask, err.Error(), "error", err)
return err
}