func executeVMWatchHelper()

in main/vmWatch.go [109:181]


func executeVMWatchHelper(lg *slog.Logger, attempt int, vmWatchSettings *vmWatchSettings, hEnv *handlerenv.HandlerEnvironment) (err error) {
	pid := -1
	defer func() {
		if r := recover(); r != nil {
			err = fmt.Errorf("error: %w\n Additonal Details: %+v", err, r)
			telemetry.SendEvent(telemetry.ErrorEvent, telemetry.StartVMWatchTask, fmt.Sprintf("Recovered %+v", r))
		}
	}()

	// Setup command
	var resourceGovernanceRequired bool
	vmWatchCommand, resourceGovernanceRequired, err = setupVMWatchCommand(vmWatchSettings, hEnv)
	if err != nil {
		err = fmt.Errorf("[%v][PID -1] Attempt %d: VMWatch setup failed. Error: %w", time.Now().UTC().Format(time.RFC3339), attempt, err)
		telemetry.SendEvent(telemetry.ErrorEvent, telemetry.SetupVMWatchTask, err.Error())
		return err
	}

	telemetry.SendEvent(telemetry.InfoEvent, telemetry.SetupVMWatchTask,
		fmt.Sprintf("Attempt %d: Setup VMWatch command: %s\nArgs: %v\nDir: %s\nEnv: %v\n",
			attempt, vmWatchCommand.Path, vmWatchCommand.Args, vmWatchCommand.Dir, vmWatchCommand.Env),
	)
	// TODO: Combined output may get excessively long, especially since VMWatch is a long running process
	// We should trim the output or only get from Stderr
	combinedOutput := &bytes.Buffer{}
	vmWatchCommand.Stdout = combinedOutput
	vmWatchCommand.Stderr = combinedOutput
	vmWatchCommand.SysProcAttr = &syscall.SysProcAttr{Pdeathsig: syscall.SIGTERM}

	// Start command
	if err := vmWatchCommand.Start(); err != nil {
		err = fmt.Errorf("[%v][PID -1] Attempt %d: VMWatch failed to start. Error: %w\nOutput: %s", time.Now().UTC().Format(time.RFC3339), attempt, err, combinedOutput.String())
		telemetry.SendEvent(telemetry.ErrorEvent, telemetry.StartVMWatchTask, err.Error(), "error", err)
		return err
	}
	pid = vmWatchCommand.Process.Pid // cmd.Process should be populated on success

	telemetry.SendEvent(telemetry.InfoEvent, telemetry.StartVMWatchTask, fmt.Sprintf("Attempt %d: Started VMWatch with PID %d", attempt, pid))
	if !resourceGovernanceRequired {
		telemetry.SendEvent(telemetry.InfoEvent, telemetry.StartVMWatchTask, fmt.Sprintf("Resource governance was already applied at process launch of PID %d", pid))
	} else {
		err = applyResourceGovernance(lg, vmWatchSettings, vmWatchCommand)
		if err != nil {
			// if this has failed we have already killed the process as we failed to assign to cgroup so log the appropriate error
			err = fmt.Errorf("[%v][PID %d] Attempt %d: VMWatch process exited. Error: %w\nOutput: %s", time.Now().UTC().Format(time.RFC3339), pid, attempt, err, combinedOutput.String())
			telemetry.SendEvent(telemetry.ErrorEvent, telemetry.StopVMWatchTask, err.Error(), "error", err)
			return err
		}
	}

	processDone := make(chan bool)

	// create a waitgroup to coordinate the goroutines
	var wg sync.WaitGroup
	// add a task to wait for process completion
	wg.Add(1)
	go func() {
		defer wg.Done()
		err = vmWatchCommand.Wait()
		processDone <- true
		close(processDone)
	}()
	// add a task to monitor heartbeat
	wg.Add(1)
	go func() {
		defer wg.Done()
		monitorHeartBeat(lg, GetVMWatchHeartbeatFilePath(hEnv), processDone, vmWatchCommand)
	}()
	wg.Wait()
	err = fmt.Errorf("[%v][PID %d] Attempt %d: VMWatch process exited. Error: %w\nOutput: %s", time.Now().UTC().Format(time.RFC3339), pid, attempt, err, combinedOutput.String())
	telemetry.SendEvent(telemetry.ErrorEvent, telemetry.StopVMWatchTask, err.Error(), "error", err)
	return err
}