public override int Execute()

in src/devhostagent.restorationjob/RestorationJobApp.cs [73:195]


        public override int Execute(string[] args, CancellationToken cancellationToken)
            => (int)AsyncHelpers.RunSync(() => this.ExecuteInnerAsync(cancellationToken));

        /// <summary>
        /// Execution implementation
        /// </summary>
        private async Task<ExitCode> ExecuteInnerAsync(CancellationToken cancellationToken)
        {
            try
            {
                AssertHelper.NotNullOrEmpty(_restorationJobEnvironmentVariables.Namespace, nameof(_restorationJobEnvironmentVariables.Namespace));
                AssertHelper.NotNullOrEmpty(_restorationJobEnvironmentVariables.InstanceLabelValue, nameof(_restorationJobEnvironmentVariables.InstanceLabelValue));

                // Load patch state
                var patchState = this._ParsePatchState();

                _log.Info("Waiting to restore previous state on {0} {1}/{2}...", patchState.KubernetesType.GetStringValue(), new PII(patchState.Namespace), new PII(patchState.Name));
                // Extra wait at the beginning to allow things to initialize
                await Task.Delay(_restorationJobEnvironmentVariables.PingInterval, cancellationToken);
                int numFailedPings = 0;
                DateTimeOffset? lastPingWithSessions = null;
                DateTimeOffset? timeSinceLastPingIsNull = null;
                bool restoredWorkload = false;
                while (!cancellationToken.IsCancellationRequested && !restoredWorkload)
                {
                    if (numFailedPings >= _restorationJobEnvironmentVariables.NumFailedPingsBeforeExit)
                    {
                        _log.Error($"Failed to ping agent {numFailedPings} times. Exiting...");
                        return ExitCode.Fail;
                    }

                    // Sleep
                    await Task.Delay(_restorationJobEnvironmentVariables.PingInterval, cancellationToken);

                    _log.Verbose("Pinging...");
                    using (var perfLogger = _log.StartPerformanceLogger(
                        Events.RestorationJob.AreaName,
                        Events.RestorationJob.Operations.AgentPing,
                        new Dictionary<string, object>()
                        {
                            { RestorePerformed, false },
                            { NumFailedPings, numFailedPings },
                            { HasConnectedClients, "" }
                        }))
                    {
                        // Get agent endpoint
                        Uri agentEndpoint = await this._GetAgentEndpointAsync((dynamic)patchState, cancellationToken);
                        if (agentEndpoint == null)
                        {
                            _log.Verbose("Couldn't get agent endpoint");
                            numFailedPings++;
                            continue;
                        }

                        // Ping agent
                        var result = await this._PingAgentAsync(agentEndpoint, cancellationToken);
                        if (result == null)
                        {
                            _log.Verbose("Failed to ping agent");
                            numFailedPings++;
                            continue;
                        }
                        else if (result.NumConnectedSessions > 0)
                        {
                            _log.Verbose($"Agent has {result.NumConnectedSessions} connected sessions");
                            lastPingWithSessions = DateTimeOffset.Now;
                            perfLogger.SetProperty(HasConnectedClients, true);
                        }
                        else
                        {
                            perfLogger.SetProperty(HasConnectedClients, false);
                            TimeSpan? disconnectedTimeSpan = null;
                            if (lastPingWithSessions == null)
                            {
                                // first loop timeUntilLastPingIsNull will be set to current time and then next while loop it will preserve that time.
                                // if lastPingWithSessions is being null for last 60 seconds or more then restoration will happen.
                                timeSinceLastPingIsNull = timeSinceLastPingIsNull == null ? DateTimeOffset.Now : timeSinceLastPingIsNull;
                                disconnectedTimeSpan = DateTimeOffset.Now - timeSinceLastPingIsNull;
                            } else
                            {
                                disconnectedTimeSpan = DateTimeOffset.Now - lastPingWithSessions;
                            }

                            
                            if (disconnectedTimeSpan != null && disconnectedTimeSpan.Value > _restorationJobEnvironmentVariables.RestoreTimeout)
                            {
                                // Restore workload
                                _log.Info($"Agent has no connected sessions for {disconnectedTimeSpan.Value:g}. Restoring...");
                                await this._RestoreAsync((dynamic)patchState, cancellationToken);
                                _log.Info("Restored {0} {1}/{2}.", patchState.KubernetesType.GetStringValue(), new PII(patchState.Namespace), new PII(patchState.Name));
                                perfLogger.SetProperty(RestorePerformed, true);
                                restoredWorkload = true;
                            }
                        }

                        numFailedPings = 0;
                        perfLogger.SetSucceeded();
                    }
                }

                if (restoredWorkload)
                {
                    // Clean up restoration job
                    // Don't pass cancellationToken because we don't want a race condition for Kubernetes killing the pod
                    await _remoteRestoreJobCleaner.CleanupRemoteRestoreJobByInstanceLabelAsync(_restorationJobEnvironmentVariables.InstanceLabelValue, _restorationJobEnvironmentVariables.Namespace, default(CancellationToken));
                    return ExitCode.Success;
                }

                // If we get here, that means the cancellationToken must have been canceled.
                return ExitCode.Cancel;
            }
            catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
            {
                _log.Info("Restoration job cancelled");
                return ExitCode.Cancel;
            }
            catch (Exception e)
            {
                _log.WithoutTelemetry.Error($"Encountered exception: {e.Message}");
                _log.WithoutConsole.Exception(e);
                return ExitCode.Fail;
            }
        }