internal async Task TimeoutRepairTasks()

in src/PatchOrchestrationApplication/CoordinatorService/src/RepairManagerHelper.cs [646:718]


        internal async Task TimeoutRepairTasks(CancellationToken cancellationToken)
        {
            if (!this.ManageRepairTasksOnTimeout)
            {
                return;
            }

            // Get repair tasks which have been approved and are still under execution by POA
            RepairTaskList repairTasks = await this.fabricClient.RepairManager.GetRepairTaskListAsync(TaskIdPrefix,
                RepairTaskStateFilter.Approved | RepairTaskStateFilter.Executing, ExecutorName, this.DefaultTimeoutForOperation, cancellationToken);
            foreach (var task in repairTasks)
            {
                ExecutorDataForRmTask executorData =
                    SerializationUtility.Deserialize<ExecutorDataForRmTask>(task.ExecutorData);
                Debug.Assert(task.ApprovedTimestamp != null, "ApprovedTimestamp of an approved repair task can never be null");
                TimeSpan elapsedTime = DateTime.UtcNow.Subtract(task.ApprovedTimestamp.Value);
                if (elapsedTime > (TimeSpan.FromMinutes(executorData.ExecutorTimeoutInMinutes) + GraceTimeForNtService))
                {
                    // Check if the node exists or not. If node does not exists, then don't break;
                    bool nodeExists = false;
                    string nodeName = this.GetNodeNameFromRepairTask(task);
                    NodeList nodeList = await this.fabricClient.QueryManager.GetNodeListAsync(nodeName, null, this.DefaultTimeoutForOperation, cancellationToken);
                    foreach (var node in nodeList)
                    {
                        if (node.NodeName.Equals(nodeName))
                        {
                            // Node Exists.
                            nodeExists = true;
                            break;
                        }
                    }

                    if (!nodeExists)
                    {
                        // If node does not exist now, there is no point in waiting on the task.
                        ServiceEventSource.Current.VerboseMessage("Cancelling repair task {0} which is in {1} state as the node {2} does not exist anymore.", task.TaskId, task.State, nodeName);
                        await this.CancelRepairTask(task);
                        continue;
                    }

                    switch (executorData.ExecutorSubState)
                    {
                        // These are special states where its best if NodeAgentNtService should move the repair task, just post warning in this case
                        case NodeAgentSfUtilityExitCodes.RestartRequested:
                        case NodeAgentSfUtilityExitCodes.RestartCompleted:
                        case NodeAgentSfUtilityExitCodes.InstallationCompleted:
                            {
                                string message =
                                    string.Format(
                                        "Repair Task {0} did not complete within the Timeout period for node {1}.  Since Installation was already started, updating Repair Task state to further proceed with Node enabling",
                                        task.TaskId,
                                        nodeName);
                                ServiceEventSource.Current.InfoMessage(message);
                                await UpdateRepairTaskState(task, nodeName, RepairTaskState.Restoring, executorData.ExecutorTimeoutInMinutes, cancellationToken);
                                break;
                            }

                        default:
                            {

                                string message =
                                    string.Format(
                                        "Repair Task {0} completed within the Timeout period for node {1}. Updating Repair Task state to further proceed with Node enabling",
                                        task.TaskId,
                                        nodeName);
                                ServiceEventSource.Current.InfoMessage(message);
                                await UpdateRepairTaskState(task, nodeName, RepairTaskState.Restoring, executorData.ExecutorTimeoutInMinutes, cancellationToken);
                                break;
                            }
                    }
                }
            }
        }