in src/PatchOrchestrationApplication/CoordinatorService/src/RepairManagerHelper.cs [646:718]
internal async Task TimeoutRepairTasks(CancellationToken cancellationToken)
{
if (!this.ManageRepairTasksOnTimeout)
{
return;
}
// Get repair tasks which have been approved and are still under execution by POA
RepairTaskList repairTasks = await this.fabricClient.RepairManager.GetRepairTaskListAsync(TaskIdPrefix,
RepairTaskStateFilter.Approved | RepairTaskStateFilter.Executing, ExecutorName, this.DefaultTimeoutForOperation, cancellationToken);
foreach (var task in repairTasks)
{
ExecutorDataForRmTask executorData =
SerializationUtility.Deserialize<ExecutorDataForRmTask>(task.ExecutorData);
Debug.Assert(task.ApprovedTimestamp != null, "ApprovedTimestamp of an approved repair task can never be null");
TimeSpan elapsedTime = DateTime.UtcNow.Subtract(task.ApprovedTimestamp.Value);
if (elapsedTime > (TimeSpan.FromMinutes(executorData.ExecutorTimeoutInMinutes) + GraceTimeForNtService))
{
// Check if the node exists or not. If node does not exists, then don't break;
bool nodeExists = false;
string nodeName = this.GetNodeNameFromRepairTask(task);
NodeList nodeList = await this.fabricClient.QueryManager.GetNodeListAsync(nodeName, null, this.DefaultTimeoutForOperation, cancellationToken);
foreach (var node in nodeList)
{
if (node.NodeName.Equals(nodeName))
{
// Node Exists.
nodeExists = true;
break;
}
}
if (!nodeExists)
{
// If node does not exist now, there is no point in waiting on the task.
ServiceEventSource.Current.VerboseMessage("Cancelling repair task {0} which is in {1} state as the node {2} does not exist anymore.", task.TaskId, task.State, nodeName);
await this.CancelRepairTask(task);
continue;
}
switch (executorData.ExecutorSubState)
{
// These are special states where its best if NodeAgentNtService should move the repair task, just post warning in this case
case NodeAgentSfUtilityExitCodes.RestartRequested:
case NodeAgentSfUtilityExitCodes.RestartCompleted:
case NodeAgentSfUtilityExitCodes.InstallationCompleted:
{
string message =
string.Format(
"Repair Task {0} did not complete within the Timeout period for node {1}. Since Installation was already started, updating Repair Task state to further proceed with Node enabling",
task.TaskId,
nodeName);
ServiceEventSource.Current.InfoMessage(message);
await UpdateRepairTaskState(task, nodeName, RepairTaskState.Restoring, executorData.ExecutorTimeoutInMinutes, cancellationToken);
break;
}
default:
{
string message =
string.Format(
"Repair Task {0} completed within the Timeout period for node {1}. Updating Repair Task state to further proceed with Node enabling",
task.TaskId,
nodeName);
ServiceEventSource.Current.InfoMessage(message);
await UpdateRepairTaskState(task, nodeName, RepairTaskState.Restoring, executorData.ExecutorTimeoutInMinutes, cancellationToken);
break;
}
}
}
}
}