in src/PatchOrchestrationApplication/CoordinatorService/src/RepairManagerHelper.cs [320:381]
public async Task PostClusterPatchingStatus(CancellationToken cancellationToken)
{
try
{
NodeList nodeList = await this.fabricClient.QueryManager.GetNodeListAsync(null, null, this.DefaultTimeoutForOperation, cancellationToken);
IList<RepairTask> claimedTaskList = await this.GetClaimedRepairTasks(nodeList, cancellationToken);
RepairTaskList processingTaskList = await this.GetRepairTasksUnderProcessing(cancellationToken);
cancellationToken.ThrowIfCancellationRequested();
if (claimedTaskList.Any())
{
if (!processingTaskList.Any())
{
// This means that repair tasks are not getting approved.
ClusterHealth clusterHealth = await this.fabricClient.HealthManager.GetClusterHealthAsync();
if (clusterHealth.AggregatedHealthState == HealthState.Error)
{
// Reset Count
postUpdateCount = 0;
string warningDescription = " Cluster is currently unhealthy. Nodes are currently not getting patched by Patch Orchestration Application. Please ensure the cluster becomes healthy for patching to continue.";
await PostWarningOnCoordinatorService(warningDescription, 1);
}
else
{
postUpdateCount++;
if (postUpdateCount > 60)
{
// Reset Count and throw a warning on the service saying we dont know the reason. But POA not is not approving tasks.
postUpdateCount = 0;
string warningDescription = "Patch Orchestration Application is currently not patching nodes. This could be possible if there is some node which is stuck in disabling state for long time.";
await PostWarningOnCoordinatorService(warningDescription, 61);
}
}
}
else
{
// Reset Count
postUpdateCount = 0;
await PostRMTaskNodeUpdate(cancellationToken);
}
}
else
{
// Reset Count
postUpdateCount = 0;
if (processingTaskList.Any())
{
await PostRMTaskNodeUpdate(cancellationToken);
}
else
{
// Post the health event saying that there is no repair task and things are working fine.
string description = "No claimed tasks and no processing tasks are found.";
HealthManagerHelper.PostNodeHealthReport(this.fabricClient, this.context.ServiceName, ClusterPatchingStatusProperty, description, HealthState.Ok, 2);
}
}
}
catch(Exception ex)
{
ServiceEventSource.Current.ErrorMessage("PostClusterPatchingStatus failed with exception {0}", ex.ToString());
}
}