in src/DurableTask.AzureServiceFabric/Remote/RemoteOrchestrationServiceClient.cs [346:402]
private async Task<HttpResponseMessage> ExecuteRequestWithRetriesAsync(string instanceId, Func<Uri, Task<HttpResponseMessage>> requestAsync, CancellationToken cancellationToken)
{
instanceId.EnsureValidInstanceId();
int retryAttempt = 0;
while (true)
{
cancellationToken.ThrowIfCancellationRequested();
HttpResponseMessage response = null;
Exception exception = null;
try
{
var endpointJson = await this.partitionProvider.GetPartitionEndPointAsync(instanceId, cancellationToken);
var endpoint = this.GetDefaultEndPoint(endpointJson);
response = await requestAsync(new Uri(endpoint));
if (response.IsSuccessStatusCode || response.Headers.Contains(Constants.ActivityIdHeaderName))
{
return response;
}
// We will end up with an incorrect endpoint and a valid response without the ActivityId header when all of these conditions are true:
// a) Service Fabric internal cache fails to receive notification for a replica move (and the subsequent endpoint change).
// b) The http.sys URLACL either failed to clean up or was reused by another replica on the same machine.
// c) The server side is yet to be upgraded to return ActivityId header with each request.
// The HTTP status code 404 or 503 will depend upon the state the new replica and the active URLACLs.
// HTTP 404 - We'll end up with this error if the port was reused by another replica belonging to a different partition.
// This will make the partition id component of the URL different resulting in http.sys returning a 404.
// HTTP 503 - We'll end up with this error if the URLACL fails to get cleaned up and we have no no process listening
// on this port. This error code is returned by http.sys itself.
if (response.StatusCode != HttpStatusCode.NotFound && response.StatusCode != HttpStatusCode.ServiceUnavailable)
{
return response;
}
}
catch (Exception ex) when (ex is SocketException || ex is WebException || ex is HttpRequestException)
{
exception = ex;
}
if (++retryAttempt < PartitionResolutionRetryCount)
{
response?.Dispose();
await Task.Delay(PartitionResolutionRetryDelay);
await this.partitionProvider.RefreshPartitionEndpointAsync(instanceId, cancellationToken);
continue;
}
if (exception != null)
{
throw exception;
}
return response;
}
}