in src/Microsoft.ServiceFabric.Actors/Runtime/ActorStateProviderHelper.cs [274:412]
internal async Task<TResult> ExecuteWithRetriesAsync<TResult>(
Func<Task<TResult>> func,
string functionNameTag,
CancellationToken userCancellationToken)
{
var retryCount = 0;
var useLinearBackoff = false;
var lastExceptionTag = string.Empty;
var roleChangeTracker = this.owner.RoleChangeTracker;
var operationId = Guid.NewGuid();
var timeoutHelper = new TimeoutHelper(this.owner.OperationTimeout);
while (true)
{
try
{
// Actor operations only happen on a primary replica and are required not to span role
// change boundaries. This is required to ensure that for a given ActorId on a primary
// replica only one thread can make any state change. Any operation active for this ActorId
// when current replica was primary previously should fail to make any state change.
//
// When primary replica becomes secondary, all in-flight operations fail as replica do not
// have write status. However, in rare cases, it may happen that replica undergoes a P -> S -> P
// role change very quickly while an in-flight operation was undergoing back-off before next retry.
//
// Fail the operation if primary replica of partition has changed.
this.EnsureSamePrimary(roleChangeTracker);
useLinearBackoff = false;
var res = await func.Invoke();
if (retryCount > 0)
{
ActorTrace.Source.WriteInfoWithId(
this.owner.TraceType,
this.owner.TraceId,
"ExecuteWithRetriesAsync: FunctionNameTag={0}, OperationId={1} completed with RetryCount={2}.",
functionNameTag,
operationId,
retryCount);
}
return res;
}
catch (FabricTransientException ex)
{
useLinearBackoff = (ex.ErrorCode == FabricErrorCode.ReplicationQueueFull);
lastExceptionTag = ex.ErrorCode.ToString();
// fall-through and retry
}
catch (FabricNotPrimaryException)
{
if (timeoutHelper.HasTimedOut || this.CurrentReplicaRoleNotPrimary)
{
throw;
}
lastExceptionTag = "FabricNotPrimary";
// fall-through and retry
}
catch (FabricObjectClosedException)
{
// During close of a primary replica, the user code may try to use the
// KVS after it has been closed. This causes KVS to throw FabricObjectClosedException.
// RC already converts it to FabricNotPrimaryException.
if (this.owner is KvsActorStateProvider)
{
throw new FabricNotPrimaryException();
}
throw;
}
catch (FabricException ex)
{
// KVS aborts all active transaction(s) when changing role from primary to secondary
// or if replica is primary and is closing.
if (this.owner is KvsActorStateProvider &&
ex.ErrorCode == FabricErrorCode.TransactionAborted)
{
throw new FabricNotPrimaryException();
}
throw;
}
catch (OperationCanceledException)
{
if (userCancellationToken.IsCancellationRequested)
{
throw;
}
if (this.CurrentReplicaRoleNotPrimary)
{
throw new FabricNotPrimaryException();
}
if (timeoutHelper.HasTimedOut)
{
throw;
}
lastExceptionTag = "OperationCanceled";
// fall-through and retry
}
catch (TransactionFaultedException)
{
if (timeoutHelper.HasTimedOut)
{
throw;
}
lastExceptionTag = "TransactionFaulted";
// fall-through and retry
}
retryCount++;
var effectiveRetryDelay = useLinearBackoff ?
TimeSpan.FromTicks(retryCount * this.owner.TransientErrorRetryDelay.Ticks) :
this.owner.TransientErrorRetryDelay;
ActorTrace.Source.WriteInfoWithId(
this.owner.TraceType,
this.owner.TraceId,
"ExecuteWithRetriesAsync: FunctionNameTag={0}, OperationId={1}, RetryCount={2}, LastExceptionTag={3}, NextRetryDelay={4}s.",
functionNameTag,
operationId,
retryCount,
lastExceptionTag,
effectiveRetryDelay.Seconds);
await Task.Delay(effectiveRetryDelay, userCancellationToken);
}
}