in subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/service/Service.java [735:835]
private void attemptToRetry(FrameworkStatus frameworkStatus) throws Exception {
String frameworkName = frameworkStatus.getFrameworkName();
Integer exitCode = frameworkStatus.getApplicationExitCode();
ExitType exitType = frameworkStatus.getApplicationExitType();
Integer retriedCount = frameworkStatus.getFrameworkRetryPolicyState().getRetriedCount();
RetryPolicyState newRetryPolicyState = YamlUtils.deepCopy(frameworkStatus.getFrameworkRetryPolicyState(), RetryPolicyState.class);
Integer transientConflictRetriedCount = frameworkStatus.getFrameworkRetryPolicyState().getTransientConflictRetriedCount();
String logPrefix = String.format("[%s]: attemptToRetry: ", frameworkName);
FrameworkRequest frameworkRequest = requestManager.tryGetFrameworkRequest(frameworkName, frameworkStatus.getFrameworkVersion());
if (frameworkRequest == null) {
LOGGER.logWarning(logPrefix + "Framework not found in Request. Ignore it.");
return;
}
RetryPolicyDescriptor retryPolicy = frameworkRequest.getFrameworkDescriptor().getRetryPolicy();
Boolean fancyRetryPolicy = retryPolicy.getFancyRetryPolicy();
Integer maxRetryCount = retryPolicy.getMaxRetryCount();
LOGGER.logSplittedLines(Level.INFO,
logPrefix + "ApplicationExitCode: [%s], ApplicationExitType: [%s], RetryPolicyState:\n[%s]",
exitCode, exitType, WebCommon.toJson(newRetryPolicyState));
String completeFrameworkLogPrefix = logPrefix + "Will completeFramework. Reason: ";
String retryFrameworkLogPrefix = logPrefix + "Will retryFramework with new Application. Reason: ";
// 1. FancyRetryPolicy
String fancyRetryPolicyLogSuffix = String.format("FancyRetryPolicy: Framework exited due to %s.", exitType);
if (exitType == ExitType.TRANSIENT_NORMAL) {
newRetryPolicyState.setTransientNormalRetriedCount(newRetryPolicyState.getTransientNormalRetriedCount() + 1);
if (fancyRetryPolicy) {
LOGGER.logWarning(retryFrameworkLogPrefix + fancyRetryPolicyLogSuffix);
retryFramework(frameworkStatus, newRetryPolicyState);
return;
}
} else if (exitType == ExitType.TRANSIENT_CONFLICT) {
newRetryPolicyState.setTransientConflictRetriedCount(newRetryPolicyState.getTransientConflictRetriedCount() + 1);
if (fancyRetryPolicy) {
int delaySec = RetryUtils.calcRandomBackoffDelay(
transientConflictRetriedCount,
conf.getApplicationTransientConflictMinDelaySec(),
conf.getApplicationTransientConflictMaxDelaySec());
LOGGER.logWarning(logPrefix +
"Will retryFramework with new Application after %ss. Reason: " +
fancyRetryPolicyLogSuffix, delaySec);
FrameworkStatus frameworkStatusSnapshot = YamlUtils.deepCopy(frameworkStatus, FrameworkStatus.class);
transitionFrameworkStateQueue.queueSystemTaskDelayed(() -> {
retryFramework(frameworkStatusSnapshot, newRetryPolicyState);
}, delaySec * 1000);
return;
}
} else if (exitType == ExitType.NON_TRANSIENT) {
newRetryPolicyState.setNonTransientRetriedCount(newRetryPolicyState.getNonTransientRetriedCount() + 1);
if (fancyRetryPolicy) {
LOGGER.logWarning(completeFrameworkLogPrefix + fancyRetryPolicyLogSuffix);
completeFramework(frameworkStatus);
return;
}
} else {
if (exitType == ExitType.SUCCEEDED) {
newRetryPolicyState.setSucceededRetriedCount(newRetryPolicyState.getSucceededRetriedCount() + 1);
} else {
newRetryPolicyState.setUnKnownRetriedCount(newRetryPolicyState.getUnKnownRetriedCount() + 1);
}
if (fancyRetryPolicy) {
// FancyRetryPolicy only handle exit due to transient and non-transient failure specially,
// Leave exit due to others to NormalRetryPolicy
LOGGER.logInfo(logPrefix +
"Transfer the RetryDecision to NormalRetryPolicy. Reason: " +
fancyRetryPolicyLogSuffix);
}
}
// 2. NormalRetryPolicy
if (maxRetryCount == GlobalConstants.USING_EXTENDED_UNLIMITED_VALUE ||
(exitType != ExitType.SUCCEEDED && maxRetryCount == GlobalConstants.USING_UNLIMITED_VALUE) ||
(exitType != ExitType.SUCCEEDED && retriedCount < maxRetryCount)) {
newRetryPolicyState.setRetriedCount(newRetryPolicyState.getRetriedCount() + 1);
LOGGER.logWarning(retryFrameworkLogPrefix +
"RetriedCount %s has not reached MaxRetryCount %s.",
retriedCount, maxRetryCount);
retryFramework(frameworkStatus, newRetryPolicyState);
return;
} else {
if (exitType == ExitType.SUCCEEDED) {
LOGGER.logInfo(completeFrameworkLogPrefix +
"Framework exited due to %s.", exitType);
completeFramework(frameworkStatus);
return;
} else {
LOGGER.logWarning(completeFrameworkLogPrefix +
"RetriedCount %s has reached MaxRetryCount %s.",
retriedCount, maxRetryCount);
completeFramework(frameworkStatus);
return;
}
}
}