private void attemptToRetry()

in subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/applicationmaster/ApplicationMaster.java [741:825]


  private void attemptToRetry(TaskStatus taskStatus) throws Exception {
    String taskRoleName = taskStatus.getTaskRoleName();
    TaskStatusLocator taskLocator = new TaskStatusLocator(taskRoleName, taskStatus.getTaskIndex());
    Integer exitCode = taskStatus.getContainerExitCode();
    ExitType exitType = taskStatus.getContainerExitType();
    Integer retriedCount = taskStatus.getTaskRetryPolicyState().getRetriedCount();
    RetryPolicyState newRetryPolicyState = YamlUtils.deepCopy(taskStatus.getTaskRetryPolicyState(), RetryPolicyState.class);

    RetryPolicyDescriptor retryPolicy = requestManager.getTaskRetryPolicy(taskRoleName);
    Boolean fancyRetryPolicy = retryPolicy.getFancyRetryPolicy();
    Integer maxRetryCount = retryPolicy.getMaxRetryCount();

    String logPrefix = String.format("%s: attemptToRetry: ", taskLocator);

    LOGGER.logSplittedLines(Level.INFO,
        logPrefix + "ContainerExitCode: [%s], ContainerExitType: [%s], RetryPolicyState:\n[%s]",
        exitCode, exitType, WebCommon.toJson(newRetryPolicyState));

    String completeTaskLogPrefix = logPrefix + "Will completeTask. Reason: ";
    String retryTaskLogPrefix = logPrefix + "Will retryTask with new Container. Reason: ";

    // 1. FancyRetryPolicy
    String fancyRetryPolicyLogSuffix = String.format("FancyRetryPolicy: Task exited due to %s.", exitType);
    if (exitType == ExitType.TRANSIENT_NORMAL) {
      newRetryPolicyState.setTransientNormalRetriedCount(newRetryPolicyState.getTransientNormalRetriedCount() + 1);
      if (fancyRetryPolicy) {
        LOGGER.logWarning(retryTaskLogPrefix + fancyRetryPolicyLogSuffix);
        retryTask(taskStatus, newRetryPolicyState);
        return;
      }
    } else if (exitType == ExitType.TRANSIENT_CONFLICT) {
      newRetryPolicyState.setTransientConflictRetriedCount(newRetryPolicyState.getTransientConflictRetriedCount() + 1);
      if (fancyRetryPolicy) {
        LOGGER.logWarning(retryTaskLogPrefix + fancyRetryPolicyLogSuffix);
        retryTask(taskStatus, newRetryPolicyState);
        return;
      }
    } else if (exitType == ExitType.NON_TRANSIENT) {
      newRetryPolicyState.setNonTransientRetriedCount(newRetryPolicyState.getNonTransientRetriedCount() + 1);
      if (fancyRetryPolicy) {
        LOGGER.logWarning(completeTaskLogPrefix + fancyRetryPolicyLogSuffix);
        completeTask(taskStatus);
        return;
      }
    } else {
      if (exitType == ExitType.SUCCEEDED) {
        newRetryPolicyState.setSucceededRetriedCount(newRetryPolicyState.getSucceededRetriedCount() + 1);
      } else {
        newRetryPolicyState.setUnKnownRetriedCount(newRetryPolicyState.getUnKnownRetriedCount() + 1);
      }
      if (fancyRetryPolicy) {
        // FancyRetryPolicy only handle exit due to transient and non-transient failure specially,
        // Leave exit due to others to NormalRetryPolicy
        LOGGER.logInfo(logPrefix +
            "Transfer the RetryDecision to NormalRetryPolicy. Reason: " +
            fancyRetryPolicyLogSuffix);
      }
    }

    // 2. NormalRetryPolicy
    if (maxRetryCount == GlobalConstants.USING_EXTENDED_UNLIMITED_VALUE ||
        (exitType != ExitType.SUCCEEDED && maxRetryCount == GlobalConstants.USING_UNLIMITED_VALUE) ||
        (exitType != ExitType.SUCCEEDED && retriedCount < maxRetryCount)) {
      newRetryPolicyState.setRetriedCount(newRetryPolicyState.getRetriedCount() + 1);

      LOGGER.logWarning(retryTaskLogPrefix +
              "RetriedCount %s has not reached MaxRetryCount %s.",
          retriedCount, maxRetryCount);
      retryTask(taskStatus, newRetryPolicyState);
      return;
    } else {
      if (exitType == ExitType.SUCCEEDED) {
        LOGGER.logInfo(completeTaskLogPrefix +
            "Task exited due to %s.", exitType);
        completeTask(taskStatus);
        return;
      } else {
        LOGGER.logWarning(completeTaskLogPrefix +
                "RetriedCount %s has reached MaxRetryCount %s.",
            retriedCount, maxRetryCount);
        completeTask(taskStatus);
        return;
      }
    }
  }