RetriableRpcStatus CoordinatorRpc::AnalyzeResponse()

in src/kudu/transactions/coordinator_rpc.cc [129:244]


RetriableRpcStatus CoordinatorRpc::AnalyzeResponse(const Status& rpc_cb_status) {
  // We only analyze OK statuses if we succeeded to do the tablet lookup. In
  // either case, let's examine whatever errors exist.
  RetriableRpcStatus result;
  result.status = rpc_cb_status.ok() ? retrier().controller().status()
                                     : rpc_cb_status;
  if (result.status.ok() &&
      resp_.has_op_result() && resp_.op_result().has_op_error()) {
    // Extract the application-level error (AppStatusPB), if any, and convert it
    // into Status to allow the retry logic to work as expected.
    result.status = StatusFromPB(resp_.op_result().op_error());
  }

  // Check for specific RPC errors.
  if (result.status.IsRemoteError()) {
    const ErrorStatusPB* err = mutable_retrier()->controller().error_response();
    if (err && err->has_code()) {
      switch (err->code()) {
        case ErrorStatusPB::ERROR_SERVER_TOO_BUSY:
        case ErrorStatusPB::ERROR_UNAVAILABLE:
          result.result = RetriableRpcStatus::SERVICE_UNAVAILABLE;
          return result;
        default:
          break;
      }
    }
  }

  // TODO(awong): it might be easier to understand if the resulting expected
  // action were encoded in these status enums, e.g. RETRY_SAME_SERVER.
  if (result.status.IsServiceUnavailable()) {
    result.result = RetriableRpcStatus::SERVICE_UNAVAILABLE;
    return result;
  }

  // Check whether we need to get a new authentication token.
  if (result.status.IsNotAuthorized()) {
    const ErrorStatusPB* err = mutable_retrier()->controller().error_response();
    if (err && err->has_code() &&
        err->code() == ErrorStatusPB::FATAL_INVALID_AUTHENTICATION_TOKEN) {
      result.result = RetriableRpcStatus::INVALID_AUTHENTICATION_TOKEN;
      return result;
    }
  }

  // If we couldn't connect to the server, e.g. it was down, failover to a
  // different replica.
  if (result.status.IsNetworkError()) {
    result.result = RetriableRpcStatus::SERVER_NOT_ACCESSIBLE;
    return result;
  }

  // We're done parsing the RPC controller errors. Unwrap the tserver response
  // errors -- from here on out, the result status will be the response error.
  if (result.status.ok() && resp_.has_error()) {
    result.status = StatusFromPB(resp_.error().status());
    DCHECK(!result.status.ok());
  }

  if (resp_.has_error()) {
    const auto code = resp_.error().code();
    switch (code) {
      // If we get TABLET_NOT_FOUND, the replica we thought was leader
      // has been deleted.
      case TabletServerErrorPB::TABLET_NOT_FOUND:
      case TabletServerErrorPB::TABLET_FAILED:
        result.result = RetriableRpcStatus::RESOURCE_NOT_FOUND;
        return result;

      case TabletServerErrorPB::TABLET_NOT_RUNNING:
      case TabletServerErrorPB::THROTTLED:
        result.result = RetriableRpcStatus::SERVICE_UNAVAILABLE;
        return result;

      case TabletServerErrorPB::NOT_THE_LEADER:
        result.result = RetriableRpcStatus::REPLICA_NOT_LEADER;
        return result;

      case TabletServerErrorPB::TXN_ILLEGAL_STATE:
        result.result = RetriableRpcStatus::NON_RETRIABLE_ERROR;
        return result;

      case TabletServerErrorPB::UNKNOWN_ERROR:
      default:
        // The rest is handled in the code below.
        break;
    }
  }

  if (result.status.IsAborted() || result.status.IsIllegalState()) {
    // This is to handle "Op aborted by new leader" Raft replication errors or
    // non-a-Raft-leader errors.
    result.result = RetriableRpcStatus::REPLICA_NOT_LEADER;
    return result;
  }

  // Handle the connection negotiation failure case if overall RPC's timeout
  // hasn't expired yet: if the connection negotiation returned non-OK status,
  // mark the server as not accessible and rely on the RetriableRpc's logic
  // to switch to an alternative tablet replica.
  //
  // NOTE: Connection negotiation errors related to security are handled in the
  //       code above: see the handlers for IsNotAuthorized(), IsRemoteError().
  if (!rpc_cb_status.IsTimedOut() && !result.status.ok() &&
      mutable_retrier()->controller().negotiation_failed()) {
    result.result = RetriableRpcStatus::SERVER_NOT_ACCESSIBLE;
    return result;
  }

  if (result.status.ok()) {
    result.result = RetriableRpcStatus::OK;
  } else {
    result.result = RetriableRpcStatus::NON_RETRIABLE_ERROR;
  }
  return result;
}