Status RaftConsensus::StartElection()

in src/kudu/consensus/raft_consensus.cc [586:743]


Status RaftConsensus::StartElection(ElectionMode mode, ElectionContext context) {
  const char* const mode_str = ModeString(mode);

  TRACE_EVENT2("consensus", "RaftConsensus::StartElection",
               "peer", LogPrefixThreadSafe(),
               "mode", mode_str);
  scoped_refptr<LeaderElection> election;
  {
    ThreadRestrictions::AssertWaitAllowed();
    LockGuard l(lock_);
    RETURN_NOT_OK(CheckRunningUnlocked());

    if (!persistent_vars_->is_start_election_allowed()) {
      KLOG_EVERY_N_SECS(WARNING, 300) << LogPrefixUnlocked() <<
          Substitute("allow_start_election is set to false, not starting $0 [EVERY 300 seconds]", mode_str);
      return Status::OK();
    }

    context.current_leader_uuid_ = GetLeaderUuidUnlocked();
    if (context.source_uuid_.empty()) {
      context.source_uuid_ = context.current_leader_uuid_;
    } else if (context.source_uuid_ != context.current_leader_uuid_) {
      // If the origin of the election isn't the same as the leader we're
      // promoting away from, it must mean that this election is part of a chain
      context.is_chained_election_ = true;
    }

    RaftPeerPB::Role active_role = cmeta_->active_role();
    if (active_role == RaftPeerPB::LEADER) {
      LOG_WITH_PREFIX_UNLOCKED(INFO) << Substitute(
          "Not starting $0 -- already a leader", mode_str);
      return Status::OK();
    }
    if (PREDICT_FALSE(!consensus::IsVoterRole(active_role))) {
      // A non-voter should not start leader elections. The leader failure
      // detector should be re-enabled once the non-voter replica is promoted
      // to voter replica.
      return Status::IllegalState("only voting members can start elections",
          SecureShortDebugString(cmeta_->ActiveConfig()));
    }

    // In flexi raft mode, we want to start elections only in Candidate
    // regions which have voter_distribution Information.
    if (FLAGS_enable_flexi_raft) {
      const auto& vd_map = cmeta_->ActiveConfig().voter_distribution();
      if (PREDICT_FALSE(vd_map.find(peer_region()) == vd_map.end())) {
        return Status::IllegalState(strings::Substitute(
            "in flexi-raft only regions with valid voter distribution can start election: $0",
            peer_region()));
      }
    }

    LOG_WITH_PREFIX_UNLOCKED(INFO)
        << "Starting " << mode_str
        << " (" << ReasonString(context.reason_, GetLeaderUuidUnlocked()) << ")";

    // Snooze to avoid the election timer firing again as much as possible.
    // We do not disable the election timer while running an election, so that
    // if the election times out, we will try again.
    MonoDelta timeout = LeaderElectionExpBackoffDeltaUnlocked();
    SnoozeFailureDetector(string("starting election"), timeout);

    // Increment the term and vote for ourselves, unless it's a pre-election.
    if (mode != PRE_ELECTION) {
      // TODO(mpercy): Consider using a separate Mutex for voting, which must sync to disk.

      // We skip flushing the term to disk because setting the vote just below also
      // flushes to disk, and the double fsync doesn't buy us anything.
      RETURN_NOT_OK(HandleTermAdvanceUnlocked(CurrentTermUnlocked() + 1,
                                              SKIP_FLUSH_TO_DISK));
      RETURN_NOT_OK(SetVotedForCurrentTermUnlocked(peer_uuid()));
    }

    RaftConfigPB active_config = cmeta_->ActiveConfig();
    LOG_WITH_PREFIX_UNLOCKED(INFO) << "Starting " << mode_str << " with config: "
                                   << SecureShortDebugString(active_config);

    int64_t candidate_term = CurrentTermUnlocked();
    if (mode == PRE_ELECTION) {
      // In a pre-election, we haven't bumped our own term yet, so we need to be
      // asking for votes for the next term.
      candidate_term += 1;
    }

    // Initialize the VoteCounter.
    gscoped_ptr<VoteCounter> counter;

    VoteInfo vote_info;
    vote_info.vote = VOTE_GRANTED;
    if (!FLAGS_enable_flexi_raft) {
      int num_voters = CountVoters(active_config);
      int majority_size = MajoritySize(num_voters);
      counter.reset(new VoteCounter(num_voters, majority_size));
    } else {
      counter.reset(new FlexibleVoteCounter(
          peer_uuid(),
          candidate_term,
          cmeta_->last_known_leader(),
          active_config,
          adjust_voter_distribution_));

      // Populate vote history for self. Although not really needed, this makes
      // the code simpler.
      const std::map<int64_t, PreviousVotePB>& pvh =
          cmeta_->previous_vote_history();
      std::map<int64_t, PreviousVotePB>::const_iterator it = pvh.begin();
      while(it != pvh.end()) {
        vote_info.previous_vote_history.push_back(it->second);
        it++;
      }
    }

    // Vote for ourselves.
    bool duplicate;
    RETURN_NOT_OK(counter->RegisterVote(peer_uuid(), vote_info, &duplicate));
    LOG_WITH_PREFIX_UNLOCKED(INFO) << "Self-Voted " << mode_str;
    CHECK(!duplicate) << LogPrefixUnlocked()
                      << "Inexplicable duplicate self-vote for term "
                      << CurrentTermUnlocked();

    // The shell VoteRequestPB is used to create the VoteRequestPB
    // for each of the specific peers.
    // NB: below dest_uuid is left unpopulated.
    VoteRequestPB request;
    request.set_ignore_live_leader(mode == ELECT_EVEN_IF_LEADER_IS_ALIVE);
    request.set_candidate_uuid(peer_uuid());
    request.set_candidate_term(candidate_term);
    *request.mutable_candidate_context()->mutable_candidate_peer_pb() =
      local_peer_pb_;

    if (mode == PRE_ELECTION) {
      request.set_is_pre_election(true);
    }
    request.set_tablet_id(options_.tablet_id);
    *request.mutable_candidate_status()->mutable_last_received() =
        queue_->GetLastOpIdInLog();

    // active_config is cached into the LeaderElection, i.e.
    // if it changes during the LeaderElection process that is not
    // reacted to. Since LeaderElection operates on a snapshot of config,
    // it makes LeaderElection simpler, easier to reason with.
    election.reset(new LeaderElection(
        std::move(active_config),
        // The RaftConsensus ref passed below ensures that this raw pointer
        // remains safe to use for the entirety of LeaderElection's life.
        peer_proxy_factory_.get(),
        std::move(request), std::move(counter), timeout,
        std::bind(&RaftConsensus::ElectionCallback,
                  shared_from_this(),
                  std::move(context),
                  std::placeholders::_1)));
  }

  // Start the election outside the lock.
  election->Run();

  return Status::OK();
}