KsckCheckResult Ksck::VerifyTablet()

in src/kudu/tools/ksck.cc [592:730]


KsckCheckResult Ksck::VerifyTablet(const shared_ptr<KsckTablet>& tablet,
                                   int table_num_replicas) {
  const string tablet_str = Substitute("Tablet $0 of table '$1'",
                                 tablet->id(), tablet->table()->name());

  auto leader_it = std::find_if(tablet->replicas().cbegin(), tablet->replicas().cend(),
      [](const shared_ptr<KsckTabletReplica>& r) -> bool { return r->is_leader(); });
  boost::optional<string> leader_uuid;
  if (leader_it != tablet->replicas().cend()) {
    leader_uuid = (*leader_it)->ts_uuid();
  }
  vector<string> voter_uuids_from_master;
  vector<string> non_voter_uuids_from_master;
  for (const auto& replica : tablet->replicas()) {
    if (replica->is_voter()) {
      voter_uuids_from_master.push_back(replica->ts_uuid());
    } else {
      non_voter_uuids_from_master.push_back(replica->ts_uuid());
    }
  }
  KsckConsensusState master_config(KsckConsensusConfigType::MASTER,
                                   boost::none,
                                   boost::none,
                                   leader_uuid,
                                   voter_uuids_from_master,
                                   non_voter_uuids_from_master);

  int leaders_count = 0;
  int running_voters_count = 0;
  int copying_replicas_count = 0;
  int conflicting_states = 0;
  int num_voters = 0;
  vector<KsckReplicaSummary> replicas;
  for (const shared_ptr<KsckTabletReplica>& replica : tablet->replicas()) {
    replicas.emplace_back();
    auto* repl_info = &replicas.back();
    repl_info->ts_uuid = replica->ts_uuid();
    VLOG(1) << Substitute("A replica of tablet $0 is on live tablet server $1",
                          tablet->id(), replica->ts_uuid());

    // Check for agreement on tablet assignment and state between the master
    // and the tablet server.
    auto ts = FindPointeeOrNull(cluster_->tablet_servers(), replica->ts_uuid());
    if (ts) {
      repl_info->ts_address = ts->address();
    }
    if (ts && ts->is_healthy()) {
      repl_info->ts_healthy = true;
      repl_info->state = ts->ReplicaState(tablet->id());
      if (ContainsKey(ts->tablet_status_map(), tablet->id())) {
        repl_info->status_pb = ts->tablet_status_map().at(tablet->id());
      }

      // Organize consensus info for each replica.
      std::pair<string, string> tablet_key = std::make_pair(ts->uuid(), tablet->id());
      if (ContainsKey(ts->tablet_consensus_state_map(), tablet_key)) {
        const auto& cstate = FindOrDieNoPrint(ts->tablet_consensus_state_map(), tablet_key);
        KsckConsensusState ksck_cstate;
        BuildKsckConsensusStateForConfigMember(cstate, &ksck_cstate);
        repl_info->consensus_state = std::move(ksck_cstate);
      }
    }

    repl_info->is_leader = replica->is_leader();
    repl_info->is_voter = replica->is_voter();
    num_voters += replica->is_voter() ? 1 : 0;
    if (replica->is_leader()) {
      leaders_count++;
    }
    if (repl_info->state == tablet::RUNNING && replica->is_voter()) {
      running_voters_count++;
    } else if (repl_info->status_pb &&
               repl_info->status_pb->tablet_data_state() == tablet::TABLET_DATA_COPYING) {
      copying_replicas_count++;
    }
    // Compare the master's and peers' consensus configs.
    for (const auto& r : replicas) {
      if (r.consensus_state && !r.consensus_state->Matches(master_config)) {
        conflicting_states++;
      }
    }
  }

  // Determine the overall health state of the tablet.
  KsckCheckResult result = KsckCheckResult::HEALTHY;
  string status;
  int majority_size = consensus::MajoritySize(num_voters);
  if (copying_replicas_count > 0) {
    result = KsckCheckResult::RECOVERING;
    status = Substitute("$0 is $1: $2 on-going tablet copies",
                        tablet_str,
                        Color(AnsiCode::YELLOW, "recovering"),
                        copying_replicas_count);
  } else if (running_voters_count < majority_size) {
    result = KsckCheckResult::UNAVAILABLE;
    status = Substitute("$0 is $1: $2 replica(s) not RUNNING",
                        tablet_str,
                        Color(AnsiCode::RED, "unavailable"),
                        num_voters - running_voters_count);
  } else if (running_voters_count < num_voters) {
    result = KsckCheckResult::UNDER_REPLICATED;
    status = Substitute("$0 is $1: $2 replica(s) not RUNNING",
                        tablet_str,
                        Color(AnsiCode::YELLOW, "under-replicated"),
                        num_voters - running_voters_count);
  } else if (check_replica_count_ && num_voters < table_num_replicas) {
    result = KsckCheckResult::UNDER_REPLICATED;
    status = Substitute("$0 is $1: configuration has $2 replicas vs desired $3",
                        tablet_str,
                        Color(AnsiCode::YELLOW, "under-replicated"),
                        num_voters,
                        table_num_replicas);
  } else if (leaders_count != 1) {
    result = KsckCheckResult::UNAVAILABLE;
    status = Substitute("$0 is $1: expected one LEADER replica",
                        tablet_str, Color(AnsiCode::RED, "unavailable"));
  } else if (conflicting_states > 0) {
    result = KsckCheckResult::CONSENSUS_MISMATCH;
    status = Substitute("$0 is $1: $0 replicas' active configs disagree with the master's",
                        tablet_str,
                        Color(AnsiCode::YELLOW, "conflicted"),
                        conflicting_states);
  } else {
    status = Substitute("$0 is $1.",
                        tablet_str,
                        Color(AnsiCode::GREEN, "healthy"));
  }

  KsckTabletSummary tablet_summary;
  tablet_summary.id = tablet->id();
  tablet_summary.table_id = tablet->table()->id();
  tablet_summary.table_name = tablet->table()->name();
  tablet_summary.result = result;
  tablet_summary.status = status;
  tablet_summary.master_cstate = std::move(master_config);
  tablet_summary.replicas.swap(replicas);
  results_.tablet_summaries.push_back(std::move(tablet_summary));
  return result;
}