in src/kudu/tools/ksck.cc [592:730]
KsckCheckResult Ksck::VerifyTablet(const shared_ptr<KsckTablet>& tablet,
int table_num_replicas) {
const string tablet_str = Substitute("Tablet $0 of table '$1'",
tablet->id(), tablet->table()->name());
auto leader_it = std::find_if(tablet->replicas().cbegin(), tablet->replicas().cend(),
[](const shared_ptr<KsckTabletReplica>& r) -> bool { return r->is_leader(); });
boost::optional<string> leader_uuid;
if (leader_it != tablet->replicas().cend()) {
leader_uuid = (*leader_it)->ts_uuid();
}
vector<string> voter_uuids_from_master;
vector<string> non_voter_uuids_from_master;
for (const auto& replica : tablet->replicas()) {
if (replica->is_voter()) {
voter_uuids_from_master.push_back(replica->ts_uuid());
} else {
non_voter_uuids_from_master.push_back(replica->ts_uuid());
}
}
KsckConsensusState master_config(KsckConsensusConfigType::MASTER,
boost::none,
boost::none,
leader_uuid,
voter_uuids_from_master,
non_voter_uuids_from_master);
int leaders_count = 0;
int running_voters_count = 0;
int copying_replicas_count = 0;
int conflicting_states = 0;
int num_voters = 0;
vector<KsckReplicaSummary> replicas;
for (const shared_ptr<KsckTabletReplica>& replica : tablet->replicas()) {
replicas.emplace_back();
auto* repl_info = &replicas.back();
repl_info->ts_uuid = replica->ts_uuid();
VLOG(1) << Substitute("A replica of tablet $0 is on live tablet server $1",
tablet->id(), replica->ts_uuid());
// Check for agreement on tablet assignment and state between the master
// and the tablet server.
auto ts = FindPointeeOrNull(cluster_->tablet_servers(), replica->ts_uuid());
if (ts) {
repl_info->ts_address = ts->address();
}
if (ts && ts->is_healthy()) {
repl_info->ts_healthy = true;
repl_info->state = ts->ReplicaState(tablet->id());
if (ContainsKey(ts->tablet_status_map(), tablet->id())) {
repl_info->status_pb = ts->tablet_status_map().at(tablet->id());
}
// Organize consensus info for each replica.
std::pair<string, string> tablet_key = std::make_pair(ts->uuid(), tablet->id());
if (ContainsKey(ts->tablet_consensus_state_map(), tablet_key)) {
const auto& cstate = FindOrDieNoPrint(ts->tablet_consensus_state_map(), tablet_key);
KsckConsensusState ksck_cstate;
BuildKsckConsensusStateForConfigMember(cstate, &ksck_cstate);
repl_info->consensus_state = std::move(ksck_cstate);
}
}
repl_info->is_leader = replica->is_leader();
repl_info->is_voter = replica->is_voter();
num_voters += replica->is_voter() ? 1 : 0;
if (replica->is_leader()) {
leaders_count++;
}
if (repl_info->state == tablet::RUNNING && replica->is_voter()) {
running_voters_count++;
} else if (repl_info->status_pb &&
repl_info->status_pb->tablet_data_state() == tablet::TABLET_DATA_COPYING) {
copying_replicas_count++;
}
// Compare the master's and peers' consensus configs.
for (const auto& r : replicas) {
if (r.consensus_state && !r.consensus_state->Matches(master_config)) {
conflicting_states++;
}
}
}
// Determine the overall health state of the tablet.
KsckCheckResult result = KsckCheckResult::HEALTHY;
string status;
int majority_size = consensus::MajoritySize(num_voters);
if (copying_replicas_count > 0) {
result = KsckCheckResult::RECOVERING;
status = Substitute("$0 is $1: $2 on-going tablet copies",
tablet_str,
Color(AnsiCode::YELLOW, "recovering"),
copying_replicas_count);
} else if (running_voters_count < majority_size) {
result = KsckCheckResult::UNAVAILABLE;
status = Substitute("$0 is $1: $2 replica(s) not RUNNING",
tablet_str,
Color(AnsiCode::RED, "unavailable"),
num_voters - running_voters_count);
} else if (running_voters_count < num_voters) {
result = KsckCheckResult::UNDER_REPLICATED;
status = Substitute("$0 is $1: $2 replica(s) not RUNNING",
tablet_str,
Color(AnsiCode::YELLOW, "under-replicated"),
num_voters - running_voters_count);
} else if (check_replica_count_ && num_voters < table_num_replicas) {
result = KsckCheckResult::UNDER_REPLICATED;
status = Substitute("$0 is $1: configuration has $2 replicas vs desired $3",
tablet_str,
Color(AnsiCode::YELLOW, "under-replicated"),
num_voters,
table_num_replicas);
} else if (leaders_count != 1) {
result = KsckCheckResult::UNAVAILABLE;
status = Substitute("$0 is $1: expected one LEADER replica",
tablet_str, Color(AnsiCode::RED, "unavailable"));
} else if (conflicting_states > 0) {
result = KsckCheckResult::CONSENSUS_MISMATCH;
status = Substitute("$0 is $1: $0 replicas' active configs disagree with the master's",
tablet_str,
Color(AnsiCode::YELLOW, "conflicted"),
conflicting_states);
} else {
status = Substitute("$0 is $1.",
tablet_str,
Color(AnsiCode::GREEN, "healthy"));
}
KsckTabletSummary tablet_summary;
tablet_summary.id = tablet->id();
tablet_summary.table_id = tablet->table()->id();
tablet_summary.table_name = tablet->table()->name();
tablet_summary.result = result;
tablet_summary.status = status;
tablet_summary.master_cstate = std::move(master_config);
tablet_summary.replicas.swap(replicas);
results_.tablet_summaries.push_back(std::move(tablet_summary));
return result;
}