in src/kudu/tools/ksck.cc [879:1029]
HealthCheckResult Ksck::VerifyTablet(const shared_ptr<KsckTablet>& tablet,
int table_num_replicas) {
const string tablet_str = Substitute("Tablet $0 of table '$1'",
tablet->id(), tablet->table()->name());
auto leader_it = std::find_if(tablet->replicas().cbegin(), tablet->replicas().cend(),
[](const shared_ptr<KsckTabletReplica>& r) { return r->is_leader(); });
optional<string> leader_uuid;
if (leader_it != tablet->replicas().cend()) {
leader_uuid = (*leader_it)->ts_uuid();
}
vector<string> voter_uuids_from_master;
vector<string> non_voter_uuids_from_master;
for (const auto& replica : tablet->replicas()) {
if (replica->is_voter()) {
voter_uuids_from_master.push_back(replica->ts_uuid());
} else {
non_voter_uuids_from_master.push_back(replica->ts_uuid());
}
}
ConsensusState master_config(ConsensusConfigType::MASTER,
nullopt,
nullopt,
leader_uuid,
voter_uuids_from_master,
non_voter_uuids_from_master);
int leaders_count = 0;
int running_voters_count = 0;
int copying_replicas_count = 0;
int conflicting_states = 0;
int num_voters = 0;
vector<ReplicaSummary> replicas;
for (const shared_ptr<KsckTabletReplica>& replica : tablet->replicas()) {
replicas.emplace_back();
auto* repl_info = &replicas.back();
repl_info->ts_uuid = replica->ts_uuid();
VLOG(1) << Substitute("A replica of tablet $0 is on live tablet server $1",
tablet->id(), replica->ts_uuid());
// Check for agreement on tablet assignment and state between the master
// and the tablet server.
auto ts = FindPointeeOrNull(cluster_->tablet_servers(), replica->ts_uuid());
if (ts) {
repl_info->ts_address = ts->address();
}
if (ts && ts->is_healthy()) {
repl_info->ts_healthy = true;
repl_info->state = ts->ReplicaState(tablet->id());
if (ContainsKey(ts->tablet_status_map(), tablet->id())) {
repl_info->status_pb = ts->tablet_status_map().at(tablet->id());
}
// Organize consensus info for each replica.
pair<string, string> tablet_key = std::make_pair(ts->uuid(), tablet->id());
if (ContainsKey(ts->tablet_consensus_state_map(), tablet_key)) {
const auto& cstate = FindOrDieNoPrint(ts->tablet_consensus_state_map(), tablet_key);
ConsensusState ksck_cstate;
BuildConsensusStateForConfigMember(cstate, &ksck_cstate);
repl_info->consensus_state = std::move(ksck_cstate);
}
}
repl_info->is_leader = replica->is_leader();
repl_info->is_voter = replica->is_voter();
num_voters += replica->is_voter() ? 1 : 0;
if (replica->is_leader()) {
leaders_count++;
}
if (repl_info->state == tablet::RUNNING && replica->is_voter()) {
running_voters_count++;
} else if (repl_info->status_pb &&
repl_info->status_pb->tablet_data_state() == tablet::TABLET_DATA_COPYING) {
copying_replicas_count++;
}
}
// Compare the master's and peers' consensus configs.
for (const auto& r : replicas) {
if (r.consensus_state && !r.consensus_state->Matches(master_config)) {
conflicting_states++;
}
}
// Determine the overall health state of the tablet.
HealthCheckResult result = HealthCheckResult::HEALTHY;
string status;
int majority_size = consensus::MajoritySize(num_voters);
if (copying_replicas_count > 0) {
result = HealthCheckResult::RECOVERING;
status = Substitute("$0 is $1: $2 on-going tablet copies",
tablet_str,
Color(AnsiCode::YELLOW, "recovering"),
copying_replicas_count);
} else if (running_voters_count < majority_size) {
result = HealthCheckResult::UNAVAILABLE;
status = Substitute("$0 is $1: $2 replica(s) not RUNNING",
tablet_str,
Color(AnsiCode::RED, "unavailable"),
num_voters - running_voters_count);
} else if (running_voters_count < num_voters) {
result = HealthCheckResult::UNDER_REPLICATED;
status = Substitute("$0 is $1: $2 replica(s) not RUNNING",
tablet_str,
Color(AnsiCode::YELLOW, "under-replicated"),
num_voters - running_voters_count);
} else if (check_replica_count_ && num_voters < table_num_replicas) {
result = HealthCheckResult::UNDER_REPLICATED;
status = Substitute("$0 is $1: configuration has $2 replicas vs desired $3",
tablet_str,
Color(AnsiCode::YELLOW, "under-replicated"),
num_voters,
table_num_replicas);
} else if (leaders_count != 1) {
result = HealthCheckResult::UNAVAILABLE;
status = Substitute("$0 is $1: expected one LEADER replica",
tablet_str, Color(AnsiCode::RED, "unavailable"));
} else if (conflicting_states > 0) {
result = HealthCheckResult::CONSENSUS_MISMATCH;
status = Substitute("$0 is $1: $2 replicas' active configs disagree with the "
"leader master's",
tablet_str,
Color(AnsiCode::YELLOW, "conflicted"),
conflicting_states);
} else {
status = Substitute("$0 is $1.",
tablet_str,
Color(AnsiCode::GREEN, "healthy"));
}
TabletSummary tablet_summary;
tablet_summary.id = tablet->id();
tablet_summary.table_id = tablet->table()->id();
tablet_summary.table_name = tablet->table()->name();
tablet_summary.result = result;
tablet_summary.status = status;
tablet_summary.master_cstate = std::move(master_config);
tablet_summary.replicas.swap(replicas);
// Add printable representation of the key for the start of the range.
const auto& range_key_begin = tablet->partition().begin().range_key();
ostringstream ss_range_key_begin;
for (size_t i = 0; i < range_key_begin.size(); ++i) {
ss_range_key_begin << std::hex << std::setw(2) << std::setfill('0')
<< static_cast<uint16_t>(range_key_begin[i]);
}
tablet_summary.range_key_begin = ss_range_key_begin.str();
VLOG(1) << Substitute("range start key for tablet $0: '$1'",
tablet_summary.id, tablet_summary.range_key_begin);
results_.cluster_status.tablet_summaries.push_back(std::move(tablet_summary));
return result;
}