in src/shell/commands/recovery.cpp [215:401]
bool ddd_diagnose(command_executor *e, shell_context *sc, arguments args)
{
static struct option long_options[] = {{"gpid", required_argument, 0, 'g'},
{"diagnose", no_argument, 0, 'd'},
{"auto_diagnose", no_argument, 0, 'a'},
{"skip_prompt", no_argument, 0, 's'},
{"output", required_argument, 0, 'o'},
{0, 0, 0, 0}};
std::string out_file;
dsn::gpid id(-1, -1);
bool diagnose = false;
bool auto_diagnose = false;
bool skip_prompt = false;
optind = 0;
while (true) {
int option_index = 0;
int c;
c = getopt_long(args.argc, args.argv, "g:daso:", long_options, &option_index);
if (c == -1)
break;
switch (c) {
case 'g':
int pid;
if (id.parse_from(optarg)) {
// app_id.partition_index
} else if (sscanf(optarg, "%d", &pid) == 1) {
// app_id
id.set_app_id(pid);
} else {
fprintf(stderr, "ERROR: invalid gpid %s\n", optarg);
return false;
}
break;
case 'd':
diagnose = true;
break;
case 'a':
auto_diagnose = true;
break;
case 's':
skip_prompt = true;
break;
case 'o':
out_file = optarg;
break;
default:
return false;
}
}
std::vector<ddd_partition_info> ddd_partitions;
::dsn::error_code ret = sc->ddl_client->ddd_diagnose(id, ddd_partitions);
if (ret != dsn::ERR_OK) {
fprintf(stderr, "ERROR: DDD diagnose failed with err = %s\n", ret.to_string());
return true;
}
std::streambuf *buf;
std::ofstream of;
if (!out_file.empty()) {
of.open(out_file);
buf = of.rdbuf();
} else {
buf = std::cout.rdbuf();
}
std::ostream out(buf);
out << "Total " << ddd_partitions.size() << " ddd partitions:" << std::endl;
out << std::endl;
int proposed_count = 0;
int i = 0;
for (const ddd_partition_info &pinfo : ddd_partitions) {
out << "(" << ++i << ") " << pinfo.config.pid << std::endl;
out << " config: ballot(" << pinfo.config.ballot << "), "
<< "last_committed(" << pinfo.config.last_committed_decree << ")" << std::endl;
out << " ----" << std::endl;
dsn::host_port latest_dropped, secondary_latest_dropped;
if (pinfo.config.hp_last_drops.size() > 0) {
latest_dropped = pinfo.config.hp_last_drops[pinfo.config.hp_last_drops.size() - 1];
}
if (pinfo.config.hp_last_drops.size() > 1) {
secondary_latest_dropped =
pinfo.config.hp_last_drops[pinfo.config.hp_last_drops.size() - 2];
}
int j = 0;
for (const ddd_node_info &n : pinfo.dropped) {
dsn::host_port hp_node;
GET_HOST_PORT(n, node, hp_node);
char time_buf[30] = {0};
::dsn::utils::time_ms_to_string(n.drop_time_ms, time_buf);
out << " dropped[" << j++ << "]: "
<< "node(" << hp_node << "), "
<< "drop_time(" << time_buf << "), "
<< "alive(" << (n.is_alive ? "true" : "false") << "), "
<< "collected(" << (n.is_collected ? "true" : "false") << "), "
<< "ballot(" << n.ballot << "), "
<< "last_committed(" << n.last_committed_decree << "), "
<< "last_prepared(" << n.last_prepared_decree << ")";
if (hp_node == latest_dropped)
out << " <== the latest";
else if (hp_node == secondary_latest_dropped)
out << " <== the secondary latest";
out << std::endl;
}
out << " ----" << std::endl;
j = 0;
for (const auto &r : pinfo.config.hp_last_drops) {
out << " last_drops[" << j++ << "]: "
<< "node(" << r.to_string() << ")";
if (j == (int)pinfo.config.hp_last_drops.size() - 1)
out << " <== the secondary latest";
else if (j == (int)pinfo.config.hp_last_drops.size())
out << " <== the latest";
out << std::endl;
}
out << " ----" << std::endl;
out << " ddd_reason: " << pinfo.reason << std::endl;
if (diagnose) {
out << " ----" << std::endl;
auto primary = diagnose_recommend(pinfo);
out << " recommend_primary: " << (!primary ? "none" : primary.to_string());
if (primary == latest_dropped)
out << " <== the latest";
else if (primary == secondary_latest_dropped)
out << " <== the secondary latest";
out << std::endl;
bool skip_this = false;
if (primary && !auto_diagnose && !skip_prompt) {
do {
std::cout << " > Are you sure to use the recommend primary? [y/n/s(skip)]: ";
char c;
std::cin >> c;
if (c == 'y') {
break;
} else if (c == 'n') {
primary.reset();
break;
} else if (c == 's') {
skip_this = true;
std::cout << " > You have choosed to skip diagnosing this partition."
<< std::endl;
break;
}
} while (true);
}
if (!primary && !skip_prompt && !skip_this) {
do {
std::cout << " > Please input the primary node: ";
std::string node;
std::cin >> node;
primary = dsn::host_port::from_string(node);
if (primary) {
break;
}
std::cout << " > Sorry, you have input an invalid node address."
<< std::endl;
} while (true);
}
if (primary && !skip_this) {
dsn::replication::configuration_balancer_request request;
request.gpid = pinfo.config.pid;
request.action_list = {
new_proposal_action(primary, primary, config_type::CT_ASSIGN_PRIMARY)};
request.force = false;
dsn::error_code err = sc->ddl_client->send_balancer_proposal(request);
out << " propose_request: propose -g " << request.gpid
<< " -p ASSIGN_PRIMARY -t " << primary << " -n " << primary << std::endl;
out << " propose_response: " << err << std::endl;
proposed_count++;
} else {
out << " propose_request: none" << std::endl;
}
}
out << std::endl;
out << "Proposed count: " << proposed_count << "/" << ddd_partitions.size() << std::endl;
out << std::endl;
}
std::cout << "Diagnose ddd done." << std::endl;
return true;
}