bool ddd_diagnose()

in src/shell/commands/recovery.cpp [215:401]


bool ddd_diagnose(command_executor *e, shell_context *sc, arguments args)
{
    static struct option long_options[] = {{"gpid", required_argument, 0, 'g'},
                                           {"diagnose", no_argument, 0, 'd'},
                                           {"auto_diagnose", no_argument, 0, 'a'},
                                           {"skip_prompt", no_argument, 0, 's'},
                                           {"output", required_argument, 0, 'o'},
                                           {0, 0, 0, 0}};

    std::string out_file;
    dsn::gpid id(-1, -1);
    bool diagnose = false;
    bool auto_diagnose = false;
    bool skip_prompt = false;
    optind = 0;
    while (true) {
        int option_index = 0;
        int c;
        c = getopt_long(args.argc, args.argv, "g:daso:", long_options, &option_index);
        if (c == -1)
            break;
        switch (c) {
        case 'g':
            int pid;
            if (id.parse_from(optarg)) {
                // app_id.partition_index
            } else if (sscanf(optarg, "%d", &pid) == 1) {
                // app_id
                id.set_app_id(pid);
            } else {
                fprintf(stderr, "ERROR: invalid gpid %s\n", optarg);
                return false;
            }
            break;
        case 'd':
            diagnose = true;
            break;
        case 'a':
            auto_diagnose = true;
            break;
        case 's':
            skip_prompt = true;
            break;
        case 'o':
            out_file = optarg;
            break;
        default:
            return false;
        }
    }

    std::vector<ddd_partition_info> ddd_partitions;
    ::dsn::error_code ret = sc->ddl_client->ddd_diagnose(id, ddd_partitions);
    if (ret != dsn::ERR_OK) {
        fprintf(stderr, "ERROR: DDD diagnose failed with err = %s\n", ret.to_string());
        return true;
    }

    std::streambuf *buf;
    std::ofstream of;

    if (!out_file.empty()) {
        of.open(out_file);
        buf = of.rdbuf();
    } else {
        buf = std::cout.rdbuf();
    }
    std::ostream out(buf);

    out << "Total " << ddd_partitions.size() << " ddd partitions:" << std::endl;
    out << std::endl;
    int proposed_count = 0;
    int i = 0;
    for (const ddd_partition_info &pinfo : ddd_partitions) {
        out << "(" << ++i << ") " << pinfo.config.pid << std::endl;
        out << "    config: ballot(" << pinfo.config.ballot << "), "
            << "last_committed(" << pinfo.config.last_committed_decree << ")" << std::endl;
        out << "    ----" << std::endl;
        dsn::host_port latest_dropped, secondary_latest_dropped;
        if (pinfo.config.hp_last_drops.size() > 0) {
            latest_dropped = pinfo.config.hp_last_drops[pinfo.config.hp_last_drops.size() - 1];
        }
        if (pinfo.config.hp_last_drops.size() > 1) {
            secondary_latest_dropped =
                pinfo.config.hp_last_drops[pinfo.config.hp_last_drops.size() - 2];
        }
        int j = 0;
        for (const ddd_node_info &n : pinfo.dropped) {
            dsn::host_port hp_node;
            GET_HOST_PORT(n, node, hp_node);
            char time_buf[30] = {0};
            ::dsn::utils::time_ms_to_string(n.drop_time_ms, time_buf);
            out << "    dropped[" << j++ << "]: "
                << "node(" << hp_node << "), "
                << "drop_time(" << time_buf << "), "
                << "alive(" << (n.is_alive ? "true" : "false") << "), "
                << "collected(" << (n.is_collected ? "true" : "false") << "), "
                << "ballot(" << n.ballot << "), "
                << "last_committed(" << n.last_committed_decree << "), "
                << "last_prepared(" << n.last_prepared_decree << ")";
            if (hp_node == latest_dropped)
                out << "  <== the latest";
            else if (hp_node == secondary_latest_dropped)
                out << "  <== the secondary latest";
            out << std::endl;
        }
        out << "    ----" << std::endl;
        j = 0;
        for (const auto &r : pinfo.config.hp_last_drops) {
            out << "    last_drops[" << j++ << "]: "
                << "node(" << r.to_string() << ")";
            if (j == (int)pinfo.config.hp_last_drops.size() - 1)
                out << "  <== the secondary latest";
            else if (j == (int)pinfo.config.hp_last_drops.size())
                out << "  <== the latest";
            out << std::endl;
        }
        out << "    ----" << std::endl;
        out << "    ddd_reason: " << pinfo.reason << std::endl;
        if (diagnose) {
            out << "    ----" << std::endl;

            auto primary = diagnose_recommend(pinfo);
            out << "    recommend_primary: " << (!primary ? "none" : primary.to_string());
            if (primary == latest_dropped)
                out << "  <== the latest";
            else if (primary == secondary_latest_dropped)
                out << "  <== the secondary latest";
            out << std::endl;

            bool skip_this = false;
            if (primary && !auto_diagnose && !skip_prompt) {
                do {
                    std::cout << "    > Are you sure to use the recommend primary? [y/n/s(skip)]: ";
                    char c;
                    std::cin >> c;
                    if (c == 'y') {
                        break;
                    } else if (c == 'n') {
                        primary.reset();
                        break;
                    } else if (c == 's') {
                        skip_this = true;
                        std::cout << "    > You have choosed to skip diagnosing this partition."
                                  << std::endl;
                        break;
                    }
                } while (true);
            }

            if (!primary && !skip_prompt && !skip_this) {
                do {
                    std::cout << "    > Please input the primary node: ";
                    std::string node;
                    std::cin >> node;
                    primary = dsn::host_port::from_string(node);
                    if (primary) {
                        break;
                    }
                    std::cout << "    > Sorry, you have input an invalid node address."
                              << std::endl;
                } while (true);
            }

            if (primary && !skip_this) {
                dsn::replication::configuration_balancer_request request;
                request.gpid = pinfo.config.pid;
                request.action_list = {
                    new_proposal_action(primary, primary, config_type::CT_ASSIGN_PRIMARY)};
                request.force = false;
                dsn::error_code err = sc->ddl_client->send_balancer_proposal(request);
                out << "    propose_request: propose -g " << request.gpid
                    << " -p ASSIGN_PRIMARY -t " << primary << " -n " << primary << std::endl;
                out << "    propose_response: " << err << std::endl;
                proposed_count++;
            } else {
                out << "    propose_request: none" << std::endl;
            }
        }
        out << std::endl;
        out << "Proposed count: " << proposed_count << "/" << ddd_partitions.size() << std::endl;
        out << std::endl;
    }

    std::cout << "Diagnose ddd done." << std::endl;
    return true;
}