void rpc_stats_az::dump_stats()

in turbonfs/src/rpc_stats.cpp [49:495]


void rpc_stats_az::dump_stats()
{
    const struct nfs_client& client = nfs_client::get_instance();
    const struct rpc_transport& transport = client.get_transport();
    const std::vector<struct nfs_connection*> connections =
        transport.get_all_connections();
    const struct mount_options& mo = client.mnt_options;
    const struct sockaddr_storage *saddr = nullptr;
    struct rpc_stats cum_stats = {};
    std::string str;
    uint64_t total_inodes, num_files, num_dirs, num_symlinks,
             open_files, open_dirs, num_files_cache_empty, num_dirs_cache_empty,
             num_forgotten, expecting_forget, num_dircached, num_silly_renamed;

    // Gather files/inode related stats.
    client.get_inode_stats(total_inodes,
                           num_files,
                           num_dirs,
                           num_symlinks,
                           open_files,
                           open_dirs,
                           num_files_cache_empty,
                           num_dirs_cache_empty,
                           num_forgotten,
                           expecting_forget,
                           num_dircached,
                           num_silly_renamed);

    /*
     * Take exclusive lock to avoid mixing dump from simultaneous dump
     * requests.
     */
    std::unique_lock<std::mutex> _lock(stats_lock_42);

    /*
     * Go over all connections, query libnfs for stats for each and accumulate
     * them.
     */
    for (struct nfs_connection *conn : connections) {
        struct rpc_stats stats;
        struct rpc_context *rpc = nfs_get_rpc_context(conn->get_nfs_context());

        /*
         * All nconnect connections will terminate at the same IPv4 address,
         * so use the one corresponding to the first connection.
         */
        if (!saddr) {
            saddr = nfs_get_server_address(conn->get_nfs_context());
            // Currently Blob NFS only supports IPv4 address.
            assert(((struct sockaddr_in *)saddr)->sin_family == AF_INET);
        }

        rpc_get_stats(rpc, &stats);

#define _CUM(s) cum_stats.s += stats.s
        _CUM(num_req_sent);
        _CUM(num_resp_rcvd);
        _CUM(num_timedout);
        _CUM(num_timedout_in_outqueue);
        _CUM(num_major_timedout);
        _CUM(num_retransmitted);
        _CUM(num_reconnects);
        _CUM(outqueue_len);
        _CUM(waitpdu_len);
        _CUM(num_write_eagain);
        _CUM(tot_write_bytes_before_eagain);
#undef _CUM
    }

    str += "---[RPC stats]----------\n";
    str += "Stats for " + mo.server + ":" + mo.export_path +
           " mounted on " + mo.mountpoint + ":\n";
    str += "  NFS mount options:" +
           std::string(mo.readonly ? "ro" : "rw") +
           std::string(",vers=3") +
           ",rsize=" + std::to_string(mo.rsize_adj) +
           ",wsize=" + std::to_string(mo.wsize_adj) +
           ",acregmin=" + std::to_string(mo.acregmin) +
           ",acregmax=" + std::to_string(mo.acregmax) +
           ",acdirmin=" + std::to_string(mo.acdirmin) +
           ",acdirmax=" + std::to_string(mo.acdirmax) +
           std::string(",hard,proto=tcp") +
           ",nconnect=" + std::to_string(mo.num_connections) +
           ",port=" + std::to_string(mo.nfs_port) +
           ",timeo=" + std::to_string(mo.timeo) +
           ",retrans=" + std::to_string(mo.retrans) +
           std::string(",sec=sys") +
           std::string(",xprtsec=") + mo.xprtsec +
           std::string(",mountaddr=") +
           ::inet_ntoa(((struct sockaddr_in *)saddr)->sin_addr) +
           ",mountport=" + std::to_string(mo.mount_port) +
           std::string(",mountproto=tcp\n");

    str += "RPC statistics:\n";
    str += "  " + std::to_string(cum_stats.num_req_sent) +
                  " RPC requests sent\n";
    str += "  " + std::to_string(cum_stats.num_resp_rcvd) +
                  " RPC replies received\n";
    str += "  " + std::to_string(cum_stats.outqueue_len) +
                  " RPC requests in libnfs outqueue\n";
    str += "  " + std::to_string(cum_stats.waitpdu_len) +
                  " RPC requests in libnfs waitpdu queue\n";
    str += "  " + std::to_string(cum_stats.num_timedout_in_outqueue) +
                  " RPC requests timed out in outqueue\n";
    str += "  " + std::to_string(cum_stats.num_timedout) +
                  " RPC requests timed out waiting for response\n";
    str += "  " + std::to_string(cum_stats.num_major_timedout) +
                  " RPC requests major timed out\n";
    str += "  " + std::to_string(cum_stats.num_retransmitted) +
                  " RPC requests retransmitted\n";
    str += "  " + std::to_string(cum_stats.num_reconnects) +
                  " Reconnect attempts\n";
    if (cum_stats.num_write_eagain) {
        str += "  " + std::to_string(cum_stats.num_write_eagain) +
                      " Socket writes returned EAGAIN\n";
        str += "  " + std::to_string(cum_stats.tot_write_bytes_before_eagain /
                                     cum_stats.num_write_eagain) +
                      " Avg bytes before socket write returned EAGAIN\n";
    }
    str += "  " + std::to_string(client.get_transport().get_avg_qlen_r()) +
                  " Avg rpc qlen seen by reads\n";
    str += "  " + std::to_string(client.get_transport().get_max_qlen_r()) +
                  " Max rpc qlen seen by reads\n";
    str += "  " + std::to_string(client.get_transport().get_avg_qlen_w()) +
                  " Avg rpc qlen seen by writes\n";
    str += "  " + std::to_string(client.get_transport().get_max_qlen_w()) +
                  " Max rpc qlen seen by writes\n";

    str += "File/Inode statistics:\n";
    str += "  " + std::to_string(total_inodes) +
                  " total inodes\n";
    str += "  " + std::to_string(num_files) +
                  " regular files\n";
    str += "  " + std::to_string(num_dirs) +
                  " directories\n";
    str += "  " + std::to_string(num_symlinks) +
                  " symlinks\n";
    str += "  " + std::to_string(open_files) +
                  " files currently open\n";
    str += "  " + std::to_string(open_dirs) +
                  " directories currently open\n";
    str += "  " + std::to_string(num_files_cache_empty) +
                  " files have empty cache\n";
    str += "  " + std::to_string(num_dirs_cache_empty) +
                  " directories have empty cache\n";
    str += "  " + std::to_string(num_forgotten) +
                  " inodes forgotten by fuse, still in inode cache\n";
    str += "  " + std::to_string(expecting_forget) +
                  " inodes not yet forgotten by fuse\n";
    str += "  " + std::to_string(num_dircached) +
                  " inodes cached by one or more readdir cache\n";
    str += "  " + std::to_string(num_silly_renamed) +
                  " inodes silly-renamed (waiting for last close)\n";

    // Maximum cache size allowed in bytes.
    uint64_t max_cache =
        (aznfsc_cfg.cache.data.user.max_size_mb * 1024 * 1024ULL);
    assert(max_cache != 0);

    str += "File Cache statistics:\n";
    if (aznfsc_cfg.cache.data.user.enable) {
        str += "  " + std::to_string(aznfsc_cfg.cache.data.user.max_size_mb) +
                      " MB user cache size configured\n";
    } else {
        str += "  user cache disabled\n";
    }
    if (aznfsc_cfg.cache.data.kernel.enable) {
        str += "  kernel cache enabled\n";
    } else {
        str += "  kernel cache disabled\n";
    }
    str += "  " + std::to_string(bytes_chunk_cache::get_num_caches()) +
                  " file caches\n";
    str += "  " + std::to_string(bytes_chunk_cache::num_chunks_g) +
                  " chunks in chunkmap\n";

    double allocate_pct =
        ((bytes_chunk_cache::bytes_allocated_g * 100.0) / max_cache);
    str += "  " + std::to_string(bytes_chunk_cache::bytes_allocated_g) +
                  " bytes allocated (" +
                  std::to_string(allocate_pct) + "%)\n";

    str += "  " + std::to_string(bytes_chunk_cache::bytes_cached_g) +
                  " bytes cached\n";

    const double dirty_pct =
        ((bytes_chunk_cache::bytes_dirty_g * 100.0) / max_cache);
    str += "  " + std::to_string(bytes_chunk_cache::bytes_dirty_g) +
                  " bytes dirty (" +
                  std::to_string(dirty_pct) + "%)\n";

    str += "  " + std::to_string(bytes_chunk_cache::bytes_flushing_g) +
                  " bytes currently flushing\n";
    str += "  " + std::to_string(bytes_chunk_cache::bytes_commit_pending_g) +
                  " bytes pending commit\n";
    str += "  " + std::to_string(bytes_chunk_cache::bytes_uptodate_g) +
                  " bytes uptodate\n";
    str += "  " + std::to_string(bytes_chunk_cache::bytes_inuse_g) +
                  " bytes inuse\n";
    str += "  " + std::to_string(bytes_chunk_cache::bytes_locked_g) +
                  " bytes locked\n";
    str += "  " + std::to_string(bytes_chunk_cache::bytes_get_g) +
                  " bytes mapped via " +
                  std::to_string(bytes_chunk_cache::num_get_g) +
                  " get calls\n";
    str += "  " + std::to_string(bytes_chunk_cache::bytes_release_g) +
                  " bytes released via " +
                  std::to_string(bytes_chunk_cache::num_release_g) +
                  " release calls\n";
    str += "  " + std::to_string(bytes_chunk_cache::bytes_truncate_g) +
                  " bytes truncated via " +
                  std::to_string(bytes_chunk_cache::num_truncate_g) +
                  " truncate calls\n";

    if (bytes_chunk_cache::num_lockwait_g) {
        const double lockwait_pct =
            ((bytes_chunk_cache::num_lockwait_g * 100.0) / bytes_chunk_cache::num_locked_g);
        str += "  " + std::to_string(bytes_chunk_cache::lock_wait_usecs_g /
                                     (bytes_chunk_cache::num_lockwait_g * 1000.0)) +
                      " msec avg lock wait (" +
                      std::to_string(lockwait_pct) + "% had to wait)\n";
    }

    // Maximum readdir cache size allowed in bytes.
    max_cache =
        (aznfsc_cfg.cache.readdir.user.max_size_mb * 1024 * 1024ULL);
    assert(max_cache != 0);

    str += "Readdir Cache statistics:\n";
    if (aznfsc_cfg.cache.readdir.user.enable) {
        str += "  " + std::to_string(aznfsc_cfg.cache.readdir.user.max_size_mb) +
                      " MB user cache size configured\n";
    } else {
        str += "  user cache disabled\n";
    }
    if (aznfsc_cfg.cache.readdir.kernel.enable) {
        str += "  kernel cache enabled\n";
    } else {
        str += "  kernel cache disabled\n";
    }
    str += "  " + std::to_string(readdirectory_cache::num_caches) +
                  " directory caches\n";
    str += "  " + std::to_string(readdirectory_cache::num_dirents_g) +
                  " total directory entries cached\n";
    allocate_pct =
        ((readdirectory_cache::bytes_allocated_g * 100.0) / max_cache);
    str += "  " + std::to_string(readdirectory_cache::bytes_allocated_g) +
                  " bytes allocated (" +
                  std::to_string(allocate_pct) + "%)\n";
    str += "  " + std::to_string(readdirectory_cache::num_dirents_returned_g) +
                  " directory entries returned to fuse over " +
                  std::to_string(readdirectory_cache::num_readdir_calls_g) +
                  " readdir and " +
                  std::to_string(readdirectory_cache::num_readdirplus_calls_g) +
                  " readdirplus calls\n";



    str += "Application statistics:\n";
    const uint64_t avg_app_read_size =
        app_read_reqs ? (app_bytes_read / app_read_reqs) : 0;
    str += "  " + std::to_string(GET_GBL_STATS(app_bytes_read)) +
                  " bytes read by application(s) in " +
                  std::to_string(app_read_reqs) + " calls with avg size " +
                  std::to_string(avg_app_read_size) + " bytes\n";
    if (failed_read_reqs) {
        str += "  " + std::to_string(GET_GBL_STATS(failed_read_reqs)) +
                      " application reads failed\n";
    }

    if (zero_reads) {
        str += "  " + std::to_string(GET_GBL_STATS(zero_reads)) +
                      " reads completed with 0 bytes\n";
    }

    const double read_cache_pct =
        app_bytes_read ?
        ((bytes_read_from_cache * 100.0) / app_bytes_read) : 0;
    assert(read_cache_pct <= 100);
    str += "  " + std::to_string(GET_GBL_STATS(bytes_read_from_cache)) +
                  " bytes served from read cache (" +
                  std::to_string(read_cache_pct) + "%)\n";

    const double hole_cache_pct =
        app_bytes_read ?
        ((bytes_zeroed_from_cache * 100.0) / app_bytes_read) : 0;
    assert(hole_cache_pct <= 100);
    str += "  " + std::to_string(GET_GBL_STATS(bytes_zeroed_from_cache)) +
                  " bytes holes read from cache (" +
                  std::to_string(hole_cache_pct) + "%)\n";

    const uint64_t avg_server_read_size =
        server_read_reqs ? (server_bytes_read / server_read_reqs) : 0;
    str += "  " + std::to_string(GET_GBL_STATS(server_bytes_read)) +
                  " bytes read from server in " +
                  std::to_string(server_read_reqs) + " calls with avg size " +
                  std::to_string(avg_server_read_size) + " bytes\n";

    const uint64_t avg_ra_size =
        num_readhead ? (bytes_read_ahead / num_readhead) : 0;
    str += "  " + std::to_string(GET_GBL_STATS(bytes_read_ahead)) +
                  " bytes read by readahead with avg size " +
                  std::to_string(avg_ra_size) + " bytes and ra scale factor " +
                  std::to_string(nfs_client::get_ra_scale_factor()) + "\n";

    const uint64_t avg_app_write_size =
        app_write_reqs ? (app_bytes_written / app_write_reqs) : 0;
    str += "  " + std::to_string(GET_GBL_STATS(app_bytes_written)) +
                  " bytes written by application(s) in " +
                  std::to_string(app_write_reqs) + " calls with avg size " +
                  std::to_string(avg_app_write_size) + " bytes\n";
    if (failed_write_reqs) {
        str += "  " + std::to_string(GET_GBL_STATS(failed_write_reqs)) +
                      " application writes failed\n";
    }
    const uint64_t avg_server_write_size =
        server_write_reqs ? (server_bytes_written / server_write_reqs) : 0;
    str += "  " + std::to_string(GET_GBL_STATS(server_bytes_written)) +
                  " bytes written to server in " +
                  std::to_string(server_write_reqs) + " calls with avg size " +
                  std::to_string(avg_server_write_size) + " bytes\n";

    str += "  " + std::to_string(GET_GBL_STATS(writes_np)) +
                  " writes did not hit any memory pressure\n";
    str += "  " + std::to_string(GET_GBL_STATS(inline_writes)) +
                  " writes had to wait inline\n";
    str += "  " + std::to_string(GET_GBL_STATS(inline_writes_lp)) +
                  " writes were inlined due to per-file cache limit\n";
    str += "  " + std::to_string(GET_GBL_STATS(inline_writes_gp)) +
                  " writes were inlined due to global cache limit\n";
    str += "  " + std::to_string(GET_GBL_STATS(flush_seq)) +
                  " flushes triggered as sequential limit was reached\n";
    str += "  " + std::to_string(GET_GBL_STATS(flush_lp)) +
                  " flushes triggered as per-file cache limit was reached\n";
    str += "  " + std::to_string(GET_GBL_STATS(flush_gp)) +
                  " flushes triggered as global cache limit was reached\n";
    str += "  " + std::to_string(GET_GBL_STATS(commit_lp)) +
                  " commits triggered as per-file cache limit was reached\n";
    str += "  " + std::to_string(GET_GBL_STATS(commit_gp)) +
                  " commits triggered as global cache limit was reached\n";

    const uint64_t avg_sync_membufs_size =
        num_sync_membufs ? (tot_bytes_sync_membufs / num_sync_membufs) : 0;
    str += "  " + std::to_string(GET_GBL_STATS(num_sync_membufs)) +
                  " sync_membufs calls with avg size " +
                  std::to_string(avg_sync_membufs_size) + " bytes and fc scale factor " +
                  std::to_string(nfs_client::get_fc_scale_factor()) + "\n";

    const double getattr_cache_pct =
        tot_getattr_reqs ?
        ((getattr_served_from_cache * 100.0) / tot_getattr_reqs) : 0;
    str += "  " + std::to_string(GET_GBL_STATS(tot_getattr_reqs)) +
                  " getattr requests received\n";
    str += "  " + std::to_string(GET_GBL_STATS(getattr_served_from_cache)) +
                  " getattr served from cache (" +
                  std::to_string(getattr_cache_pct) + "%)\n";
    const double lookup_cache_pct =
        tot_lookup_reqs ?
        ((lookup_served_from_cache * 100.0) / tot_lookup_reqs) : 0;
    str += "  " + std::to_string(GET_GBL_STATS(tot_lookup_reqs)) +
                  " lookup requests received\n";
    str += "  " + std::to_string(GET_GBL_STATS(lookup_served_from_cache)) +
                  " lookup served from cache (" +
                  std::to_string(lookup_cache_pct) + "%)\n";

    str += "Misc statistics:\n";
    str += "  " + std::to_string(GET_GBL_STATS(rpc_tasks_allocated)) +
                  " rpc tasks currently running\n";
    str += "  " + std::to_string(GET_GBL_STATS(fuse_responses_awaited)) +
                  " responses awaited by fuse\n";
    str += "  " + std::to_string(GET_GBL_STATS(fuse_reply_failed)) +
                  " fuse replies failed to send\n";

#define DUMP_OP(opcode) \
do { \
    const auto& ops = opstats[opcode]; \
    if (ops.count != 0) { \
        const std::string opstr = rpc_task::fuse_opcode_to_string(opcode); \
        const int pcent_ops = (ops.count * 100.0) / cum_stats.num_req_sent; \
        str += opstr + ":\n"; \
        str += "        " + std::to_string(ops.count) + \
                        " ops (" + std::to_string(pcent_ops) + "%)\n"; \
        if (ops.pending > 0) { \
            str += "        " + std::to_string((int64_t) ops.pending) + \
                            " pending\n"; \
        } \
        str += "        Avg bytes sent per op: " + \
                        std::to_string(ops.bytes_sent / ops.count) + "\n"; \
        str += "        Avg bytes received per op: " + \
                        std::to_string(ops.bytes_rcvd / ops.count) + "\n"; \
        str += "        Avg RTT: " + \
                        std::to_string(ops.rtt_usec / (ops.count * 1000.0)) + \
                        " msec\n"; \
        str += "        Avg dispatch wait: " + \
                        std::to_string(ops.dispatch_usec / (ops.count * 1000.0)) + \
                        " msec\n"; \
        str += "        Avg fuse issue time: " + \
                        std::to_string(ops.fuse_handler_usec / (ops.count * 1000.0)) + \
                        " msec\n"; \
        str += "        Avg Total execute time: " + \
                        std::to_string(ops.total_usec / (ops.count * 1000.0)) + \
                        " msec\n"; \
        if (opcode == FUSE_READ) { \
            str += "        Last 5 sec throughput: " + \
                            std::to_string(client.get_read_MBps()) + " MBps\n"; \
        } else if (opcode == FUSE_WRITE) { \
            str += "        Last 5 sec throughput: " + \
                            std::to_string(client.get_write_MBps()) + " MBps\n"; \
        } \
        if (!ops.error_map.empty()) { \
            str += "        Errors encountered: \n"; \
            for (const auto& entry : ops.error_map) { \
                str += "            " + \
                        std::string((entry.first == NFS3ERR_RPC_ERROR) ? \
                                    "RPC Errors" : nfsstat3_to_str(entry.first)) +  ": " + \
                        std::to_string(entry.second) + "\n"; \
            } \
        } \
    } \
} while (0)

    DUMP_OP(FUSE_STATFS);
    DUMP_OP(FUSE_LOOKUP);
    DUMP_OP(FUSE_ACCESS);
    DUMP_OP(FUSE_GETATTR);
    DUMP_OP(FUSE_SETATTR);
    DUMP_OP(FUSE_CREATE);
    DUMP_OP(FUSE_MKNOD);
    DUMP_OP(FUSE_MKDIR);
    DUMP_OP(FUSE_SYMLINK);
    DUMP_OP(FUSE_READLINK);
    DUMP_OP(FUSE_RMDIR);
    DUMP_OP(FUSE_UNLINK);
    DUMP_OP(FUSE_RENAME);
    DUMP_OP(FUSE_READDIR);
    DUMP_OP(FUSE_READDIRPLUS);
    DUMP_OP(FUSE_READ);
    DUMP_OP(FUSE_WRITE);
    // FUSE_FLUSH corresponds to the COMMIT RPC
    DUMP_OP(FUSE_FLUSH);

    /*
     * TODO: Add more ops.
     */

    AZLogWarn("\n{}\n", str.c_str());
}