void nfs_inode::lookup_dircache()

in turbonfs/src/nfs_inode.cpp [2066:2231]


void nfs_inode::lookup_dircache(
    cookie3 cookie,
    size_t max_size,
    std::vector<std::shared_ptr<const directory_entry>>& results,
    bool& eof,
    bool readdirplus)
{
    // Sanity check.
    assert(max_size > 0 && max_size <= (64*1024*1024));
    assert(results.empty());
    // Must be called only for a directory inode.
    assert(is_dir());
    // Must have been allocated in open()/opendir().
    assert(has_dircache());

#ifndef ENABLE_NON_AZURE_NFS
    // Blob NFS uses cookie as a counter, so 4B is a practical check.
    assert(cookie < UINT32_MAX);
#endif

    /*
     * Before looking up the cache check if we need to purge it.
     * We need to purge the cache in two cases:
     * 1. readdirectory_cache is marked lookuponly.
     * 2. readdirectory_cache has invalidate_pending set.
     *
     * Note that lookuponly readdir caches cannot be used to serve directory
     * enumeration requests as they are not in sync with the actual directory
     * content (one or more file/dir has been created/deleted since we last
     * enumerated and cachd the enumeration results).
     */
    dircache_handle->clear_if_needed();

    int num_cache_entries = 0;
    ssize_t rem_size = max_size;
    // Have we seen eof from the server?
    const bool dir_eof_seen = dircache_handle->get_eof();

    eof = false;

    while (rem_size > 0) {
        /*
         * lookup() will hold a dircachecnt ref on the inode if entry has a
         * valid nfs_inode. Also, there will one dircachecnt because of the
         * directory_entry being present in dir_entries map.
         */
        std::shared_ptr<struct directory_entry> entry =
            dircache_handle->lookup(cookie);

        /*
         * Cached entries stored by a prior READDIR call are not usable
         * for READDIRPLUS as they won't have the attributes saved, treat
         * them as not present.
         */
        if (entry && readdirplus && !entry->nfs_inode) {
            entry = nullptr;
        }

        if (entry) {
            /*
             * Get the size this entry will take when copied to fuse buffer.
             * The size is more for readdirplus, which copies the attributes
             * too. This way we make sure we don't return more than what fuse
             * readdir/readdirplus call requested.
             */
            rem_size -= entry->get_fuse_buf_size(readdirplus);

            if (rem_size >= 0) {
                /*
                 * This entry can fit in the fuse buffer. If entry->nfs_inode
                 * is valid then increase the inode lookupcnt ref and also the
                 * forget_expected. Note that we do it regardless of whether
                 * the caller wants it for READDIR or READDIRPLUS. Caller must
                 * drop the lookupcnt ref and forget_expected correctly.
                 */
                if (entry->nfs_inode) {
                    /*
                     * lookup() would have held a dircachecnt ref and one
                     * original dircachecnt ref held for each directory_entry
                     * added to dir_entries.
                     *
                     * Note: forget_expected MUST always be incremented after
                     *       lookupcnt.
                     */
                    entry->nfs_inode->incref();
                    entry->nfs_inode->forget_expected++;
                    assert(entry->nfs_inode->lookupcnt >=
                            (uint64_t) entry->nfs_inode->forget_expected);
                    assert(entry->nfs_inode->dircachecnt >= 2);
                    entry->nfs_inode->dircachecnt--;
                }

                num_cache_entries++;
                results.push_back(entry);

                /*
                 * We must convey eof to caller only after we successfully copy
                 * the directory entry with eof_cookie.
                 */
                if (dir_eof_seen &&
                    (entry->cookie == dircache_handle->get_eof_cookie())) {
                    eof = true;
                }
            } else {
                /*
                 * Drop the ref taken inside readdirectory_cache::lookup().
                 * Note that we should have 2 or more dircachecnt references,
                 * one taken by lookup() for the directory_entry copy returned
                 * to us and one already taken as the directory_entry is added
                 * to readdirectory_cache::dir_entries.
                 * Also note that this readdirectory_cache won't be purged,
                 * after lookup() releases readdircache_lock_2 since this dir
                 * is being enumerated by the current thread and hence it must
                 * have the directory open which should prevent fuse vfs from
                 * calling forget on the directory inode.
                 *
                 * Note: entry->nfs_inode may be null for entries populated using
                 *       only readdir however, it is guaranteed to be present for
                 *       readdirplus.
                 */
                if (entry->nfs_inode) {
                    struct nfs_inode *inode = entry->nfs_inode;
                    inode->incref();
                    assert(inode->dircachecnt >= 2);
                    inode->dircachecnt--;
                    entry.reset();
                    inode->decref();
                }

                // No space left to add more entries.
                AZLogDebug("[{}] lookup_dircache: Returning {} entries, as {} bytes "
                           "of output buffer exhausted (eof={})",
                           get_fuse_ino(), num_cache_entries, max_size, eof);
                break;
            }

            /*
             * TODO: ENABLE_NON_AZURE_NFS alert!!
             *       Note that we assume sequentially increasing cookies.
             *       This is only true for Azure NFS. Linux NFS server
             *       also has sequentially increasing cookies but it
             *       sometimes have gaps in between which causes us to
             *       believe that we don't have the cookie and re-fetch
             *       it from the server.
             */
            cookie++;
        } else {
            /*
             * Call after we return the last cookie, comes here.
             */
            if (dir_eof_seen && (cookie >= dircache_handle->get_eof_cookie())) {
                eof = true;
            }

            AZLogDebug("[{}] lookup_dircache: Returning {} entries, as next "
                       "cookie {} not found in cache (eof={})",
                       get_fuse_ino(), num_cache_entries, cookie, eof);

            /*
             * If we don't find the current cookie, then we will not find the
             * next ones as well since they are stored sequentially.
             */
            break;
        }
    }
}