void nfs_inode::decref()

in turbonfs/src/nfs_inode.cpp [163:303]


void nfs_inode::decref(size_t cnt, bool from_forget)
{
    AZLogDebug("[{}:{}] decref(cnt={}, from_forget={}) called "
               "(lookupcnt={}, dircachecnt={}, forget_expected={}, opencnt={})",
               get_filetype_coding(), ino, cnt, from_forget,
               lookupcnt.load(), dircachecnt.load(),
               forget_expected.load(), opencnt.load());

    /*
     * We only decrement lookupcnt in forget and once lookupcnt drops to
     * 0 we mark the inode as forgotten, so decref() should not be called
     * for forgotten inode.
     */
    assert(!is_forgotten());
    assert(cnt > 0);
    // When not from forget, there's never a case to pass cnt > 1.
    assert(from_forget || (cnt == 1));
    assert(lookupcnt >= cnt);

    if (from_forget) {
#ifdef ENABLE_PARANOID
        /*
         * Fuse should not call more forgets than how many times we returned
         * the inode to fuse.
         */
        if ((int64_t) cnt > forget_expected) {
            AZLogError("[{}:{}] Extra forget from fuse @ {}, got {}, "
                       "expected {}, last forget seen @ {}, lookupcnt={}, "
                       "dircachecnt={}",
                       get_filetype_coding(), ino,
                       get_current_usecs(), cnt, forget_expected.load(),
                       last_forget_seen_usecs, lookupcnt.load(),
                       dircachecnt.load());
            assert(0);
        }
        last_forget_seen_usecs = get_current_usecs();
#endif

        /*
         * This call will drop 'cnt' refs from both 'lookupcnt' and
         * 'forget_expected'. After that also 'lookupcnt' must have
         * 'forget_expected' or more refs.
         */
        assert((lookupcnt - cnt) >= (uint64_t) (forget_expected - cnt));

        forget_expected -= cnt;
        assert(forget_expected >= 0);
    } else {
        /*
         * lookupcnt should be forget_expected + local refs on the inode, so
         * should never be less than forget_expected. See how we increment
         * forget_expected after lookupcnt and decrement before lookupcnt,
         * so it's safe to compare.
         */
        assert((lookupcnt - cnt) >= (uint64_t) forget_expected);
    }

try_again:
    /*
     * Grab an extra ref so that the lookupcnt-=cnt does not cause the refcnt
     * to drop to 0, else some other thread can delete the inode before we get
     * to call put_nfs_inode().
     */
    ++lookupcnt;
    const bool forget_now = ((lookupcnt -= cnt) == 1);

    if (forget_now) {
        /*
         * For directory inodes it's a good time to purge the dircache, since
         * fuse VFS has lost all references on the directory. Note that we
         * can purge the directory cache at a later point also, but doing it
         * here causes the fuse client to behave like the Linux kernel NFS
         * client where we can purge the directory cache by writing to
         * /proc/sys/vm/drop_caches.
         * Also for files since the inode last ref is dropped, further accesses
         * are unlikely, hence we can drop file caches too.
         *
         * Note that invalidate_cache with purge_now=true, will take exclusive
         * lock on chunkmap_lock_43 for files and readdircache_lock_2 for
         * directories.
         */
        invalidate_cache(true /* purge_now */, true /* shutdown*/);

        /*
         * Reduce the extra refcnt and revert the cnt.
         * After this the inode will have 'cnt' references that need to be
         * dropped by put_nfs_inode() call below, with inode_map_lock_0 held.
         */
        lookupcnt += (cnt - 1);
        assert(lookupcnt >= cnt);

        /*
         * It's possible that while we were purging the dir cache above,
         * some other thread got a new ref on this inode (maybe it enumerated
         * its parent dir). In that case put_nfs_inode() will not free the
         * inode.
         */
        if (lookupcnt == cnt) {
            AZLogDebug("[{}:{}] lookupcnt dropping by {}, to 0, forgetting inode",
                       get_filetype_coding(), ino, cnt);
        } else {
            AZLogWarn("[{}:{}] lookupcnt dropping by {}, to {} "
                      "(some other thread got a fresh ref)",
                      get_filetype_coding(), ino, cnt, lookupcnt - cnt);
        }

        /*
         * This FORGET would drop the lookupcnt to 0, fuse vfs should not send
         * any more forgets, delete the inode. Note that before we grab the
         * inode_map_lock_0 in put_nfs_inode() some other thread can reuse the
         * forgotten inode, in which case put_nfs_inode() will just skip it.
         *
         * TODO: In order to avoid taking inode_map_lock_0 for every forget,
         *       see if we should batch them in a threadlocal vector and call
         *       put_nfs_inodes() for a batch.
         */
        client->put_nfs_inode(this, cnt);
    } else {
        /*
         * After the --lookupcnt below some other thread calling decref()
         * can delete this inode, so don't access it after that, hence we
         * log before that but with updated lookupcnt.
         */
        AZLogDebug("[{}:{}] lookupcnt decremented by {}, to {}, "
                   "dircachecnt: {}, forget_expected: {}",
                   get_filetype_coding(), ino, cnt,
                   lookupcnt.load() - 1, dircachecnt.load(),
                   forget_expected.load());

        if (--lookupcnt == 0) {
            /*
             * This means that there was some thread holding a lookupcnt
             * ref on the inode but it just now released it (after we checked
             * above and before the --lookupcnt here) and now this forget
             * makes this inode's lookupcnt 0.
             */
            lookupcnt += cnt;
            goto try_again;
        }
    }
}