static int lookup_handle_found_vnode()

in bsd/vfs/vfs_lookup.c [119:607]


static int              lookup_handle_found_vnode(struct nameidata *ndp, struct componentname *cnp, int rdonly,
    int vbusyflags, int *keep_going, int nc_generation,
    int wantparent, int atroot, vfs_context_t ctx);
static int              lookup_handle_emptyname(struct nameidata *ndp, struct componentname *cnp, int wantparent);

#if NAMEDRSRCFORK
static int              lookup_handle_rsrc_fork(vnode_t dp, struct nameidata *ndp, struct componentname *cnp, int wantparent, vfs_context_t ctx);
#endif

extern lck_rw_t rootvnode_rw_lock;

/*
 * Convert a pathname into a pointer to a locked inode.
 *
 * The FOLLOW flag is set when symbolic links are to be followed
 * when they occur at the end of the name translation process.
 * Symbolic links are always followed for all other pathname
 * components other than the last.
 *
 * The segflg defines whether the name is to be copied from user
 * space or kernel space.
 *
 * Overall outline of namei:
 *
 *	copy in name
 *	get starting directory
 *	while (!done && !error) {
 *		call lookup to search path.
 *		if symbolic link, massage name in buffer and continue
 *	}
 *
 * Returns:	0			Success
 *		ENOENT			No such file or directory
 *		ELOOP			Too many levels of symbolic links
 *		ENAMETOOLONG		Filename too long
 *		copyinstr:EFAULT	Bad address
 *		copyinstr:ENAMETOOLONG	Filename too long
 *		lookup:EBADF		Bad file descriptor
 *		lookup:EROFS
 *		lookup:EACCES
 *		lookup:EPERM
 *		lookup:ERECYCLE	 vnode was recycled from underneath us in lookup.
 *						 This means we should re-drive lookup from this point.
 *		lookup: ???
 *		VNOP_READLINK:???
 */
int
namei(struct nameidata *ndp)
{
	struct filedesc *fdp;   /* pointer to file descriptor state */
	struct vnode *dp;       /* the directory we are searching */
	struct vnode *usedvp = ndp->ni_dvp;  /* store pointer to vp in case we must loop due to
	                                      *                                          heavy vnode pressure */
	uint32_t cnpflags = ndp->ni_cnd.cn_flags; /* store in case we have to restore after loop */
	int error;
	struct componentname *cnp = &ndp->ni_cnd;
	vfs_context_t ctx = cnp->cn_context;
	proc_t p = vfs_context_proc(ctx);
#if CONFIG_AUDIT
/* XXX ut should be from context */
	uthread_t ut = (struct uthread *)get_bsdthread_info(current_thread());
#endif

#if CONFIG_VOLFS
	int volfs_restarts = 0;
#endif
	size_t bytes_copied = 0;
	vnode_t rootdir_with_usecount = NULLVP;
	vnode_t startdir_with_usecount = NULLVP;
	vnode_t usedvp_dp = NULLVP;
	int32_t old_count = 0;
	bool dp_has_iocount = false;

	fdp = p->p_fd;

#if DIAGNOSTIC
	if (!vfs_context_ucred(ctx) || !p) {
		panic("namei: bad cred/proc");
	}
	if (cnp->cn_nameiop & (~OPMASK)) {
		panic("namei: nameiop contaminated with flags");
	}
	if (cnp->cn_flags & OPMASK) {
		panic("namei: flags contaminated with nameiops");
	}
#endif

	/*
	 * A compound VNOP found something that needs further processing:
	 * either a trigger vnode, a covered directory, or a symlink.
	 */
	if (ndp->ni_flag & NAMEI_CONTLOOKUP) {
		int rdonly, vbusyflags, keep_going, wantparent;

		rdonly = cnp->cn_flags & RDONLY;
		vbusyflags = ((cnp->cn_flags & CN_NBMOUNTLOOK) != 0) ? LK_NOWAIT : 0;
		keep_going = 0;
		wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT);

		ndp->ni_flag &= ~(NAMEI_CONTLOOKUP);

		error = lookup_handle_found_vnode(ndp, &ndp->ni_cnd, rdonly, vbusyflags,
		    &keep_going, ndp->ni_ncgeneration, wantparent, 0, ctx);
		if (error) {
			goto out_drop;
		}
		if (keep_going) {
			if ((cnp->cn_flags & ISSYMLINK) == 0) {
				panic("We need to keep going on a continued lookup, but for vp type %d (tag %d)\n", ndp->ni_vp->v_type, ndp->ni_vp->v_tag);
			}
			goto continue_symlink;
		}

		return 0;
	}

vnode_recycled:

	/*
	 * Get a buffer for the name to be translated, and copy the
	 * name into the buffer.
	 */
	if ((cnp->cn_flags & HASBUF) == 0) {
		cnp->cn_pnbuf = ndp->ni_pathbuf;
		cnp->cn_pnlen = PATHBUFLEN;
	}
#if LP64_DEBUG
	if ((UIO_SEG_IS_USER_SPACE(ndp->ni_segflg) == 0)
	    && (ndp->ni_segflg != UIO_SYSSPACE)
	    && (ndp->ni_segflg != UIO_SYSSPACE32)) {
		panic("%s :%d - invalid ni_segflg\n", __FILE__, __LINE__);
	}
#endif /* LP64_DEBUG */

retry_copy:
	if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
		error = copyinstr(ndp->ni_dirp, cnp->cn_pnbuf,
		    cnp->cn_pnlen, &bytes_copied);
	} else {
		error = copystr(CAST_DOWN(void *, ndp->ni_dirp), cnp->cn_pnbuf,
		    cnp->cn_pnlen, &bytes_copied);
	}
	if (error == ENAMETOOLONG && !(cnp->cn_flags & HASBUF)) {
		cnp->cn_pnbuf = zalloc(ZV_NAMEI);
		cnp->cn_flags |= HASBUF;
		cnp->cn_pnlen = MAXPATHLEN;
		bytes_copied = 0;

		goto retry_copy;
	}
	if (error) {
		goto error_out;
	}
	assert(bytes_copied <= MAXPATHLEN);
	ndp->ni_pathlen = (u_int)bytes_copied;
	bytes_copied = 0;

	/*
	 * Since the name cache may contain positive entries of
	 * the incorrect case, force lookup() to bypass the cache
	 * and call directly into the filesystem for each path
	 * component. Note: the FS may still consult the cache,
	 * but can apply rules to validate the results.
	 */
	if (proc_is_forcing_hfs_case_sensitivity(p)) {
		cnp->cn_flags |= CN_SKIPNAMECACHE;
	}

#if CONFIG_VOLFS
	/*
	 * Check for legacy volfs style pathnames.
	 *
	 * For compatibility reasons we currently allow these paths,
	 * but future versions of the OS may not support them.
	 */
	if (ndp->ni_pathlen >= VOLFS_MIN_PATH_LEN &&
	    cnp->cn_pnbuf[0] == '/' &&
	    cnp->cn_pnbuf[1] == '.' &&
	    cnp->cn_pnbuf[2] == 'v' &&
	    cnp->cn_pnbuf[3] == 'o' &&
	    cnp->cn_pnbuf[4] == 'l' &&
	    cnp->cn_pnbuf[5] == '/') {
		char * realpath;
		int realpath_err;
		/* Attempt to resolve a legacy volfs style pathname. */
		realpath = zalloc(ZV_NAMEI);
		/*
		 * We only error out on the ENAMETOOLONG cases where we know that
		 * vfs_getrealpath translation succeeded but the path could not fit into
		 * MAXPATHLEN characters.  In other failure cases, we may be dealing with a path
		 * that legitimately looks like /.vol/1234/567 and is not meant to be translated
		 */
		if ((realpath_err = vfs_getrealpath(&cnp->cn_pnbuf[6], realpath, MAXPATHLEN, ctx))) {
			zfree(ZV_NAMEI, realpath);
			if (realpath_err == ENOSPC || realpath_err == ENAMETOOLONG) {
				error = ENAMETOOLONG;
				goto error_out;
			}
		} else {
			size_t tmp_len;
			if (cnp->cn_flags & HASBUF) {
				zfree(ZV_NAMEI, cnp->cn_pnbuf);
			}
			cnp->cn_pnbuf = realpath;
			cnp->cn_pnlen = MAXPATHLEN;
			tmp_len = strlen(realpath) + 1;
			assert(tmp_len <= UINT_MAX);
			ndp->ni_pathlen = (u_int)tmp_len;
			cnp->cn_flags |= HASBUF | CN_VOLFSPATH;
		}
	}
#endif /* CONFIG_VOLFS */

#if CONFIG_AUDIT
	/* If we are auditing the kernel pathname, save the user pathname */
	if (cnp->cn_flags & AUDITVNPATH1) {
		AUDIT_ARG(upath, ut->uu_cdir, cnp->cn_pnbuf, ARG_UPATH1);
	}
	if (cnp->cn_flags & AUDITVNPATH2) {
		AUDIT_ARG(upath, ut->uu_cdir, cnp->cn_pnbuf, ARG_UPATH2);
	}
#endif /* CONFIG_AUDIT */

	/*
	 * Do not allow empty pathnames
	 */
	if (*cnp->cn_pnbuf == '\0') {
		error = ENOENT;
		goto error_out;
	}
	if (ndp->ni_flag & NAMEI_NOFOLLOW_ANY) {
		ndp->ni_loopcnt = MAXSYMLINKS;
	} else {
		ndp->ni_loopcnt = 0;
	}

	/*
	 * determine the starting point for the translation.
	 */
	proc_dirs_lock_shared(p);
	lck_rw_lock_shared(&rootvnode_rw_lock);

	if (!(fdp->fd_flags & FD_CHROOT)) {
		ndp->ni_rootdir = rootvnode;
	} else {
		ndp->ni_rootdir = fdp->fd_rdir;
	}

	if (!ndp->ni_rootdir) {
		if (!(fdp->fd_flags & FD_CHROOT)) {
			printf("rootvnode is not set\n");
		} else {
			/* This should be a panic */
			printf("fdp->fd_rdir is not set\n");
		}
		lck_rw_unlock_shared(&rootvnode_rw_lock);
		proc_dirs_unlock_shared(p);
		error = ENOENT;
		goto error_out;
	}

	cnp->cn_nameptr = cnp->cn_pnbuf;

	ndp->ni_usedvp = NULLVP;

	if (*(cnp->cn_nameptr) == '/') {
		while (*(cnp->cn_nameptr) == '/') {
			cnp->cn_nameptr++;
			ndp->ni_pathlen--;
		}
		dp = ndp->ni_rootdir;
	} else if (cnp->cn_flags & USEDVP) {
		dp = ndp->ni_dvp;
		ndp->ni_usedvp = dp;
		usedvp_dp = dp;
	} else {
		dp = vfs_context_cwd(ctx);
	}

	if (dp == NULLVP || (dp->v_lflag & VL_DEAD)) {
		dp = NULLVP;
		lck_rw_unlock_shared(&rootvnode_rw_lock);
		proc_dirs_unlock_shared(p);
		error = ENOENT;
		goto error_out;
	}

	/*
	 * We need our own usecount on the root vnode and the starting dir across
	 * the lookup. There's two things that be done here. We can hold the locks
	 * (which protect the existing usecounts on the directories) across the
	 * lookup or take our own usecount. Holding the locks across the lookup can
	 * cause deadlock issues if we re-enter namei on the same thread so the
	 * correct thing to do is to acquire our own usecount.
	 *
	 * Ideally, the usecount should be obtained by vnode_get->vnode_ref->vnode_put.
	 * However when this vnode is the rootvnode, that sequence will produce a
	 * lot of vnode mutex locks and  unlocks on a single vnode (the rootvnode)
	 * and will be highly contended and degrade performance. Since we have
	 * an existing usecount protected by the locks we hold, we'll just use
	 * an atomic op to increment the usecount on a vnode which already has one
	 * and can't be released becasue we have the locks which protect against that
	 * happening.
	 */
	rootdir_with_usecount = ndp->ni_rootdir;
	old_count = os_atomic_inc_orig(&rootdir_with_usecount->v_usecount, relaxed);
	if (old_count < 1) {
		panic("(1) invalid pre-increment usecount (%d) for rootdir vnode %p",
		    old_count, rootdir_with_usecount);
	} else if (old_count == INT32_MAX) {
		panic("(1) usecount overflow for vnode %p", rootdir_with_usecount);
	}

	if ((dp != rootdir_with_usecount) && (dp != usedvp_dp)) {
		old_count = os_atomic_inc_orig(&dp->v_usecount, relaxed);
		if (old_count < 1) {
			panic("(2) invalid pre-increment usecount (%d) for vnode %p", old_count, dp);
		} else if (old_count == INT32_MAX) {
			panic("(2) usecount overflow for vnode %p", dp);
		}
		startdir_with_usecount = dp;
	}

	/* Now that we have our usecount, release the locks */
	lck_rw_unlock_shared(&rootvnode_rw_lock);
	proc_dirs_unlock_shared(p);

	ndp->ni_dvp = NULLVP;
	ndp->ni_vp  = NULLVP;

	for (;;) {
#if CONFIG_MACF
		/*
		 * Give MACF policies a chance to reject the lookup
		 * before performing any filesystem operations.
		 * This hook is called before resolving the path and
		 * again each time a symlink is encountered.
		 * NB: policies receive path information as supplied
		 *     by the caller and thus cannot be trusted.
		 */
		error = mac_vnode_check_lookup_preflight(ctx, dp, cnp->cn_nameptr, cnp->cn_namelen);
		if (error) {
			goto error_out;
		}
#endif
		ndp->ni_startdir = dp;
		dp = NULLVP;

		if ((error = lookup(ndp))) {
			goto error_out;
		}

		/*
		 * Check for symbolic link
		 */
		if ((cnp->cn_flags & ISSYMLINK) == 0) {
			if (startdir_with_usecount) {
				vnode_rele(startdir_with_usecount);
				startdir_with_usecount = NULLVP;
			}
			if (rootdir_with_usecount) {
				lck_rw_lock_shared(&rootvnode_rw_lock);
				if (rootdir_with_usecount == rootvnode) {
					old_count = os_atomic_dec_orig(&rootdir_with_usecount->v_usecount, relaxed);
					if (old_count < 2) {
						/*
						 * There needs to have been at least 1 usecount left on the rootvnode
						 */
						panic("(3) Unexpected pre-decrement value (%d) of usecount for rootvnode %p",
						    old_count, rootdir_with_usecount);
					}
					rootdir_with_usecount = NULLVP;
				}
				lck_rw_unlock_shared(&rootvnode_rw_lock);
				if (rootdir_with_usecount) {
					vnode_rele(rootdir_with_usecount);
					rootdir_with_usecount = NULLVP;
				}
			}

			return 0;
		}

continue_symlink:
		/* Gives us a new path to process, and a starting dir */
		error = lookup_handle_symlink(ndp, &dp, &dp_has_iocount, ctx);
		if (error != 0) {
			break;
		}
		if (dp_has_iocount) {
			if ((dp != rootdir_with_usecount) && (dp != startdir_with_usecount) &&
			    (dp != usedvp_dp)) {
				if (startdir_with_usecount) {
					vnode_rele(startdir_with_usecount);
				}
				vnode_ref_ext(dp, 0, VNODE_REF_FORCE);
				startdir_with_usecount = dp;
			}
			vnode_put(dp);
			dp_has_iocount = false;
		}
	}
	/*
	 * only come here if we fail to handle a SYMLINK...
	 * if either ni_dvp or ni_vp is non-NULL, then
	 * we need to drop the iocount that was picked
	 * up in the lookup routine
	 */
out_drop:
	if (ndp->ni_dvp) {
		vnode_put(ndp->ni_dvp);
	}
	if (ndp->ni_vp) {
		vnode_put(ndp->ni_vp);
	}
error_out:
	if (startdir_with_usecount) {
		vnode_rele(startdir_with_usecount);
		startdir_with_usecount = NULLVP;
	}
	if (rootdir_with_usecount) {
		lck_rw_lock_shared(&rootvnode_rw_lock);
		if (rootdir_with_usecount == rootvnode) {
			old_count = os_atomic_dec_orig(&rootdir_with_usecount->v_usecount, relaxed);
			if (old_count < 2) {
				/*
				 * There needs to have been at least 1 usecount left on the rootvnode
				 */
				panic("(4) Unexpected pre-decrement value (%d) of usecount for rootvnode %p",
				    old_count, rootdir_with_usecount);
			}
			lck_rw_unlock_shared(&rootvnode_rw_lock);
		} else {
			lck_rw_unlock_shared(&rootvnode_rw_lock);
			vnode_rele(rootdir_with_usecount);
		}
		rootdir_with_usecount = NULLVP;
	}

	if ((cnp->cn_flags & HASBUF)) {
		cnp->cn_flags &= ~HASBUF;
		zfree(ZV_NAMEI, cnp->cn_pnbuf);
	}
	cnp->cn_pnbuf = NULL;
	ndp->ni_vp = NULLVP;
	ndp->ni_dvp = NULLVP;

#if CONFIG_VOLFS
	/*
	 * Deal with volfs fallout.
	 *
	 * At this point, if we were originally given a volfs path that
	 * looks like /.vol/123/456, then we would have had to convert it into
	 * a full path.  Assuming that part worked properly, we will now attempt
	 * to conduct a lookup of the item in the namespace.  Under normal
	 * circumstances, if a user looked up /tmp/foo and it was not there, it
	 * would be permissible to return ENOENT.
	 *
	 * However, we may not want to do that here.  Specifically, the volfs path
	 * uniquely identifies a certain item in the namespace regardless of where it
	 * lives.  If the item has moved in between the time we constructed the
	 * path and now, when we're trying to do a lookup/authorization on the full
	 * path, we may have gotten an ENOENT.
	 *
	 * At this point we can no longer tell if the path no longer exists
	 * or if the item in question no longer exists. It could have been renamed
	 * away, in which case the /.vol identifier is still valid.
	 *
	 * Do this dance a maximum of MAX_VOLFS_RESTARTS times.
	 */
	if ((error == ENOENT) && (ndp->ni_cnd.cn_flags & CN_VOLFSPATH)) {
		if (volfs_restarts < MAX_VOLFS_RESTARTS) {
			volfs_restarts++;
			goto vnode_recycled;
		}
	}
#endif

	if (error == ERECYCLE) {
		/* vnode was recycled underneath us. re-drive lookup to start at
		 *  the beginning again, since recycling invalidated last lookup*/
		ndp->ni_cnd.cn_flags = cnpflags;
		ndp->ni_dvp = usedvp;
		goto vnode_recycled;
	}


	return error;
}