in bsd/vfs/vfs_lookup.c [119:607]
static int lookup_handle_found_vnode(struct nameidata *ndp, struct componentname *cnp, int rdonly,
int vbusyflags, int *keep_going, int nc_generation,
int wantparent, int atroot, vfs_context_t ctx);
static int lookup_handle_emptyname(struct nameidata *ndp, struct componentname *cnp, int wantparent);
#if NAMEDRSRCFORK
static int lookup_handle_rsrc_fork(vnode_t dp, struct nameidata *ndp, struct componentname *cnp, int wantparent, vfs_context_t ctx);
#endif
extern lck_rw_t rootvnode_rw_lock;
/*
* Convert a pathname into a pointer to a locked inode.
*
* The FOLLOW flag is set when symbolic links are to be followed
* when they occur at the end of the name translation process.
* Symbolic links are always followed for all other pathname
* components other than the last.
*
* The segflg defines whether the name is to be copied from user
* space or kernel space.
*
* Overall outline of namei:
*
* copy in name
* get starting directory
* while (!done && !error) {
* call lookup to search path.
* if symbolic link, massage name in buffer and continue
* }
*
* Returns: 0 Success
* ENOENT No such file or directory
* ELOOP Too many levels of symbolic links
* ENAMETOOLONG Filename too long
* copyinstr:EFAULT Bad address
* copyinstr:ENAMETOOLONG Filename too long
* lookup:EBADF Bad file descriptor
* lookup:EROFS
* lookup:EACCES
* lookup:EPERM
* lookup:ERECYCLE vnode was recycled from underneath us in lookup.
* This means we should re-drive lookup from this point.
* lookup: ???
* VNOP_READLINK:???
*/
int
namei(struct nameidata *ndp)
{
struct filedesc *fdp; /* pointer to file descriptor state */
struct vnode *dp; /* the directory we are searching */
struct vnode *usedvp = ndp->ni_dvp; /* store pointer to vp in case we must loop due to
* heavy vnode pressure */
uint32_t cnpflags = ndp->ni_cnd.cn_flags; /* store in case we have to restore after loop */
int error;
struct componentname *cnp = &ndp->ni_cnd;
vfs_context_t ctx = cnp->cn_context;
proc_t p = vfs_context_proc(ctx);
#if CONFIG_AUDIT
/* XXX ut should be from context */
uthread_t ut = (struct uthread *)get_bsdthread_info(current_thread());
#endif
#if CONFIG_VOLFS
int volfs_restarts = 0;
#endif
size_t bytes_copied = 0;
vnode_t rootdir_with_usecount = NULLVP;
vnode_t startdir_with_usecount = NULLVP;
vnode_t usedvp_dp = NULLVP;
int32_t old_count = 0;
bool dp_has_iocount = false;
fdp = p->p_fd;
#if DIAGNOSTIC
if (!vfs_context_ucred(ctx) || !p) {
panic("namei: bad cred/proc");
}
if (cnp->cn_nameiop & (~OPMASK)) {
panic("namei: nameiop contaminated with flags");
}
if (cnp->cn_flags & OPMASK) {
panic("namei: flags contaminated with nameiops");
}
#endif
/*
* A compound VNOP found something that needs further processing:
* either a trigger vnode, a covered directory, or a symlink.
*/
if (ndp->ni_flag & NAMEI_CONTLOOKUP) {
int rdonly, vbusyflags, keep_going, wantparent;
rdonly = cnp->cn_flags & RDONLY;
vbusyflags = ((cnp->cn_flags & CN_NBMOUNTLOOK) != 0) ? LK_NOWAIT : 0;
keep_going = 0;
wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT);
ndp->ni_flag &= ~(NAMEI_CONTLOOKUP);
error = lookup_handle_found_vnode(ndp, &ndp->ni_cnd, rdonly, vbusyflags,
&keep_going, ndp->ni_ncgeneration, wantparent, 0, ctx);
if (error) {
goto out_drop;
}
if (keep_going) {
if ((cnp->cn_flags & ISSYMLINK) == 0) {
panic("We need to keep going on a continued lookup, but for vp type %d (tag %d)\n", ndp->ni_vp->v_type, ndp->ni_vp->v_tag);
}
goto continue_symlink;
}
return 0;
}
vnode_recycled:
/*
* Get a buffer for the name to be translated, and copy the
* name into the buffer.
*/
if ((cnp->cn_flags & HASBUF) == 0) {
cnp->cn_pnbuf = ndp->ni_pathbuf;
cnp->cn_pnlen = PATHBUFLEN;
}
#if LP64_DEBUG
if ((UIO_SEG_IS_USER_SPACE(ndp->ni_segflg) == 0)
&& (ndp->ni_segflg != UIO_SYSSPACE)
&& (ndp->ni_segflg != UIO_SYSSPACE32)) {
panic("%s :%d - invalid ni_segflg\n", __FILE__, __LINE__);
}
#endif /* LP64_DEBUG */
retry_copy:
if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
error = copyinstr(ndp->ni_dirp, cnp->cn_pnbuf,
cnp->cn_pnlen, &bytes_copied);
} else {
error = copystr(CAST_DOWN(void *, ndp->ni_dirp), cnp->cn_pnbuf,
cnp->cn_pnlen, &bytes_copied);
}
if (error == ENAMETOOLONG && !(cnp->cn_flags & HASBUF)) {
cnp->cn_pnbuf = zalloc(ZV_NAMEI);
cnp->cn_flags |= HASBUF;
cnp->cn_pnlen = MAXPATHLEN;
bytes_copied = 0;
goto retry_copy;
}
if (error) {
goto error_out;
}
assert(bytes_copied <= MAXPATHLEN);
ndp->ni_pathlen = (u_int)bytes_copied;
bytes_copied = 0;
/*
* Since the name cache may contain positive entries of
* the incorrect case, force lookup() to bypass the cache
* and call directly into the filesystem for each path
* component. Note: the FS may still consult the cache,
* but can apply rules to validate the results.
*/
if (proc_is_forcing_hfs_case_sensitivity(p)) {
cnp->cn_flags |= CN_SKIPNAMECACHE;
}
#if CONFIG_VOLFS
/*
* Check for legacy volfs style pathnames.
*
* For compatibility reasons we currently allow these paths,
* but future versions of the OS may not support them.
*/
if (ndp->ni_pathlen >= VOLFS_MIN_PATH_LEN &&
cnp->cn_pnbuf[0] == '/' &&
cnp->cn_pnbuf[1] == '.' &&
cnp->cn_pnbuf[2] == 'v' &&
cnp->cn_pnbuf[3] == 'o' &&
cnp->cn_pnbuf[4] == 'l' &&
cnp->cn_pnbuf[5] == '/') {
char * realpath;
int realpath_err;
/* Attempt to resolve a legacy volfs style pathname. */
realpath = zalloc(ZV_NAMEI);
/*
* We only error out on the ENAMETOOLONG cases where we know that
* vfs_getrealpath translation succeeded but the path could not fit into
* MAXPATHLEN characters. In other failure cases, we may be dealing with a path
* that legitimately looks like /.vol/1234/567 and is not meant to be translated
*/
if ((realpath_err = vfs_getrealpath(&cnp->cn_pnbuf[6], realpath, MAXPATHLEN, ctx))) {
zfree(ZV_NAMEI, realpath);
if (realpath_err == ENOSPC || realpath_err == ENAMETOOLONG) {
error = ENAMETOOLONG;
goto error_out;
}
} else {
size_t tmp_len;
if (cnp->cn_flags & HASBUF) {
zfree(ZV_NAMEI, cnp->cn_pnbuf);
}
cnp->cn_pnbuf = realpath;
cnp->cn_pnlen = MAXPATHLEN;
tmp_len = strlen(realpath) + 1;
assert(tmp_len <= UINT_MAX);
ndp->ni_pathlen = (u_int)tmp_len;
cnp->cn_flags |= HASBUF | CN_VOLFSPATH;
}
}
#endif /* CONFIG_VOLFS */
#if CONFIG_AUDIT
/* If we are auditing the kernel pathname, save the user pathname */
if (cnp->cn_flags & AUDITVNPATH1) {
AUDIT_ARG(upath, ut->uu_cdir, cnp->cn_pnbuf, ARG_UPATH1);
}
if (cnp->cn_flags & AUDITVNPATH2) {
AUDIT_ARG(upath, ut->uu_cdir, cnp->cn_pnbuf, ARG_UPATH2);
}
#endif /* CONFIG_AUDIT */
/*
* Do not allow empty pathnames
*/
if (*cnp->cn_pnbuf == '\0') {
error = ENOENT;
goto error_out;
}
if (ndp->ni_flag & NAMEI_NOFOLLOW_ANY) {
ndp->ni_loopcnt = MAXSYMLINKS;
} else {
ndp->ni_loopcnt = 0;
}
/*
* determine the starting point for the translation.
*/
proc_dirs_lock_shared(p);
lck_rw_lock_shared(&rootvnode_rw_lock);
if (!(fdp->fd_flags & FD_CHROOT)) {
ndp->ni_rootdir = rootvnode;
} else {
ndp->ni_rootdir = fdp->fd_rdir;
}
if (!ndp->ni_rootdir) {
if (!(fdp->fd_flags & FD_CHROOT)) {
printf("rootvnode is not set\n");
} else {
/* This should be a panic */
printf("fdp->fd_rdir is not set\n");
}
lck_rw_unlock_shared(&rootvnode_rw_lock);
proc_dirs_unlock_shared(p);
error = ENOENT;
goto error_out;
}
cnp->cn_nameptr = cnp->cn_pnbuf;
ndp->ni_usedvp = NULLVP;
if (*(cnp->cn_nameptr) == '/') {
while (*(cnp->cn_nameptr) == '/') {
cnp->cn_nameptr++;
ndp->ni_pathlen--;
}
dp = ndp->ni_rootdir;
} else if (cnp->cn_flags & USEDVP) {
dp = ndp->ni_dvp;
ndp->ni_usedvp = dp;
usedvp_dp = dp;
} else {
dp = vfs_context_cwd(ctx);
}
if (dp == NULLVP || (dp->v_lflag & VL_DEAD)) {
dp = NULLVP;
lck_rw_unlock_shared(&rootvnode_rw_lock);
proc_dirs_unlock_shared(p);
error = ENOENT;
goto error_out;
}
/*
* We need our own usecount on the root vnode and the starting dir across
* the lookup. There's two things that be done here. We can hold the locks
* (which protect the existing usecounts on the directories) across the
* lookup or take our own usecount. Holding the locks across the lookup can
* cause deadlock issues if we re-enter namei on the same thread so the
* correct thing to do is to acquire our own usecount.
*
* Ideally, the usecount should be obtained by vnode_get->vnode_ref->vnode_put.
* However when this vnode is the rootvnode, that sequence will produce a
* lot of vnode mutex locks and unlocks on a single vnode (the rootvnode)
* and will be highly contended and degrade performance. Since we have
* an existing usecount protected by the locks we hold, we'll just use
* an atomic op to increment the usecount on a vnode which already has one
* and can't be released becasue we have the locks which protect against that
* happening.
*/
rootdir_with_usecount = ndp->ni_rootdir;
old_count = os_atomic_inc_orig(&rootdir_with_usecount->v_usecount, relaxed);
if (old_count < 1) {
panic("(1) invalid pre-increment usecount (%d) for rootdir vnode %p",
old_count, rootdir_with_usecount);
} else if (old_count == INT32_MAX) {
panic("(1) usecount overflow for vnode %p", rootdir_with_usecount);
}
if ((dp != rootdir_with_usecount) && (dp != usedvp_dp)) {
old_count = os_atomic_inc_orig(&dp->v_usecount, relaxed);
if (old_count < 1) {
panic("(2) invalid pre-increment usecount (%d) for vnode %p", old_count, dp);
} else if (old_count == INT32_MAX) {
panic("(2) usecount overflow for vnode %p", dp);
}
startdir_with_usecount = dp;
}
/* Now that we have our usecount, release the locks */
lck_rw_unlock_shared(&rootvnode_rw_lock);
proc_dirs_unlock_shared(p);
ndp->ni_dvp = NULLVP;
ndp->ni_vp = NULLVP;
for (;;) {
#if CONFIG_MACF
/*
* Give MACF policies a chance to reject the lookup
* before performing any filesystem operations.
* This hook is called before resolving the path and
* again each time a symlink is encountered.
* NB: policies receive path information as supplied
* by the caller and thus cannot be trusted.
*/
error = mac_vnode_check_lookup_preflight(ctx, dp, cnp->cn_nameptr, cnp->cn_namelen);
if (error) {
goto error_out;
}
#endif
ndp->ni_startdir = dp;
dp = NULLVP;
if ((error = lookup(ndp))) {
goto error_out;
}
/*
* Check for symbolic link
*/
if ((cnp->cn_flags & ISSYMLINK) == 0) {
if (startdir_with_usecount) {
vnode_rele(startdir_with_usecount);
startdir_with_usecount = NULLVP;
}
if (rootdir_with_usecount) {
lck_rw_lock_shared(&rootvnode_rw_lock);
if (rootdir_with_usecount == rootvnode) {
old_count = os_atomic_dec_orig(&rootdir_with_usecount->v_usecount, relaxed);
if (old_count < 2) {
/*
* There needs to have been at least 1 usecount left on the rootvnode
*/
panic("(3) Unexpected pre-decrement value (%d) of usecount for rootvnode %p",
old_count, rootdir_with_usecount);
}
rootdir_with_usecount = NULLVP;
}
lck_rw_unlock_shared(&rootvnode_rw_lock);
if (rootdir_with_usecount) {
vnode_rele(rootdir_with_usecount);
rootdir_with_usecount = NULLVP;
}
}
return 0;
}
continue_symlink:
/* Gives us a new path to process, and a starting dir */
error = lookup_handle_symlink(ndp, &dp, &dp_has_iocount, ctx);
if (error != 0) {
break;
}
if (dp_has_iocount) {
if ((dp != rootdir_with_usecount) && (dp != startdir_with_usecount) &&
(dp != usedvp_dp)) {
if (startdir_with_usecount) {
vnode_rele(startdir_with_usecount);
}
vnode_ref_ext(dp, 0, VNODE_REF_FORCE);
startdir_with_usecount = dp;
}
vnode_put(dp);
dp_has_iocount = false;
}
}
/*
* only come here if we fail to handle a SYMLINK...
* if either ni_dvp or ni_vp is non-NULL, then
* we need to drop the iocount that was picked
* up in the lookup routine
*/
out_drop:
if (ndp->ni_dvp) {
vnode_put(ndp->ni_dvp);
}
if (ndp->ni_vp) {
vnode_put(ndp->ni_vp);
}
error_out:
if (startdir_with_usecount) {
vnode_rele(startdir_with_usecount);
startdir_with_usecount = NULLVP;
}
if (rootdir_with_usecount) {
lck_rw_lock_shared(&rootvnode_rw_lock);
if (rootdir_with_usecount == rootvnode) {
old_count = os_atomic_dec_orig(&rootdir_with_usecount->v_usecount, relaxed);
if (old_count < 2) {
/*
* There needs to have been at least 1 usecount left on the rootvnode
*/
panic("(4) Unexpected pre-decrement value (%d) of usecount for rootvnode %p",
old_count, rootdir_with_usecount);
}
lck_rw_unlock_shared(&rootvnode_rw_lock);
} else {
lck_rw_unlock_shared(&rootvnode_rw_lock);
vnode_rele(rootdir_with_usecount);
}
rootdir_with_usecount = NULLVP;
}
if ((cnp->cn_flags & HASBUF)) {
cnp->cn_flags &= ~HASBUF;
zfree(ZV_NAMEI, cnp->cn_pnbuf);
}
cnp->cn_pnbuf = NULL;
ndp->ni_vp = NULLVP;
ndp->ni_dvp = NULLVP;
#if CONFIG_VOLFS
/*
* Deal with volfs fallout.
*
* At this point, if we were originally given a volfs path that
* looks like /.vol/123/456, then we would have had to convert it into
* a full path. Assuming that part worked properly, we will now attempt
* to conduct a lookup of the item in the namespace. Under normal
* circumstances, if a user looked up /tmp/foo and it was not there, it
* would be permissible to return ENOENT.
*
* However, we may not want to do that here. Specifically, the volfs path
* uniquely identifies a certain item in the namespace regardless of where it
* lives. If the item has moved in between the time we constructed the
* path and now, when we're trying to do a lookup/authorization on the full
* path, we may have gotten an ENOENT.
*
* At this point we can no longer tell if the path no longer exists
* or if the item in question no longer exists. It could have been renamed
* away, in which case the /.vol identifier is still valid.
*
* Do this dance a maximum of MAX_VOLFS_RESTARTS times.
*/
if ((error == ENOENT) && (ndp->ni_cnd.cn_flags & CN_VOLFSPATH)) {
if (volfs_restarts < MAX_VOLFS_RESTARTS) {
volfs_restarts++;
goto vnode_recycled;
}
}
#endif
if (error == ERECYCLE) {
/* vnode was recycled underneath us. re-drive lookup to start at
* the beginning again, since recycling invalidated last lookup*/
ndp->ni_cnd.cn_flags = cnpflags;
ndp->ni_dvp = usedvp;
goto vnode_recycled;
}
return error;
}