| /* Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved. | 
 |  * Portions Copyright © 1997-1999 Vita Nuova Limited | 
 |  * Portions Copyright © 2000-2007 Vita Nuova Holdings Limited | 
 |  *                                (www.vitanuova.com) | 
 |  * Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others | 
 |  * | 
 |  * Modified for the Akaros operating system: | 
 |  * Copyright (c) 2013-2014 The Regents of the University of California | 
 |  * Copyright (c) 2013-2015 Google Inc. | 
 |  * | 
 |  * Permission is hereby granted, free of charge, to any person obtaining a copy | 
 |  * of this software and associated documentation files (the "Software"), to deal | 
 |  * in the Software without restriction, including without limitation the rights | 
 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | 
 |  * copies of the Software, and to permit persons to whom the Software is | 
 |  * furnished to do so, subject to the following conditions: | 
 |  * | 
 |  * The above copyright notice and this permission notice shall be included in | 
 |  * all copies or substantial portions of the Software. | 
 |  * | 
 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | 
 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | 
 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE | 
 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | 
 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | 
 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | 
 |  * SOFTWARE. */ | 
 |  | 
 | #include <slab.h> | 
 | #include <kmalloc.h> | 
 | #include <kref.h> | 
 | #include <string.h> | 
 | #include <stdio.h> | 
 | #include <assert.h> | 
 | #include <error.h> | 
 | #include <cpio.h> | 
 | #include <pmap.h> | 
 | #include <smp.h> | 
 | #include <net/ip.h> | 
 | #include <rcu.h> | 
 |  | 
 | /* TODO: these sizes are hokey.  DIRSIZE is used in chandirstat, and it looks | 
 |  * like it's the size of a common-case stat. */ | 
 | enum { | 
 | 	DIRSIZE = STAT_FIX_LEN_AK + 32 * STAT_NR_STRINGS_AK, | 
 |  | 
 | 	/* should handle the largest reasonable directory entry */ | 
 | 	DIRREADLIM = 2048, | 
 |  | 
 | 	/* Just read a lot. Memory is cheap, lots of bandwidth, and RPCs are | 
 | 	 * very expensive. At the same time, let's not yet exceed a common | 
 | 	 * MSIZE. */ | 
 | 	DIRREADSIZE = 8192, | 
 | }; | 
 |  | 
 | int newfd(struct chan *c, int low_fd, int oflags, bool must_use_low) | 
 | { | 
 | 	int ret = insert_obj_fdt(¤t->open_files, c, low_fd, | 
 | 	                         oflags & O_CLOEXEC ? FD_CLOEXEC : 0, | 
 | 	                         must_use_low); | 
 | 	if (ret >= 0) | 
 | 		cclose(c); | 
 | 	return ret; | 
 | } | 
 |  | 
 | struct chan *fdtochan(struct fd_table *fdt, int fd, int mode, int chkmnt, | 
 |                       int iref) | 
 | { | 
 | 	struct chan *c; | 
 |  | 
 | 	c = lookup_fd(fdt, fd, iref); | 
 | 	if (!c) { | 
 | 		/* We lost the info about why there was a problem (we used to | 
 | 		 * track file group closed too, can add that in later). */ | 
 | 		error(EBADF, ERROR_FIXME); | 
 | 	} | 
 | 	if (chkmnt && (c->flag & CMSG)) { | 
 | 		if (iref) | 
 | 			cclose(c); | 
 | 		error(EBADF, ERROR_FIXME); | 
 | 	} | 
 | 	if (mode < 0) | 
 | 		return c; | 
 | 	if ((mode & c->mode) != mode) { | 
 | 		if (iref) | 
 | 			cclose(c); | 
 | 		error(EBADF, | 
 | 		      "FD access mode failure: chan mode 0x%x, wanted 0x%x (opened with 0 instead of O_READ?)", | 
 | 		      c->mode, mode); | 
 | 	} | 
 | 	return c; | 
 | } | 
 |  | 
 | long kchanio(void *vc, void *buf, int n, int mode) | 
 | { | 
 | 	ERRSTACK(1); | 
 | 	int r; | 
 | 	struct chan *c; | 
 |  | 
 | 	c = vc; | 
 | 	if (waserror()) { | 
 | 		poperror(); | 
 | 		return -1; | 
 | 	} | 
 |  | 
 | 	if (mode == O_READ) | 
 | 		r = devtab[c->type].read(c, buf, n, c->offset); | 
 | 	else if (mode == O_WRITE) | 
 | 		r = devtab[c->type].write(c, buf, n, c->offset); | 
 | 	else | 
 | 		error(ENOSYS, "kchanio: use only O_READ xor O_WRITE"); | 
 |  | 
 | 	spin_lock(&c->lock); | 
 | 	c->offset += r; | 
 | 	spin_unlock(&c->lock); | 
 | 	poperror(); | 
 | 	return r; | 
 | } | 
 |  | 
 | int openmode(uint32_t omode) | 
 | { | 
 | /* GIANT WARNING: if this ever throws, ipopen (and probably many others) will | 
 |  * screw up refcnts of Qctl, err, data, etc */ | 
 | #if 0 | 
 | 	/* this is the old plan9 style.  i think they want to turn exec into | 
 | 	 * read, and strip off anything higher, and just return the RD/WR style | 
 | 	 * bits.  not stuff like ORCLOSE.  the lack of OEXCL might be a bug on | 
 | 	 * their part (it's the only one of their non-RW-related flags that | 
 | 	 * isn't masked out). | 
 | 	 * | 
 | 	 * Note that we no longer convert OEXEC/O_EXEC to O_READ, and instead | 
 | 	 * return just the O_ACCMODE bits. */ | 
 | 	if (o >= (OTRUNC | OCEXEC | ORCLOSE | OEXEC)) | 
 | 		error(EINVAL, ERROR_FIXME); | 
 | 	o &= ~(OTRUNC | OCEXEC | ORCLOSE); | 
 | 	if (o > OEXEC) | 
 | 		error(EINVAL, ERROR_FIXME); | 
 | 	if (o == OEXEC) | 
 | 		return OREAD; | 
 | 	return o; | 
 | #endif | 
 | 	/* no error checking (we have a shitload of flags anyway), and we return | 
 | 	 * the basic access modes (RD/WR/ETC) */ | 
 | 	return omode & O_ACCMODE; | 
 | } | 
 |  | 
 | void fdclose(struct fd_table *fdt, int fd) | 
 | { | 
 | 	close_fd(fdt, fd); | 
 | } | 
 |  | 
 | static void set_dot(struct proc *p, struct chan *c) | 
 | { | 
 | 	c = atomic_swap_ptr((void**)&p->dot, c); | 
 | 	synchronize_rcu(); | 
 | 	cclose(c); | 
 | } | 
 |  | 
 | /* Note namec() happens in the namespace of the caller. */ | 
 | int syschdir(struct proc *target, char *path) | 
 | { | 
 | 	ERRSTACK(1); | 
 | 	struct chan *c; | 
 |  | 
 | 	if (waserror()) { | 
 | 		poperror(); | 
 | 		return -1; | 
 | 	} | 
 | 	c = namec(path, Atodir, 0, 0, NULL); | 
 | 	poperror(); | 
 | 	set_dot(target, c); | 
 | 	return 0; | 
 | } | 
 |  | 
 | /* Note fdtochan() happens with the FDs of the caller. */ | 
 | int sysfchdir(struct proc *target, int fd) | 
 | { | 
 | 	ERRSTACK(1); | 
 | 	struct chan *c; | 
 |  | 
 | 	if (waserror()) { | 
 | 		poperror(); | 
 | 		return -1; | 
 | 	} | 
 | 	c = fdtochan(¤t->open_files, fd, -1, 0, 1); | 
 | 	poperror(); | 
 |  | 
 | 	/* This is a little hokey.  Ideally, we'd only allow O_PATH fds to be | 
 | 	 * fchdir'd.  Linux/POSIX lets you do arbitrary FDs.  Luckily, we stored | 
 | 	 * the name when we walked (__namec_from), so we should be able to | 
 | 	 * recreate the chan.  Using namec() with channame() is a more | 
 | 	 * heavy-weight cclone(), but also might have issues if the chan has | 
 | 	 * since been removed or the namespace is otherwise different from when | 
 | 	 * the original fd/chan was first created. */ | 
 | 	if (c->flag & O_PATH) { | 
 | 		set_dot(target, c); | 
 | 		return 0; | 
 | 	} | 
 | 	if (waserror()) { | 
 | 		cclose(c); | 
 | 		poperror(); | 
 | 		return -1; | 
 | 	} | 
 | 	syschdir(target, channame(c)); | 
 | 	cclose(c); | 
 | 	poperror(); | 
 |  | 
 | 	return 0; | 
 | } | 
 |  | 
 | int sysclose(int fd) | 
 | { | 
 | 	ERRSTACK(1); | 
 | 	struct fd_table *fdt = ¤t->open_files; | 
 |  | 
 | 	if (waserror()) { | 
 | 		poperror(); | 
 | 		return -1; | 
 | 	} | 
 | 	/* | 
 | 	 * Take no reference on the chan because we don't really need the | 
 | 	 * data structure, and are calling fdtochan only for error checks. | 
 | 	 * fdclose takes care of processes racing through here. | 
 | 	 */ | 
 | 	fdtochan(fdt, fd, -1, 0, 0); | 
 | 	fdclose(fdt, fd); | 
 | 	poperror(); | 
 | 	return 0; | 
 | } | 
 |  | 
 | int syscreate(char *path, int mode, uint32_t perm) | 
 | { | 
 | 	ERRSTACK(2); | 
 | 	int fd; | 
 | 	struct chan *c; | 
 |  | 
 | 	if (waserror()) { | 
 | 		poperror(); | 
 | 		return -1; | 
 | 	} | 
 |  | 
 | 	openmode(mode & ~O_EXCL);	/* error check only; OEXCL okay here */ | 
 | 	c = namec(path, Acreate, mode, perm, NULL); | 
 | 	if (waserror()) { | 
 | 		cclose(c); | 
 | 		nexterror(); | 
 | 	} | 
 | 	/* 9ns mode is the O_FLAGS and perm is glibc mode */ | 
 | 	fd = newfd(c, 0, mode, FALSE); | 
 | 	if (fd < 0) | 
 | 		error(-fd, ERROR_FIXME); | 
 | 	poperror(); | 
 |  | 
 | 	poperror(); | 
 | 	return fd; | 
 | } | 
 |  | 
 | int sysdup(int old, int low_fd, bool must_use_low) | 
 | { | 
 | 	ERRSTACK(1); | 
 | 	int fd; | 
 | 	struct chan *c; | 
 |  | 
 | 	if (waserror()) { | 
 | 		poperror(); | 
 | 		return -1; | 
 | 	} | 
 | 	c = fdtochan(¤t->open_files, old, -1, 0, 1); | 
 | 	if (c->qid.type & QTAUTH) { | 
 | 		cclose(c); | 
 | 		error(EPERM, ERROR_FIXME); | 
 | 	} | 
 | 	fd = newfd(c, low_fd, 0, must_use_low); | 
 | 	if (fd < 0) { | 
 | 		cclose(c); | 
 | 		error(-fd, ERROR_FIXME); | 
 | 	} | 
 | 	poperror(); | 
 | 	return fd; | 
 | } | 
 |  | 
 | /* Could pass in the fdt instead of the proc, but we used to need the to_proc | 
 |  * for now so we can claim a VFS FD.  Careful, we don't close the old chan. */ | 
 | int sys_dup_to(struct proc *from_proc, unsigned int from_fd, | 
 |                struct proc *to_proc, unsigned int to_fd) | 
 | { | 
 | 	ERRSTACK(1); | 
 | 	int ret; | 
 | 	struct chan *c; | 
 |  | 
 | 	if (waserror()) { | 
 | 		poperror(); | 
 | 		return -1; | 
 | 	} | 
 | 	c = fdtochan(&from_proc->open_files, from_fd, -1, 0, 1); | 
 | 	if (c->qid.type & QTAUTH) { | 
 | 		cclose(c); | 
 | 		error(EPERM, ERROR_FIXME); | 
 | 	} | 
 | 	ret = insert_obj_fdt(&to_proc->open_files, c, to_fd, 0, TRUE); | 
 | 	/* drop the ref from fdtochan.  if insert succeeded, there is one other | 
 | 	 * ref stored in the FDT */ | 
 | 	cclose(c); | 
 | 	if (ret < 0) | 
 | 		error(EFAIL, "Can't insert FD %d into FDG", to_fd); | 
 | 	poperror(); | 
 | 	return 0; | 
 | } | 
 |  | 
 | char *sysgetcwd(void) | 
 | { | 
 | 	char *s = NULL; | 
 | 	struct chan *dot; | 
 |  | 
 | 	rcu_read_lock(); | 
 | 	dot = rcu_dereference(current->dot); | 
 | 	kref_get(&dot->ref, 1); | 
 | 	rcu_read_unlock(); | 
 | 	if (dot->name) | 
 | 		kstrdup(&s, dot->name->s); | 
 | 	cclose(dot); | 
 | 	return s; | 
 | } | 
 |  | 
 | int sysfauth(int fd, char *aname) | 
 | { | 
 | 	ERRSTACK(2); | 
 | 	struct chan *c, *ac; | 
 |  | 
 | 	if (waserror()) { | 
 | 		poperror(); | 
 | 		return -1; | 
 | 	} | 
 |  | 
 | 	validname(aname, 0); | 
 | 	c = fdtochan(¤t->open_files, fd, O_RDWR, 0, 1); | 
 | 	if (waserror()) { | 
 | 		cclose(c); | 
 | 		nexterror(); | 
 | 	} | 
 |  | 
 | 	ac = mntauth(c, aname); | 
 |  | 
 | 	/* at this point ac is responsible for keeping c alive */ | 
 | 	poperror();	/* c */ | 
 | 	cclose(c); | 
 |  | 
 | 	if (waserror()) { | 
 | 		cclose(ac); | 
 | 		nexterror(); | 
 | 	} | 
 |  | 
 | 	fd = newfd(ac, 0, 0, FALSE); | 
 | 	if (fd < 0) | 
 | 		error(-fd, ERROR_FIXME); | 
 | 	poperror();	/* ac */ | 
 |  | 
 | 	poperror(); | 
 |  | 
 | 	return fd; | 
 | } | 
 |  | 
 | int sysfversion(int fd, unsigned int msize, char *vers, unsigned int arglen) | 
 | { | 
 | 	ERRSTACK(2); | 
 | 	int m; | 
 | 	struct chan *c; | 
 |  | 
 | 	if (waserror()) { | 
 | 		poperror(); | 
 | 		return -1; | 
 | 	} | 
 |  | 
 | 	/* check there's a NUL in the version string */ | 
 | 	if (arglen == 0 || memchr(vers, 0, arglen) == 0) | 
 | 		error(EINVAL, ERROR_FIXME); | 
 |  | 
 | 	c = fdtochan(¤t->open_files, fd, O_RDWR, 0, 1); | 
 | 	if (waserror()) { | 
 | 		cclose(c); | 
 | 		nexterror(); | 
 | 	} | 
 |  | 
 | 	m = mntversion(c, vers, msize, arglen); | 
 |  | 
 | 	poperror(); | 
 | 	cclose(c); | 
 |  | 
 | 	poperror(); | 
 | 	return m; | 
 | } | 
 |  | 
 | int sysfwstat(int fd, uint8_t * buf, int n) | 
 | { | 
 | 	ERRSTACK(2); | 
 | 	struct chan *c; | 
 |  | 
 | 	if (waserror()) { | 
 | 		poperror(); | 
 | 		return -1; | 
 | 	} | 
 |  | 
 | 	validstat(buf, n, 0); | 
 | 	c = fdtochan(¤t->open_files, fd, -1, 1, 1); | 
 | 	if (waserror()) { | 
 | 		cclose(c); | 
 | 		nexterror(); | 
 | 	} | 
 | 	n = devtab[c->type].wstat(c, buf, n); | 
 | 	poperror(); | 
 | 	cclose(c); | 
 |  | 
 | 	poperror(); | 
 | 	return n; | 
 | } | 
 |  | 
 | long bindmount(struct chan *c, char *old, int flag, char *spec) | 
 | { | 
 | 	ERRSTACK(1); | 
 | 	int ret; | 
 | 	struct chan *c1; | 
 |  | 
 | 	if (flag > MMASK || (flag & MORDER) == (MBEFORE | MAFTER)) | 
 | 		error(EINVAL, ERROR_FIXME); | 
 |  | 
 | 	c1 = namec(old, Amount, 0, 0, NULL); | 
 | 	if (waserror()) { | 
 | 		cclose(c1); | 
 | 		nexterror(); | 
 | 	} | 
 | 	ret = cmount(c, c1, flag, spec); | 
 |  | 
 | 	poperror(); | 
 | 	cclose(c1); | 
 | 	return ret; | 
 | } | 
 |  | 
 | int sysbind(char *new, char *old, int flags) | 
 | { | 
 | 	ERRSTACK(2); | 
 | 	long r; | 
 | 	struct chan *c0; | 
 |  | 
 | 	if (waserror()) { | 
 | 		poperror(); | 
 | 		return -1; | 
 | 	} | 
 |  | 
 | 	c0 = namec(new, Abind, 0, 0, NULL); | 
 | 	if (waserror()) { | 
 | 		cclose(c0); | 
 | 		nexterror(); | 
 | 	} | 
 | 	r = bindmount(c0, old, flags, ""); | 
 | 	poperror(); | 
 | 	cclose(c0); | 
 |  | 
 | 	poperror(); | 
 | 	return r; | 
 | } | 
 |  | 
 | int syssymlink(char *new_path, char *old_path) | 
 | { | 
 | 	ERRSTACK(1); | 
 | 	struct chan *c; | 
 |  | 
 | 	if (waserror()) { | 
 | 		poperror(); | 
 | 		return -1; | 
 | 	} | 
 | 	validname(old_path, true); | 
 | 	c = namec(new_path, Acreate, O_EXCL, | 
 | 	          DMSYMLINK | S_IRWXU | S_IRWXG | S_IRWXO, old_path); | 
 | 	cclose(c); | 
 | 	poperror(); | 
 | 	return 0; | 
 | } | 
 |  | 
 | int sysmount(int fd, int afd, char *old, int flags, char *spec) | 
 | { | 
 | 	ERRSTACK(1); | 
 | 	long r; | 
 | 	volatile struct { | 
 | 		struct chan *c; | 
 | 	} c0; | 
 | 	volatile struct { | 
 | 		struct chan *c; | 
 | 	} bc; | 
 | 	volatile struct { | 
 | 		struct chan *c; | 
 | 	} ac; | 
 | 	struct mntparam mntparam; | 
 |  | 
 | 	ac.c = NULL; | 
 | 	bc.c = NULL; | 
 | 	c0.c = NULL; | 
 | 	if (waserror()) { | 
 | 		cclose(ac.c); | 
 | 		cclose(bc.c); | 
 | 		cclose(c0.c); | 
 | 		poperror(); | 
 | 		return -1; | 
 | 	} | 
 | 	bc.c = fdtochan(¤t->open_files, fd, O_RDWR, 0, 1); | 
 | 	if (afd >= 0) | 
 | 		ac.c = fdtochan(¤t->open_files, afd, O_RDWR, 0, 1); | 
 | 	mntparam.chan = bc.c; | 
 | 	mntparam.authchan = ac.c; | 
 | 	mntparam.spec = spec; | 
 | 	c0.c = devtab[devno("mnt", 0)].attach((char *)&mntparam); | 
 | 	if (flags & MCACHE) | 
 | 		c0.c = devtab[devno("gtfs", 0)].attach((char*)c0.c); | 
 | 	r = bindmount(c0.c, old, flags, spec); | 
 | 	poperror(); | 
 | 	cclose(ac.c); | 
 | 	cclose(bc.c); | 
 | 	cclose(c0.c); | 
 |  | 
 | 	return r; | 
 | } | 
 |  | 
 | int sysunmount(char *src_path, char *onto_path) | 
 | { | 
 | 	ERRSTACK(1); | 
 | 	volatile struct { | 
 | 		struct chan *c; | 
 | 	} cmount; | 
 | 	volatile struct { | 
 | 		struct chan *c; | 
 | 	} cmounted; | 
 |  | 
 | 	cmount.c = NULL; | 
 | 	cmounted.c = NULL; | 
 | 	if (waserror()) { | 
 | 		cclose(cmount.c); | 
 | 		cclose(cmounted.c); | 
 | 		poperror(); | 
 | 		return -1; | 
 | 	} | 
 |  | 
 | 	cmount.c = namec(onto_path, Amount, 0, 0, NULL); | 
 | 	if (src_path != NULL && src_path[0] != '\0') { | 
 | 		/* | 
 | 		 * This has to be namec(..., Aopen, ...) because | 
 | 		 * if arg[0] is something like /srv/cs or /fd/0, | 
 | 		 * opening it is the only way to get at the real | 
 | 		 * Chan underneath. | 
 | 		 */ | 
 | 		cmounted.c = namec(src_path, Aopen, O_READ, 0, NULL); | 
 | 	} | 
 |  | 
 | 	cunmount(cmount.c, cmounted.c); | 
 | 	poperror(); | 
 | 	cclose(cmount.c); | 
 | 	cclose(cmounted.c); | 
 | 	return 0; | 
 | } | 
 |  | 
 | int sysopenat(int fromfd, char *path, int vfs_flags, int perm) | 
 | { | 
 | 	ERRSTACK(1); | 
 | 	int fd; | 
 | 	struct chan *c = NULL, *from; | 
 | 	int open_or_create = Aopen; | 
 |  | 
 | 	/* O_EXCL must be O_CREATE (checked in syscall.c); we can skip the Aopen | 
 | 	 * call.  Note namec(Acreate) checks O_EXCL internally. */ | 
 | 	if (vfs_flags & O_EXCL) | 
 | 		open_or_create = Acreate; | 
 | 	if (waserror()) { | 
 | 		if (open_or_create == Aopen && vfs_flags & O_CREATE | 
 | 		    && get_errno() == ENOENT) { | 
 | 			open_or_create = Acreate; | 
 | 			/* Don't poperror - we're keeping ourselves at the | 
 | 			 * current waserror() depth.  Returns thrice! */ | 
 | 			goto retry; | 
 | 		} | 
 | 		cclose(c); | 
 | 		poperror(); | 
 | 		return -1; | 
 | 	} | 
 | retry: | 
 | 	openmode(vfs_flags & ~O_EXCL);	/* error check only; O_EXCL okay here */ | 
 | 	if ((path[0] == '/') || (fromfd == AT_FDCWD)) { | 
 | 		c = namec(path, open_or_create, vfs_flags, perm, NULL); | 
 | 	} else { | 
 | 		/* We don't cclose from.  namec_from will convert it to the new | 
 | 		 * chan during the walk process (c).  It'll probably close from | 
 | 		 * internally, and give us something new for c.  On error, | 
 | 		 * namec_from will cclose from. */ | 
 | 		from = fdtochan(¤t->open_files, fromfd, -1, FALSE, TRUE); | 
 | 		if (!(from->flag & O_PATH)) { | 
 | 			/* This is the only error path where we need to close | 
 | 			 * from.  namec_from will close from for us, regardless | 
 | 			 * of whether or not it fails. */ | 
 | 			cclose(from); | 
 | 			error(EINVAL, "Cannot openat from a non-O_PATH FD"); | 
 | 		} | 
 | 		c = namec_from(from, path, open_or_create, vfs_flags, perm, | 
 | 			       NULL); | 
 | 	} | 
 | 	/* Devices should catch this, but just in case, we'll catch it. */ | 
 | 	if ((c->qid.type & QTSYMLINK) && (vfs_flags & O_NOFOLLOW)) | 
 | 		error(ELOOP, "no-follow open of a symlink"); | 
 | 	fd = newfd(c, 0, vfs_flags, FALSE); | 
 | 	if (fd < 0) | 
 | 		error(-fd, ERROR_FIXME); | 
 | 	poperror(); | 
 | 	return fd; | 
 | } | 
 |  | 
 | int sysopen(char *path, int vfs_flags) | 
 | { | 
 | 	return sysopenat(AT_FDCWD, path, vfs_flags, 0); | 
 | } | 
 |  | 
 | long unionread(struct chan *c, void *va, long n) | 
 | { | 
 | 	ERRSTACK(1); | 
 | 	int i; | 
 | 	long nr; | 
 | 	struct mhead *m; | 
 | 	struct mount *mount; | 
 |  | 
 | 	qlock(&c->umqlock); | 
 | 	m = c->umh; | 
 | 	rlock(&m->lock); | 
 | 	mount = m->mount; | 
 | 	/* bring mount in sync with c->uri and c->umc */ | 
 | 	for (i = 0; mount != NULL && i < c->uri; i++) | 
 | 		mount = mount->next; | 
 |  | 
 | 	nr = 0; | 
 | 	while (mount != NULL) { | 
 | 		/* Error causes component of union to be skipped */ | 
 | 		if (mount->to) { | 
 | 			/* normally we want to discard the error, but for our | 
 | 			 * ghetto kdirent hack, we need to repeat unionread if | 
 | 			 * we saw a ENODATA */ | 
 | 			if (waserror()) { | 
 | 				if (get_errno() == ENODATA) { | 
 | 					runlock(&m->lock); | 
 | 					qunlock(&c->umqlock); | 
 | 					nexterror(); | 
 | 				} | 
 | 				/* poperror done below for either branch */ | 
 | 			} else { | 
 | 				if (c->umc == NULL) { | 
 | 					c->umc = cclone(mount->to); | 
 | 					c->umc = | 
 | 					    devtab[c->umc->type].open(c->umc, | 
 | 								      O_READ); | 
 | 				} | 
 |  | 
 | 				nr = devtab[c->umc->type].read(c->umc, va, n, | 
 | 							       c->umc->offset); | 
 | 				if (nr < 0) | 
 | 					nr = 0;	/* dev.c can return -1 */ | 
 | 				c->umc->offset += nr; | 
 | 			} | 
 | 			poperror();	/* pop regardless */ | 
 | 		} | 
 | 		if (nr > 0) | 
 | 			break; | 
 |  | 
 | 		/* Advance to next element */ | 
 | 		c->uri++; | 
 | 		if (c->umc) { | 
 | 			cclose(c->umc); | 
 | 			c->umc = NULL; | 
 | 		} | 
 | 		mount = mount->next; | 
 | 	} | 
 | 	runlock(&m->lock); | 
 | 	qunlock(&c->umqlock); | 
 | 	return nr; | 
 | } | 
 |  | 
 | static void unionrewind(struct chan *c) | 
 | { | 
 | 	qlock(&c->umqlock); | 
 | 	c->uri = 0; | 
 | 	if (c->umc) { | 
 | 		cclose(c->umc); | 
 | 		c->umc = NULL; | 
 | 	} | 
 | 	qunlock(&c->umqlock); | 
 | } | 
 |  | 
 | static long rread(int fd, void *va, long n, int64_t * offp) | 
 | { | 
 | 	ERRSTACK(3); | 
 | 	int dir; | 
 | 	struct chan *c; | 
 | 	int64_t off; | 
 |  | 
 | 	/* dirty dirent hack */ | 
 | 	void *real_va = va; | 
 |  | 
 | 	if (waserror()) { | 
 | 		poperror(); | 
 | 		return -1; | 
 | 	} | 
 |  | 
 | 	c = fdtochan(¤t->open_files, fd, O_READ, 1, 1); | 
 | 	if (waserror()) { | 
 | 		cclose(c); | 
 | 		nexterror(); | 
 | 	} | 
 |  | 
 | 	if (n < 0) | 
 | 		error(EINVAL, ERROR_FIXME); | 
 |  | 
 | 	dir = c->qid.type & QTDIR; | 
 |  | 
 | 	/* kdirent hack: userspace is expecting kdirents, but all of 9ns | 
 | 	 * produces Ms.  Just save up what we don't use and append the | 
 | 	 * new stuff later. Allocate DIRREADSIZE bytes for that purpose. | 
 | 	 */ | 
 | 	if (dir) { | 
 | 		int amt; | 
 |  | 
 | 		if (n < sizeof(struct kdirent)) | 
 | 			error(EINVAL, "readdir needs to read at least %d", | 
 | 			      sizeof(struct kdirent)); | 
 | 		if (!c->buf) { | 
 | 			c->buf = kmalloc(DIRREADSIZE, MEM_WAIT); | 
 | 			c->bufused = 0; | 
 | 		} | 
 | 		/* Attempt to extract an M, in case there was some already */ | 
 | 		amt = convM2kdirent(c->buf, c->bufused, real_va, 0); | 
 | 		if (amt) { | 
 | 			c->bufused -= amt; | 
 | 			memmove(c->buf, c->buf + amt, c->bufused); | 
 | 			n = sizeof(struct kdirent); | 
 | 			goto out; | 
 | 		} | 
 | 		/* debugging */ | 
 | 		if (waserror()) { | 
 | 			printk("Well, sysread of a dir sucks.%s \n", | 
 | 			       current_errstr()); | 
 | 			nexterror(); | 
 | 		} | 
 | 		va = c->buf + c->bufused; | 
 | 		n = DIRREADSIZE - c->bufused; | 
 | 	} | 
 |  | 
 | 	/* this is the normal plan9 read */ | 
 | 	if (dir && c->umh) | 
 | 		n = unionread(c, va, n); | 
 | 	else { | 
 | 		if (offp == NULL) { | 
 | 			spin_lock(&c->lock); /* lock for int64_t assignment */ | 
 | 			off = c->offset; | 
 | 			spin_unlock(&c->lock); | 
 | 		} else | 
 | 			off = *offp; | 
 | 		if (off < 0) | 
 | 			error(EINVAL, ERROR_FIXME); | 
 | 		if ((off64_t)off + (size_t)n < (off64_t)off) | 
 | 			error(EINVAL, "bad offset %p + count %p", off, n); | 
 | 		if (off == 0) { | 
 | 			if (offp == NULL) { | 
 | 				spin_lock(&c->lock); | 
 | 				c->offset = 0; | 
 | 				c->dri = 0; | 
 | 				spin_unlock(&c->lock); | 
 | 			} | 
 | 			unionrewind(c); | 
 | 		} | 
 | 		if (! c->ateof) { | 
 | 			n = devtab[c->type].read(c, va, n, off); | 
 | 			if (n == 0 && dir) | 
 | 				c->ateof = 1; | 
 | 		} else { | 
 | 			n = 0; | 
 | 		} | 
 | 		spin_lock(&c->lock); | 
 | 		c->offset += n; | 
 | 		spin_unlock(&c->lock); | 
 | 	} | 
 |  | 
 | 	/* dirty kdirent hack */ | 
 | 	if (dir) { | 
 | 		int amt; | 
 | 		c->bufused = c->bufused + n; | 
 | 		/* extract an M from the front, then shift the remainder back */ | 
 | 		amt = convM2kdirent(c->buf, c->bufused, real_va, 0); | 
 | 		c->bufused -= amt; | 
 | 		memmove(c->buf, c->buf + amt, c->bufused); | 
 | 		n = amt ? sizeof(struct kdirent) : 0; | 
 | 		poperror();	/* matching our debugging waserror */ | 
 | 	} | 
 |  | 
 | out: | 
 | 	poperror(); | 
 | 	cclose(c); | 
 |  | 
 | 	poperror(); | 
 | 	return n; | 
 | } | 
 |  | 
 | /* Reads exactly n bytes from chan c, starting at its offset.  Can block, but if | 
 |  * we get 0 back too soon (EOF or error), then we'll error out with ENODATA. | 
 |  * That might need a little work - if there was a previous error, then we | 
 |  * clobbered it and only know ENODATA but not why we completed early. */ | 
 | void read_exactly_n(struct chan *c, void *vp, long n) | 
 | { | 
 | 	char *p; | 
 | 	long nn; | 
 | 	int total = 0, want = n; | 
 |  | 
 | 	p = vp; | 
 | 	while (n > 0) { | 
 | 		nn = devtab[c->type].read(c, p, n, c->offset); | 
 | 		printd("readn: Got %d@%lld\n", nn, c->offset); | 
 | 		if (nn == 0) | 
 | 			error(ENODATA, "wanted %d, got %d", want, total); | 
 | 		spin_lock(&c->lock); | 
 | 		c->offset += nn; | 
 | 		spin_unlock(&c->lock); | 
 | 		p += nn; | 
 | 		n -= nn; | 
 | 		total += nn; | 
 | 	} | 
 | } | 
 |  | 
 | long sysread(int fd, void *va, long n) | 
 | { | 
 | 	return rread(fd, va, n, NULL); | 
 | } | 
 |  | 
 | long syspread(int fd, void *va, long n, int64_t off) | 
 | { | 
 | 	return rread(fd, va, n, &off); | 
 | } | 
 |  | 
 | int sysremove(char *path) | 
 | { | 
 | 	ERRSTACK(2); | 
 | 	struct chan *c; | 
 |  | 
 | 	if (waserror()) { | 
 | 		poperror(); | 
 | 		return -1; | 
 | 	} | 
 |  | 
 | 	c = namec(path, Aremove, 0, 0, NULL); | 
 | 	if (waserror()) { | 
 | 		c->type = -1;	/* see below */ | 
 | 		cclose(c); | 
 | 		nexterror(); | 
 | 	} | 
 | 	devtab[c->type].remove(c); | 
 | 	/* | 
 | 	 * Remove clunks the fid, but we need to recover the Chan | 
 | 	 * so fake it up.  -1 aborts the dev's close. | 
 | 	 */ | 
 | 	c->type = -1; | 
 | 	poperror(); | 
 | 	cclose(c); | 
 |  | 
 | 	poperror(); | 
 | 	return 0; | 
 | } | 
 |  | 
 | int sysrename(char *from_path, char *to_path) | 
 | { | 
 | 	ERRSTACK(1); | 
 | 	struct chan *volatile renamee = NULL; | 
 | 	struct chan *parent_chan; | 
 |  | 
 | 	if (waserror()) { | 
 | 		cclose(renamee); | 
 | 		poperror(); | 
 | 		return -1; | 
 | 	} | 
 | 	renamee = namec(from_path, Aremove, 0, 0, NULL); | 
 | 	/* We might need to support wstat for 'short' rename (intra-directory, | 
 | 	 * with no slashes).  Til then, we can just go with EXDEV. */ | 
 | 	if (!devtab[renamee->type].rename) | 
 | 		error(EXDEV, "device does not support rename"); | 
 | 	parent_chan = namec(to_path, Arename, 0, 0, (char*)renamee); | 
 | 	/* When we're done, renamee still points to the file, but it's in the | 
 | 	 * new location.  Its cname is still the old location, similar to | 
 | 	 * remove.  If anyone cares, we can change it.  parent_chan still points | 
 | 	 * to the parent - it didn't get moved like create does.  Though it does | 
 | 	 * have the name of the new location.  If we want, we can hand that to | 
 | 	 * renamee.  It's a moot point, since they are both getting closed. */ | 
 | 	cclose(renamee); | 
 | 	cclose(parent_chan); | 
 | 	poperror(); | 
 | 	return 0; | 
 | } | 
 |  | 
 | int64_t sysseek(int fd, int64_t off, int whence) | 
 | { | 
 | 	ERRSTACK(2); | 
 | 	struct dir *dir; | 
 | 	struct chan *c; | 
 |  | 
 | 	if (waserror()) { | 
 | 		poperror(); | 
 | 		return -1; | 
 | 	} | 
 |  | 
 | 	c = fdtochan(¤t->open_files, fd, -1, 1, 1); | 
 | 	if (waserror()) { | 
 | 		cclose(c); | 
 | 		nexterror(); | 
 | 	} | 
 | 	switch (whence) { | 
 | 	case 0: | 
 | 		if (c->qid.type & QTDIR) { | 
 | 			if (off != 0) | 
 | 				error(EISDIR, ERROR_FIXME); | 
 | 			unionrewind(c); | 
 | 		} else if (off < 0) | 
 | 			error(EINVAL, ERROR_FIXME); | 
 | 		spin_lock(&c->lock);	/* lock for int64_t assignment */ | 
 | 		c->offset = off; | 
 | 		spin_unlock(&c->lock); | 
 | 		break; | 
 |  | 
 | 	case 1: | 
 | 		if (c->qid.type & QTDIR) | 
 | 			error(EISDIR, ERROR_FIXME); | 
 | 		spin_lock(&c->lock);	/* lock for read/write update */ | 
 | 		off += c->offset; | 
 | 		if (off < 0) { | 
 | 			spin_unlock(&c->lock); | 
 | 			error(EINVAL, ERROR_FIXME); | 
 | 		} | 
 | 		c->offset = off; | 
 | 		spin_unlock(&c->lock); | 
 | 		break; | 
 |  | 
 | 	case 2: | 
 | 		if (c->qid.type & QTDIR) | 
 | 			error(EISDIR, ERROR_FIXME); | 
 | 		dir = chandirstat(c); | 
 | 		if (dir == NULL) | 
 | 			error(EFAIL, "internal error: stat error in seek"); | 
 | 		off += dir->length; | 
 | 		kfree(dir); | 
 | 		if (off < 0) | 
 | 			error(EINVAL, ERROR_FIXME); | 
 | 		spin_lock(&c->lock);	/* lock for read/write update */ | 
 | 		c->offset = off; | 
 | 		spin_unlock(&c->lock); | 
 | 		break; | 
 |  | 
 | 	default: | 
 | 		error(EINVAL, ERROR_FIXME); | 
 | 		break; | 
 | 	} | 
 | 	poperror(); | 
 | 	c->dri = 0; | 
 | 	cclose(c); | 
 | 	poperror(); | 
 | 	return off; | 
 | } | 
 |  | 
 | void validstat(uint8_t * s, int n, int slashok) | 
 | { | 
 |  | 
 | 	int m; | 
 | 	char buf[64]; | 
 |  | 
 | 	statcheck(s, n); | 
 | 	/* verify that name entry is acceptable */ | 
 | 	s += STAT_FIX_LEN_9P - STAT_NR_STRINGS_9P * BIT16SZ; | 
 | 	/* | 
 | 	 * s now points at count for first string. | 
 | 	 * if it's too long, let the server decide; this is | 
 | 	 * only for his protection anyway. otherwise | 
 | 	 * we'd have to allocate and waserror. | 
 | 	 */ | 
 | 	m = GBIT16(s); | 
 | 	s += BIT16SZ; | 
 | 	if (m + 1 > sizeof buf) { | 
 | 		return; | 
 | 	} | 
 | 	memmove(buf, s, m); | 
 | 	buf[m] = '\0'; | 
 | 	/* name could be '/' */ | 
 | 	if (strcmp(buf, "/") != 0) | 
 | 		validname(buf, slashok); | 
 | } | 
 |  | 
 | int sysfstat(int fd, uint8_t *buf, int n) | 
 | { | 
 | 	ERRSTACK(2); | 
 | 	struct chan *c; | 
 |  | 
 | 	if (waserror()) { | 
 | 		poperror(); | 
 | 		return -1; | 
 | 	} | 
 |  | 
 | 	c = fdtochan(¤t->open_files, fd, -1, 0, 1); | 
 | 	if (waserror()) { | 
 | 		cclose(c); | 
 | 		nexterror(); | 
 | 	} | 
 | 	devtab[c->type].stat(c, buf, n); | 
 |  | 
 | 	poperror(); | 
 | 	cclose(c); | 
 |  | 
 | 	poperror(); | 
 | 	return n; | 
 | } | 
 |  | 
 | int sysfstatakaros(int fd, struct kstat *ks) | 
 | { | 
 |  | 
 | 	int n = 4096; | 
 | 	uint8_t *buf; | 
 |  | 
 | 	buf = kmalloc(n, MEM_WAIT); | 
 | 	n = sysfstat(fd, buf, n); | 
 | 	if (n > 0) { | 
 | 		convM2kstat(buf, n, ks); | 
 | 		n = 0; | 
 | 	} | 
 | 	kfree(buf); | 
 | 	return n; | 
 | } | 
 |  | 
 | static int __stat(char *path, uint8_t *buf, int n, int flags) | 
 | { | 
 | 	ERRSTACK(2); | 
 | 	struct chan *c; | 
 |  | 
 | 	if (waserror()) { | 
 | 		poperror(); | 
 | 		return -1; | 
 | 	} | 
 |  | 
 | 	c = namec(path, Aaccess, flags, 0, NULL); | 
 | 	if (waserror()) { | 
 | 		cclose(c); | 
 | 		nexterror(); | 
 | 	} | 
 | 	devtab[c->type].stat(c, buf, n); | 
 | 	poperror(); | 
 | 	cclose(c); | 
 |  | 
 | 	poperror(); | 
 |  | 
 | 	return n; | 
 | } | 
 |  | 
 | int sysstat(char *path, uint8_t *buf, int n) | 
 | { | 
 | 	return __stat(path, buf, n, 0); | 
 | } | 
 |  | 
 | int syslstat(char *path, uint8_t *buf, int n) | 
 | { | 
 | 	return __stat(path, buf, n, O_NOFOLLOW); | 
 | } | 
 |  | 
 | int sysstatakaros(char *path, struct kstat *ks, int flags) | 
 | { | 
 |  | 
 | 	int n = 4096; | 
 | 	uint8_t *buf; | 
 |  | 
 | 	buf = kmalloc(n, MEM_WAIT); | 
 | 	n = __stat(path, buf, n, flags); | 
 | 	if (n > 0) { | 
 | 		convM2kstat(buf, n, ks); | 
 | 		n = 0; | 
 | 	} | 
 | 	kfree(buf); | 
 | 	return n; | 
 | } | 
 |  | 
 | static long rwrite(int fd, void *va, long n, int64_t * offp) | 
 | { | 
 | 	ERRSTACK(3); | 
 | 	struct chan *c; | 
 | 	struct dir *dir; | 
 | 	int64_t off; | 
 | 	long m; | 
 |  | 
 | 	if (waserror()) { | 
 | 		poperror(); | 
 | 		return -1; | 
 | 	} | 
 | 	c = fdtochan(¤t->open_files, fd, O_WRITE, 1, 1); | 
 | 	if (waserror()) { | 
 | 		cclose(c); | 
 | 		nexterror(); | 
 | 	} | 
 | 	if (c->qid.type & QTDIR) | 
 | 		error(EISDIR, ERROR_FIXME); | 
 |  | 
 | 	if (n < 0) | 
 | 		error(EINVAL, ERROR_FIXME); | 
 |  | 
 | 	if (offp == NULL) { | 
 | 		/* append changes the offset to the end, and even if we fail | 
 | 		 * later, this change will persist */ | 
 | 		if (c->flag & O_APPEND) { | 
 | 			dir = chandirstat(c); | 
 | 			if (!dir) | 
 | 				error(EFAIL, "stat error in append write"); | 
 | 			/* legacy lock for int64 assignment */ | 
 | 			spin_lock(&c->lock); | 
 | 			c->offset = dir->length; | 
 | 			spin_unlock(&c->lock); | 
 | 			kfree(dir); | 
 | 		} | 
 | 		spin_lock(&c->lock); | 
 | 		off = c->offset; | 
 | 		c->offset += n; | 
 | 		spin_unlock(&c->lock); | 
 | 	} else | 
 | 		off = *offp; | 
 |  | 
 | 	if (waserror()) { | 
 | 		if (offp == NULL) { | 
 | 			spin_lock(&c->lock); | 
 | 			c->offset -= n; | 
 | 			spin_unlock(&c->lock); | 
 | 		} | 
 | 		nexterror(); | 
 | 	} | 
 | 	if (off < 0) | 
 | 		error(EINVAL, ERROR_FIXME); | 
 | 	if ((off64_t)off + (size_t)n < (off64_t)off) | 
 | 		error(EINVAL, "bad offset %p + count %p", off, n); | 
 | 	m = devtab[c->type].write(c, va, n, off); | 
 | 	poperror(); | 
 |  | 
 | 	if (offp == NULL && m < n) { | 
 | 		spin_lock(&c->lock); | 
 | 		c->offset -= n - m; | 
 | 		spin_unlock(&c->lock); | 
 | 	} | 
 |  | 
 | 	poperror(); | 
 | 	cclose(c); | 
 |  | 
 | 	poperror(); | 
 | 	return m; | 
 | } | 
 |  | 
 | long syswrite(int fd, void *va, long n) | 
 | { | 
 | 	return rwrite(fd, va, n, NULL); | 
 | } | 
 |  | 
 | long syspwrite(int fd, void *va, long n, int64_t off) | 
 | { | 
 | 	return rwrite(fd, va, n, &off); | 
 | } | 
 |  | 
 | int syswstat(char *path, uint8_t * buf, int n) | 
 | { | 
 | 	ERRSTACK(2); | 
 | 	struct chan *c; | 
 |  | 
 | 	if (waserror()) { | 
 | 		poperror(); | 
 | 		return -1; | 
 | 	} | 
 |  | 
 | 	validstat(buf, n, 0); | 
 | 	c = namec(path, Aaccess, 0, 0, NULL); | 
 | 	if (waserror()) { | 
 | 		cclose(c); | 
 | 		nexterror(); | 
 | 	} | 
 | 	n = devtab[c->type].wstat(c, buf, n); | 
 | 	poperror(); | 
 | 	cclose(c); | 
 |  | 
 | 	poperror(); | 
 | 	return n; | 
 | } | 
 |  | 
 | struct dir *chandirstat(struct chan *c) | 
 | { | 
 | 	ERRSTACK(1); | 
 | 	struct dir *d; | 
 | 	uint8_t *buf; | 
 | 	int n, nd, i; | 
 |  | 
 | 	nd = DIRSIZE; | 
 | 	for (i = 0; i < 2; i++) {	/* should work by the second try */ | 
 | 		d = kzmalloc(sizeof(struct dir) + nd, MEM_WAIT); | 
 | 		buf = (uint8_t *) & d[1]; | 
 | 		if (waserror()) { | 
 | 			kfree(d); | 
 | 			poperror(); | 
 | 			return NULL; | 
 | 		} | 
 | 		n = devtab[c->type].stat(c, buf, nd); | 
 | 		poperror(); | 
 | 		if (n < BIT16SZ) { | 
 | 			kfree(d); | 
 | 			return NULL; | 
 | 		} | 
 | 		/* size needed to store whole stat buffer including count */ | 
 | 		nd = GBIT16((uint8_t *) buf) + BIT16SZ; | 
 | 		if (nd <= n) { | 
 | 			convM2D(buf, n, d, (char *)&d[1]); | 
 | 			return d; | 
 | 		} | 
 | 		/* else sizeof(Dir)+nd is plenty */ | 
 | 		kfree(d); | 
 | 	} | 
 | 	return NULL; | 
 |  | 
 | } | 
 |  | 
 | static struct dir *__dir_stat(char *name, int flags) | 
 | { | 
 | 	ERRSTACK(2); | 
 | 	struct chan *c; | 
 | 	struct dir *d; | 
 |  | 
 | 	if (waserror()) { | 
 | 		poperror(); | 
 | 		return NULL; | 
 | 	} | 
 |  | 
 | 	c = namec(name, Aaccess, flags, 0, NULL); | 
 | 	if (waserror()) { | 
 | 		cclose(c); | 
 | 		nexterror(); | 
 | 	} | 
 | 	d = chandirstat(c); | 
 | 	poperror(); | 
 | 	cclose(c); | 
 |  | 
 | 	poperror(); | 
 | 	return d; | 
 | } | 
 |  | 
 | struct dir *sysdirstat(char *name) | 
 | { | 
 | 	return __dir_stat(name, 0); | 
 | } | 
 |  | 
 | struct dir *sysdirlstat(char *name) | 
 | { | 
 | 	return __dir_stat(name, O_NOFOLLOW); | 
 | } | 
 |  | 
 | struct dir *sysdirfstat(int fd) | 
 | { | 
 | 	ERRSTACK(2); | 
 | 	struct chan *c; | 
 | 	struct dir *d; | 
 |  | 
 | 	if (waserror()) { | 
 | 		poperror(); | 
 | 		return NULL; | 
 | 	} | 
 |  | 
 | 	c = fdtochan(¤t->open_files, fd, -1, 0, 1); | 
 | 	if (waserror()) { | 
 | 		cclose(c); | 
 | 		nexterror(); | 
 | 	} | 
 | 	d = chandirstat(c); | 
 | 	poperror(); | 
 | 	cclose(c); | 
 |  | 
 | 	poperror(); | 
 | 	return d; | 
 | } | 
 |  | 
 | int sysdirwstat(char *name, struct dir *dir) | 
 | { | 
 |  | 
 | 	uint8_t *buf; | 
 | 	int r; | 
 |  | 
 | 	r = sizeD2M(dir); | 
 | 	buf = kzmalloc(r, MEM_WAIT); | 
 | 	convD2M(dir, buf, r); | 
 | 	r = syswstat(name, buf, r); | 
 | 	kfree(buf); | 
 | 	return r < 0 ? r : 0; | 
 | } | 
 |  | 
 | int sysdirfwstat(int fd, struct dir *dir) | 
 | { | 
 |  | 
 | 	uint8_t *buf; | 
 | 	int r; | 
 |  | 
 | 	r = sizeD2M(dir); | 
 | 	buf = kzmalloc(r, MEM_WAIT); | 
 | 	convD2M(dir, buf, r); | 
 | 	r = sysfwstat(fd, buf, r); | 
 | 	kfree(buf); | 
 | 	return r < 0 ? r : 0; | 
 | } | 
 |  | 
 | static long dirpackage(uint8_t * buf, long ts, struct kdirent **d) | 
 | { | 
 |  | 
 | 	char *s; | 
 | 	long ss, i, n, nn, m = 0; | 
 |  | 
 | 	*d = NULL; | 
 | 	if (ts <= 0) { | 
 | 		return ts; | 
 | 	} | 
 |  | 
 | 	/* | 
 | 	 * first find number of all stats, check they look like stats, & size | 
 | 	 * all associated strings | 
 | 	 */ | 
 | 	ss = 0; | 
 | 	n = 0; | 
 | 	for (i = 0; i < ts; i += m) { | 
 | 		m = BIT16SZ + GBIT16(&buf[i]); | 
 | 		statcheck(&buf[i], m); | 
 | 		ss += m; | 
 | 		n++; | 
 | 	} | 
 |  | 
 | 	*d = kzmalloc(n * sizeof(**d) + ss, 0); | 
 | 	if (*d == NULL) | 
 | 		error(ENOMEM, ERROR_FIXME); | 
 |  | 
 | 	/* | 
 | 	 * then convert all buffers | 
 | 	 */ | 
 | 	s = (char *)*d + n * sizeof(**d); | 
 | 	nn = 0; | 
 | 	for (i = 0; i < ts; i += m) { | 
 | 		m = BIT16SZ + GBIT16((uint8_t *) & buf[i]); | 
 | 		/* Note 's' is ignored by convM2kdirent */ | 
 | 		if (nn >= n || /*convM2D */ convM2kdirent(&buf[i], m, *d + nn, | 
 | 							  s) != m) { | 
 | 			kfree(*d); | 
 | 			*d = NULL; | 
 | 			error(EFAIL, "bad directory entry"); | 
 | 		} | 
 | 		nn++; | 
 | 		s += m; | 
 | 	} | 
 |  | 
 | 	return nn; | 
 | } | 
 |  | 
 | long sysdirread(int fd, struct kdirent **d) | 
 | { | 
 | 	ERRSTACK(2); | 
 | 	uint8_t *buf; | 
 | 	long ts; | 
 |  | 
 | 	*d = NULL; | 
 | 	if (waserror()) { | 
 | 		poperror(); | 
 | 		return -1; | 
 | 	} | 
 | 	buf = kzmalloc(DIRREADLIM, 0); | 
 | 	if (buf == NULL) | 
 | 		error(ENOMEM, ERROR_FIXME); | 
 | 	if (waserror()) { | 
 | 		kfree(buf); | 
 | 		nexterror(); | 
 | 	} | 
 | 	ts = sysread(fd, buf, DIRREADLIM); | 
 | 	if (ts >= 0) | 
 | 		ts = dirpackage(buf, ts, d); | 
 | 	poperror(); | 
 | 	kfree(buf); | 
 | 	poperror(); | 
 | 	return ts; | 
 | } | 
 |  | 
 | int sysiounit(int fd) | 
 | { | 
 | 	ERRSTACK(1); | 
 | 	struct chan *c; | 
 | 	int n; | 
 |  | 
 | 	c = fdtochan(¤t->open_files, fd, -1, 0, 1); | 
 | 	if (waserror()) { | 
 | 		cclose(c); | 
 | 		poperror(); | 
 | 		return 0;	/* n.b. */ | 
 | 	} | 
 | 	n = c->iounit; | 
 | 	poperror(); | 
 | 	cclose(c); | 
 | 	return n; | 
 | } | 
 |  | 
 | void print_chaninfo(struct chan *c) | 
 | { | 
 |  | 
 | 	char buf[128] = { 0 }; | 
 | 	bool has_dev = c->type != -1; | 
 | 	bool has_chaninfo = has_dev && devtab[c->type].chaninfo; | 
 |  | 
 | 	print_lock(); | 
 | 	printk("Chan flags: %p, pathname: %s, ref: %d, Dev: %s, Devinfo: %s", | 
 | 		   c->flag, | 
 | 		   c->name ? c->name->s : "no cname", | 
 | 		   kref_refcnt(&c->ref), | 
 | 		   has_dev ? devtab[c->type].name : "no dev", | 
 | 		   has_chaninfo ? devtab[c->type].chaninfo(c, buf, sizeof(buf)) | 
 | 		                : ""); | 
 | 	if (!has_chaninfo) | 
 | 		printk("qid.path: %p\n", c->qid.path); | 
 | 	printk("\n"); | 
 | 	print_unlock(); | 
 | } | 
 |  | 
 | /* TODO: 9ns ns inheritance flags: Shared, copied, or empty.  The old fgrp is | 
 |  * managed by the fd_table, which is handled outside this function.  We share | 
 |  * the pgrp. */ | 
 | int plan9setup(struct proc *new_proc, struct proc *parent, int flags) | 
 | { | 
 |  | 
 | 	struct chan *new_dot; | 
 |  | 
 | 	ERRSTACK(1); | 
 | 	if (waserror()) { | 
 | 		printk("plan9setup failed, %s\n", current_errstr()); | 
 | 		poperror(); | 
 | 		return -1; | 
 | 	} | 
 | 	if (!parent) { | 
 | 		/* We are probably spawned by the kernel directly, and have no | 
 | 		 * parent to inherit from. */ | 
 | 		new_proc->pgrp = newpgrp(); | 
 | 		new_proc->slash = namec("#kfs", Atodir, 0, 0, NULL); | 
 | 		if (!new_proc->slash) | 
 | 			panic("no kfs device"); | 
 | 		/* Want the name to be "/" instead of "#kfs" */ | 
 | 		cnameclose(new_proc->slash->name); | 
 | 		new_proc->slash->name = newcname("/"); | 
 | 		new_proc->dot = cclone(new_proc->slash); | 
 | 		poperror(); | 
 | 		return 0; | 
 | 	} | 
 | 	/* Shared semantics */ | 
 | 	kref_get(&parent->pgrp->ref, 1); | 
 | 	new_proc->pgrp = parent->pgrp; | 
 | 	/* copy semantics on / and . (doesn't make a lot of sense in akaros | 
 | 	 * o/w). */ | 
 | 	/* / should never disappear while we hold a ref to parent */ | 
 | 	chan_incref(parent->slash); | 
 | 	new_proc->slash = parent->slash; | 
 |  | 
 | 	rcu_read_lock(); | 
 | 	new_dot = rcu_dereference(parent->dot); | 
 | 	kref_get(&new_dot->ref, 1); | 
 | 	rcu_read_unlock(); | 
 | 	new_proc->dot = new_dot; | 
 |  | 
 | 	poperror(); | 
 | 	return 0; | 
 | } | 
 |  | 
 | /* Open flags, create modes, access types, file flags, and all that... | 
 |  * | 
 |  * there are a bunch of things here: | 
 |  * 		1) file creation flags (e.g. O_TRUNC) | 
 |  * 		2) file status flags (e.g. O_APPEND) | 
 |  * 		3) file open modes (e.g. O_RDWR) | 
 |  * 		4) file descriptor flags (e.g. CLOEXEC) | 
 |  * 		5) file creation mode (e.g. S_IRWXU) | 
 |  * the 1-4 are passed in via open's vfs_flags, and the 5 via mode only when | 
 |  * O_CREATE is set. | 
 |  * | 
 |  * file creation flags (1) only matter when creating, but aren't permanent. | 
 |  * O_EXCL, O_DIRECTORY, O_TRUNC, etc. | 
 |  * | 
 |  * file status flags (2) are per struct file/chan.  stuff like O_APPEND, | 
 |  * O_ASYNC, etc.  we convert those to an internal flag bit and store in c->flags | 
 |  * | 
 |  * the open mode (3) matters for a given FD/chan (chan->mode), and should be | 
 |  * stored in the chan. (c->mode) stuff like O_RDONLY. | 
 |  * | 
 |  * the file descriptor flags (4) clearly are in the FD.  note that the same | 
 |  * file/chan can be opened by two different FDs, with different flags.  the only | 
 |  * one anyone uses is CLOEXEC.  while exec may not last long in akaros, i can | 
 |  * imagine similar "never pass to children" flags/meanings. | 
 |  * | 
 |  * the file creation mode (5) matters for the device's permissions; given this, | 
 |  * it should be stored in the device/inode.  ACLs fall under this category. | 
 |  * | 
 |  * finally, only certain categories can be edited afterwards: file status flags | 
 |  * (2), FD flags (4), and file permissions (5).	*/ | 
 | int fd_getfl(int fd) | 
 | { | 
 | 	ERRSTACK(1); | 
 | 	struct chan *c; | 
 | 	int ret; | 
 |  | 
 | 	if (waserror()) { | 
 | 		poperror(); | 
 | 		return -1; | 
 | 	} | 
 | 	c = fdtochan(¤t->open_files, fd, -1, 0, 1); | 
 |  | 
 | 	ret = c->mode; | 
 | 	ret |= c->flag & CEXTERNAL_FLAGS; | 
 |  | 
 | 	cclose(c); | 
 | 	poperror(); | 
 | 	return ret; | 
 | } | 
 |  | 
 | static bool cexternal_flags_differ(int set1, int set2, int flags) | 
 | { | 
 | 	flags &= CEXTERNAL_FLAGS; | 
 | 	return (set1 & flags) ^ (set2 & flags); | 
 | } | 
 |  | 
 | static int chan_setfl(struct chan *c, int flags) | 
 | { | 
 | 	int ret; | 
 |  | 
 | 	if (cexternal_flags_differ(flags, c->flag, O_CLOEXEC)) { | 
 | 		/* TODO: The whole CCEXEC / O_CLOEXEC on 9ns needs work */ | 
 | 		error(EINVAL, "can't toggle O_CLOEXEC with setfl"); | 
 | 	} | 
 | 	if (cexternal_flags_differ(flags, c->flag, O_REMCLO)) | 
 | 		error(EINVAL, "can't toggle O_REMCLO with setfl"); | 
 | 	if (cexternal_flags_differ(flags, c->flag, O_PATH)) | 
 | 		error(EINVAL, "can't toggle O_PATH with setfl"); | 
 | 	ret = devtab[c->type].chan_ctl(c, CCTL_SET_FL, flags & CEXTERNAL_FLAGS, | 
 | 	                               0, 0, 0); | 
 | 	c->flag = (c->flag & ~CEXTERNAL_FLAGS) | (flags & CEXTERNAL_FLAGS); | 
 | 	return ret; | 
 | } | 
 |  | 
 | int fd_chan_ctl(int fd, int cmd, unsigned long arg1, unsigned long arg2, | 
 |                 unsigned long arg3, unsigned long arg4) | 
 | { | 
 | 	ERRSTACK(2); | 
 | 	struct chan *c; | 
 | 	int ret; | 
 |  | 
 | 	if (waserror()) { | 
 | 		poperror(); | 
 | 		return -1; | 
 | 	} | 
 | 	c = fdtochan(¤t->open_files, fd, -1, 0, 1); | 
 | 	if (waserror()) { | 
 | 		cclose(c); | 
 | 		nexterror(); | 
 | 	} | 
 |  | 
 | 	if (!devtab[c->type].chan_ctl) | 
 | 		error(EINVAL, "%s has no chan_ctl, can't %d", chan_dev_name(c), | 
 | 		      cmd); | 
 |  | 
 | 	/* Some commands require 9ns support in addition to the device ctl. */ | 
 | 	switch (cmd) { | 
 | 	case CCTL_SET_FL: | 
 | 		ret = chan_setfl(c, arg1); | 
 | 		break; | 
 | 	default: | 
 | 		ret = devtab[c->type].chan_ctl(c, cmd, arg1, arg2, arg3, arg4); | 
 | 		break; | 
 | 	} | 
 |  | 
 | 	poperror(); | 
 | 	cclose(c); | 
 | 	poperror(); | 
 | 	return ret; | 
 | } | 
 |  | 
 | ssize_t kread_file(struct file_or_chan *file, void *buf, size_t sz) | 
 | { | 
 | 	/* TODO: (KFOP) (VFS kernel read/writes need to be from a ktask) */ | 
 | 	uintptr_t old_ret = switch_to_ktask(); | 
 | 	off64_t dummy = 0; | 
 | 	ssize_t cpy_amt = foc_read(file, buf, sz, dummy); | 
 |  | 
 | 	switch_back_from_ktask(old_ret); | 
 | 	return cpy_amt; | 
 | } | 
 |  | 
 | /* Reads the contents of an entire file into a buffer, returning that buffer. | 
 |  * On error, prints something useful and returns 0 */ | 
 | void *kread_whole_file(struct file_or_chan *file) | 
 | { | 
 | 	size_t size; | 
 | 	void *contents; | 
 | 	ssize_t cpy_amt; | 
 |  | 
 | 	size = foc_get_len(file); | 
 | 	contents = kmalloc(size, MEM_WAIT); | 
 | 	cpy_amt = kread_file(file, contents, size); | 
 | 	if (cpy_amt < 0) { | 
 | 		printk("Error %d reading file %s\n", get_errno(), | 
 | 		       foc_to_name(file)); | 
 | 		kfree(contents); | 
 | 		return 0; | 
 | 	} | 
 | 	if (cpy_amt != size) { | 
 | 		printk("Read %d, needed %d for file %s\n", cpy_amt, size, | 
 | 		       foc_to_name(file)); | 
 | 		kfree(contents); | 
 | 		return 0; | 
 | 	} | 
 | 	return contents; | 
 | } | 
 |  | 
 | /* Process-related File management functions */ | 
 |  | 
 | /* Given any FD, get the appropriate object, 0 o/w. Set incref if you want a | 
 |  * reference count (which is a 9ns thing, you can't use the pointer if you | 
 |  * didn't incref). */ | 
 | void *lookup_fd(struct fd_table *fdt, int fd, bool incref) | 
 | { | 
 | 	void *retval = 0; | 
 |  | 
 | 	if (fd < 0) | 
 | 		return 0; | 
 | 	spin_lock(&fdt->lock); | 
 | 	if (fdt->closed) { | 
 | 		spin_unlock(&fdt->lock); | 
 | 		return 0; | 
 | 	} | 
 | 	if (fd < fdt->max_fdset) { | 
 | 		if (GET_BITMASK_BIT(fdt->open_fds->fds_bits, fd)) { | 
 | 			/* while max_files and max_fdset might not line up, we | 
 | 			 * should never have a valid fdset higher than files */ | 
 | 			assert(fd < fdt->max_files); | 
 | 			retval = fdt->fd[fd].fd_chan; | 
 | 			if (incref) | 
 | 				chan_incref((struct chan*)retval); | 
 | 		} | 
 | 	} | 
 | 	spin_unlock(&fdt->lock); | 
 | 	return retval; | 
 | } | 
 |  | 
 | /* Grow the vfs fd set */ | 
 | static int grow_fd_set(struct fd_table *open_files) | 
 | { | 
 | 	int n; | 
 | 	struct file_desc *nfd, *ofd; | 
 |  | 
 | 	/* Only update open_fds once. If currently pointing to open_fds_init, | 
 | 	 * then update it to point to a newly allocated fd_set with space for | 
 | 	 * NR_FILE_DESC_MAX */ | 
 | 	if (open_files->open_fds == (struct fd_set*)&open_files->open_fds_init) | 
 | 	{ | 
 | 		open_files->open_fds = kzmalloc(sizeof(struct fd_set), 0); | 
 | 		memmove(open_files->open_fds, &open_files->open_fds_init, | 
 | 		        sizeof(struct small_fd_set)); | 
 | 	} | 
 |  | 
 | 	/* Grow the open_files->fd array in increments of NR_OPEN_FILES_DEFAULT | 
 | 	 */ | 
 | 	n = open_files->max_files + NR_OPEN_FILES_DEFAULT; | 
 | 	if (n > NR_FILE_DESC_MAX) | 
 | 		return -EMFILE; | 
 | 	nfd = kzmalloc(n * sizeof(struct file_desc), 0); | 
 | 	if (nfd == NULL) | 
 | 		return -ENOMEM; | 
 |  | 
 | 	/* Move the old array on top of the new one */ | 
 | 	ofd = open_files->fd; | 
 | 	memmove(nfd, ofd, open_files->max_files * sizeof(struct file_desc)); | 
 |  | 
 | 	/* Update the array and the maxes for both max_files and max_fdset */ | 
 | 	open_files->fd = nfd; | 
 | 	open_files->max_files = n; | 
 | 	open_files->max_fdset = n; | 
 |  | 
 | 	/* Only free the old one if it wasn't pointing to open_files->fd_array*/ | 
 | 	if (ofd != open_files->fd_array) | 
 | 		kfree(ofd); | 
 | 	return 0; | 
 | } | 
 |  | 
 | /* Free the vfs fd set if necessary */ | 
 | static void free_fd_set(struct fd_table *open_files) | 
 | { | 
 | 	void *free_me; | 
 |  | 
 | 	if (open_files->open_fds != (struct fd_set*)&open_files->open_fds_init) | 
 | 	{ | 
 | 		assert(open_files->fd != open_files->fd_array); | 
 | 		/* need to reset the pointers to the internal addrs, in case we | 
 | 		 * take a look while debugging.  0 them out, since they have old | 
 | 		 * data.  our current versions should all be closed. */ | 
 | 		memset(&open_files->open_fds_init, 0, | 
 | 		       sizeof(struct small_fd_set)); | 
 | 		memset(&open_files->fd_array, 0, sizeof(open_files->fd_array)); | 
 |  | 
 | 		free_me = open_files->open_fds; | 
 | 		open_files->open_fds = | 
 | 			(struct fd_set*)&open_files->open_fds_init; | 
 | 		kfree(free_me); | 
 |  | 
 | 		free_me = open_files->fd; | 
 | 		open_files->fd = open_files->fd_array; | 
 | 		kfree(free_me); | 
 | 	} | 
 | } | 
 |  | 
 | /* If FD is in the group, remove it, decref it, and return TRUE. */ | 
 | bool close_fd(struct fd_table *fdt, int fd) | 
 | { | 
 | 	struct chan *chan = 0; | 
 | 	struct fd_tap *tap = 0; | 
 | 	bool ret = FALSE; | 
 |  | 
 | 	if (fd < 0) | 
 | 		return FALSE; | 
 | 	spin_lock(&fdt->lock); | 
 | 	if (fd < fdt->max_fdset) { | 
 | 		if (GET_BITMASK_BIT(fdt->open_fds->fds_bits, fd)) { | 
 | 			/* while max_files and max_fdset might not line up, we | 
 | 			 * should never have a valid fdset higher than files */ | 
 | 			assert(fd < fdt->max_files); | 
 | 			chan = fdt->fd[fd].fd_chan; | 
 | 			tap = fdt->fd[fd].fd_tap; | 
 | 			fdt->fd[fd].fd_chan = 0; | 
 | 			fdt->fd[fd].fd_tap = 0; | 
 | 			CLR_BITMASK_BIT(fdt->open_fds->fds_bits, fd); | 
 | 			if (fd < fdt->hint_min_fd) | 
 | 				fdt->hint_min_fd = fd; | 
 | 			ret = TRUE; | 
 | 		} | 
 | 	} | 
 | 	spin_unlock(&fdt->lock); | 
 | 	/* Need to decref/cclose outside of the lock; they could sleep */ | 
 | 	cclose(chan); | 
 | 	if (tap) | 
 | 		kref_put(&tap->kref); | 
 | 	return ret; | 
 | } | 
 |  | 
 | static int __get_fd(struct fd_table *open_files, int low_fd, bool must_use_low) | 
 | { | 
 | 	int slot = -1; | 
 | 	int error; | 
 | 	bool update_hint = TRUE; | 
 |  | 
 | 	if ((low_fd < 0) || (low_fd > NR_FILE_DESC_MAX)) | 
 | 		return -EINVAL; | 
 | 	if (open_files->closed) | 
 | 		return -EINVAL;	/* won't matter, they are dying */ | 
 | 	if (must_use_low | 
 | 	    && GET_BITMASK_BIT(open_files->open_fds->fds_bits, low_fd)) | 
 | 		return -ENFILE; | 
 | 	if (low_fd > open_files->hint_min_fd) | 
 | 		update_hint = FALSE; | 
 | 	else | 
 | 		low_fd = open_files->hint_min_fd; | 
 | 	/* Loop until we have a valid slot (we grow the fd_array at the bottom | 
 | 	 * of the loop if we haven't found a slot in the current array */ | 
 | 	while (slot == -1) { | 
 | 		for (low_fd; low_fd < open_files->max_fdset; low_fd++) { | 
 | 			if (GET_BITMASK_BIT(open_files->open_fds->fds_bits, | 
 | 					    low_fd)) | 
 | 				continue; | 
 | 			slot = low_fd; | 
 | 			SET_BITMASK_BIT(open_files->open_fds->fds_bits, slot); | 
 | 			assert(slot < open_files->max_files && | 
 | 			       open_files->fd[slot].fd_chan == 0); | 
 | 			/* We know slot >= hint, since we started with hint */ | 
 | 			if (update_hint) | 
 | 				open_files->hint_min_fd = slot + 1; | 
 | 			break; | 
 | 		} | 
 | 		if (slot == -1)	{ | 
 | 			if ((error = grow_fd_set(open_files))) | 
 | 				return error; | 
 | 		} | 
 | 	} | 
 | 	return slot; | 
 | } | 
 |  | 
 | /* Insert a file or chan (obj, chosen by vfs) into the fd group with fd_flags. | 
 |  * If must_use_low, then we have to insert at FD = low_fd.  o/w we start looking | 
 |  * for empty slots at low_fd. */ | 
 | int insert_obj_fdt(struct fd_table *fdt, void *obj, int low_fd, int fd_flags, | 
 |                    bool must_use_low) | 
 | { | 
 | 	int slot; | 
 |  | 
 | 	spin_lock(&fdt->lock); | 
 | 	slot = __get_fd(fdt, low_fd, must_use_low); | 
 | 	if (slot < 0) { | 
 | 		spin_unlock(&fdt->lock); | 
 | 		return slot; | 
 | 	} | 
 | 	assert(slot < fdt->max_files && | 
 | 	       fdt->fd[slot].fd_chan == 0); | 
 | 	chan_incref((struct chan*)obj); | 
 | 	fdt->fd[slot].fd_chan = obj; | 
 | 	fdt->fd[slot].fd_flags = fd_flags; | 
 | 	spin_unlock(&fdt->lock); | 
 | 	return slot; | 
 | } | 
 |  | 
 | /* Closes all open files.  Mostly just a "put" for all files.  If cloexec, it | 
 |  * will only close the FDs with FD_CLOEXEC (opened with O_CLOEXEC or fcntld). | 
 |  * | 
 |  * Notes on concurrency: | 
 |  * - Can't hold spinlocks while we call cclose, since it might sleep eventually. | 
 |  * - We're called from proc_destroy, so we could have concurrent openers trying | 
 |  *   to add to the group (other syscalls), hence the "closed" flag. | 
 |  * - dot and slash chans are dealt with in proc_free.  its difficult to close | 
 |  *   and zero those with concurrent syscalls, since those are a source of krefs. | 
 |  * - Once we lock and set closed, no further additions can happen.  To simplify | 
 |  *   our closes, we also allow multiple calls to this func (though that should | 
 |  *   never happen with the current code). */ | 
 | void close_fdt(struct fd_table *fdt, bool cloexec) | 
 | { | 
 | 	struct chan *chan; | 
 | 	struct file_desc *to_close; | 
 | 	int idx = 0; | 
 |  | 
 | 	to_close = kzmalloc(sizeof(struct file_desc) * fdt->max_files, | 
 | 	                    MEM_WAIT); | 
 | 	spin_lock(&fdt->lock); | 
 | 	if (fdt->closed) { | 
 | 		spin_unlock(&fdt->lock); | 
 | 		kfree(to_close); | 
 | 		return; | 
 | 	} | 
 | 	for (int i = 0; i < fdt->max_fdset; i++) { | 
 | 		if (GET_BITMASK_BIT(fdt->open_fds->fds_bits, i)) { | 
 | 			/* while max_files and max_fdset might not line up, we | 
 | 			 * should never have a valid fdset higher than files */ | 
 | 			assert(i < fdt->max_files); | 
 | 			if (cloexec && !(fdt->fd[i].fd_flags & FD_CLOEXEC)) | 
 | 				continue; | 
 | 			chan = fdt->fd[i].fd_chan; | 
 | 			to_close[idx].fd_tap = fdt->fd[i].fd_tap; | 
 | 			fdt->fd[i].fd_tap = 0; | 
 | 			fdt->fd[i].fd_chan = 0; | 
 | 			to_close[idx++].fd_chan = chan; | 
 | 			CLR_BITMASK_BIT(fdt->open_fds->fds_bits, i); | 
 | 		} | 
 | 	} | 
 | 	/* it's just a hint, we can build back up from being 0 */ | 
 | 	fdt->hint_min_fd = 0; | 
 | 	if (!cloexec) { | 
 | 		free_fd_set(fdt); | 
 | 		fdt->closed = TRUE; | 
 | 	} | 
 | 	spin_unlock(&fdt->lock); | 
 | 	/* We go through some hoops to close/decref outside the lock.  Nice for | 
 | 	 * not holding the lock for a while; critical in case the decref/cclose | 
 | 	 * sleeps (it can) */ | 
 | 	for (int i = 0; i < idx; i++) { | 
 | 		cclose(to_close[i].fd_chan); | 
 | 		if (to_close[i].fd_tap) | 
 | 			kref_put(&to_close[i].fd_tap->kref); | 
 | 	} | 
 | 	kfree(to_close); | 
 | } | 
 |  | 
 | /* Inserts all of the files from src into dst, used by sys_fork(). */ | 
 | void clone_fdt(struct fd_table *src, struct fd_table *dst) | 
 | { | 
 | 	struct chan *chan; | 
 | 	int ret; | 
 |  | 
 | 	spin_lock(&src->lock); | 
 | 	if (src->closed) { | 
 | 		spin_unlock(&src->lock); | 
 | 		return; | 
 | 	} | 
 | 	spin_lock(&dst->lock); | 
 | 	if (dst->closed) { | 
 | 		warn("Destination closed before it opened"); | 
 | 		spin_unlock(&dst->lock); | 
 | 		spin_unlock(&src->lock); | 
 | 		return; | 
 | 	} | 
 | 	while (src->max_files > dst->max_files) { | 
 | 		ret = grow_fd_set(dst); | 
 | 		if (ret < 0) { | 
 | 			set_error(-ret, "Failed to grow for a clone_fdt"); | 
 | 			spin_unlock(&dst->lock); | 
 | 			spin_unlock(&src->lock); | 
 | 			return; | 
 | 		} | 
 | 	} | 
 | 	for (int i = 0; i < src->max_fdset; i++) { | 
 | 		if (GET_BITMASK_BIT(src->open_fds->fds_bits, i)) { | 
 | 			/* while max_files and max_fdset might not line up, we | 
 | 			 * should never have a valid fdset higher than files */ | 
 | 			assert(i < src->max_files); | 
 | 			chan = src->fd[i].fd_chan; | 
 | 			assert(i < dst->max_files && dst->fd[i].fd_chan == 0); | 
 | 			SET_BITMASK_BIT(dst->open_fds->fds_bits, i); | 
 | 			dst->fd[i].fd_chan = chan; | 
 | 			chan_incref(chan); | 
 | 		} | 
 | 	} | 
 | 	dst->hint_min_fd = src->hint_min_fd; | 
 | 	spin_unlock(&dst->lock); | 
 | 	spin_unlock(&src->lock); | 
 | } | 
 |  | 
 | int fd_get_fd_flags(struct fd_table *fdt, int fd) | 
 | { | 
 | 	int ret = -1; | 
 |  | 
 | 	if (fd < 0) | 
 | 		return -1; | 
 | 	spin_lock(&fdt->lock); | 
 | 	if (fdt->closed) { | 
 | 		spin_unlock(&fdt->lock); | 
 | 		return -1; | 
 | 	} | 
 | 	if ((fd < fdt->max_fdset) | 
 | 	    && GET_BITMASK_BIT(fdt->open_fds->fds_bits, fd)) | 
 | 		ret = fdt->fd[fd].fd_flags; | 
 | 	spin_unlock(&fdt->lock); | 
 | 	if (ret == -1) | 
 | 		set_error(EBADF, "FD was not open"); | 
 | 	return ret; | 
 | } | 
 |  | 
 | int fd_set_fd_flags(struct fd_table *fdt, int fd, int new_fl) | 
 | { | 
 | 	int ret = -1; | 
 |  | 
 | 	if (fd < 0) | 
 | 		return -1; | 
 | 	spin_lock(&fdt->lock); | 
 | 	if (fdt->closed) { | 
 | 		spin_unlock(&fdt->lock); | 
 | 		return -1; | 
 | 	} | 
 | 	if ((fd < fdt->max_fdset) | 
 | 	    && GET_BITMASK_BIT(fdt->open_fds->fds_bits, fd)) | 
 | 		fdt->fd[fd].fd_flags = new_fl; | 
 | 	spin_unlock(&fdt->lock); | 
 | 	if (ret == -1) | 
 | 		set_error(EBADF, "FD was not open"); | 
 | 	return ret; | 
 | } |