|  | /* Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved. | 
|  | * Portions Copyright © 1997-1999 Vita Nuova Limited | 
|  | * Portions Copyright © 2000-2007 Vita Nuova Holdings Limited | 
|  | *                                (www.vitanuova.com) | 
|  | * Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others | 
|  | * | 
|  | * Modified for the Akaros operating system: | 
|  | * Copyright (c) 2013-2014 The Regents of the University of California | 
|  | * Copyright (c) 2013-2015 Google Inc. | 
|  | * | 
|  | * Permission is hereby granted, free of charge, to any person obtaining a copy | 
|  | * of this software and associated documentation files (the "Software"), to deal | 
|  | * in the Software without restriction, including without limitation the rights | 
|  | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | 
|  | * copies of the Software, and to permit persons to whom the Software is | 
|  | * furnished to do so, subject to the following conditions: | 
|  | * | 
|  | * The above copyright notice and this permission notice shall be included in | 
|  | * all copies or substantial portions of the Software. | 
|  | * | 
|  | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | 
|  | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | 
|  | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE | 
|  | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | 
|  | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | 
|  | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | 
|  | * SOFTWARE. */ | 
|  |  | 
|  | #include <slab.h> | 
|  | #include <kmalloc.h> | 
|  | #include <kref.h> | 
|  | #include <string.h> | 
|  | #include <stdio.h> | 
|  | #include <assert.h> | 
|  | #include <error.h> | 
|  | #include <cpio.h> | 
|  | #include <pmap.h> | 
|  | #include <smp.h> | 
|  | #include <net/ip.h> | 
|  | #include <rcu.h> | 
|  |  | 
|  | /* TODO: these sizes are hokey.  DIRSIZE is used in chandirstat, and it looks | 
|  | * like it's the size of a common-case stat. */ | 
|  | enum { | 
|  | DIRSIZE = STAT_FIX_LEN_AK + 32 * STAT_NR_STRINGS_AK, | 
|  |  | 
|  | /* should handle the largest reasonable directory entry */ | 
|  | DIRREADLIM = 2048, | 
|  |  | 
|  | /* Just read a lot. Memory is cheap, lots of bandwidth, and RPCs are | 
|  | * very expensive. At the same time, let's not yet exceed a common | 
|  | * MSIZE. */ | 
|  | DIRREADSIZE = 8192, | 
|  | }; | 
|  |  | 
|  | int newfd(struct chan *c, int low_fd, int oflags, bool must_use_low) | 
|  | { | 
|  | int ret = insert_obj_fdt(¤t->open_files, c, low_fd, | 
|  | oflags & O_CLOEXEC ? FD_CLOEXEC : 0, | 
|  | must_use_low); | 
|  | if (ret >= 0) | 
|  | cclose(c); | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | struct chan *fdtochan(struct fd_table *fdt, int fd, int mode, int chkmnt, | 
|  | int iref) | 
|  | { | 
|  | struct chan *c; | 
|  |  | 
|  | c = lookup_fd(fdt, fd, iref); | 
|  | if (!c) { | 
|  | /* We lost the info about why there was a problem (we used to | 
|  | * track file group closed too, can add that in later). */ | 
|  | error(EBADF, ERROR_FIXME); | 
|  | } | 
|  | if (chkmnt && (c->flag & CMSG)) { | 
|  | if (iref) | 
|  | cclose(c); | 
|  | error(EBADF, ERROR_FIXME); | 
|  | } | 
|  | if (mode < 0) | 
|  | return c; | 
|  | if ((mode & c->mode) != mode) { | 
|  | if (iref) | 
|  | cclose(c); | 
|  | error(EBADF, | 
|  | "FD access mode failure: chan mode 0x%x, wanted 0x%x (opened with 0 instead of O_READ?)", | 
|  | c->mode, mode); | 
|  | } | 
|  | return c; | 
|  | } | 
|  |  | 
|  | long kchanio(void *vc, void *buf, int n, int mode) | 
|  | { | 
|  | ERRSTACK(1); | 
|  | int r; | 
|  | struct chan *c; | 
|  |  | 
|  | c = vc; | 
|  | if (waserror()) { | 
|  | poperror(); | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | if (mode == O_READ) | 
|  | r = devtab[c->type].read(c, buf, n, c->offset); | 
|  | else if (mode == O_WRITE) | 
|  | r = devtab[c->type].write(c, buf, n, c->offset); | 
|  | else | 
|  | error(ENOSYS, "kchanio: use only O_READ xor O_WRITE"); | 
|  |  | 
|  | spin_lock(&c->lock); | 
|  | c->offset += r; | 
|  | spin_unlock(&c->lock); | 
|  | poperror(); | 
|  | return r; | 
|  | } | 
|  |  | 
|  | int openmode(uint32_t omode) | 
|  | { | 
|  | /* GIANT WARNING: if this ever throws, ipopen (and probably many others) will | 
|  | * screw up refcnts of Qctl, err, data, etc */ | 
|  | #if 0 | 
|  | /* this is the old plan9 style.  i think they want to turn exec into | 
|  | * read, and strip off anything higher, and just return the RD/WR style | 
|  | * bits.  not stuff like ORCLOSE.  the lack of OEXCL might be a bug on | 
|  | * their part (it's the only one of their non-RW-related flags that | 
|  | * isn't masked out). | 
|  | * | 
|  | * Note that we no longer convert OEXEC/O_EXEC to O_READ, and instead | 
|  | * return just the O_ACCMODE bits. */ | 
|  | if (o >= (OTRUNC | OCEXEC | ORCLOSE | OEXEC)) | 
|  | error(EINVAL, ERROR_FIXME); | 
|  | o &= ~(OTRUNC | OCEXEC | ORCLOSE); | 
|  | if (o > OEXEC) | 
|  | error(EINVAL, ERROR_FIXME); | 
|  | if (o == OEXEC) | 
|  | return OREAD; | 
|  | return o; | 
|  | #endif | 
|  | /* no error checking (we have a shitload of flags anyway), and we return | 
|  | * the basic access modes (RD/WR/ETC) */ | 
|  | return omode & O_ACCMODE; | 
|  | } | 
|  |  | 
|  | void fdclose(struct fd_table *fdt, int fd) | 
|  | { | 
|  | close_fd(fdt, fd); | 
|  | } | 
|  |  | 
|  | static void set_dot(struct proc *p, struct chan *c) | 
|  | { | 
|  | c = atomic_swap_ptr((void**)&p->dot, c); | 
|  | synchronize_rcu(); | 
|  | cclose(c); | 
|  | } | 
|  |  | 
|  | /* Note namec() happens in the namespace of the caller. */ | 
|  | int syschdir(struct proc *target, char *path) | 
|  | { | 
|  | ERRSTACK(1); | 
|  | struct chan *c; | 
|  |  | 
|  | if (waserror()) { | 
|  | poperror(); | 
|  | return -1; | 
|  | } | 
|  | c = namec(path, Atodir, 0, 0, NULL); | 
|  | poperror(); | 
|  | set_dot(target, c); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | /* Note fdtochan() happens with the FDs of the caller. */ | 
|  | int sysfchdir(struct proc *target, int fd) | 
|  | { | 
|  | ERRSTACK(1); | 
|  | struct chan *c; | 
|  |  | 
|  | if (waserror()) { | 
|  | poperror(); | 
|  | return -1; | 
|  | } | 
|  | c = fdtochan(¤t->open_files, fd, -1, 0, 1); | 
|  | poperror(); | 
|  |  | 
|  | /* This is a little hokey.  Ideally, we'd only allow O_PATH fds to be | 
|  | * fchdir'd.  Linux/POSIX lets you do arbitrary FDs.  Luckily, we stored | 
|  | * the name when we walked (__namec_from), so we should be able to | 
|  | * recreate the chan.  Using namec() with channame() is a more | 
|  | * heavy-weight cclone(), but also might have issues if the chan has | 
|  | * since been removed or the namespace is otherwise different from when | 
|  | * the original fd/chan was first created. */ | 
|  | if (c->flag & O_PATH) { | 
|  | set_dot(target, c); | 
|  | return 0; | 
|  | } | 
|  | if (waserror()) { | 
|  | cclose(c); | 
|  | poperror(); | 
|  | return -1; | 
|  | } | 
|  | syschdir(target, channame(c)); | 
|  | cclose(c); | 
|  | poperror(); | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | int sysclose(int fd) | 
|  | { | 
|  | ERRSTACK(1); | 
|  | struct fd_table *fdt = ¤t->open_files; | 
|  |  | 
|  | if (waserror()) { | 
|  | poperror(); | 
|  | return -1; | 
|  | } | 
|  | /* | 
|  | * Take no reference on the chan because we don't really need the | 
|  | * data structure, and are calling fdtochan only for error checks. | 
|  | * fdclose takes care of processes racing through here. | 
|  | */ | 
|  | fdtochan(fdt, fd, -1, 0, 0); | 
|  | fdclose(fdt, fd); | 
|  | poperror(); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | int syscreate(char *path, int mode, uint32_t perm) | 
|  | { | 
|  | ERRSTACK(2); | 
|  | int fd; | 
|  | struct chan *c; | 
|  |  | 
|  | if (waserror()) { | 
|  | poperror(); | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | openmode(mode & ~O_EXCL);	/* error check only; OEXCL okay here */ | 
|  | c = namec(path, Acreate, mode, perm, NULL); | 
|  | if (waserror()) { | 
|  | cclose(c); | 
|  | nexterror(); | 
|  | } | 
|  | /* 9ns mode is the O_FLAGS and perm is glibc mode */ | 
|  | fd = newfd(c, 0, mode, FALSE); | 
|  | if (fd < 0) | 
|  | error(-fd, ERROR_FIXME); | 
|  | poperror(); | 
|  |  | 
|  | poperror(); | 
|  | return fd; | 
|  | } | 
|  |  | 
|  | int sysdup(int old, int low_fd, bool must_use_low) | 
|  | { | 
|  | ERRSTACK(1); | 
|  | int fd; | 
|  | struct chan *c; | 
|  |  | 
|  | if (waserror()) { | 
|  | poperror(); | 
|  | return -1; | 
|  | } | 
|  | c = fdtochan(¤t->open_files, old, -1, 0, 1); | 
|  | if (c->qid.type & QTAUTH) { | 
|  | cclose(c); | 
|  | error(EPERM, ERROR_FIXME); | 
|  | } | 
|  | fd = newfd(c, low_fd, 0, must_use_low); | 
|  | if (fd < 0) { | 
|  | cclose(c); | 
|  | error(-fd, ERROR_FIXME); | 
|  | } | 
|  | poperror(); | 
|  | return fd; | 
|  | } | 
|  |  | 
|  | /* Could pass in the fdt instead of the proc, but we used to need the to_proc | 
|  | * for now so we can claim a VFS FD.  Careful, we don't close the old chan. */ | 
|  | int sys_dup_to(struct proc *from_proc, unsigned int from_fd, | 
|  | struct proc *to_proc, unsigned int to_fd) | 
|  | { | 
|  | ERRSTACK(1); | 
|  | int ret; | 
|  | struct chan *c; | 
|  |  | 
|  | if (waserror()) { | 
|  | poperror(); | 
|  | return -1; | 
|  | } | 
|  | c = fdtochan(&from_proc->open_files, from_fd, -1, 0, 1); | 
|  | if (c->qid.type & QTAUTH) { | 
|  | cclose(c); | 
|  | error(EPERM, ERROR_FIXME); | 
|  | } | 
|  | ret = insert_obj_fdt(&to_proc->open_files, c, to_fd, 0, TRUE); | 
|  | /* drop the ref from fdtochan.  if insert succeeded, there is one other | 
|  | * ref stored in the FDT */ | 
|  | cclose(c); | 
|  | if (ret < 0) | 
|  | error(EFAIL, "Can't insert FD %d into FDG", to_fd); | 
|  | poperror(); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | char *sysfd2path(int fd) | 
|  | { | 
|  | ERRSTACK(1); | 
|  | struct chan *c; | 
|  | char *s; | 
|  |  | 
|  | if (waserror()) { | 
|  | poperror(); | 
|  | return NULL; | 
|  | } | 
|  | c = fdtochan(¤t->open_files, fd, -1, 0, 1); | 
|  | s = NULL; | 
|  | if (c->name != NULL) { | 
|  | s = kzmalloc(c->name->len + 1, 0); | 
|  | if (s == NULL) { | 
|  | cclose(c); | 
|  | error(ENOMEM, ERROR_FIXME); | 
|  | } | 
|  | memmove(s, c->name->s, c->name->len + 1); | 
|  | } | 
|  | cclose(c); | 
|  | poperror(); | 
|  | return s; | 
|  | } | 
|  |  | 
|  | char *sysgetcwd(void) | 
|  | { | 
|  | char *s = NULL; | 
|  | struct chan *dot; | 
|  |  | 
|  | rcu_read_lock(); | 
|  | dot = rcu_dereference(current->dot); | 
|  | kref_get(&dot->ref, 1); | 
|  | rcu_read_unlock(); | 
|  | if (dot->name) | 
|  | kstrdup(&s, dot->name->s); | 
|  | cclose(dot); | 
|  | return s; | 
|  | } | 
|  |  | 
|  | int sysfauth(int fd, char *aname) | 
|  | { | 
|  | ERRSTACK(2); | 
|  | struct chan *c, *ac; | 
|  |  | 
|  | if (waserror()) { | 
|  | poperror(); | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | validname(aname, 0); | 
|  | c = fdtochan(¤t->open_files, fd, O_RDWR, 0, 1); | 
|  | if (waserror()) { | 
|  | cclose(c); | 
|  | nexterror(); | 
|  | } | 
|  |  | 
|  | ac = mntauth(c, aname); | 
|  |  | 
|  | /* at this point ac is responsible for keeping c alive */ | 
|  | poperror();	/* c */ | 
|  | cclose(c); | 
|  |  | 
|  | if (waserror()) { | 
|  | cclose(ac); | 
|  | nexterror(); | 
|  | } | 
|  |  | 
|  | fd = newfd(ac, 0, 0, FALSE); | 
|  | if (fd < 0) | 
|  | error(-fd, ERROR_FIXME); | 
|  | poperror();	/* ac */ | 
|  |  | 
|  | poperror(); | 
|  |  | 
|  | return fd; | 
|  | } | 
|  |  | 
|  | int sysfversion(int fd, unsigned int msize, char *vers, unsigned int arglen) | 
|  | { | 
|  | ERRSTACK(2); | 
|  | int m; | 
|  | struct chan *c; | 
|  |  | 
|  | if (waserror()) { | 
|  | poperror(); | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | /* check there's a NUL in the version string */ | 
|  | if (arglen == 0 || memchr(vers, 0, arglen) == 0) | 
|  | error(EINVAL, ERROR_FIXME); | 
|  |  | 
|  | c = fdtochan(¤t->open_files, fd, O_RDWR, 0, 1); | 
|  | if (waserror()) { | 
|  | cclose(c); | 
|  | nexterror(); | 
|  | } | 
|  |  | 
|  | m = mntversion(c, vers, msize, arglen); | 
|  |  | 
|  | poperror(); | 
|  | cclose(c); | 
|  |  | 
|  | poperror(); | 
|  | return m; | 
|  | } | 
|  |  | 
|  | int sysfwstat(int fd, uint8_t * buf, int n) | 
|  | { | 
|  | ERRSTACK(2); | 
|  | struct chan *c; | 
|  |  | 
|  | if (waserror()) { | 
|  | poperror(); | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | validstat(buf, n, 0); | 
|  | c = fdtochan(¤t->open_files, fd, -1, 1, 1); | 
|  | if (waserror()) { | 
|  | cclose(c); | 
|  | nexterror(); | 
|  | } | 
|  | n = devtab[c->type].wstat(c, buf, n); | 
|  | poperror(); | 
|  | cclose(c); | 
|  |  | 
|  | poperror(); | 
|  | return n; | 
|  | } | 
|  |  | 
|  | long bindmount(struct chan *c, char *old, int flag, char *spec) | 
|  | { | 
|  | ERRSTACK(1); | 
|  | int ret; | 
|  | struct chan *c1; | 
|  |  | 
|  | if (flag > MMASK || (flag & MORDER) == (MBEFORE | MAFTER)) | 
|  | error(EINVAL, ERROR_FIXME); | 
|  |  | 
|  | c1 = namec(old, Amount, 0, 0, NULL); | 
|  | if (waserror()) { | 
|  | cclose(c1); | 
|  | nexterror(); | 
|  | } | 
|  | ret = cmount(c, c1, flag, spec); | 
|  |  | 
|  | poperror(); | 
|  | cclose(c1); | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | int sysbind(char *new, char *old, int flags) | 
|  | { | 
|  | ERRSTACK(2); | 
|  | long r; | 
|  | struct chan *c0; | 
|  |  | 
|  | if (waserror()) { | 
|  | poperror(); | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | c0 = namec(new, Abind, 0, 0, NULL); | 
|  | if (waserror()) { | 
|  | cclose(c0); | 
|  | nexterror(); | 
|  | } | 
|  | r = bindmount(c0, old, flags, ""); | 
|  | poperror(); | 
|  | cclose(c0); | 
|  |  | 
|  | poperror(); | 
|  | return r; | 
|  | } | 
|  |  | 
|  | int syssymlink(char *new_path, char *old_path) | 
|  | { | 
|  | ERRSTACK(1); | 
|  | struct chan *c; | 
|  |  | 
|  | if (waserror()) { | 
|  | poperror(); | 
|  | return -1; | 
|  | } | 
|  | validname(old_path, true); | 
|  | c = namec(new_path, Acreate, O_EXCL, | 
|  | DMSYMLINK | S_IRWXU | S_IRWXG | S_IRWXO, old_path); | 
|  | cclose(c); | 
|  | poperror(); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | int sysmount(int fd, int afd, char *old, int flags, char *spec) | 
|  | { | 
|  | ERRSTACK(1); | 
|  | long r; | 
|  | volatile struct { | 
|  | struct chan *c; | 
|  | } c0; | 
|  | volatile struct { | 
|  | struct chan *c; | 
|  | } bc; | 
|  | volatile struct { | 
|  | struct chan *c; | 
|  | } ac; | 
|  | struct mntparam mntparam; | 
|  |  | 
|  | ac.c = NULL; | 
|  | bc.c = NULL; | 
|  | c0.c = NULL; | 
|  | if (waserror()) { | 
|  | cclose(ac.c); | 
|  | cclose(bc.c); | 
|  | cclose(c0.c); | 
|  | poperror(); | 
|  | return -1; | 
|  | } | 
|  | bc.c = fdtochan(¤t->open_files, fd, O_RDWR, 0, 1); | 
|  | if (afd >= 0) | 
|  | ac.c = fdtochan(¤t->open_files, afd, O_RDWR, 0, 1); | 
|  | mntparam.chan = bc.c; | 
|  | mntparam.authchan = ac.c; | 
|  | mntparam.spec = spec; | 
|  | c0.c = devtab[devno("mnt", 0)].attach((char *)&mntparam); | 
|  | if (flags & MCACHE) | 
|  | c0.c = devtab[devno("gtfs", 0)].attach((char*)c0.c); | 
|  | r = bindmount(c0.c, old, flags, spec); | 
|  | poperror(); | 
|  | cclose(ac.c); | 
|  | cclose(bc.c); | 
|  | cclose(c0.c); | 
|  |  | 
|  | return r; | 
|  | } | 
|  |  | 
|  | int sysunmount(char *src_path, char *onto_path) | 
|  | { | 
|  | ERRSTACK(1); | 
|  | volatile struct { | 
|  | struct chan *c; | 
|  | } cmount; | 
|  | volatile struct { | 
|  | struct chan *c; | 
|  | } cmounted; | 
|  |  | 
|  | cmount.c = NULL; | 
|  | cmounted.c = NULL; | 
|  | if (waserror()) { | 
|  | cclose(cmount.c); | 
|  | cclose(cmounted.c); | 
|  | poperror(); | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | cmount.c = namec(onto_path, Amount, 0, 0, NULL); | 
|  | if (src_path != NULL && src_path[0] != '\0') { | 
|  | /* | 
|  | * This has to be namec(..., Aopen, ...) because | 
|  | * if arg[0] is something like /srv/cs or /fd/0, | 
|  | * opening it is the only way to get at the real | 
|  | * Chan underneath. | 
|  | */ | 
|  | cmounted.c = namec(src_path, Aopen, O_READ, 0, NULL); | 
|  | } | 
|  |  | 
|  | cunmount(cmount.c, cmounted.c); | 
|  | poperror(); | 
|  | cclose(cmount.c); | 
|  | cclose(cmounted.c); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | int sysopenat(int fromfd, char *path, int vfs_flags) | 
|  | { | 
|  | ERRSTACK(1); | 
|  | int fd; | 
|  | struct chan *c = 0, *from = 0; | 
|  |  | 
|  | if (waserror()) { | 
|  | cclose(c); | 
|  | poperror(); | 
|  | return -1; | 
|  | } | 
|  | openmode(vfs_flags);	/* error check only */ | 
|  | if ((path[0] == '/') || (fromfd == AT_FDCWD)) { | 
|  | c = namec(path, Aopen, vfs_flags, 0, NULL); | 
|  | } else { | 
|  | /* We don't cclose from.  namec_from will convert it to the new | 
|  | * chan during the walk process (c).  It'll probably close from | 
|  | * internally, and give us something new for c.  On error, | 
|  | * namec_from will cclose from. */ | 
|  | from = fdtochan(¤t->open_files, fromfd, -1, FALSE, TRUE); | 
|  | if (!(from->flag & O_PATH)) | 
|  | error(EINVAL, "Cannot openat from a non-O_PATH FD"); | 
|  | c = namec_from(from, path, Aopen, vfs_flags, 0, NULL); | 
|  | } | 
|  | /* Devices should catch this, but just in case, we'll catch it. */ | 
|  | if ((c->qid.type & QTSYMLINK) && (vfs_flags & O_NOFOLLOW)) | 
|  | error(ELOOP, "no-follow open of a symlink"); | 
|  | fd = newfd(c, 0, vfs_flags, FALSE); | 
|  | if (fd < 0) | 
|  | error(-fd, ERROR_FIXME); | 
|  | poperror(); | 
|  | return fd; | 
|  | } | 
|  |  | 
|  | int sysopen(char *path, int vfs_flags) | 
|  | { | 
|  | return sysopenat(AT_FDCWD, path, vfs_flags); | 
|  | } | 
|  |  | 
|  | long unionread(struct chan *c, void *va, long n) | 
|  | { | 
|  | ERRSTACK(1); | 
|  | int i; | 
|  | long nr; | 
|  | struct mhead *m; | 
|  | struct mount *mount; | 
|  |  | 
|  | qlock(&c->umqlock); | 
|  | m = c->umh; | 
|  | rlock(&m->lock); | 
|  | mount = m->mount; | 
|  | /* bring mount in sync with c->uri and c->umc */ | 
|  | for (i = 0; mount != NULL && i < c->uri; i++) | 
|  | mount = mount->next; | 
|  |  | 
|  | nr = 0; | 
|  | while (mount != NULL) { | 
|  | /* Error causes component of union to be skipped */ | 
|  | if (mount->to) { | 
|  | /* normally we want to discard the error, but for our | 
|  | * ghetto kdirent hack, we need to repeat unionread if | 
|  | * we saw a ENODATA */ | 
|  | if (waserror()) { | 
|  | if (get_errno() == ENODATA) { | 
|  | runlock(&m->lock); | 
|  | qunlock(&c->umqlock); | 
|  | nexterror(); | 
|  | } | 
|  | /* poperror done below for either branch */ | 
|  | } else { | 
|  | if (c->umc == NULL) { | 
|  | c->umc = cclone(mount->to); | 
|  | c->umc = | 
|  | devtab[c->umc->type].open(c->umc, | 
|  | O_READ); | 
|  | } | 
|  |  | 
|  | nr = devtab[c->umc->type].read(c->umc, va, n, | 
|  | c->umc->offset); | 
|  | if (nr < 0) | 
|  | nr = 0;	/* dev.c can return -1 */ | 
|  | c->umc->offset += nr; | 
|  | } | 
|  | poperror();	/* pop regardless */ | 
|  | } | 
|  | if (nr > 0) | 
|  | break; | 
|  |  | 
|  | /* Advance to next element */ | 
|  | c->uri++; | 
|  | if (c->umc) { | 
|  | cclose(c->umc); | 
|  | c->umc = NULL; | 
|  | } | 
|  | mount = mount->next; | 
|  | } | 
|  | runlock(&m->lock); | 
|  | qunlock(&c->umqlock); | 
|  | return nr; | 
|  | } | 
|  |  | 
|  | static void unionrewind(struct chan *c) | 
|  | { | 
|  | qlock(&c->umqlock); | 
|  | c->uri = 0; | 
|  | if (c->umc) { | 
|  | cclose(c->umc); | 
|  | c->umc = NULL; | 
|  | } | 
|  | qunlock(&c->umqlock); | 
|  | } | 
|  |  | 
|  | static long rread(int fd, void *va, long n, int64_t * offp) | 
|  | { | 
|  | ERRSTACK(3); | 
|  | int dir; | 
|  | struct chan *c; | 
|  | int64_t off; | 
|  |  | 
|  | /* dirty dirent hack */ | 
|  | void *real_va = va; | 
|  |  | 
|  | if (waserror()) { | 
|  | poperror(); | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | c = fdtochan(¤t->open_files, fd, O_READ, 1, 1); | 
|  | if (waserror()) { | 
|  | cclose(c); | 
|  | nexterror(); | 
|  | } | 
|  |  | 
|  | if (n < 0) | 
|  | error(EINVAL, ERROR_FIXME); | 
|  |  | 
|  | dir = c->qid.type & QTDIR; | 
|  |  | 
|  | /* kdirent hack: userspace is expecting kdirents, but all of 9ns | 
|  | * produces Ms.  Just save up what we don't use and append the | 
|  | * new stuff later. Allocate DIRREADSIZE bytes for that purpose. | 
|  | */ | 
|  | if (dir) { | 
|  | int amt; | 
|  |  | 
|  | if (n < sizeof(struct kdirent)) | 
|  | error(EINVAL, "readdir needs to read at least %d", | 
|  | sizeof(struct kdirent)); | 
|  | if (!c->buf) { | 
|  | c->buf = kmalloc(DIRREADSIZE, MEM_WAIT); | 
|  | c->bufused = 0; | 
|  | } | 
|  | /* Attempt to extract an M, in case there was some already */ | 
|  | amt = convM2kdirent(c->buf, c->bufused, real_va, 0); | 
|  | if (amt) { | 
|  | c->bufused -= amt; | 
|  | memmove(c->buf, c->buf + amt, c->bufused); | 
|  | n = sizeof(struct kdirent); | 
|  | goto out; | 
|  | } | 
|  | /* debugging */ | 
|  | if (waserror()) { | 
|  | printk("Well, sysread of a dir sucks.%s \n", | 
|  | current_errstr()); | 
|  | nexterror(); | 
|  | } | 
|  | va = c->buf + c->bufused; | 
|  | n = DIRREADSIZE - c->bufused; | 
|  | } | 
|  |  | 
|  | /* this is the normal plan9 read */ | 
|  | if (dir && c->umh) | 
|  | n = unionread(c, va, n); | 
|  | else { | 
|  | if (offp == NULL) { | 
|  | spin_lock(&c->lock); /* lock for int64_t assignment */ | 
|  | off = c->offset; | 
|  | spin_unlock(&c->lock); | 
|  | } else | 
|  | off = *offp; | 
|  | if (off < 0) | 
|  | error(EINVAL, ERROR_FIXME); | 
|  | if ((off64_t)off + (size_t)n < (off64_t)off) | 
|  | error(EINVAL, "bad offset %p + count %p", off, n); | 
|  | if (off == 0) { | 
|  | if (offp == NULL) { | 
|  | spin_lock(&c->lock); | 
|  | c->offset = 0; | 
|  | c->dri = 0; | 
|  | spin_unlock(&c->lock); | 
|  | } | 
|  | unionrewind(c); | 
|  | } | 
|  | if (! c->ateof) { | 
|  | n = devtab[c->type].read(c, va, n, off); | 
|  | if (n == 0 && dir) | 
|  | c->ateof = 1; | 
|  | } else { | 
|  | n = 0; | 
|  | } | 
|  | spin_lock(&c->lock); | 
|  | c->offset += n; | 
|  | spin_unlock(&c->lock); | 
|  | } | 
|  |  | 
|  | /* dirty kdirent hack */ | 
|  | if (dir) { | 
|  | int amt; | 
|  | c->bufused = c->bufused + n; | 
|  | /* extract an M from the front, then shift the remainder back */ | 
|  | amt = convM2kdirent(c->buf, c->bufused, real_va, 0); | 
|  | c->bufused -= amt; | 
|  | memmove(c->buf, c->buf + amt, c->bufused); | 
|  | n = amt ? sizeof(struct kdirent) : 0; | 
|  | poperror();	/* matching our debugging waserror */ | 
|  | } | 
|  |  | 
|  | out: | 
|  | poperror(); | 
|  | cclose(c); | 
|  |  | 
|  | poperror(); | 
|  | return n; | 
|  | } | 
|  |  | 
|  | /* Reads exactly n bytes from chan c, starting at its offset.  Can block, but if | 
|  | * we get 0 back too soon (EOF or error), then we'll error out with ENODATA. | 
|  | * That might need a little work - if there was a previous error, then we | 
|  | * clobbered it and only know ENODATA but not why we completed early. */ | 
|  | void read_exactly_n(struct chan *c, void *vp, long n) | 
|  | { | 
|  | char *p; | 
|  | long nn; | 
|  | int total = 0, want = n; | 
|  |  | 
|  | p = vp; | 
|  | while (n > 0) { | 
|  | nn = devtab[c->type].read(c, p, n, c->offset); | 
|  | printd("readn: Got %d@%lld\n", nn, c->offset); | 
|  | if (nn == 0) | 
|  | error(ENODATA, "wanted %d, got %d", want, total); | 
|  | spin_lock(&c->lock); | 
|  | c->offset += nn; | 
|  | spin_unlock(&c->lock); | 
|  | p += nn; | 
|  | n -= nn; | 
|  | total += nn; | 
|  | } | 
|  | } | 
|  |  | 
|  | long sysread(int fd, void *va, long n) | 
|  | { | 
|  | return rread(fd, va, n, NULL); | 
|  | } | 
|  |  | 
|  | long syspread(int fd, void *va, long n, int64_t off) | 
|  | { | 
|  | return rread(fd, va, n, &off); | 
|  | } | 
|  |  | 
|  | int sysremove(char *path) | 
|  | { | 
|  | ERRSTACK(2); | 
|  | struct chan *c; | 
|  |  | 
|  | if (waserror()) { | 
|  | poperror(); | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | c = namec(path, Aremove, 0, 0, NULL); | 
|  | if (waserror()) { | 
|  | c->type = -1;	/* see below */ | 
|  | cclose(c); | 
|  | nexterror(); | 
|  | } | 
|  | devtab[c->type].remove(c); | 
|  | /* | 
|  | * Remove clunks the fid, but we need to recover the Chan | 
|  | * so fake it up.  -1 aborts the dev's close. | 
|  | */ | 
|  | c->type = -1; | 
|  | poperror(); | 
|  | cclose(c); | 
|  |  | 
|  | poperror(); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | int sysrename(char *from_path, char *to_path) | 
|  | { | 
|  | ERRSTACK(1); | 
|  | struct chan *volatile renamee = NULL; | 
|  | struct chan *parent_chan; | 
|  |  | 
|  | if (waserror()) { | 
|  | cclose(renamee); | 
|  | poperror(); | 
|  | return -1; | 
|  | } | 
|  | renamee = namec(from_path, Aremove, 0, 0, NULL); | 
|  | /* We might need to support wstat for 'short' rename (intra-directory, | 
|  | * with no slashes).  Til then, we can just go with EXDEV. */ | 
|  | if (!devtab[renamee->type].rename) | 
|  | error(EXDEV, "device does not support rename"); | 
|  | parent_chan = namec(to_path, Arename, 0, 0, (char*)renamee); | 
|  | /* When we're done, renamee still points to the file, but it's in the | 
|  | * new location.  Its cname is still the old location, similar to | 
|  | * remove.  If anyone cares, we can change it.  parent_chan still points | 
|  | * to the parent - it didn't get moved like create does.  Though it does | 
|  | * have the name of the new location.  If we want, we can hand that to | 
|  | * renamee.  It's a moot point, since they are both getting closed. */ | 
|  | cclose(renamee); | 
|  | cclose(parent_chan); | 
|  | poperror(); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | int64_t sysseek(int fd, int64_t off, int whence) | 
|  | { | 
|  | ERRSTACK(2); | 
|  | struct dir *dir; | 
|  | struct chan *c; | 
|  |  | 
|  | if (waserror()) { | 
|  | poperror(); | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | c = fdtochan(¤t->open_files, fd, -1, 1, 1); | 
|  | if (waserror()) { | 
|  | cclose(c); | 
|  | nexterror(); | 
|  | } | 
|  | switch (whence) { | 
|  | case 0: | 
|  | if (c->qid.type & QTDIR) { | 
|  | if (off != 0) | 
|  | error(EISDIR, ERROR_FIXME); | 
|  | unionrewind(c); | 
|  | } else if (off < 0) | 
|  | error(EINVAL, ERROR_FIXME); | 
|  | spin_lock(&c->lock);	/* lock for int64_t assignment */ | 
|  | c->offset = off; | 
|  | spin_unlock(&c->lock); | 
|  | break; | 
|  |  | 
|  | case 1: | 
|  | if (c->qid.type & QTDIR) | 
|  | error(EISDIR, ERROR_FIXME); | 
|  | spin_lock(&c->lock);	/* lock for read/write update */ | 
|  | off += c->offset; | 
|  | if (off < 0) { | 
|  | spin_unlock(&c->lock); | 
|  | error(EINVAL, ERROR_FIXME); | 
|  | } | 
|  | c->offset = off; | 
|  | spin_unlock(&c->lock); | 
|  | break; | 
|  |  | 
|  | case 2: | 
|  | if (c->qid.type & QTDIR) | 
|  | error(EISDIR, ERROR_FIXME); | 
|  | dir = chandirstat(c); | 
|  | if (dir == NULL) | 
|  | error(EFAIL, "internal error: stat error in seek"); | 
|  | off += dir->length; | 
|  | kfree(dir); | 
|  | if (off < 0) | 
|  | error(EINVAL, ERROR_FIXME); | 
|  | spin_lock(&c->lock);	/* lock for read/write update */ | 
|  | c->offset = off; | 
|  | spin_unlock(&c->lock); | 
|  | break; | 
|  |  | 
|  | default: | 
|  | error(EINVAL, ERROR_FIXME); | 
|  | break; | 
|  | } | 
|  | poperror(); | 
|  | c->dri = 0; | 
|  | cclose(c); | 
|  | poperror(); | 
|  | return off; | 
|  | } | 
|  |  | 
|  | void validstat(uint8_t * s, int n, int slashok) | 
|  | { | 
|  |  | 
|  | int m; | 
|  | char buf[64]; | 
|  |  | 
|  | statcheck(s, n); | 
|  | /* verify that name entry is acceptable */ | 
|  | s += STAT_FIX_LEN_9P - STAT_NR_STRINGS_9P * BIT16SZ; | 
|  | /* | 
|  | * s now points at count for first string. | 
|  | * if it's too long, let the server decide; this is | 
|  | * only for his protection anyway. otherwise | 
|  | * we'd have to allocate and waserror. | 
|  | */ | 
|  | m = GBIT16(s); | 
|  | s += BIT16SZ; | 
|  | if (m + 1 > sizeof buf) { | 
|  | return; | 
|  | } | 
|  | memmove(buf, s, m); | 
|  | buf[m] = '\0'; | 
|  | /* name could be '/' */ | 
|  | if (strcmp(buf, "/") != 0) | 
|  | validname(buf, slashok); | 
|  | } | 
|  |  | 
|  | int sysfstat(int fd, uint8_t *buf, int n) | 
|  | { | 
|  | ERRSTACK(2); | 
|  | struct chan *c; | 
|  |  | 
|  | if (waserror()) { | 
|  | poperror(); | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | c = fdtochan(¤t->open_files, fd, -1, 0, 1); | 
|  | if (waserror()) { | 
|  | cclose(c); | 
|  | nexterror(); | 
|  | } | 
|  | devtab[c->type].stat(c, buf, n); | 
|  |  | 
|  | poperror(); | 
|  | cclose(c); | 
|  |  | 
|  | poperror(); | 
|  | return n; | 
|  | } | 
|  |  | 
|  | int sysfstatakaros(int fd, struct kstat *ks) | 
|  | { | 
|  |  | 
|  | int n = 4096; | 
|  | uint8_t *buf; | 
|  |  | 
|  | buf = kmalloc(n, MEM_WAIT); | 
|  | n = sysfstat(fd, buf, n); | 
|  | if (n > 0) { | 
|  | convM2kstat(buf, n, ks); | 
|  | n = 0; | 
|  | } | 
|  | kfree(buf); | 
|  | return n; | 
|  | } | 
|  |  | 
|  | static int __stat(char *path, uint8_t *buf, int n, int flags) | 
|  | { | 
|  | ERRSTACK(2); | 
|  | struct chan *c; | 
|  |  | 
|  | if (waserror()) { | 
|  | poperror(); | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | c = namec(path, Aaccess, flags, 0, NULL); | 
|  | if (waserror()) { | 
|  | cclose(c); | 
|  | nexterror(); | 
|  | } | 
|  | devtab[c->type].stat(c, buf, n); | 
|  | poperror(); | 
|  | cclose(c); | 
|  |  | 
|  | poperror(); | 
|  |  | 
|  | return n; | 
|  | } | 
|  |  | 
|  | int sysstat(char *path, uint8_t *buf, int n) | 
|  | { | 
|  | return __stat(path, buf, n, 0); | 
|  | } | 
|  |  | 
|  | int syslstat(char *path, uint8_t *buf, int n) | 
|  | { | 
|  | return __stat(path, buf, n, O_NOFOLLOW); | 
|  | } | 
|  |  | 
|  | int sysstatakaros(char *path, struct kstat *ks, int flags) | 
|  | { | 
|  |  | 
|  | int n = 4096; | 
|  | uint8_t *buf; | 
|  |  | 
|  | buf = kmalloc(n, MEM_WAIT); | 
|  | n = __stat(path, buf, n, flags); | 
|  | if (n > 0) { | 
|  | convM2kstat(buf, n, ks); | 
|  | n = 0; | 
|  | } | 
|  | kfree(buf); | 
|  | return n; | 
|  | } | 
|  |  | 
|  | static long rwrite(int fd, void *va, long n, int64_t * offp) | 
|  | { | 
|  | ERRSTACK(3); | 
|  | struct chan *c; | 
|  | struct dir *dir; | 
|  | int64_t off; | 
|  | long m; | 
|  |  | 
|  | if (waserror()) { | 
|  | poperror(); | 
|  | return -1; | 
|  | } | 
|  | c = fdtochan(¤t->open_files, fd, O_WRITE, 1, 1); | 
|  | if (waserror()) { | 
|  | cclose(c); | 
|  | nexterror(); | 
|  | } | 
|  | if (c->qid.type & QTDIR) | 
|  | error(EISDIR, ERROR_FIXME); | 
|  |  | 
|  | if (n < 0) | 
|  | error(EINVAL, ERROR_FIXME); | 
|  |  | 
|  | if (offp == NULL) { | 
|  | /* append changes the offset to the end, and even if we fail | 
|  | * later, this change will persist */ | 
|  | if (c->flag & O_APPEND) { | 
|  | dir = chandirstat(c); | 
|  | if (!dir) | 
|  | error(EFAIL, "stat error in append write"); | 
|  | /* legacy lock for int64 assignment */ | 
|  | spin_lock(&c->lock); | 
|  | c->offset = dir->length; | 
|  | spin_unlock(&c->lock); | 
|  | kfree(dir); | 
|  | } | 
|  | spin_lock(&c->lock); | 
|  | off = c->offset; | 
|  | c->offset += n; | 
|  | spin_unlock(&c->lock); | 
|  | } else | 
|  | off = *offp; | 
|  |  | 
|  | if (waserror()) { | 
|  | if (offp == NULL) { | 
|  | spin_lock(&c->lock); | 
|  | c->offset -= n; | 
|  | spin_unlock(&c->lock); | 
|  | } | 
|  | nexterror(); | 
|  | } | 
|  | if (off < 0) | 
|  | error(EINVAL, ERROR_FIXME); | 
|  | if ((off64_t)off + (size_t)n < (off64_t)off) | 
|  | error(EINVAL, "bad offset %p + count %p", off, n); | 
|  | m = devtab[c->type].write(c, va, n, off); | 
|  | poperror(); | 
|  |  | 
|  | if (offp == NULL && m < n) { | 
|  | spin_lock(&c->lock); | 
|  | c->offset -= n - m; | 
|  | spin_unlock(&c->lock); | 
|  | } | 
|  |  | 
|  | poperror(); | 
|  | cclose(c); | 
|  |  | 
|  | poperror(); | 
|  | return m; | 
|  | } | 
|  |  | 
|  | long syswrite(int fd, void *va, long n) | 
|  | { | 
|  | return rwrite(fd, va, n, NULL); | 
|  | } | 
|  |  | 
|  | long syspwrite(int fd, void *va, long n, int64_t off) | 
|  | { | 
|  | return rwrite(fd, va, n, &off); | 
|  | } | 
|  |  | 
|  | int syswstat(char *path, uint8_t * buf, int n) | 
|  | { | 
|  | ERRSTACK(2); | 
|  | struct chan *c; | 
|  |  | 
|  | if (waserror()) { | 
|  | poperror(); | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | validstat(buf, n, 0); | 
|  | c = namec(path, Aaccess, 0, 0, NULL); | 
|  | if (waserror()) { | 
|  | cclose(c); | 
|  | nexterror(); | 
|  | } | 
|  | n = devtab[c->type].wstat(c, buf, n); | 
|  | poperror(); | 
|  | cclose(c); | 
|  |  | 
|  | poperror(); | 
|  | return n; | 
|  | } | 
|  |  | 
|  | struct dir *chandirstat(struct chan *c) | 
|  | { | 
|  | ERRSTACK(1); | 
|  | struct dir *d; | 
|  | uint8_t *buf; | 
|  | int n, nd, i; | 
|  |  | 
|  | nd = DIRSIZE; | 
|  | for (i = 0; i < 2; i++) {	/* should work by the second try */ | 
|  | d = kzmalloc(sizeof(struct dir) + nd, MEM_WAIT); | 
|  | buf = (uint8_t *) & d[1]; | 
|  | if (waserror()) { | 
|  | kfree(d); | 
|  | poperror(); | 
|  | return NULL; | 
|  | } | 
|  | n = devtab[c->type].stat(c, buf, nd); | 
|  | poperror(); | 
|  | if (n < BIT16SZ) { | 
|  | kfree(d); | 
|  | return NULL; | 
|  | } | 
|  | /* size needed to store whole stat buffer including count */ | 
|  | nd = GBIT16((uint8_t *) buf) + BIT16SZ; | 
|  | if (nd <= n) { | 
|  | convM2D(buf, n, d, (char *)&d[1]); | 
|  | return d; | 
|  | } | 
|  | /* else sizeof(Dir)+nd is plenty */ | 
|  | kfree(d); | 
|  | } | 
|  | return NULL; | 
|  |  | 
|  | } | 
|  |  | 
|  | static struct dir *__dir_stat(char *name, int flags) | 
|  | { | 
|  | ERRSTACK(2); | 
|  | struct chan *c; | 
|  | struct dir *d; | 
|  |  | 
|  | if (waserror()) { | 
|  | poperror(); | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | c = namec(name, Aaccess, flags, 0, NULL); | 
|  | if (waserror()) { | 
|  | cclose(c); | 
|  | nexterror(); | 
|  | } | 
|  | d = chandirstat(c); | 
|  | poperror(); | 
|  | cclose(c); | 
|  |  | 
|  | poperror(); | 
|  | return d; | 
|  | } | 
|  |  | 
|  | struct dir *sysdirstat(char *name) | 
|  | { | 
|  | return __dir_stat(name, 0); | 
|  | } | 
|  |  | 
|  | struct dir *sysdirlstat(char *name) | 
|  | { | 
|  | return __dir_stat(name, O_NOFOLLOW); | 
|  | } | 
|  |  | 
|  | struct dir *sysdirfstat(int fd) | 
|  | { | 
|  | ERRSTACK(2); | 
|  | struct chan *c; | 
|  | struct dir *d; | 
|  |  | 
|  | if (waserror()) { | 
|  | poperror(); | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | c = fdtochan(¤t->open_files, fd, -1, 0, 1); | 
|  | if (waserror()) { | 
|  | cclose(c); | 
|  | nexterror(); | 
|  | } | 
|  | d = chandirstat(c); | 
|  | poperror(); | 
|  | cclose(c); | 
|  |  | 
|  | poperror(); | 
|  | return d; | 
|  | } | 
|  |  | 
|  | int sysdirwstat(char *name, struct dir *dir) | 
|  | { | 
|  |  | 
|  | uint8_t *buf; | 
|  | int r; | 
|  |  | 
|  | r = sizeD2M(dir); | 
|  | buf = kzmalloc(r, MEM_WAIT); | 
|  | convD2M(dir, buf, r); | 
|  | r = syswstat(name, buf, r); | 
|  | kfree(buf); | 
|  | return r < 0 ? r : 0; | 
|  | } | 
|  |  | 
|  | int sysdirfwstat(int fd, struct dir *dir) | 
|  | { | 
|  |  | 
|  | uint8_t *buf; | 
|  | int r; | 
|  |  | 
|  | r = sizeD2M(dir); | 
|  | buf = kzmalloc(r, MEM_WAIT); | 
|  | convD2M(dir, buf, r); | 
|  | r = sysfwstat(fd, buf, r); | 
|  | kfree(buf); | 
|  | return r < 0 ? r : 0; | 
|  | } | 
|  |  | 
|  | static long dirpackage(uint8_t * buf, long ts, struct kdirent **d) | 
|  | { | 
|  |  | 
|  | char *s; | 
|  | long ss, i, n, nn, m = 0; | 
|  |  | 
|  | *d = NULL; | 
|  | if (ts <= 0) { | 
|  | return ts; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * first find number of all stats, check they look like stats, & size | 
|  | * all associated strings | 
|  | */ | 
|  | ss = 0; | 
|  | n = 0; | 
|  | for (i = 0; i < ts; i += m) { | 
|  | m = BIT16SZ + GBIT16(&buf[i]); | 
|  | statcheck(&buf[i], m); | 
|  | ss += m; | 
|  | n++; | 
|  | } | 
|  |  | 
|  | *d = kzmalloc(n * sizeof(**d) + ss, 0); | 
|  | if (*d == NULL) | 
|  | error(ENOMEM, ERROR_FIXME); | 
|  |  | 
|  | /* | 
|  | * then convert all buffers | 
|  | */ | 
|  | s = (char *)*d + n * sizeof(**d); | 
|  | nn = 0; | 
|  | for (i = 0; i < ts; i += m) { | 
|  | m = BIT16SZ + GBIT16((uint8_t *) & buf[i]); | 
|  | /* Note 's' is ignored by convM2kdirent */ | 
|  | if (nn >= n || /*convM2D */ convM2kdirent(&buf[i], m, *d + nn, | 
|  | s) != m) { | 
|  | kfree(*d); | 
|  | *d = NULL; | 
|  | error(EFAIL, "bad directory entry"); | 
|  | } | 
|  | nn++; | 
|  | s += m; | 
|  | } | 
|  |  | 
|  | return nn; | 
|  | } | 
|  |  | 
|  | long sysdirread(int fd, struct kdirent **d) | 
|  | { | 
|  | ERRSTACK(2); | 
|  | uint8_t *buf; | 
|  | long ts; | 
|  |  | 
|  | *d = NULL; | 
|  | if (waserror()) { | 
|  | poperror(); | 
|  | return -1; | 
|  | } | 
|  | buf = kzmalloc(DIRREADLIM, 0); | 
|  | if (buf == NULL) | 
|  | error(ENOMEM, ERROR_FIXME); | 
|  | if (waserror()) { | 
|  | kfree(buf); | 
|  | nexterror(); | 
|  | } | 
|  | ts = sysread(fd, buf, DIRREADLIM); | 
|  | if (ts >= 0) | 
|  | ts = dirpackage(buf, ts, d); | 
|  | poperror(); | 
|  | kfree(buf); | 
|  | poperror(); | 
|  | return ts; | 
|  | } | 
|  |  | 
|  | int sysiounit(int fd) | 
|  | { | 
|  | ERRSTACK(1); | 
|  | struct chan *c; | 
|  | int n; | 
|  |  | 
|  | c = fdtochan(¤t->open_files, fd, -1, 0, 1); | 
|  | if (waserror()) { | 
|  | cclose(c); | 
|  | poperror(); | 
|  | return 0;	/* n.b. */ | 
|  | } | 
|  | n = c->iounit; | 
|  | poperror(); | 
|  | cclose(c); | 
|  | return n; | 
|  | } | 
|  |  | 
|  | void print_chaninfo(struct chan *c) | 
|  | { | 
|  |  | 
|  | char buf[128] = { 0 }; | 
|  | bool has_dev = c->type != -1; | 
|  | bool has_chaninfo = has_dev && devtab[c->type].chaninfo; | 
|  |  | 
|  | print_lock(); | 
|  | printk("Chan flags: %p, pathname: %s, ref: %d, Dev: %s, Devinfo: %s", | 
|  | c->flag, | 
|  | c->name ? c->name->s : "no cname", | 
|  | kref_refcnt(&c->ref), | 
|  | has_dev ? devtab[c->type].name : "no dev", | 
|  | has_chaninfo ? devtab[c->type].chaninfo(c, buf, sizeof(buf)) | 
|  | : ""); | 
|  | if (!has_chaninfo) | 
|  | printk("qid.path: %p\n", c->qid.path); | 
|  | printk("\n"); | 
|  | print_unlock(); | 
|  | } | 
|  |  | 
|  | /* TODO: 9ns ns inheritance flags: Shared, copied, or empty.  The old fgrp is | 
|  | * managed by the fd_table, which is handled outside this function.  We share | 
|  | * the pgrp. */ | 
|  | int plan9setup(struct proc *new_proc, struct proc *parent, int flags) | 
|  | { | 
|  |  | 
|  | struct chan *new_dot; | 
|  |  | 
|  | ERRSTACK(1); | 
|  | if (waserror()) { | 
|  | printk("plan9setup failed, %s\n", current_errstr()); | 
|  | poperror(); | 
|  | return -1; | 
|  | } | 
|  | if (!parent) { | 
|  | /* We are probably spawned by the kernel directly, and have no | 
|  | * parent to inherit from. */ | 
|  | new_proc->pgrp = newpgrp(); | 
|  | new_proc->slash = namec("#kfs", Atodir, 0, 0, NULL); | 
|  | if (!new_proc->slash) | 
|  | panic("no kfs device"); | 
|  | /* Want the name to be "/" instead of "#kfs" */ | 
|  | cnameclose(new_proc->slash->name); | 
|  | new_proc->slash->name = newcname("/"); | 
|  | new_proc->dot = cclone(new_proc->slash); | 
|  | poperror(); | 
|  | return 0; | 
|  | } | 
|  | /* Shared semantics */ | 
|  | kref_get(&parent->pgrp->ref, 1); | 
|  | new_proc->pgrp = parent->pgrp; | 
|  | /* copy semantics on / and . (doesn't make a lot of sense in akaros | 
|  | * o/w). */ | 
|  | /* / should never disappear while we hold a ref to parent */ | 
|  | chan_incref(parent->slash); | 
|  | new_proc->slash = parent->slash; | 
|  |  | 
|  | rcu_read_lock(); | 
|  | new_dot = rcu_dereference(parent->dot); | 
|  | kref_get(&new_dot->ref, 1); | 
|  | rcu_read_unlock(); | 
|  | new_proc->dot = new_dot; | 
|  |  | 
|  | poperror(); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | /* Open flags, create modes, access types, file flags, and all that... | 
|  | * | 
|  | * there are a bunch of things here: | 
|  | * 		1) file creation flags (e.g. O_TRUNC) | 
|  | * 		2) file status flags (e.g. O_APPEND) | 
|  | * 		3) file open modes (e.g. O_RDWR) | 
|  | * 		4) file descriptor flags (e.g. CLOEXEC) | 
|  | * 		5) file creation mode (e.g. S_IRWXU) | 
|  | * the 1-4 are passed in via open's vfs_flags, and the 5 via mode only when | 
|  | * O_CREATE is set. | 
|  | * | 
|  | * file creation flags (1) only matter when creating, but aren't permanent. | 
|  | * O_EXCL, O_DIRECTORY, O_TRUNC, etc. | 
|  | * | 
|  | * file status flags (2) are per struct file/chan.  stuff like O_APPEND, | 
|  | * O_ASYNC, etc.  we convert those to an internal flag bit and store in c->flags | 
|  | * | 
|  | * the open mode (3) matters for a given FD/chan (chan->mode), and should be | 
|  | * stored in the chan. (c->mode) stuff like O_RDONLY. | 
|  | * | 
|  | * the file descriptor flags (4) clearly are in the FD.  note that the same | 
|  | * file/chan can be opened by two different FDs, with different flags.  the only | 
|  | * one anyone uses is CLOEXEC.  while exec may not last long in akaros, i can | 
|  | * imagine similar "never pass to children" flags/meanings. | 
|  | * | 
|  | * the file creation mode (5) matters for the device's permissions; given this, | 
|  | * it should be stored in the device/inode.  ACLs fall under this category. | 
|  | * | 
|  | * finally, only certain categories can be edited afterwards: file status flags | 
|  | * (2), FD flags (4), and file permissions (5).	*/ | 
|  | int fd_getfl(int fd) | 
|  | { | 
|  | ERRSTACK(1); | 
|  | struct chan *c; | 
|  | int ret; | 
|  |  | 
|  | if (waserror()) { | 
|  | poperror(); | 
|  | return -1; | 
|  | } | 
|  | c = fdtochan(¤t->open_files, fd, -1, 0, 1); | 
|  |  | 
|  | ret = c->mode; | 
|  | ret |= c->flag & CEXTERNAL_FLAGS; | 
|  |  | 
|  | cclose(c); | 
|  | poperror(); | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | static bool cexternal_flags_differ(int set1, int set2, int flags) | 
|  | { | 
|  | flags &= CEXTERNAL_FLAGS; | 
|  | return (set1 & flags) ^ (set2 & flags); | 
|  | } | 
|  |  | 
|  | static int chan_setfl(struct chan *c, int flags) | 
|  | { | 
|  | int ret; | 
|  |  | 
|  | if (cexternal_flags_differ(flags, c->flag, O_CLOEXEC)) { | 
|  | /* TODO: The whole CCEXEC / O_CLOEXEC on 9ns needs work */ | 
|  | error(EINVAL, "can't toggle O_CLOEXEC with setfl"); | 
|  | } | 
|  | if (cexternal_flags_differ(flags, c->flag, O_REMCLO)) | 
|  | error(EINVAL, "can't toggle O_REMCLO with setfl"); | 
|  | if (cexternal_flags_differ(flags, c->flag, O_PATH)) | 
|  | error(EINVAL, "can't toggle O_PATH with setfl"); | 
|  | ret = devtab[c->type].chan_ctl(c, CCTL_SET_FL, flags & CEXTERNAL_FLAGS, | 
|  | 0, 0, 0); | 
|  | c->flag = (c->flag & ~CEXTERNAL_FLAGS) | (flags & CEXTERNAL_FLAGS); | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | int fd_chan_ctl(int fd, int cmd, unsigned long arg1, unsigned long arg2, | 
|  | unsigned long arg3, unsigned long arg4) | 
|  | { | 
|  | ERRSTACK(2); | 
|  | struct chan *c; | 
|  | int ret; | 
|  |  | 
|  | if (waserror()) { | 
|  | poperror(); | 
|  | return -1; | 
|  | } | 
|  | c = fdtochan(¤t->open_files, fd, -1, 0, 1); | 
|  | if (waserror()) { | 
|  | cclose(c); | 
|  | nexterror(); | 
|  | } | 
|  |  | 
|  | if (!devtab[c->type].chan_ctl) | 
|  | error(EINVAL, "%s has no chan_ctl, can't %d", chan_dev_name(c), | 
|  | cmd); | 
|  |  | 
|  | /* Some commands require 9ns support in addition to the device ctl. */ | 
|  | switch (cmd) { | 
|  | case CCTL_SET_FL: | 
|  | ret = chan_setfl(c, arg1); | 
|  | break; | 
|  | default: | 
|  | ret = devtab[c->type].chan_ctl(c, cmd, arg1, arg2, arg3, arg4); | 
|  | break; | 
|  | } | 
|  |  | 
|  | poperror(); | 
|  | cclose(c); | 
|  | poperror(); | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | ssize_t kread_file(struct file_or_chan *file, void *buf, size_t sz) | 
|  | { | 
|  | /* TODO: (KFOP) (VFS kernel read/writes need to be from a ktask) */ | 
|  | uintptr_t old_ret = switch_to_ktask(); | 
|  | off64_t dummy = 0; | 
|  | ssize_t cpy_amt = foc_read(file, buf, sz, dummy); | 
|  |  | 
|  | switch_back_from_ktask(old_ret); | 
|  | return cpy_amt; | 
|  | } | 
|  |  | 
|  | /* Reads the contents of an entire file into a buffer, returning that buffer. | 
|  | * On error, prints something useful and returns 0 */ | 
|  | void *kread_whole_file(struct file_or_chan *file) | 
|  | { | 
|  | size_t size; | 
|  | void *contents; | 
|  | ssize_t cpy_amt; | 
|  |  | 
|  | size = foc_get_len(file); | 
|  | contents = kmalloc(size, MEM_WAIT); | 
|  | cpy_amt = kread_file(file, contents, size); | 
|  | if (cpy_amt < 0) { | 
|  | printk("Error %d reading file %s\n", get_errno(), | 
|  | foc_to_name(file)); | 
|  | kfree(contents); | 
|  | return 0; | 
|  | } | 
|  | if (cpy_amt != size) { | 
|  | printk("Read %d, needed %d for file %s\n", cpy_amt, size, | 
|  | foc_to_name(file)); | 
|  | kfree(contents); | 
|  | return 0; | 
|  | } | 
|  | return contents; | 
|  | } | 
|  |  | 
|  | /* Process-related File management functions */ | 
|  |  | 
|  | /* Given any FD, get the appropriate object, 0 o/w. Set incref if you want a | 
|  | * reference count (which is a 9ns thing, you can't use the pointer if you | 
|  | * didn't incref). */ | 
|  | void *lookup_fd(struct fd_table *fdt, int fd, bool incref) | 
|  | { | 
|  | void *retval = 0; | 
|  |  | 
|  | if (fd < 0) | 
|  | return 0; | 
|  | spin_lock(&fdt->lock); | 
|  | if (fdt->closed) { | 
|  | spin_unlock(&fdt->lock); | 
|  | return 0; | 
|  | } | 
|  | if (fd < fdt->max_fdset) { | 
|  | if (GET_BITMASK_BIT(fdt->open_fds->fds_bits, fd)) { | 
|  | /* while max_files and max_fdset might not line up, we | 
|  | * should never have a valid fdset higher than files */ | 
|  | assert(fd < fdt->max_files); | 
|  | retval = fdt->fd[fd].fd_chan; | 
|  | if (incref) | 
|  | chan_incref((struct chan*)retval); | 
|  | } | 
|  | } | 
|  | spin_unlock(&fdt->lock); | 
|  | return retval; | 
|  | } | 
|  |  | 
|  | /* Grow the vfs fd set */ | 
|  | static int grow_fd_set(struct fd_table *open_files) | 
|  | { | 
|  | int n; | 
|  | struct file_desc *nfd, *ofd; | 
|  |  | 
|  | /* Only update open_fds once. If currently pointing to open_fds_init, | 
|  | * then update it to point to a newly allocated fd_set with space for | 
|  | * NR_FILE_DESC_MAX */ | 
|  | if (open_files->open_fds == (struct fd_set*)&open_files->open_fds_init) | 
|  | { | 
|  | open_files->open_fds = kzmalloc(sizeof(struct fd_set), 0); | 
|  | memmove(open_files->open_fds, &open_files->open_fds_init, | 
|  | sizeof(struct small_fd_set)); | 
|  | } | 
|  |  | 
|  | /* Grow the open_files->fd array in increments of NR_OPEN_FILES_DEFAULT | 
|  | */ | 
|  | n = open_files->max_files + NR_OPEN_FILES_DEFAULT; | 
|  | if (n > NR_FILE_DESC_MAX) | 
|  | return -EMFILE; | 
|  | nfd = kzmalloc(n * sizeof(struct file_desc), 0); | 
|  | if (nfd == NULL) | 
|  | return -ENOMEM; | 
|  |  | 
|  | /* Move the old array on top of the new one */ | 
|  | ofd = open_files->fd; | 
|  | memmove(nfd, ofd, open_files->max_files * sizeof(struct file_desc)); | 
|  |  | 
|  | /* Update the array and the maxes for both max_files and max_fdset */ | 
|  | open_files->fd = nfd; | 
|  | open_files->max_files = n; | 
|  | open_files->max_fdset = n; | 
|  |  | 
|  | /* Only free the old one if it wasn't pointing to open_files->fd_array*/ | 
|  | if (ofd != open_files->fd_array) | 
|  | kfree(ofd); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | /* Free the vfs fd set if necessary */ | 
|  | static void free_fd_set(struct fd_table *open_files) | 
|  | { | 
|  | void *free_me; | 
|  |  | 
|  | if (open_files->open_fds != (struct fd_set*)&open_files->open_fds_init) | 
|  | { | 
|  | assert(open_files->fd != open_files->fd_array); | 
|  | /* need to reset the pointers to the internal addrs, in case we | 
|  | * take a look while debugging.  0 them out, since they have old | 
|  | * data.  our current versions should all be closed. */ | 
|  | memset(&open_files->open_fds_init, 0, | 
|  | sizeof(struct small_fd_set)); | 
|  | memset(&open_files->fd_array, 0, sizeof(open_files->fd_array)); | 
|  |  | 
|  | free_me = open_files->open_fds; | 
|  | open_files->open_fds = | 
|  | (struct fd_set*)&open_files->open_fds_init; | 
|  | kfree(free_me); | 
|  |  | 
|  | free_me = open_files->fd; | 
|  | open_files->fd = open_files->fd_array; | 
|  | kfree(free_me); | 
|  | } | 
|  | } | 
|  |  | 
|  | /* If FD is in the group, remove it, decref it, and return TRUE. */ | 
|  | bool close_fd(struct fd_table *fdt, int fd) | 
|  | { | 
|  | struct chan *chan = 0; | 
|  | struct fd_tap *tap = 0; | 
|  | bool ret = FALSE; | 
|  |  | 
|  | if (fd < 0) | 
|  | return FALSE; | 
|  | spin_lock(&fdt->lock); | 
|  | if (fd < fdt->max_fdset) { | 
|  | if (GET_BITMASK_BIT(fdt->open_fds->fds_bits, fd)) { | 
|  | /* while max_files and max_fdset might not line up, we | 
|  | * should never have a valid fdset higher than files */ | 
|  | assert(fd < fdt->max_files); | 
|  | chan = fdt->fd[fd].fd_chan; | 
|  | tap = fdt->fd[fd].fd_tap; | 
|  | fdt->fd[fd].fd_chan = 0; | 
|  | fdt->fd[fd].fd_tap = 0; | 
|  | CLR_BITMASK_BIT(fdt->open_fds->fds_bits, fd); | 
|  | if (fd < fdt->hint_min_fd) | 
|  | fdt->hint_min_fd = fd; | 
|  | ret = TRUE; | 
|  | } | 
|  | } | 
|  | spin_unlock(&fdt->lock); | 
|  | /* Need to decref/cclose outside of the lock; they could sleep */ | 
|  | cclose(chan); | 
|  | if (tap) | 
|  | kref_put(&tap->kref); | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | static int __get_fd(struct fd_table *open_files, int low_fd, bool must_use_low) | 
|  | { | 
|  | int slot = -1; | 
|  | int error; | 
|  | bool update_hint = TRUE; | 
|  |  | 
|  | if ((low_fd < 0) || (low_fd > NR_FILE_DESC_MAX)) | 
|  | return -EINVAL; | 
|  | if (open_files->closed) | 
|  | return -EINVAL;	/* won't matter, they are dying */ | 
|  | if (must_use_low | 
|  | && GET_BITMASK_BIT(open_files->open_fds->fds_bits, low_fd)) | 
|  | return -ENFILE; | 
|  | if (low_fd > open_files->hint_min_fd) | 
|  | update_hint = FALSE; | 
|  | else | 
|  | low_fd = open_files->hint_min_fd; | 
|  | /* Loop until we have a valid slot (we grow the fd_array at the bottom | 
|  | * of the loop if we haven't found a slot in the current array */ | 
|  | while (slot == -1) { | 
|  | for (low_fd; low_fd < open_files->max_fdset; low_fd++) { | 
|  | if (GET_BITMASK_BIT(open_files->open_fds->fds_bits, | 
|  | low_fd)) | 
|  | continue; | 
|  | slot = low_fd; | 
|  | SET_BITMASK_BIT(open_files->open_fds->fds_bits, slot); | 
|  | assert(slot < open_files->max_files && | 
|  | open_files->fd[slot].fd_chan == 0); | 
|  | /* We know slot >= hint, since we started with hint */ | 
|  | if (update_hint) | 
|  | open_files->hint_min_fd = slot + 1; | 
|  | break; | 
|  | } | 
|  | if (slot == -1)	{ | 
|  | if ((error = grow_fd_set(open_files))) | 
|  | return error; | 
|  | } | 
|  | } | 
|  | return slot; | 
|  | } | 
|  |  | 
|  | /* Insert a file or chan (obj, chosen by vfs) into the fd group with fd_flags. | 
|  | * If must_use_low, then we have to insert at FD = low_fd.  o/w we start looking | 
|  | * for empty slots at low_fd. */ | 
|  | int insert_obj_fdt(struct fd_table *fdt, void *obj, int low_fd, int fd_flags, | 
|  | bool must_use_low) | 
|  | { | 
|  | int slot; | 
|  |  | 
|  | spin_lock(&fdt->lock); | 
|  | slot = __get_fd(fdt, low_fd, must_use_low); | 
|  | if (slot < 0) { | 
|  | spin_unlock(&fdt->lock); | 
|  | return slot; | 
|  | } | 
|  | assert(slot < fdt->max_files && | 
|  | fdt->fd[slot].fd_chan == 0); | 
|  | chan_incref((struct chan*)obj); | 
|  | fdt->fd[slot].fd_chan = obj; | 
|  | fdt->fd[slot].fd_flags = fd_flags; | 
|  | spin_unlock(&fdt->lock); | 
|  | return slot; | 
|  | } | 
|  |  | 
|  | /* Closes all open files.  Mostly just a "put" for all files.  If cloexec, it | 
|  | * will only close the FDs with FD_CLOEXEC (opened with O_CLOEXEC or fcntld). | 
|  | * | 
|  | * Notes on concurrency: | 
|  | * - Can't hold spinlocks while we call cclose, since it might sleep eventually. | 
|  | * - We're called from proc_destroy, so we could have concurrent openers trying | 
|  | *   to add to the group (other syscalls), hence the "closed" flag. | 
|  | * - dot and slash chans are dealt with in proc_free.  its difficult to close | 
|  | *   and zero those with concurrent syscalls, since those are a source of krefs. | 
|  | * - Once we lock and set closed, no further additions can happen.  To simplify | 
|  | *   our closes, we also allow multiple calls to this func (though that should | 
|  | *   never happen with the current code). */ | 
|  | void close_fdt(struct fd_table *fdt, bool cloexec) | 
|  | { | 
|  | struct chan *chan; | 
|  | struct file_desc *to_close; | 
|  | int idx = 0; | 
|  |  | 
|  | to_close = kzmalloc(sizeof(struct file_desc) * fdt->max_files, | 
|  | MEM_WAIT); | 
|  | spin_lock(&fdt->lock); | 
|  | if (fdt->closed) { | 
|  | spin_unlock(&fdt->lock); | 
|  | kfree(to_close); | 
|  | return; | 
|  | } | 
|  | for (int i = 0; i < fdt->max_fdset; i++) { | 
|  | if (GET_BITMASK_BIT(fdt->open_fds->fds_bits, i)) { | 
|  | /* while max_files and max_fdset might not line up, we | 
|  | * should never have a valid fdset higher than files */ | 
|  | assert(i < fdt->max_files); | 
|  | if (cloexec && !(fdt->fd[i].fd_flags & FD_CLOEXEC)) | 
|  | continue; | 
|  | chan = fdt->fd[i].fd_chan; | 
|  | to_close[idx].fd_tap = fdt->fd[i].fd_tap; | 
|  | fdt->fd[i].fd_tap = 0; | 
|  | fdt->fd[i].fd_chan = 0; | 
|  | to_close[idx++].fd_chan = chan; | 
|  | CLR_BITMASK_BIT(fdt->open_fds->fds_bits, i); | 
|  | } | 
|  | } | 
|  | /* it's just a hint, we can build back up from being 0 */ | 
|  | fdt->hint_min_fd = 0; | 
|  | if (!cloexec) { | 
|  | free_fd_set(fdt); | 
|  | fdt->closed = TRUE; | 
|  | } | 
|  | spin_unlock(&fdt->lock); | 
|  | /* We go through some hoops to close/decref outside the lock.  Nice for | 
|  | * not holding the lock for a while; critical in case the decref/cclose | 
|  | * sleeps (it can) */ | 
|  | for (int i = 0; i < idx; i++) { | 
|  | cclose(to_close[i].fd_chan); | 
|  | if (to_close[i].fd_tap) | 
|  | kref_put(&to_close[i].fd_tap->kref); | 
|  | } | 
|  | kfree(to_close); | 
|  | } | 
|  |  | 
|  | /* Inserts all of the files from src into dst, used by sys_fork(). */ | 
|  | void clone_fdt(struct fd_table *src, struct fd_table *dst) | 
|  | { | 
|  | struct chan *chan; | 
|  | int ret; | 
|  |  | 
|  | spin_lock(&src->lock); | 
|  | if (src->closed) { | 
|  | spin_unlock(&src->lock); | 
|  | return; | 
|  | } | 
|  | spin_lock(&dst->lock); | 
|  | if (dst->closed) { | 
|  | warn("Destination closed before it opened"); | 
|  | spin_unlock(&dst->lock); | 
|  | spin_unlock(&src->lock); | 
|  | return; | 
|  | } | 
|  | while (src->max_files > dst->max_files) { | 
|  | ret = grow_fd_set(dst); | 
|  | if (ret < 0) { | 
|  | set_error(-ret, "Failed to grow for a clone_fdt"); | 
|  | spin_unlock(&dst->lock); | 
|  | spin_unlock(&src->lock); | 
|  | return; | 
|  | } | 
|  | } | 
|  | for (int i = 0; i < src->max_fdset; i++) { | 
|  | if (GET_BITMASK_BIT(src->open_fds->fds_bits, i)) { | 
|  | /* while max_files and max_fdset might not line up, we | 
|  | * should never have a valid fdset higher than files */ | 
|  | assert(i < src->max_files); | 
|  | chan = src->fd[i].fd_chan; | 
|  | assert(i < dst->max_files && dst->fd[i].fd_chan == 0); | 
|  | SET_BITMASK_BIT(dst->open_fds->fds_bits, i); | 
|  | dst->fd[i].fd_chan = chan; | 
|  | chan_incref(chan); | 
|  | } | 
|  | } | 
|  | dst->hint_min_fd = src->hint_min_fd; | 
|  | spin_unlock(&dst->lock); | 
|  | spin_unlock(&src->lock); | 
|  | } | 
|  |  | 
|  | int fd_get_fd_flags(struct fd_table *fdt, int fd) | 
|  | { | 
|  | int ret = -1; | 
|  |  | 
|  | if (fd < 0) | 
|  | return -1; | 
|  | spin_lock(&fdt->lock); | 
|  | if (fdt->closed) { | 
|  | spin_unlock(&fdt->lock); | 
|  | return -1; | 
|  | } | 
|  | if ((fd < fdt->max_fdset) | 
|  | && GET_BITMASK_BIT(fdt->open_fds->fds_bits, fd)) | 
|  | ret = fdt->fd[fd].fd_flags; | 
|  | spin_unlock(&fdt->lock); | 
|  | if (ret == -1) | 
|  | set_error(EBADF, "FD was not open"); | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | int fd_set_fd_flags(struct fd_table *fdt, int fd, int new_fl) | 
|  | { | 
|  | int ret = -1; | 
|  |  | 
|  | if (fd < 0) | 
|  | return -1; | 
|  | spin_lock(&fdt->lock); | 
|  | if (fdt->closed) { | 
|  | spin_unlock(&fdt->lock); | 
|  | return -1; | 
|  | } | 
|  | if ((fd < fdt->max_fdset) | 
|  | && GET_BITMASK_BIT(fdt->open_fds->fds_bits, fd)) | 
|  | fdt->fd[fd].fd_flags = new_fl; | 
|  | spin_unlock(&fdt->lock); | 
|  | if (ret == -1) | 
|  | set_error(EBADF, "FD was not open"); | 
|  | return ret; | 
|  | } |