| /* Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. |
| * Portions Copyright © 1997-1999 Vita Nuova Limited |
| * Portions Copyright © 2000-2007 Vita Nuova Holdings Limited |
| * (www.vitanuova.com) |
| * Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others |
| * |
| * Modified for the Akaros operating system: |
| * Copyright (c) 2013-2014 The Regents of the University of California |
| * Copyright (c) 2013-2015 Google Inc. |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a copy |
| * of this software and associated documentation files (the "Software"), to deal |
| * in the Software without restriction, including without limitation the rights |
| * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| * copies of the Software, and to permit persons to whom the Software is |
| * furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice shall be included in |
| * all copies or substantial portions of the Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| * SOFTWARE. */ |
| |
| #include <slab.h> |
| #include <kmalloc.h> |
| #include <kref.h> |
| #include <string.h> |
| #include <stdio.h> |
| #include <assert.h> |
| #include <error.h> |
| #include <cpio.h> |
| #include <pmap.h> |
| #include <smp.h> |
| #include <net/ip.h> |
| #include <rcu.h> |
| |
| /* TODO: these sizes are hokey. DIRSIZE is used in chandirstat, and it looks |
| * like it's the size of a common-case stat. */ |
| enum { |
| DIRSIZE = STAT_FIX_LEN_AK + 32 * STAT_NR_STRINGS_AK, |
| |
| /* should handle the largest reasonable directory entry */ |
| DIRREADLIM = 2048, |
| |
| /* Just read a lot. Memory is cheap, lots of bandwidth, and RPCs are |
| * very expensive. At the same time, let's not yet exceed a common |
| * MSIZE. */ |
| DIRREADSIZE = 8192, |
| }; |
| |
| int newfd(struct chan *c, int low_fd, int oflags, bool must_use_low) |
| { |
| int ret = insert_obj_fdt(¤t->open_files, c, low_fd, |
| oflags & O_CLOEXEC ? FD_CLOEXEC : 0, |
| must_use_low); |
| if (ret >= 0) |
| cclose(c); |
| return ret; |
| } |
| |
| struct chan *fdtochan(struct fd_table *fdt, int fd, int mode, int chkmnt, |
| int iref) |
| { |
| struct chan *c; |
| |
| c = lookup_fd(fdt, fd, iref); |
| if (!c) { |
| /* We lost the info about why there was a problem (we used to |
| * track file group closed too, can add that in later). */ |
| error(EBADF, ERROR_FIXME); |
| } |
| if (chkmnt && (c->flag & CMSG)) { |
| if (iref) |
| cclose(c); |
| error(EBADF, ERROR_FIXME); |
| } |
| if (mode < 0) |
| return c; |
| if ((mode & c->mode) != mode) { |
| if (iref) |
| cclose(c); |
| error(EBADF, |
| "FD access mode failure: chan mode 0x%x, wanted 0x%x (opened with 0 instead of O_READ?)", |
| c->mode, mode); |
| } |
| return c; |
| } |
| |
| long kchanio(void *vc, void *buf, int n, int mode) |
| { |
| ERRSTACK(1); |
| int r; |
| struct chan *c; |
| |
| c = vc; |
| if (waserror()) { |
| poperror(); |
| return -1; |
| } |
| |
| if (mode == O_READ) |
| r = devtab[c->type].read(c, buf, n, c->offset); |
| else if (mode == O_WRITE) |
| r = devtab[c->type].write(c, buf, n, c->offset); |
| else |
| error(ENOSYS, "kchanio: use only O_READ xor O_WRITE"); |
| |
| spin_lock(&c->lock); |
| c->offset += r; |
| spin_unlock(&c->lock); |
| poperror(); |
| return r; |
| } |
| |
| int openmode(uint32_t omode) |
| { |
| /* GIANT WARNING: if this ever throws, ipopen (and probably many others) will |
| * screw up refcnts of Qctl, err, data, etc */ |
| #if 0 |
| /* this is the old plan9 style. i think they want to turn exec into |
| * read, and strip off anything higher, and just return the RD/WR style |
| * bits. not stuff like ORCLOSE. the lack of OEXCL might be a bug on |
| * their part (it's the only one of their non-RW-related flags that |
| * isn't masked out). |
| * |
| * Note that we no longer convert OEXEC/O_EXEC to O_READ, and instead |
| * return just the O_ACCMODE bits. */ |
| if (o >= (OTRUNC | OCEXEC | ORCLOSE | OEXEC)) |
| error(EINVAL, ERROR_FIXME); |
| o &= ~(OTRUNC | OCEXEC | ORCLOSE); |
| if (o > OEXEC) |
| error(EINVAL, ERROR_FIXME); |
| if (o == OEXEC) |
| return OREAD; |
| return o; |
| #endif |
| /* no error checking (we have a shitload of flags anyway), and we return |
| * the basic access modes (RD/WR/ETC) */ |
| return omode & O_ACCMODE; |
| } |
| |
| void fdclose(struct fd_table *fdt, int fd) |
| { |
| close_fd(fdt, fd); |
| } |
| |
| static void set_dot(struct proc *p, struct chan *c) |
| { |
| c = atomic_swap_ptr((void**)&p->dot, c); |
| synchronize_rcu(); |
| cclose(c); |
| } |
| |
| /* Note namec() happens in the namespace of the caller. */ |
| int syschdir(struct proc *target, char *path) |
| { |
| ERRSTACK(1); |
| struct chan *c; |
| |
| if (waserror()) { |
| poperror(); |
| return -1; |
| } |
| c = namec(path, Atodir, 0, 0, NULL); |
| poperror(); |
| set_dot(target, c); |
| return 0; |
| } |
| |
| /* Note fdtochan() happens with the FDs of the caller. */ |
| int sysfchdir(struct proc *target, int fd) |
| { |
| ERRSTACK(1); |
| struct chan *c; |
| |
| if (waserror()) { |
| poperror(); |
| return -1; |
| } |
| c = fdtochan(¤t->open_files, fd, -1, 0, 1); |
| poperror(); |
| |
| /* This is a little hokey. Ideally, we'd only allow O_PATH fds to be |
| * fchdir'd. Linux/POSIX lets you do arbitrary FDs. Luckily, we stored |
| * the name when we walked (__namec_from), so we should be able to |
| * recreate the chan. Using namec() with channame() is a more |
| * heavy-weight cclone(), but also might have issues if the chan has |
| * since been removed or the namespace is otherwise different from when |
| * the original fd/chan was first created. */ |
| if (c->flag & O_PATH) { |
| set_dot(target, c); |
| return 0; |
| } |
| if (waserror()) { |
| cclose(c); |
| poperror(); |
| return -1; |
| } |
| syschdir(target, channame(c)); |
| cclose(c); |
| poperror(); |
| |
| return 0; |
| } |
| |
| int sysclose(int fd) |
| { |
| ERRSTACK(1); |
| struct fd_table *fdt = ¤t->open_files; |
| |
| if (waserror()) { |
| poperror(); |
| return -1; |
| } |
| /* |
| * Take no reference on the chan because we don't really need the |
| * data structure, and are calling fdtochan only for error checks. |
| * fdclose takes care of processes racing through here. |
| */ |
| fdtochan(fdt, fd, -1, 0, 0); |
| fdclose(fdt, fd); |
| poperror(); |
| return 0; |
| } |
| |
| int syscreate(char *path, int mode, uint32_t perm) |
| { |
| ERRSTACK(2); |
| int fd; |
| struct chan *c; |
| |
| if (waserror()) { |
| poperror(); |
| return -1; |
| } |
| |
| openmode(mode & ~O_EXCL); /* error check only; OEXCL okay here */ |
| c = namec(path, Acreate, mode, perm, NULL); |
| if (waserror()) { |
| cclose(c); |
| nexterror(); |
| } |
| /* 9ns mode is the O_FLAGS and perm is glibc mode */ |
| fd = newfd(c, 0, mode, FALSE); |
| if (fd < 0) |
| error(-fd, ERROR_FIXME); |
| poperror(); |
| |
| poperror(); |
| return fd; |
| } |
| |
| int sysdup(int old, int low_fd, bool must_use_low) |
| { |
| ERRSTACK(1); |
| int fd; |
| struct chan *c; |
| |
| if (waserror()) { |
| poperror(); |
| return -1; |
| } |
| c = fdtochan(¤t->open_files, old, -1, 0, 1); |
| if (c->qid.type & QTAUTH) { |
| cclose(c); |
| error(EPERM, ERROR_FIXME); |
| } |
| fd = newfd(c, low_fd, 0, must_use_low); |
| if (fd < 0) { |
| cclose(c); |
| error(-fd, ERROR_FIXME); |
| } |
| poperror(); |
| return fd; |
| } |
| |
| /* Could pass in the fdt instead of the proc, but we used to need the to_proc |
| * for now so we can claim a VFS FD. Careful, we don't close the old chan. */ |
| int sys_dup_to(struct proc *from_proc, unsigned int from_fd, |
| struct proc *to_proc, unsigned int to_fd) |
| { |
| ERRSTACK(1); |
| int ret; |
| struct chan *c; |
| |
| if (waserror()) { |
| poperror(); |
| return -1; |
| } |
| c = fdtochan(&from_proc->open_files, from_fd, -1, 0, 1); |
| if (c->qid.type & QTAUTH) { |
| cclose(c); |
| error(EPERM, ERROR_FIXME); |
| } |
| ret = insert_obj_fdt(&to_proc->open_files, c, to_fd, 0, TRUE); |
| /* drop the ref from fdtochan. if insert succeeded, there is one other |
| * ref stored in the FDT */ |
| cclose(c); |
| if (ret < 0) |
| error(EFAIL, "Can't insert FD %d into FDG", to_fd); |
| poperror(); |
| return 0; |
| } |
| |
| char *sysfd2path(int fd) |
| { |
| ERRSTACK(1); |
| struct chan *c; |
| char *s; |
| |
| if (waserror()) { |
| poperror(); |
| return NULL; |
| } |
| c = fdtochan(¤t->open_files, fd, -1, 0, 1); |
| s = NULL; |
| if (c->name != NULL) { |
| s = kzmalloc(c->name->len + 1, 0); |
| if (s == NULL) { |
| cclose(c); |
| error(ENOMEM, ERROR_FIXME); |
| } |
| memmove(s, c->name->s, c->name->len + 1); |
| } |
| cclose(c); |
| poperror(); |
| return s; |
| } |
| |
| char *sysgetcwd(void) |
| { |
| char *s = NULL; |
| struct chan *dot; |
| |
| rcu_read_lock(); |
| dot = rcu_dereference(current->dot); |
| kref_get(&dot->ref, 1); |
| rcu_read_unlock(); |
| if (dot->name) |
| kstrdup(&s, dot->name->s); |
| cclose(dot); |
| return s; |
| } |
| |
| int sysfauth(int fd, char *aname) |
| { |
| ERRSTACK(2); |
| struct chan *c, *ac; |
| |
| if (waserror()) { |
| poperror(); |
| return -1; |
| } |
| |
| validname(aname, 0); |
| c = fdtochan(¤t->open_files, fd, O_RDWR, 0, 1); |
| if (waserror()) { |
| cclose(c); |
| nexterror(); |
| } |
| |
| ac = mntauth(c, aname); |
| |
| /* at this point ac is responsible for keeping c alive */ |
| poperror(); /* c */ |
| cclose(c); |
| |
| if (waserror()) { |
| cclose(ac); |
| nexterror(); |
| } |
| |
| fd = newfd(ac, 0, 0, FALSE); |
| if (fd < 0) |
| error(-fd, ERROR_FIXME); |
| poperror(); /* ac */ |
| |
| poperror(); |
| |
| return fd; |
| } |
| |
| int sysfversion(int fd, unsigned int msize, char *vers, unsigned int arglen) |
| { |
| ERRSTACK(2); |
| int m; |
| struct chan *c; |
| |
| if (waserror()) { |
| poperror(); |
| return -1; |
| } |
| |
| /* check there's a NUL in the version string */ |
| if (arglen == 0 || memchr(vers, 0, arglen) == 0) |
| error(EINVAL, ERROR_FIXME); |
| |
| c = fdtochan(¤t->open_files, fd, O_RDWR, 0, 1); |
| if (waserror()) { |
| cclose(c); |
| nexterror(); |
| } |
| |
| m = mntversion(c, vers, msize, arglen); |
| |
| poperror(); |
| cclose(c); |
| |
| poperror(); |
| return m; |
| } |
| |
| int sysfwstat(int fd, uint8_t * buf, int n) |
| { |
| ERRSTACK(2); |
| struct chan *c; |
| |
| if (waserror()) { |
| poperror(); |
| return -1; |
| } |
| |
| validstat(buf, n, 0); |
| c = fdtochan(¤t->open_files, fd, -1, 1, 1); |
| if (waserror()) { |
| cclose(c); |
| nexterror(); |
| } |
| n = devtab[c->type].wstat(c, buf, n); |
| poperror(); |
| cclose(c); |
| |
| poperror(); |
| return n; |
| } |
| |
| long bindmount(struct chan *c, char *old, int flag, char *spec) |
| { |
| ERRSTACK(1); |
| int ret; |
| struct chan *c1; |
| |
| if (flag > MMASK || (flag & MORDER) == (MBEFORE | MAFTER)) |
| error(EINVAL, ERROR_FIXME); |
| |
| c1 = namec(old, Amount, 0, 0, NULL); |
| if (waserror()) { |
| cclose(c1); |
| nexterror(); |
| } |
| ret = cmount(c, c1, flag, spec); |
| |
| poperror(); |
| cclose(c1); |
| return ret; |
| } |
| |
| int sysbind(char *new, char *old, int flags) |
| { |
| ERRSTACK(2); |
| long r; |
| struct chan *c0; |
| |
| if (waserror()) { |
| poperror(); |
| return -1; |
| } |
| |
| c0 = namec(new, Abind, 0, 0, NULL); |
| if (waserror()) { |
| cclose(c0); |
| nexterror(); |
| } |
| r = bindmount(c0, old, flags, ""); |
| poperror(); |
| cclose(c0); |
| |
| poperror(); |
| return r; |
| } |
| |
| int syssymlink(char *new_path, char *old_path) |
| { |
| ERRSTACK(1); |
| struct chan *c; |
| |
| if (waserror()) { |
| poperror(); |
| return -1; |
| } |
| validname(old_path, true); |
| c = namec(new_path, Acreate, O_EXCL, |
| DMSYMLINK | S_IRWXU | S_IRWXG | S_IRWXO, old_path); |
| cclose(c); |
| poperror(); |
| return 0; |
| } |
| |
| int sysmount(int fd, int afd, char *old, int flags, char *spec) |
| { |
| ERRSTACK(1); |
| long r; |
| volatile struct { |
| struct chan *c; |
| } c0; |
| volatile struct { |
| struct chan *c; |
| } bc; |
| volatile struct { |
| struct chan *c; |
| } ac; |
| struct mntparam mntparam; |
| |
| ac.c = NULL; |
| bc.c = NULL; |
| c0.c = NULL; |
| if (waserror()) { |
| cclose(ac.c); |
| cclose(bc.c); |
| cclose(c0.c); |
| poperror(); |
| return -1; |
| } |
| bc.c = fdtochan(¤t->open_files, fd, O_RDWR, 0, 1); |
| if (afd >= 0) |
| ac.c = fdtochan(¤t->open_files, afd, O_RDWR, 0, 1); |
| mntparam.chan = bc.c; |
| mntparam.authchan = ac.c; |
| mntparam.spec = spec; |
| c0.c = devtab[devno("mnt", 0)].attach((char *)&mntparam); |
| if (flags & MCACHE) |
| c0.c = devtab[devno("gtfs", 0)].attach((char*)c0.c); |
| r = bindmount(c0.c, old, flags, spec); |
| poperror(); |
| cclose(ac.c); |
| cclose(bc.c); |
| cclose(c0.c); |
| |
| return r; |
| } |
| |
| int sysunmount(char *src_path, char *onto_path) |
| { |
| ERRSTACK(1); |
| volatile struct { |
| struct chan *c; |
| } cmount; |
| volatile struct { |
| struct chan *c; |
| } cmounted; |
| |
| cmount.c = NULL; |
| cmounted.c = NULL; |
| if (waserror()) { |
| cclose(cmount.c); |
| cclose(cmounted.c); |
| poperror(); |
| return -1; |
| } |
| |
| cmount.c = namec(onto_path, Amount, 0, 0, NULL); |
| if (src_path != NULL && src_path[0] != '\0') { |
| /* |
| * This has to be namec(..., Aopen, ...) because |
| * if arg[0] is something like /srv/cs or /fd/0, |
| * opening it is the only way to get at the real |
| * Chan underneath. |
| */ |
| cmounted.c = namec(src_path, Aopen, O_READ, 0, NULL); |
| } |
| |
| cunmount(cmount.c, cmounted.c); |
| poperror(); |
| cclose(cmount.c); |
| cclose(cmounted.c); |
| return 0; |
| } |
| |
| int sysopenat(int fromfd, char *path, int vfs_flags) |
| { |
| ERRSTACK(1); |
| int fd; |
| struct chan *c = 0, *from = 0; |
| |
| if (waserror()) { |
| cclose(c); |
| poperror(); |
| return -1; |
| } |
| openmode(vfs_flags); /* error check only */ |
| if ((path[0] == '/') || (fromfd == AT_FDCWD)) { |
| c = namec(path, Aopen, vfs_flags, 0, NULL); |
| } else { |
| /* We don't cclose from. namec_from will convert it to the new |
| * chan during the walk process (c). It'll probably close from |
| * internally, and give us something new for c. On error, |
| * namec_from will cclose from. */ |
| from = fdtochan(¤t->open_files, fromfd, -1, FALSE, TRUE); |
| if (!(from->flag & O_PATH)) |
| error(EINVAL, "Cannot openat from a non-O_PATH FD"); |
| c = namec_from(from, path, Aopen, vfs_flags, 0, NULL); |
| } |
| /* Devices should catch this, but just in case, we'll catch it. */ |
| if ((c->qid.type & QTSYMLINK) && (vfs_flags & O_NOFOLLOW)) |
| error(ELOOP, "no-follow open of a symlink"); |
| fd = newfd(c, 0, vfs_flags, FALSE); |
| if (fd < 0) |
| error(-fd, ERROR_FIXME); |
| poperror(); |
| return fd; |
| } |
| |
| int sysopen(char *path, int vfs_flags) |
| { |
| return sysopenat(AT_FDCWD, path, vfs_flags); |
| } |
| |
| long unionread(struct chan *c, void *va, long n) |
| { |
| ERRSTACK(1); |
| int i; |
| long nr; |
| struct mhead *m; |
| struct mount *mount; |
| |
| qlock(&c->umqlock); |
| m = c->umh; |
| rlock(&m->lock); |
| mount = m->mount; |
| /* bring mount in sync with c->uri and c->umc */ |
| for (i = 0; mount != NULL && i < c->uri; i++) |
| mount = mount->next; |
| |
| nr = 0; |
| while (mount != NULL) { |
| /* Error causes component of union to be skipped */ |
| if (mount->to) { |
| /* normally we want to discard the error, but for our |
| * ghetto kdirent hack, we need to repeat unionread if |
| * we saw a ENODATA */ |
| if (waserror()) { |
| if (get_errno() == ENODATA) { |
| runlock(&m->lock); |
| qunlock(&c->umqlock); |
| nexterror(); |
| } |
| /* poperror done below for either branch */ |
| } else { |
| if (c->umc == NULL) { |
| c->umc = cclone(mount->to); |
| c->umc = |
| devtab[c->umc->type].open(c->umc, |
| O_READ); |
| } |
| |
| nr = devtab[c->umc->type].read(c->umc, va, n, |
| c->umc->offset); |
| if (nr < 0) |
| nr = 0; /* dev.c can return -1 */ |
| c->umc->offset += nr; |
| } |
| poperror(); /* pop regardless */ |
| } |
| if (nr > 0) |
| break; |
| |
| /* Advance to next element */ |
| c->uri++; |
| if (c->umc) { |
| cclose(c->umc); |
| c->umc = NULL; |
| } |
| mount = mount->next; |
| } |
| runlock(&m->lock); |
| qunlock(&c->umqlock); |
| return nr; |
| } |
| |
| static void unionrewind(struct chan *c) |
| { |
| qlock(&c->umqlock); |
| c->uri = 0; |
| if (c->umc) { |
| cclose(c->umc); |
| c->umc = NULL; |
| } |
| qunlock(&c->umqlock); |
| } |
| |
| static long rread(int fd, void *va, long n, int64_t * offp) |
| { |
| ERRSTACK(3); |
| int dir; |
| struct chan *c; |
| int64_t off; |
| |
| /* dirty dirent hack */ |
| void *real_va = va; |
| |
| if (waserror()) { |
| poperror(); |
| return -1; |
| } |
| |
| c = fdtochan(¤t->open_files, fd, O_READ, 1, 1); |
| if (waserror()) { |
| cclose(c); |
| nexterror(); |
| } |
| |
| if (n < 0) |
| error(EINVAL, ERROR_FIXME); |
| |
| dir = c->qid.type & QTDIR; |
| |
| /* kdirent hack: userspace is expecting kdirents, but all of 9ns |
| * produces Ms. Just save up what we don't use and append the |
| * new stuff later. Allocate DIRREADSIZE bytes for that purpose. |
| */ |
| if (dir) { |
| int amt; |
| |
| if (n < sizeof(struct kdirent)) |
| error(EINVAL, "readdir needs to read at least %d", |
| sizeof(struct kdirent)); |
| if (!c->buf) { |
| c->buf = kmalloc(DIRREADSIZE, MEM_WAIT); |
| c->bufused = 0; |
| } |
| /* Attempt to extract an M, in case there was some already */ |
| amt = convM2kdirent(c->buf, c->bufused, real_va, 0); |
| if (amt) { |
| c->bufused -= amt; |
| memmove(c->buf, c->buf + amt, c->bufused); |
| n = sizeof(struct kdirent); |
| goto out; |
| } |
| /* debugging */ |
| if (waserror()) { |
| printk("Well, sysread of a dir sucks.%s \n", |
| current_errstr()); |
| nexterror(); |
| } |
| va = c->buf + c->bufused; |
| n = DIRREADSIZE - c->bufused; |
| } |
| |
| /* this is the normal plan9 read */ |
| if (dir && c->umh) |
| n = unionread(c, va, n); |
| else { |
| if (offp == NULL) { |
| spin_lock(&c->lock); /* lock for int64_t assignment */ |
| off = c->offset; |
| spin_unlock(&c->lock); |
| } else |
| off = *offp; |
| if (off < 0) |
| error(EINVAL, ERROR_FIXME); |
| if ((off64_t)off + (size_t)n < (off64_t)off) |
| error(EINVAL, "bad offset %p + count %p", off, n); |
| if (off == 0) { |
| if (offp == NULL) { |
| spin_lock(&c->lock); |
| c->offset = 0; |
| c->dri = 0; |
| spin_unlock(&c->lock); |
| } |
| unionrewind(c); |
| } |
| if (! c->ateof) { |
| n = devtab[c->type].read(c, va, n, off); |
| if (n == 0 && dir) |
| c->ateof = 1; |
| } else { |
| n = 0; |
| } |
| spin_lock(&c->lock); |
| c->offset += n; |
| spin_unlock(&c->lock); |
| } |
| |
| /* dirty kdirent hack */ |
| if (dir) { |
| int amt; |
| c->bufused = c->bufused + n; |
| /* extract an M from the front, then shift the remainder back */ |
| amt = convM2kdirent(c->buf, c->bufused, real_va, 0); |
| c->bufused -= amt; |
| memmove(c->buf, c->buf + amt, c->bufused); |
| n = amt ? sizeof(struct kdirent) : 0; |
| poperror(); /* matching our debugging waserror */ |
| } |
| |
| out: |
| poperror(); |
| cclose(c); |
| |
| poperror(); |
| return n; |
| } |
| |
| /* Reads exactly n bytes from chan c, starting at its offset. Can block, but if |
| * we get 0 back too soon (EOF or error), then we'll error out with ENODATA. |
| * That might need a little work - if there was a previous error, then we |
| * clobbered it and only know ENODATA but not why we completed early. */ |
| void read_exactly_n(struct chan *c, void *vp, long n) |
| { |
| char *p; |
| long nn; |
| int total = 0, want = n; |
| |
| p = vp; |
| while (n > 0) { |
| nn = devtab[c->type].read(c, p, n, c->offset); |
| printd("readn: Got %d@%lld\n", nn, c->offset); |
| if (nn == 0) |
| error(ENODATA, "wanted %d, got %d", want, total); |
| spin_lock(&c->lock); |
| c->offset += nn; |
| spin_unlock(&c->lock); |
| p += nn; |
| n -= nn; |
| total += nn; |
| } |
| } |
| |
| long sysread(int fd, void *va, long n) |
| { |
| return rread(fd, va, n, NULL); |
| } |
| |
| long syspread(int fd, void *va, long n, int64_t off) |
| { |
| return rread(fd, va, n, &off); |
| } |
| |
| int sysremove(char *path) |
| { |
| ERRSTACK(2); |
| struct chan *c; |
| |
| if (waserror()) { |
| poperror(); |
| return -1; |
| } |
| |
| c = namec(path, Aremove, 0, 0, NULL); |
| if (waserror()) { |
| c->type = -1; /* see below */ |
| cclose(c); |
| nexterror(); |
| } |
| devtab[c->type].remove(c); |
| /* |
| * Remove clunks the fid, but we need to recover the Chan |
| * so fake it up. -1 aborts the dev's close. |
| */ |
| c->type = -1; |
| poperror(); |
| cclose(c); |
| |
| poperror(); |
| return 0; |
| } |
| |
| int sysrename(char *from_path, char *to_path) |
| { |
| ERRSTACK(1); |
| struct chan *volatile renamee = NULL; |
| struct chan *parent_chan; |
| |
| if (waserror()) { |
| cclose(renamee); |
| poperror(); |
| return -1; |
| } |
| renamee = namec(from_path, Aremove, 0, 0, NULL); |
| /* We might need to support wstat for 'short' rename (intra-directory, |
| * with no slashes). Til then, we can just go with EXDEV. */ |
| if (!devtab[renamee->type].rename) |
| error(EXDEV, "device does not support rename"); |
| parent_chan = namec(to_path, Arename, 0, 0, (char*)renamee); |
| /* When we're done, renamee still points to the file, but it's in the |
| * new location. Its cname is still the old location, similar to |
| * remove. If anyone cares, we can change it. parent_chan still points |
| * to the parent - it didn't get moved like create does. Though it does |
| * have the name of the new location. If we want, we can hand that to |
| * renamee. It's a moot point, since they are both getting closed. */ |
| cclose(renamee); |
| cclose(parent_chan); |
| poperror(); |
| return 0; |
| } |
| |
| int64_t sysseek(int fd, int64_t off, int whence) |
| { |
| ERRSTACK(2); |
| struct dir *dir; |
| struct chan *c; |
| |
| if (waserror()) { |
| poperror(); |
| return -1; |
| } |
| |
| c = fdtochan(¤t->open_files, fd, -1, 1, 1); |
| if (waserror()) { |
| cclose(c); |
| nexterror(); |
| } |
| switch (whence) { |
| case 0: |
| if (c->qid.type & QTDIR) { |
| if (off != 0) |
| error(EISDIR, ERROR_FIXME); |
| unionrewind(c); |
| } else if (off < 0) |
| error(EINVAL, ERROR_FIXME); |
| spin_lock(&c->lock); /* lock for int64_t assignment */ |
| c->offset = off; |
| spin_unlock(&c->lock); |
| break; |
| |
| case 1: |
| if (c->qid.type & QTDIR) |
| error(EISDIR, ERROR_FIXME); |
| spin_lock(&c->lock); /* lock for read/write update */ |
| off += c->offset; |
| if (off < 0) { |
| spin_unlock(&c->lock); |
| error(EINVAL, ERROR_FIXME); |
| } |
| c->offset = off; |
| spin_unlock(&c->lock); |
| break; |
| |
| case 2: |
| if (c->qid.type & QTDIR) |
| error(EISDIR, ERROR_FIXME); |
| dir = chandirstat(c); |
| if (dir == NULL) |
| error(EFAIL, "internal error: stat error in seek"); |
| off += dir->length; |
| kfree(dir); |
| if (off < 0) |
| error(EINVAL, ERROR_FIXME); |
| spin_lock(&c->lock); /* lock for read/write update */ |
| c->offset = off; |
| spin_unlock(&c->lock); |
| break; |
| |
| default: |
| error(EINVAL, ERROR_FIXME); |
| break; |
| } |
| poperror(); |
| c->dri = 0; |
| cclose(c); |
| poperror(); |
| return off; |
| } |
| |
| void validstat(uint8_t * s, int n, int slashok) |
| { |
| |
| int m; |
| char buf[64]; |
| |
| statcheck(s, n); |
| /* verify that name entry is acceptable */ |
| s += STAT_FIX_LEN_9P - STAT_NR_STRINGS_9P * BIT16SZ; |
| /* |
| * s now points at count for first string. |
| * if it's too long, let the server decide; this is |
| * only for his protection anyway. otherwise |
| * we'd have to allocate and waserror. |
| */ |
| m = GBIT16(s); |
| s += BIT16SZ; |
| if (m + 1 > sizeof buf) { |
| return; |
| } |
| memmove(buf, s, m); |
| buf[m] = '\0'; |
| /* name could be '/' */ |
| if (strcmp(buf, "/") != 0) |
| validname(buf, slashok); |
| } |
| |
| int sysfstat(int fd, uint8_t *buf, int n) |
| { |
| ERRSTACK(2); |
| struct chan *c; |
| |
| if (waserror()) { |
| poperror(); |
| return -1; |
| } |
| |
| c = fdtochan(¤t->open_files, fd, -1, 0, 1); |
| if (waserror()) { |
| cclose(c); |
| nexterror(); |
| } |
| devtab[c->type].stat(c, buf, n); |
| |
| poperror(); |
| cclose(c); |
| |
| poperror(); |
| return n; |
| } |
| |
| int sysfstatakaros(int fd, struct kstat *ks) |
| { |
| |
| int n = 4096; |
| uint8_t *buf; |
| |
| buf = kmalloc(n, MEM_WAIT); |
| n = sysfstat(fd, buf, n); |
| if (n > 0) { |
| convM2kstat(buf, n, ks); |
| n = 0; |
| } |
| kfree(buf); |
| return n; |
| } |
| |
| static int __stat(char *path, uint8_t *buf, int n, int flags) |
| { |
| ERRSTACK(2); |
| struct chan *c; |
| |
| if (waserror()) { |
| poperror(); |
| return -1; |
| } |
| |
| c = namec(path, Aaccess, flags, 0, NULL); |
| if (waserror()) { |
| cclose(c); |
| nexterror(); |
| } |
| devtab[c->type].stat(c, buf, n); |
| poperror(); |
| cclose(c); |
| |
| poperror(); |
| |
| return n; |
| } |
| |
| int sysstat(char *path, uint8_t *buf, int n) |
| { |
| return __stat(path, buf, n, 0); |
| } |
| |
| int syslstat(char *path, uint8_t *buf, int n) |
| { |
| return __stat(path, buf, n, O_NOFOLLOW); |
| } |
| |
| int sysstatakaros(char *path, struct kstat *ks, int flags) |
| { |
| |
| int n = 4096; |
| uint8_t *buf; |
| |
| buf = kmalloc(n, MEM_WAIT); |
| n = __stat(path, buf, n, flags); |
| if (n > 0) { |
| convM2kstat(buf, n, ks); |
| n = 0; |
| } |
| kfree(buf); |
| return n; |
| } |
| |
| static long rwrite(int fd, void *va, long n, int64_t * offp) |
| { |
| ERRSTACK(3); |
| struct chan *c; |
| struct dir *dir; |
| int64_t off; |
| long m; |
| |
| if (waserror()) { |
| poperror(); |
| return -1; |
| } |
| c = fdtochan(¤t->open_files, fd, O_WRITE, 1, 1); |
| if (waserror()) { |
| cclose(c); |
| nexterror(); |
| } |
| if (c->qid.type & QTDIR) |
| error(EISDIR, ERROR_FIXME); |
| |
| if (n < 0) |
| error(EINVAL, ERROR_FIXME); |
| |
| if (offp == NULL) { |
| /* append changes the offset to the end, and even if we fail |
| * later, this change will persist */ |
| if (c->flag & O_APPEND) { |
| dir = chandirstat(c); |
| if (!dir) |
| error(EFAIL, "stat error in append write"); |
| /* legacy lock for int64 assignment */ |
| spin_lock(&c->lock); |
| c->offset = dir->length; |
| spin_unlock(&c->lock); |
| kfree(dir); |
| } |
| spin_lock(&c->lock); |
| off = c->offset; |
| c->offset += n; |
| spin_unlock(&c->lock); |
| } else |
| off = *offp; |
| |
| if (waserror()) { |
| if (offp == NULL) { |
| spin_lock(&c->lock); |
| c->offset -= n; |
| spin_unlock(&c->lock); |
| } |
| nexterror(); |
| } |
| if (off < 0) |
| error(EINVAL, ERROR_FIXME); |
| if ((off64_t)off + (size_t)n < (off64_t)off) |
| error(EINVAL, "bad offset %p + count %p", off, n); |
| m = devtab[c->type].write(c, va, n, off); |
| poperror(); |
| |
| if (offp == NULL && m < n) { |
| spin_lock(&c->lock); |
| c->offset -= n - m; |
| spin_unlock(&c->lock); |
| } |
| |
| poperror(); |
| cclose(c); |
| |
| poperror(); |
| return m; |
| } |
| |
| long syswrite(int fd, void *va, long n) |
| { |
| return rwrite(fd, va, n, NULL); |
| } |
| |
| long syspwrite(int fd, void *va, long n, int64_t off) |
| { |
| return rwrite(fd, va, n, &off); |
| } |
| |
| int syswstat(char *path, uint8_t * buf, int n) |
| { |
| ERRSTACK(2); |
| struct chan *c; |
| |
| if (waserror()) { |
| poperror(); |
| return -1; |
| } |
| |
| validstat(buf, n, 0); |
| c = namec(path, Aaccess, 0, 0, NULL); |
| if (waserror()) { |
| cclose(c); |
| nexterror(); |
| } |
| n = devtab[c->type].wstat(c, buf, n); |
| poperror(); |
| cclose(c); |
| |
| poperror(); |
| return n; |
| } |
| |
| struct dir *chandirstat(struct chan *c) |
| { |
| ERRSTACK(1); |
| struct dir *d; |
| uint8_t *buf; |
| int n, nd, i; |
| |
| nd = DIRSIZE; |
| for (i = 0; i < 2; i++) { /* should work by the second try */ |
| d = kzmalloc(sizeof(struct dir) + nd, MEM_WAIT); |
| buf = (uint8_t *) & d[1]; |
| if (waserror()) { |
| kfree(d); |
| poperror(); |
| return NULL; |
| } |
| n = devtab[c->type].stat(c, buf, nd); |
| poperror(); |
| if (n < BIT16SZ) { |
| kfree(d); |
| return NULL; |
| } |
| /* size needed to store whole stat buffer including count */ |
| nd = GBIT16((uint8_t *) buf) + BIT16SZ; |
| if (nd <= n) { |
| convM2D(buf, n, d, (char *)&d[1]); |
| return d; |
| } |
| /* else sizeof(Dir)+nd is plenty */ |
| kfree(d); |
| } |
| return NULL; |
| |
| } |
| |
| static struct dir *__dir_stat(char *name, int flags) |
| { |
| ERRSTACK(2); |
| struct chan *c; |
| struct dir *d; |
| |
| if (waserror()) { |
| poperror(); |
| return NULL; |
| } |
| |
| c = namec(name, Aaccess, flags, 0, NULL); |
| if (waserror()) { |
| cclose(c); |
| nexterror(); |
| } |
| d = chandirstat(c); |
| poperror(); |
| cclose(c); |
| |
| poperror(); |
| return d; |
| } |
| |
| struct dir *sysdirstat(char *name) |
| { |
| return __dir_stat(name, 0); |
| } |
| |
| struct dir *sysdirlstat(char *name) |
| { |
| return __dir_stat(name, O_NOFOLLOW); |
| } |
| |
| struct dir *sysdirfstat(int fd) |
| { |
| ERRSTACK(2); |
| struct chan *c; |
| struct dir *d; |
| |
| if (waserror()) { |
| poperror(); |
| return NULL; |
| } |
| |
| c = fdtochan(¤t->open_files, fd, -1, 0, 1); |
| if (waserror()) { |
| cclose(c); |
| nexterror(); |
| } |
| d = chandirstat(c); |
| poperror(); |
| cclose(c); |
| |
| poperror(); |
| return d; |
| } |
| |
| int sysdirwstat(char *name, struct dir *dir) |
| { |
| |
| uint8_t *buf; |
| int r; |
| |
| r = sizeD2M(dir); |
| buf = kzmalloc(r, MEM_WAIT); |
| convD2M(dir, buf, r); |
| r = syswstat(name, buf, r); |
| kfree(buf); |
| return r < 0 ? r : 0; |
| } |
| |
| int sysdirfwstat(int fd, struct dir *dir) |
| { |
| |
| uint8_t *buf; |
| int r; |
| |
| r = sizeD2M(dir); |
| buf = kzmalloc(r, MEM_WAIT); |
| convD2M(dir, buf, r); |
| r = sysfwstat(fd, buf, r); |
| kfree(buf); |
| return r < 0 ? r : 0; |
| } |
| |
| static long dirpackage(uint8_t * buf, long ts, struct kdirent **d) |
| { |
| |
| char *s; |
| long ss, i, n, nn, m = 0; |
| |
| *d = NULL; |
| if (ts <= 0) { |
| return ts; |
| } |
| |
| /* |
| * first find number of all stats, check they look like stats, & size |
| * all associated strings |
| */ |
| ss = 0; |
| n = 0; |
| for (i = 0; i < ts; i += m) { |
| m = BIT16SZ + GBIT16(&buf[i]); |
| statcheck(&buf[i], m); |
| ss += m; |
| n++; |
| } |
| |
| *d = kzmalloc(n * sizeof(**d) + ss, 0); |
| if (*d == NULL) |
| error(ENOMEM, ERROR_FIXME); |
| |
| /* |
| * then convert all buffers |
| */ |
| s = (char *)*d + n * sizeof(**d); |
| nn = 0; |
| for (i = 0; i < ts; i += m) { |
| m = BIT16SZ + GBIT16((uint8_t *) & buf[i]); |
| /* Note 's' is ignored by convM2kdirent */ |
| if (nn >= n || /*convM2D */ convM2kdirent(&buf[i], m, *d + nn, |
| s) != m) { |
| kfree(*d); |
| *d = NULL; |
| error(EFAIL, "bad directory entry"); |
| } |
| nn++; |
| s += m; |
| } |
| |
| return nn; |
| } |
| |
| long sysdirread(int fd, struct kdirent **d) |
| { |
| ERRSTACK(2); |
| uint8_t *buf; |
| long ts; |
| |
| *d = NULL; |
| if (waserror()) { |
| poperror(); |
| return -1; |
| } |
| buf = kzmalloc(DIRREADLIM, 0); |
| if (buf == NULL) |
| error(ENOMEM, ERROR_FIXME); |
| if (waserror()) { |
| kfree(buf); |
| nexterror(); |
| } |
| ts = sysread(fd, buf, DIRREADLIM); |
| if (ts >= 0) |
| ts = dirpackage(buf, ts, d); |
| poperror(); |
| kfree(buf); |
| poperror(); |
| return ts; |
| } |
| |
| int sysiounit(int fd) |
| { |
| ERRSTACK(1); |
| struct chan *c; |
| int n; |
| |
| c = fdtochan(¤t->open_files, fd, -1, 0, 1); |
| if (waserror()) { |
| cclose(c); |
| poperror(); |
| return 0; /* n.b. */ |
| } |
| n = c->iounit; |
| poperror(); |
| cclose(c); |
| return n; |
| } |
| |
| void print_chaninfo(struct chan *c) |
| { |
| |
| char buf[128] = { 0 }; |
| bool has_dev = c->type != -1; |
| bool has_chaninfo = has_dev && devtab[c->type].chaninfo; |
| |
| print_lock(); |
| printk("Chan flags: %p, pathname: %s, ref: %d, Dev: %s, Devinfo: %s", |
| c->flag, |
| c->name ? c->name->s : "no cname", |
| kref_refcnt(&c->ref), |
| has_dev ? devtab[c->type].name : "no dev", |
| has_chaninfo ? devtab[c->type].chaninfo(c, buf, sizeof(buf)) |
| : ""); |
| if (!has_chaninfo) |
| printk("qid.path: %p\n", c->qid.path); |
| printk("\n"); |
| print_unlock(); |
| } |
| |
| /* TODO: 9ns ns inheritance flags: Shared, copied, or empty. The old fgrp is |
| * managed by the fd_table, which is handled outside this function. We share |
| * the pgrp. */ |
| int plan9setup(struct proc *new_proc, struct proc *parent, int flags) |
| { |
| |
| struct chan *new_dot; |
| |
| ERRSTACK(1); |
| if (waserror()) { |
| printk("plan9setup failed, %s\n", current_errstr()); |
| poperror(); |
| return -1; |
| } |
| if (!parent) { |
| /* We are probably spawned by the kernel directly, and have no |
| * parent to inherit from. */ |
| new_proc->pgrp = newpgrp(); |
| new_proc->slash = namec("#kfs", Atodir, 0, 0, NULL); |
| if (!new_proc->slash) |
| panic("no kfs device"); |
| /* Want the name to be "/" instead of "#kfs" */ |
| cnameclose(new_proc->slash->name); |
| new_proc->slash->name = newcname("/"); |
| new_proc->dot = cclone(new_proc->slash); |
| poperror(); |
| return 0; |
| } |
| /* Shared semantics */ |
| kref_get(&parent->pgrp->ref, 1); |
| new_proc->pgrp = parent->pgrp; |
| /* copy semantics on / and . (doesn't make a lot of sense in akaros |
| * o/w). */ |
| /* / should never disappear while we hold a ref to parent */ |
| chan_incref(parent->slash); |
| new_proc->slash = parent->slash; |
| |
| rcu_read_lock(); |
| new_dot = rcu_dereference(parent->dot); |
| kref_get(&new_dot->ref, 1); |
| rcu_read_unlock(); |
| new_proc->dot = new_dot; |
| |
| poperror(); |
| return 0; |
| } |
| |
| /* Open flags, create modes, access types, file flags, and all that... |
| * |
| * there are a bunch of things here: |
| * 1) file creation flags (e.g. O_TRUNC) |
| * 2) file status flags (e.g. O_APPEND) |
| * 3) file open modes (e.g. O_RDWR) |
| * 4) file descriptor flags (e.g. CLOEXEC) |
| * 5) file creation mode (e.g. S_IRWXU) |
| * the 1-4 are passed in via open's vfs_flags, and the 5 via mode only when |
| * O_CREATE is set. |
| * |
| * file creation flags (1) only matter when creating, but aren't permanent. |
| * O_EXCL, O_DIRECTORY, O_TRUNC, etc. |
| * |
| * file status flags (2) are per struct file/chan. stuff like O_APPEND, |
| * O_ASYNC, etc. we convert those to an internal flag bit and store in c->flags |
| * |
| * the open mode (3) matters for a given FD/chan (chan->mode), and should be |
| * stored in the chan. (c->mode) stuff like O_RDONLY. |
| * |
| * the file descriptor flags (4) clearly are in the FD. note that the same |
| * file/chan can be opened by two different FDs, with different flags. the only |
| * one anyone uses is CLOEXEC. while exec may not last long in akaros, i can |
| * imagine similar "never pass to children" flags/meanings. |
| * |
| * the file creation mode (5) matters for the device's permissions; given this, |
| * it should be stored in the device/inode. ACLs fall under this category. |
| * |
| * finally, only certain categories can be edited afterwards: file status flags |
| * (2), FD flags (4), and file permissions (5). */ |
| int fd_getfl(int fd) |
| { |
| ERRSTACK(1); |
| struct chan *c; |
| int ret; |
| |
| if (waserror()) { |
| poperror(); |
| return -1; |
| } |
| c = fdtochan(¤t->open_files, fd, -1, 0, 1); |
| |
| ret = c->mode; |
| ret |= c->flag & CEXTERNAL_FLAGS; |
| |
| cclose(c); |
| poperror(); |
| return ret; |
| } |
| |
| static bool cexternal_flags_differ(int set1, int set2, int flags) |
| { |
| flags &= CEXTERNAL_FLAGS; |
| return (set1 & flags) ^ (set2 & flags); |
| } |
| |
| static int chan_setfl(struct chan *c, int flags) |
| { |
| int ret; |
| |
| if (cexternal_flags_differ(flags, c->flag, O_CLOEXEC)) { |
| /* TODO: The whole CCEXEC / O_CLOEXEC on 9ns needs work */ |
| error(EINVAL, "can't toggle O_CLOEXEC with setfl"); |
| } |
| if (cexternal_flags_differ(flags, c->flag, O_REMCLO)) |
| error(EINVAL, "can't toggle O_REMCLO with setfl"); |
| if (cexternal_flags_differ(flags, c->flag, O_PATH)) |
| error(EINVAL, "can't toggle O_PATH with setfl"); |
| ret = devtab[c->type].chan_ctl(c, CCTL_SET_FL, flags & CEXTERNAL_FLAGS, |
| 0, 0, 0); |
| c->flag = (c->flag & ~CEXTERNAL_FLAGS) | (flags & CEXTERNAL_FLAGS); |
| return ret; |
| } |
| |
| int fd_chan_ctl(int fd, int cmd, unsigned long arg1, unsigned long arg2, |
| unsigned long arg3, unsigned long arg4) |
| { |
| ERRSTACK(2); |
| struct chan *c; |
| int ret; |
| |
| if (waserror()) { |
| poperror(); |
| return -1; |
| } |
| c = fdtochan(¤t->open_files, fd, -1, 0, 1); |
| if (waserror()) { |
| cclose(c); |
| nexterror(); |
| } |
| |
| if (!devtab[c->type].chan_ctl) |
| error(EINVAL, "%s has no chan_ctl, can't %d", chan_dev_name(c), |
| cmd); |
| |
| /* Some commands require 9ns support in addition to the device ctl. */ |
| switch (cmd) { |
| case CCTL_SET_FL: |
| ret = chan_setfl(c, arg1); |
| break; |
| default: |
| ret = devtab[c->type].chan_ctl(c, cmd, arg1, arg2, arg3, arg4); |
| break; |
| } |
| |
| poperror(); |
| cclose(c); |
| poperror(); |
| return ret; |
| } |
| |
| ssize_t kread_file(struct file_or_chan *file, void *buf, size_t sz) |
| { |
| /* TODO: (KFOP) (VFS kernel read/writes need to be from a ktask) */ |
| uintptr_t old_ret = switch_to_ktask(); |
| off64_t dummy = 0; |
| ssize_t cpy_amt = foc_read(file, buf, sz, dummy); |
| |
| switch_back_from_ktask(old_ret); |
| return cpy_amt; |
| } |
| |
| /* Reads the contents of an entire file into a buffer, returning that buffer. |
| * On error, prints something useful and returns 0 */ |
| void *kread_whole_file(struct file_or_chan *file) |
| { |
| size_t size; |
| void *contents; |
| ssize_t cpy_amt; |
| |
| size = foc_get_len(file); |
| contents = kmalloc(size, MEM_WAIT); |
| cpy_amt = kread_file(file, contents, size); |
| if (cpy_amt < 0) { |
| printk("Error %d reading file %s\n", get_errno(), |
| foc_to_name(file)); |
| kfree(contents); |
| return 0; |
| } |
| if (cpy_amt != size) { |
| printk("Read %d, needed %d for file %s\n", cpy_amt, size, |
| foc_to_name(file)); |
| kfree(contents); |
| return 0; |
| } |
| return contents; |
| } |
| |
| /* Process-related File management functions */ |
| |
| /* Given any FD, get the appropriate object, 0 o/w. Set incref if you want a |
| * reference count (which is a 9ns thing, you can't use the pointer if you |
| * didn't incref). */ |
| void *lookup_fd(struct fd_table *fdt, int fd, bool incref) |
| { |
| void *retval = 0; |
| |
| if (fd < 0) |
| return 0; |
| spin_lock(&fdt->lock); |
| if (fdt->closed) { |
| spin_unlock(&fdt->lock); |
| return 0; |
| } |
| if (fd < fdt->max_fdset) { |
| if (GET_BITMASK_BIT(fdt->open_fds->fds_bits, fd)) { |
| /* while max_files and max_fdset might not line up, we |
| * should never have a valid fdset higher than files */ |
| assert(fd < fdt->max_files); |
| retval = fdt->fd[fd].fd_chan; |
| if (incref) |
| chan_incref((struct chan*)retval); |
| } |
| } |
| spin_unlock(&fdt->lock); |
| return retval; |
| } |
| |
| /* Grow the vfs fd set */ |
| static int grow_fd_set(struct fd_table *open_files) |
| { |
| int n; |
| struct file_desc *nfd, *ofd; |
| |
| /* Only update open_fds once. If currently pointing to open_fds_init, |
| * then update it to point to a newly allocated fd_set with space for |
| * NR_FILE_DESC_MAX */ |
| if (open_files->open_fds == (struct fd_set*)&open_files->open_fds_init) |
| { |
| open_files->open_fds = kzmalloc(sizeof(struct fd_set), 0); |
| memmove(open_files->open_fds, &open_files->open_fds_init, |
| sizeof(struct small_fd_set)); |
| } |
| |
| /* Grow the open_files->fd array in increments of NR_OPEN_FILES_DEFAULT |
| */ |
| n = open_files->max_files + NR_OPEN_FILES_DEFAULT; |
| if (n > NR_FILE_DESC_MAX) |
| return -EMFILE; |
| nfd = kzmalloc(n * sizeof(struct file_desc), 0); |
| if (nfd == NULL) |
| return -ENOMEM; |
| |
| /* Move the old array on top of the new one */ |
| ofd = open_files->fd; |
| memmove(nfd, ofd, open_files->max_files * sizeof(struct file_desc)); |
| |
| /* Update the array and the maxes for both max_files and max_fdset */ |
| open_files->fd = nfd; |
| open_files->max_files = n; |
| open_files->max_fdset = n; |
| |
| /* Only free the old one if it wasn't pointing to open_files->fd_array*/ |
| if (ofd != open_files->fd_array) |
| kfree(ofd); |
| return 0; |
| } |
| |
| /* Free the vfs fd set if necessary */ |
| static void free_fd_set(struct fd_table *open_files) |
| { |
| void *free_me; |
| |
| if (open_files->open_fds != (struct fd_set*)&open_files->open_fds_init) |
| { |
| assert(open_files->fd != open_files->fd_array); |
| /* need to reset the pointers to the internal addrs, in case we |
| * take a look while debugging. 0 them out, since they have old |
| * data. our current versions should all be closed. */ |
| memset(&open_files->open_fds_init, 0, |
| sizeof(struct small_fd_set)); |
| memset(&open_files->fd_array, 0, sizeof(open_files->fd_array)); |
| |
| free_me = open_files->open_fds; |
| open_files->open_fds = |
| (struct fd_set*)&open_files->open_fds_init; |
| kfree(free_me); |
| |
| free_me = open_files->fd; |
| open_files->fd = open_files->fd_array; |
| kfree(free_me); |
| } |
| } |
| |
| /* If FD is in the group, remove it, decref it, and return TRUE. */ |
| bool close_fd(struct fd_table *fdt, int fd) |
| { |
| struct chan *chan = 0; |
| struct fd_tap *tap = 0; |
| bool ret = FALSE; |
| |
| if (fd < 0) |
| return FALSE; |
| spin_lock(&fdt->lock); |
| if (fd < fdt->max_fdset) { |
| if (GET_BITMASK_BIT(fdt->open_fds->fds_bits, fd)) { |
| /* while max_files and max_fdset might not line up, we |
| * should never have a valid fdset higher than files */ |
| assert(fd < fdt->max_files); |
| chan = fdt->fd[fd].fd_chan; |
| tap = fdt->fd[fd].fd_tap; |
| fdt->fd[fd].fd_chan = 0; |
| fdt->fd[fd].fd_tap = 0; |
| CLR_BITMASK_BIT(fdt->open_fds->fds_bits, fd); |
| if (fd < fdt->hint_min_fd) |
| fdt->hint_min_fd = fd; |
| ret = TRUE; |
| } |
| } |
| spin_unlock(&fdt->lock); |
| /* Need to decref/cclose outside of the lock; they could sleep */ |
| cclose(chan); |
| if (tap) |
| kref_put(&tap->kref); |
| return ret; |
| } |
| |
| static int __get_fd(struct fd_table *open_files, int low_fd, bool must_use_low) |
| { |
| int slot = -1; |
| int error; |
| bool update_hint = TRUE; |
| |
| if ((low_fd < 0) || (low_fd > NR_FILE_DESC_MAX)) |
| return -EINVAL; |
| if (open_files->closed) |
| return -EINVAL; /* won't matter, they are dying */ |
| if (must_use_low |
| && GET_BITMASK_BIT(open_files->open_fds->fds_bits, low_fd)) |
| return -ENFILE; |
| if (low_fd > open_files->hint_min_fd) |
| update_hint = FALSE; |
| else |
| low_fd = open_files->hint_min_fd; |
| /* Loop until we have a valid slot (we grow the fd_array at the bottom |
| * of the loop if we haven't found a slot in the current array */ |
| while (slot == -1) { |
| for (low_fd; low_fd < open_files->max_fdset; low_fd++) { |
| if (GET_BITMASK_BIT(open_files->open_fds->fds_bits, |
| low_fd)) |
| continue; |
| slot = low_fd; |
| SET_BITMASK_BIT(open_files->open_fds->fds_bits, slot); |
| assert(slot < open_files->max_files && |
| open_files->fd[slot].fd_chan == 0); |
| /* We know slot >= hint, since we started with hint */ |
| if (update_hint) |
| open_files->hint_min_fd = slot + 1; |
| break; |
| } |
| if (slot == -1) { |
| if ((error = grow_fd_set(open_files))) |
| return error; |
| } |
| } |
| return slot; |
| } |
| |
| /* Insert a file or chan (obj, chosen by vfs) into the fd group with fd_flags. |
| * If must_use_low, then we have to insert at FD = low_fd. o/w we start looking |
| * for empty slots at low_fd. */ |
| int insert_obj_fdt(struct fd_table *fdt, void *obj, int low_fd, int fd_flags, |
| bool must_use_low) |
| { |
| int slot; |
| |
| spin_lock(&fdt->lock); |
| slot = __get_fd(fdt, low_fd, must_use_low); |
| if (slot < 0) { |
| spin_unlock(&fdt->lock); |
| return slot; |
| } |
| assert(slot < fdt->max_files && |
| fdt->fd[slot].fd_chan == 0); |
| chan_incref((struct chan*)obj); |
| fdt->fd[slot].fd_chan = obj; |
| fdt->fd[slot].fd_flags = fd_flags; |
| spin_unlock(&fdt->lock); |
| return slot; |
| } |
| |
| /* Closes all open files. Mostly just a "put" for all files. If cloexec, it |
| * will only close the FDs with FD_CLOEXEC (opened with O_CLOEXEC or fcntld). |
| * |
| * Notes on concurrency: |
| * - Can't hold spinlocks while we call cclose, since it might sleep eventually. |
| * - We're called from proc_destroy, so we could have concurrent openers trying |
| * to add to the group (other syscalls), hence the "closed" flag. |
| * - dot and slash chans are dealt with in proc_free. its difficult to close |
| * and zero those with concurrent syscalls, since those are a source of krefs. |
| * - Once we lock and set closed, no further additions can happen. To simplify |
| * our closes, we also allow multiple calls to this func (though that should |
| * never happen with the current code). */ |
| void close_fdt(struct fd_table *fdt, bool cloexec) |
| { |
| struct chan *chan; |
| struct file_desc *to_close; |
| int idx = 0; |
| |
| to_close = kzmalloc(sizeof(struct file_desc) * fdt->max_files, |
| MEM_WAIT); |
| spin_lock(&fdt->lock); |
| if (fdt->closed) { |
| spin_unlock(&fdt->lock); |
| kfree(to_close); |
| return; |
| } |
| for (int i = 0; i < fdt->max_fdset; i++) { |
| if (GET_BITMASK_BIT(fdt->open_fds->fds_bits, i)) { |
| /* while max_files and max_fdset might not line up, we |
| * should never have a valid fdset higher than files */ |
| assert(i < fdt->max_files); |
| if (cloexec && !(fdt->fd[i].fd_flags & FD_CLOEXEC)) |
| continue; |
| chan = fdt->fd[i].fd_chan; |
| to_close[idx].fd_tap = fdt->fd[i].fd_tap; |
| fdt->fd[i].fd_tap = 0; |
| fdt->fd[i].fd_chan = 0; |
| to_close[idx++].fd_chan = chan; |
| CLR_BITMASK_BIT(fdt->open_fds->fds_bits, i); |
| } |
| } |
| /* it's just a hint, we can build back up from being 0 */ |
| fdt->hint_min_fd = 0; |
| if (!cloexec) { |
| free_fd_set(fdt); |
| fdt->closed = TRUE; |
| } |
| spin_unlock(&fdt->lock); |
| /* We go through some hoops to close/decref outside the lock. Nice for |
| * not holding the lock for a while; critical in case the decref/cclose |
| * sleeps (it can) */ |
| for (int i = 0; i < idx; i++) { |
| cclose(to_close[i].fd_chan); |
| if (to_close[i].fd_tap) |
| kref_put(&to_close[i].fd_tap->kref); |
| } |
| kfree(to_close); |
| } |
| |
| /* Inserts all of the files from src into dst, used by sys_fork(). */ |
| void clone_fdt(struct fd_table *src, struct fd_table *dst) |
| { |
| struct chan *chan; |
| int ret; |
| |
| spin_lock(&src->lock); |
| if (src->closed) { |
| spin_unlock(&src->lock); |
| return; |
| } |
| spin_lock(&dst->lock); |
| if (dst->closed) { |
| warn("Destination closed before it opened"); |
| spin_unlock(&dst->lock); |
| spin_unlock(&src->lock); |
| return; |
| } |
| while (src->max_files > dst->max_files) { |
| ret = grow_fd_set(dst); |
| if (ret < 0) { |
| set_error(-ret, "Failed to grow for a clone_fdt"); |
| spin_unlock(&dst->lock); |
| spin_unlock(&src->lock); |
| return; |
| } |
| } |
| for (int i = 0; i < src->max_fdset; i++) { |
| if (GET_BITMASK_BIT(src->open_fds->fds_bits, i)) { |
| /* while max_files and max_fdset might not line up, we |
| * should never have a valid fdset higher than files */ |
| assert(i < src->max_files); |
| chan = src->fd[i].fd_chan; |
| assert(i < dst->max_files && dst->fd[i].fd_chan == 0); |
| SET_BITMASK_BIT(dst->open_fds->fds_bits, i); |
| dst->fd[i].fd_chan = chan; |
| chan_incref(chan); |
| } |
| } |
| dst->hint_min_fd = src->hint_min_fd; |
| spin_unlock(&dst->lock); |
| spin_unlock(&src->lock); |
| } |
| |
| int fd_get_fd_flags(struct fd_table *fdt, int fd) |
| { |
| int ret = -1; |
| |
| if (fd < 0) |
| return -1; |
| spin_lock(&fdt->lock); |
| if (fdt->closed) { |
| spin_unlock(&fdt->lock); |
| return -1; |
| } |
| if ((fd < fdt->max_fdset) |
| && GET_BITMASK_BIT(fdt->open_fds->fds_bits, fd)) |
| ret = fdt->fd[fd].fd_flags; |
| spin_unlock(&fdt->lock); |
| if (ret == -1) |
| set_error(EBADF, "FD was not open"); |
| return ret; |
| } |
| |
| int fd_set_fd_flags(struct fd_table *fdt, int fd, int new_fl) |
| { |
| int ret = -1; |
| |
| if (fd < 0) |
| return -1; |
| spin_lock(&fdt->lock); |
| if (fdt->closed) { |
| spin_unlock(&fdt->lock); |
| return -1; |
| } |
| if ((fd < fdt->max_fdset) |
| && GET_BITMASK_BIT(fdt->open_fds->fds_bits, fd)) |
| fdt->fd[fd].fd_flags = new_fl; |
| spin_unlock(&fdt->lock); |
| if (ret == -1) |
| set_error(EBADF, "FD was not open"); |
| return ret; |
| } |