| // INFERNO |
| #include <vfs.h> |
| #include <kfs.h> |
| #include <slab.h> |
| #include <kmalloc.h> |
| #include <kref.h> |
| #include <string.h> |
| #include <stdio.h> |
| #include <assert.h> |
| #include <error.h> |
| #include <cpio.h> |
| #include <pmap.h> |
| #include <smp.h> |
| #include <ip.h> |
| |
| enum { |
| DIRSIZE = STATFIXLEN + 32 * 4, |
| DIRREADLIM = 2048, /* should handle the largest reasonable directory entry */ |
| DIRREADSIZE=8192, /* Just read a lot. Memory is cheap, lots of bandwidth, |
| * and RPCs are very expensive. At the same time, |
| * let's not yet exceed a common MSIZE. */ |
| }; |
| |
| static int growfd(struct fgrp *f, int fd) |
| { |
| int n; |
| struct chan **nfd, **ofd; |
| |
| if (fd < f->nfd) { |
| return 0; |
| } |
| /* want to grow by a reasonable amount (delta), but also make sure we can |
| * handle the fd we're asked for */ |
| n = MAX(f->nfd, fd + 1) + DELTAFD; |
| if (n > MAXNFD) |
| n = MAXNFD; |
| if (fd >= n) { |
| set_errno(EMFILE); |
| set_errstr("Asked for FD %d, more than %d\n", fd, MAXNFD); |
| return -1; |
| } |
| nfd = kzmalloc(n * sizeof(struct chan *), 0); |
| if (nfd == NULL) { |
| set_errno(ENOMEM); |
| set_errstr("Failed to growfd for FD %d, OOM\n", fd); |
| return -1; |
| } |
| ofd = f->fd; |
| memmove(nfd, ofd, f->nfd * sizeof(struct chan *)); |
| f->fd = nfd; |
| f->nfd = n; |
| kfree(ofd); |
| return 0; |
| } |
| |
| int newfd(struct chan *c, bool oflags) |
| { |
| int i; |
| struct fgrp *f = current->fgrp; |
| |
| spin_lock(&f->lock); |
| if (f->closed) { |
| spin_unlock(&f->lock); |
| return -1; |
| } |
| /* VFS hack */ |
| /* We'd like to ask it to start at f->minfd, but that would require us to |
| * know if we closed anything. Since we share the FD numbers with the VFS, |
| * there is no way to know that. */ |
| #if 1 // VFS hack |
| i = get_fd(¤t->open_files, 0, oflags & O_CLOEXEC); |
| #else // 9ns style |
| /* TODO: use a unique integer allocator */ |
| for (i = f->minfd; i < f->nfd; i++) |
| if (f->fd[i] == 0) |
| break; |
| #endif |
| if (growfd(f, i) < 0) { |
| spin_unlock(&f->lock); |
| return -1; |
| } |
| assert(f->fd[i] == 0); |
| f->minfd = i + 1; |
| if (i > f->maxfd) |
| f->maxfd = i; |
| f->fd[i] = c; |
| spin_unlock(&f->lock); |
| return i; |
| } |
| |
| struct chan *fdtochan(struct fgrp *f, int fd, int mode, int chkmnt, int iref) |
| { |
| |
| struct chan *c; |
| |
| c = 0; |
| |
| spin_lock(&f->lock); |
| if (f->closed) { |
| spin_unlock(&f->lock); |
| error("File group closed"); |
| } |
| if (fd < 0 || f->maxfd < fd || (c = f->fd[fd]) == 0) { |
| spin_unlock(&f->lock); |
| set_errno(EBADF); |
| error("Bad FD %d\n", fd); |
| } |
| if (iref) |
| chan_incref(c); |
| spin_unlock(&f->lock); |
| |
| if (chkmnt && (c->flag & CMSG)) { |
| if (iref) |
| cclose(c); |
| error(Ebadusefd); |
| } |
| |
| if (mode < 0 || c->mode == ORDWR) { |
| return c; |
| } |
| |
| if ((mode & OTRUNC) && IS_RDONLY(c->mode)) { |
| if (iref) |
| cclose(c); |
| error(Ebadusefd); |
| } |
| |
| /* TODO: this is probably wrong. if you get this from a dev, in the dev's |
| * open, you are probably saving mode directly, without passing it through |
| * openmode. */ |
| if ((mode & ~OTRUNC) != c->mode) { |
| warn("Trunc mode issue: mode %o, mode minus trunc %o, chan mode %o\n", |
| mode, mode & ~OTRUNC, c->mode); |
| if (iref) |
| cclose(c); |
| error(Ebadusefd); |
| } |
| |
| return c; |
| } |
| |
| long kchanio(void *vc, void *buf, int n, int mode) |
| { |
| ERRSTACK(1); |
| int r; |
| struct chan *c; |
| |
| c = vc; |
| if (waserror()) { |
| poperror(); |
| return -1; |
| } |
| |
| if (IS_RDONLY(mode)) |
| r = devtab[c->type].read(c, buf, n, c->offset); |
| else |
| r = devtab[c->type].write(c, buf, n, c->offset); |
| |
| spin_lock(&c->lock); |
| c->offset += r; |
| spin_unlock(&c->lock); |
| poperror(); |
| return r; |
| } |
| |
| int openmode(uint32_t omode) |
| { |
| /* GIANT WARNING: if this ever throws, ipopen (and probably many others) will |
| * screw up refcnts of Qctl, err, data, etc */ |
| #if 0 |
| /* this is the old plan9 style. i think they want to turn exec into read, |
| * and strip off anything higher, and just return the RD/WR style bits. not |
| * stuff like ORCLOSE. the lack of OEXCL might be a bug on their part (it's |
| * the only one of their non-RW-related flags that isn't masked out) */ |
| if (o >= (OTRUNC | OCEXEC | ORCLOSE | OEXEC)) |
| error(Ebadarg); |
| o &= ~(OTRUNC | OCEXEC | ORCLOSE); |
| if (o > OEXEC) |
| error(Ebadarg); |
| if (o == OEXEC) |
| return OREAD; |
| return o; |
| #endif |
| /* no error checking (we have a shitload of flags anyway), and we return the |
| * basic access modes (RD/WR/ETC) */ |
| if (omode == O_EXEC) { |
| return O_RDONLY; |
| } |
| return omode & O_ACCMODE; |
| } |
| |
| void fdclose(struct fgrp *f, int fd) |
| { |
| |
| int i; |
| struct chan *c; |
| |
| spin_lock(&f->lock); |
| if (f->closed) { |
| spin_unlock(&f->lock); |
| return; |
| } |
| c = f->fd[fd]; |
| if (c == 0) { |
| /* can happen for users with shared fd tables */ |
| spin_unlock(&f->lock); |
| return; |
| } |
| f->fd[fd] = 0; |
| if (fd == f->maxfd) |
| for (i = fd; --i >= 0 && f->fd[i] == 0;) |
| f->maxfd = i; |
| if (fd < f->minfd) |
| f->minfd = fd; |
| /* VFS hack: give the FD back to VFS */ |
| put_fd(¤t->open_files, fd); |
| spin_unlock(&f->lock); |
| cclose(c); |
| } |
| |
| int syschdir(char *path) |
| { |
| ERRSTACK(1); |
| struct chan *c; |
| struct pgrp *pg; |
| |
| if (waserror()) { |
| poperror(); |
| return -1; |
| } |
| |
| c = namec(path, Atodir, 0, 0); |
| pg = current->pgrp; |
| cclose(pg->dot); |
| pg->dot = c; |
| poperror(); |
| return 0; |
| } |
| |
| int fgrpclose(struct fgrp *f, int fd) |
| { |
| ERRSTACK(1); |
| if (waserror()) { |
| poperror(); |
| return -1; |
| } |
| |
| /* |
| * Take no reference on the chan because we don't really need the |
| * data structure, and are calling fdtochan only for error checks. |
| * fdclose takes care of processes racing through here. |
| */ |
| fdtochan(f, fd, -1, 0, 0); |
| fdclose(f, fd); |
| poperror(); |
| return 0; |
| } |
| |
| int sysclose(int fd) |
| { |
| return fgrpclose(current->fgrp, fd); |
| } |
| |
| int syscreate(char *path, int mode, uint32_t perm) |
| { |
| ERRSTACK(2); |
| int fd; |
| struct chan *c; |
| |
| if (waserror()) { |
| poperror(); |
| return -1; |
| } |
| |
| openmode(mode & ~OEXCL); /* error check only; OEXCL okay here */ |
| c = namec(path, Acreate, mode, perm); |
| if (waserror()) { |
| cclose(c); |
| nexterror(); |
| } |
| fd = newfd(c, mode); /* 9ns mode is the O_FLAGS and perm is glibc mode */ |
| if (fd < 0) |
| error(Enofd); |
| poperror(); |
| |
| poperror(); |
| return fd; |
| } |
| |
| // This is in need of rework but for now just copy and convert. |
| int sysdup(int old, int new) |
| { |
| ERRSTACK(2); |
| int fd; |
| struct chan *c, *oc; |
| struct fgrp *f = current->fgrp; |
| |
| if (waserror()) { |
| poperror(); |
| return -1; |
| } |
| |
| c = fdtochan(current->fgrp, old, -1, 0, 1); |
| if (c->qid.type & QTAUTH) { |
| cclose(c); |
| error(Eperm); |
| } |
| fd = new; |
| if (fd != -1) { |
| /* ideally we'll be done with the VFS before we fix this */ |
| /* double check the ccloses when you fix this */ |
| panic("Need to sync with the VFS"); |
| spin_lock(&f->lock); |
| if (f->closed) { |
| spin_unlock(&f->lock); |
| cclose(c); |
| return -1; |
| } |
| if (fd < 0) { |
| spin_unlock(&f->lock); |
| cclose(c); |
| set_errno(EBADF); |
| error("Bad FD %d\n", fd); |
| } |
| if (growfd(f, fd) < 0) { |
| spin_unlock(&f->lock); |
| cclose(c); |
| error(current_errstr()); |
| } |
| if (fd > f->maxfd) |
| f->maxfd = fd; |
| oc = f->fd[fd]; |
| f->fd[fd] = c; |
| spin_unlock(&f->lock); |
| if (oc) |
| cclose(oc); |
| } else { |
| if (waserror()) { |
| cclose(c); |
| nexterror(); |
| } |
| fd = newfd(c, 0); |
| if (fd < 0) |
| error(Enofd); |
| poperror(); |
| } |
| poperror(); |
| return fd; |
| } |
| |
| /* Could pass in the fgrp instead of the proc, but we need the to_proc for now |
| * so we can claim a VFS FD */ |
| int sys_dup_to(struct proc *from_proc, unsigned int from_fd, |
| struct proc *to_proc, unsigned int to_fd) |
| { |
| ERRSTACK(1); |
| struct chan *c, *old_chan; |
| struct fgrp *to_fgrp = to_proc->fgrp; |
| |
| if (waserror()) { |
| poperror(); |
| return -1; |
| } |
| |
| c = fdtochan(from_proc->fgrp, from_fd, -1, 0, 1); |
| if (c->qid.type & QTAUTH) { |
| cclose(c); |
| error(Eperm); |
| } |
| |
| spin_lock(&to_fgrp->lock); |
| if (to_fgrp->closed) { |
| spin_unlock(&to_fgrp->lock); |
| cclose(c); |
| error("Can't dup, FGRP closed"); |
| } |
| if (claim_fd(&to_proc->open_files, to_fd)) { |
| spin_unlock(&to_fgrp->lock); |
| cclose(c); |
| error("Can't claim FD %d", to_fd); |
| } |
| if (growfd(to_fgrp, to_fd) < 0) { |
| spin_unlock(&to_fgrp->lock); |
| cclose(c); |
| error(current_errstr()); |
| } |
| if (to_fd > to_fgrp->maxfd) |
| to_fgrp->maxfd = to_fd; |
| old_chan = to_fgrp->fd[to_fd]; |
| to_fgrp->fd[to_fd] = c; |
| spin_unlock(&to_fgrp->lock); |
| if (old_chan) |
| cclose(old_chan); |
| |
| poperror(); |
| return 0; |
| } |
| |
| char *sysfd2path(int fd) |
| { |
| ERRSTACK(1); |
| struct chan *c; |
| char *s; |
| |
| if (waserror()) { |
| poperror(); |
| return NULL; |
| } |
| c = fdtochan(current->fgrp, fd, -1, 0, 1); |
| s = NULL; |
| if (c->name != NULL) { |
| s = kzmalloc(c->name->len + 1, 0); |
| if (s == NULL) { |
| cclose(c); |
| error(Enomem); |
| } |
| memmove(s, c->name->s, c->name->len + 1); |
| } |
| cclose(c); |
| poperror(); |
| return s; |
| } |
| |
| int sysfauth(int fd, char *aname) |
| { |
| ERRSTACK(2); |
| struct chan *c, *ac; |
| |
| if (waserror()) { |
| poperror(); |
| return -1; |
| } |
| |
| validname(aname, 0); |
| c = fdtochan(current->fgrp, fd, ORDWR, 0, 1); |
| if (waserror()) { |
| cclose(c); |
| nexterror(); |
| } |
| |
| ac = mntauth(c, aname); |
| |
| /* at this point ac is responsible for keeping c alive */ |
| poperror(); /* c */ |
| cclose(c); |
| |
| if (waserror()) { |
| cclose(ac); |
| nexterror(); |
| } |
| |
| fd = newfd(ac, 0); |
| if (fd < 0) |
| error(Enofd); |
| poperror(); /* ac */ |
| |
| poperror(); |
| |
| return fd; |
| } |
| |
| int sysfversion(int fd, unsigned int msize, char *vers, unsigned int arglen) |
| { |
| ERRSTACK(2); |
| int m; |
| struct chan *c; |
| |
| if (waserror()) { |
| poperror(); |
| return -1; |
| } |
| |
| /* check there's a NUL in the version string */ |
| if (arglen == 0 || memchr(vers, 0, arglen) == 0) |
| error(Ebadarg); |
| |
| c = fdtochan(current->fgrp, fd, ORDWR, 0, 1); |
| if (waserror()) { |
| cclose(c); |
| nexterror(); |
| } |
| |
| m = mntversion(c, vers, msize, arglen); |
| |
| poperror(); |
| cclose(c); |
| |
| poperror(); |
| return m; |
| } |
| |
| int syspipe(int fd[2]) |
| { |
| ERRSTACK(1); |
| struct dev *d; |
| struct fgrp *f; |
| struct chan *c[2]; |
| static char *names[] = { "data", "data1" }; |
| |
| f = current->fgrp; |
| |
| d = &devtab[devno('|', 0)]; |
| c[0] = namec("#|", Atodir, 0, 0); |
| c[1] = 0; |
| fd[0] = -1; |
| fd[1] = -1; |
| if (waserror()) { |
| if (c[0] != 0) |
| cclose(c[0]); |
| if (c[1] != 0) |
| cclose(c[1]); |
| if (fd[0] >= 0) { |
| /* VFS hack */ |
| f->fd[fd[0]] = 0; |
| put_fd(¤t->open_files, fd[0]); |
| } |
| if (fd[1] >= 0) { |
| /* VFS hack */ |
| f->fd[fd[1]] = 0; |
| put_fd(¤t->open_files, fd[1]); |
| } |
| poperror(); |
| return -1; |
| } |
| c[1] = cclone(c[0]); |
| if (walk(&c[0], &names[0], 1, 1, NULL) < 0) |
| error(Egreg); |
| if (walk(&c[1], &names[1], 1, 1, NULL) < 0) |
| error(Egreg); |
| c[0] = d->open(c[0], ORDWR); |
| c[1] = d->open(c[1], ORDWR); |
| fd[0] = newfd(c[0], 0); |
| if (fd[0] < 0) |
| error(Enofd); |
| fd[1] = newfd(c[1], 0); |
| if (fd[1] < 0) |
| error(Enofd); |
| poperror(); |
| return 0; |
| } |
| |
| int sysfwstat(int fd, uint8_t * buf, int n) |
| { |
| ERRSTACK(2); |
| struct chan *c; |
| |
| if (waserror()) { |
| poperror(); |
| return -1; |
| } |
| |
| validstat(buf, n, 0); |
| c = fdtochan(current->fgrp, fd, -1, 1, 1); |
| if (waserror()) { |
| cclose(c); |
| nexterror(); |
| } |
| n = devtab[c->type].wstat(c, buf, n); |
| poperror(); |
| cclose(c); |
| |
| poperror(); |
| return n; |
| } |
| |
| long bindmount(struct chan *c, char *old, int flag, char *spec) |
| { |
| ERRSTACK(1); |
| int ret; |
| struct chan *c1; |
| |
| if (flag > MMASK || (flag & MORDER) == (MBEFORE | MAFTER)) |
| error(Ebadarg); |
| |
| c1 = namec(old, Amount, 0, 0); |
| if (waserror()) { |
| cclose(c1); |
| nexterror(); |
| } |
| ret = cmount(c, c1, flag, spec); |
| |
| poperror(); |
| cclose(c1); |
| return ret; |
| } |
| |
| int sysbind(char *new, char *old, int flags) |
| { |
| ERRSTACK(2); |
| long r; |
| struct chan *c0; |
| |
| if (waserror()) { |
| poperror(); |
| return -1; |
| } |
| |
| c0 = namec(new, Abind, 0, 0); |
| if (waserror()) { |
| cclose(c0); |
| nexterror(); |
| } |
| r = bindmount(c0, old, flags, ""); |
| poperror(); |
| cclose(c0); |
| |
| poperror(); |
| return r; |
| } |
| |
| int sysmount(int fd, int afd, char *old, int flags, char *spec) |
| { |
| ERRSTACK(1); |
| long r; |
| volatile struct { |
| struct chan *c; |
| } c0; |
| volatile struct { |
| struct chan *c; |
| } bc; |
| volatile struct { |
| struct chan *c; |
| } ac; |
| struct mntparam mntparam; |
| |
| ac.c = NULL; |
| bc.c = NULL; |
| c0.c = NULL; |
| if (waserror()) { |
| cclose(ac.c); |
| cclose(bc.c); |
| cclose(c0.c); |
| poperror(); |
| return -1; |
| } |
| bc.c = fdtochan(current->fgrp, fd, ORDWR, 0, 1); |
| if (afd >= 0) |
| ac.c = fdtochan(current->fgrp, afd, ORDWR, 0, 1); |
| mntparam.chan = bc.c; |
| mntparam.authchan = ac.c; |
| mntparam.spec = spec; |
| mntparam.flags = flags; |
| c0.c = devtab[devno('M', 0)].attach((char *)&mntparam); |
| |
| r = bindmount(c0.c, old, flags, spec); |
| poperror(); |
| cclose(ac.c); |
| cclose(bc.c); |
| cclose(c0.c); |
| |
| return r; |
| } |
| |
| int sysunmount(char *old, char *new) |
| { |
| ERRSTACK(1); |
| volatile struct { |
| struct chan *c; |
| } cmount; |
| volatile struct { |
| struct chan *c; |
| } cmounted; |
| |
| cmount.c = NULL; |
| cmounted.c = NULL; |
| if (waserror()) { |
| cclose(cmount.c); |
| cclose(cmounted.c); |
| poperror(); |
| return -1; |
| } |
| |
| cmount.c = namec(new, Amount, 0, 0); |
| if (old != NULL && old[0] != '\0') { |
| /* |
| * This has to be namec(..., Aopen, ...) because |
| * if arg[0] is something like /srv/cs or /fd/0, |
| * opening it is the only way to get at the real |
| * Chan underneath. |
| */ |
| cmounted.c = namec(old, Aopen, OREAD, 0); |
| } |
| |
| cunmount(cmount.c, cmounted.c); |
| poperror(); |
| cclose(cmount.c); |
| cclose(cmounted.c); |
| return 0; |
| } |
| |
| int sysopen(char *path, int vfs_flags) |
| { |
| ERRSTACK(2); |
| int fd; |
| struct chan *c; |
| |
| if (waserror()) { |
| poperror(); |
| return -1; |
| } |
| |
| openmode(vfs_flags); /* error check only */ |
| c = namec(path, Aopen, vfs_flags, 0); |
| if (waserror()) { |
| cclose(c); |
| nexterror(); |
| } |
| fd = newfd(c, vfs_flags); |
| if (fd < 0) |
| error(Enofd); |
| poperror(); |
| |
| poperror(); |
| return fd; |
| } |
| |
| long unionread(struct chan *c, void *va, long n) |
| { |
| ERRSTACK(1); |
| int i; |
| long nr; |
| struct mhead *m; |
| struct mount *mount; |
| |
| qlock(&c->umqlock); |
| m = c->umh; |
| rlock(&m->lock); |
| mount = m->mount; |
| /* bring mount in sync with c->uri and c->umc */ |
| for (i = 0; mount != NULL && i < c->uri; i++) |
| mount = mount->next; |
| |
| nr = 0; |
| while (mount != NULL) { |
| /* Error causes component of union to be skipped */ |
| if (mount->to) { |
| /* normally we want to discard the error, but for our ghetto kdirent |
| * hack, we need to repeat unionread if we saw a Eshort */ |
| if (waserror()) { |
| if (!strcmp(current_errstr(), Eshort)) { |
| runlock(&m->lock); |
| qunlock(&c->umqlock); |
| nexterror(); |
| } |
| /* poperror done below for either branch */ |
| } else { |
| if (c->umc == NULL) { |
| c->umc = cclone(mount->to); |
| c->umc = devtab[c->umc->type].open(c->umc, OREAD); |
| } |
| |
| nr = devtab[c->umc->type].read(c->umc, va, n, c->umc->offset); |
| if (nr < 0) |
| nr = 0; /* dev.c can return -1 */ |
| c->umc->offset += nr; |
| } |
| poperror(); /* pop regardless */ |
| } |
| if (nr > 0) |
| break; |
| |
| /* Advance to next element */ |
| c->uri++; |
| if (c->umc) { |
| cclose(c->umc); |
| c->umc = NULL; |
| } |
| mount = mount->next; |
| } |
| runlock(&m->lock); |
| qunlock(&c->umqlock); |
| return nr; |
| } |
| |
| static void unionrewind(struct chan *c) |
| { |
| qlock(&c->umqlock); |
| c->uri = 0; |
| if (c->umc) { |
| cclose(c->umc); |
| c->umc = NULL; |
| } |
| qunlock(&c->umqlock); |
| } |
| |
| static long rread(int fd, void *va, long n, int64_t * offp) |
| { |
| ERRSTACK(3); |
| int dir; |
| struct chan *c; |
| int64_t off; |
| |
| /* dirty dirent hack */ |
| void *real_va = va; |
| |
| if (waserror()) { |
| poperror(); |
| return -1; |
| } |
| |
| c = fdtochan(current->fgrp, fd, OREAD, 1, 1); |
| if (waserror()) { |
| cclose(c); |
| nexterror(); |
| } |
| |
| if (n < 0) |
| error(Etoosmall); |
| |
| dir = c->qid.type & QTDIR; |
| |
| /* kdirent hack: userspace is expecting kdirents, but all of 9ns |
| * produces Ms. Just save up what we don't use and append the |
| * new stuff later. Allocate DIRREADSIZE bytes for that purpose. |
| */ |
| if (dir) { |
| int amt; |
| /* expecting only one dirent at a time, o/w we're busted */ |
| assert(n >= sizeof(struct kdirent)); |
| if (!c->buf) { |
| c->buf=kmalloc(DIRREADSIZE, KMALLOC_WAIT); |
| c->bufused = 0; |
| } |
| /* Attempt to extract an M, in case there was some already */ |
| amt = convM2kdirent(c->buf, c->bufused, real_va, 0); |
| if (amt) { |
| c->bufused -= amt; |
| memmove(c->buf, c->buf + amt, c->bufused); |
| n = sizeof(struct kdirent); |
| goto out; |
| } |
| /* debugging */ |
| if (waserror()) { |
| printk("Well, sysread of a dir sucks.%s \n", current_errstr()); |
| nexterror(); |
| } |
| va = c->buf + c->bufused; |
| n = DIRREADSIZE - c->bufused; |
| } |
| |
| /* this is the normal plan9 read */ |
| if (dir && c->umh) |
| n = unionread(c, va, n); |
| else { |
| if (offp == NULL) { |
| spin_lock(&c->lock); /* lock for int64_t assignment */ |
| off = c->offset; |
| spin_unlock(&c->lock); |
| } else |
| off = *offp; |
| if (off < 0) |
| error(Enegoff); |
| if (off == 0) { |
| if (offp == NULL) { |
| spin_lock(&c->lock); |
| c->offset = 0; |
| c->dri = 0; |
| spin_unlock(&c->lock); |
| } |
| unionrewind(c); |
| } |
| if (! c->ateof) { |
| n = devtab[c->type].read(c, va, n, off); |
| if (n == 0 && dir) |
| c->ateof = 1; |
| } else { |
| n = 0; |
| } |
| spin_lock(&c->lock); |
| c->offset += n; |
| spin_unlock(&c->lock); |
| } |
| |
| /* dirty kdirent hack */ |
| if (dir) { |
| int amt; |
| c->bufused = c->bufused + n; |
| /* extract an M from the front, then shift the remainder back */ |
| amt = convM2kdirent(c->buf, c->bufused, real_va, 0); |
| c->bufused -= amt; |
| memmove(c->buf, c->buf + amt, c->bufused); |
| n = amt ? sizeof(struct kdirent) : 0; |
| poperror(); /* matching our debugging waserror */ |
| } |
| |
| out: |
| poperror(); |
| cclose(c); |
| |
| poperror(); |
| return n; |
| } |
| |
| /* Reads exactly n bytes from chan c, starting at its offset. Can block, but if |
| * we get 0 back too soon (EOF or error), then we'll error out with Eshort. |
| * That might need a little work - if there was a previous error, then we |
| * clobbered it and only know Eshort but not why we completed early. */ |
| void read_exactly_n(struct chan *c, void *vp, long n) |
| { |
| char *p; |
| long nn; |
| int total = 0, want = n; |
| |
| p = vp; |
| while (n > 0) { |
| nn = devtab[c->type].read(c, p, n, c->offset); |
| printd("readn: Got %d@%lld\n", nn, c->offset); |
| if (nn == 0) |
| error("%s: wanted %d, got %d", Eshort, want, total); |
| spin_lock(&c->lock); |
| c->offset += nn; |
| spin_unlock(&c->lock); |
| p += nn; |
| n -= nn; |
| total += nn; |
| } |
| } |
| |
| long sysread(int fd, void *va, long n) |
| { |
| return rread(fd, va, n, NULL); |
| } |
| |
| long syspread(int fd, void *va, long n, int64_t off) |
| { |
| return rread(fd, va, n, &off); |
| } |
| |
| int sysremove(char *path) |
| { |
| ERRSTACK(2); |
| struct chan *c; |
| |
| if (waserror()) { |
| poperror(); |
| return -1; |
| } |
| |
| c = namec(path, Aremove, 0, 0); |
| if (waserror()) { |
| c->type = -1; /* see below */ |
| cclose(c); |
| nexterror(); |
| } |
| devtab[c->type].remove(c); |
| /* |
| * Remove clunks the fid, but we need to recover the Chan |
| * so fake it up. -1 aborts the dev's close. |
| */ |
| c->type = -1; |
| poperror(); |
| cclose(c); |
| |
| poperror(); |
| return 0; |
| } |
| |
| int64_t sysseek(int fd, int64_t off, int whence) |
| { |
| ERRSTACK(2); |
| struct dir *dir; |
| struct chan *c; |
| |
| if (waserror()) { |
| poperror(); |
| return -1; |
| } |
| |
| c = fdtochan(current->fgrp, fd, -1, 1, 1); |
| if (waserror()) { |
| cclose(c); |
| nexterror(); |
| } |
| |
| if (devtab[c->type].dc == '|') |
| error(Eisstream); |
| |
| switch (whence) { |
| case 0: |
| if (c->qid.type & QTDIR) { |
| if (off != 0) |
| error(Eisdir); |
| unionrewind(c); |
| } else if (off < 0) |
| error(Enegoff); |
| spin_lock(&c->lock); /* lock for int64_t assignment */ |
| c->offset = off; |
| spin_unlock(&c->lock); |
| break; |
| |
| case 1: |
| if (c->qid.type & QTDIR) |
| error(Eisdir); |
| spin_lock(&c->lock); /* lock for read/write update */ |
| off += c->offset; |
| if (off < 0) { |
| spin_unlock(&c->lock); |
| error(Enegoff); |
| } |
| c->offset = off; |
| spin_unlock(&c->lock); |
| break; |
| |
| case 2: |
| if (c->qid.type & QTDIR) |
| error(Eisdir); |
| dir = chandirstat(c); |
| if (dir == NULL) |
| error("internal error: stat error in seek"); |
| off += dir->length; |
| kfree(dir); |
| if (off < 0) |
| error(Enegoff); |
| spin_lock(&c->lock); /* lock for read/write update */ |
| c->offset = off; |
| spin_unlock(&c->lock); |
| break; |
| |
| default: |
| error(Ebadarg); |
| break; |
| } |
| poperror(); |
| c->dri = 0; |
| cclose(c); |
| poperror(); |
| return off; |
| } |
| |
| void validstat(uint8_t * s, int n, int slashok) |
| { |
| |
| int m; |
| char buf[64]; |
| |
| if (statcheck(s, n) < 0) |
| error(Ebadstat); |
| /* verify that name entry is acceptable */ |
| s += STATFIXLEN - 4 * BIT16SZ; /* location of first string */ |
| /* |
| * s now points at count for first string. |
| * if it's too long, let the server decide; this is |
| * only for his protection anyway. otherwise |
| * we'd have to allocate and waserror. |
| */ |
| m = GBIT16(s); |
| s += BIT16SZ; |
| if (m + 1 > sizeof buf) { |
| return; |
| } |
| memmove(buf, s, m); |
| buf[m] = '\0'; |
| /* name could be '/' */ |
| if (strcmp(buf, "/") != 0) |
| validname(buf, slashok); |
| } |
| |
| int sysfstat(int fd, uint8_t *buf, int n) |
| { |
| ERRSTACK(2); |
| struct chan *c; |
| |
| if (waserror()) { |
| poperror(); |
| return -1; |
| } |
| |
| c = fdtochan(current->fgrp, fd, -1, 0, 1); |
| if (waserror()) { |
| cclose(c); |
| nexterror(); |
| } |
| devtab[c->type].stat(c, buf, n); |
| |
| poperror(); |
| cclose(c); |
| |
| poperror(); |
| return n; |
| } |
| |
| int sysfstatakaros(int fd, struct kstat *ks) |
| { |
| |
| int n = 4096; |
| uint8_t *buf; |
| buf = kmalloc(n, KMALLOC_WAIT); |
| n = sysfstat(fd, buf, n); |
| if (n > 0) { |
| convM2kstat(buf, n, ks); |
| n = 0; |
| } |
| kfree(buf); |
| return n; |
| } |
| |
| int sysstat(char *path, uint8_t *buf, int n) |
| { |
| ERRSTACK(2); |
| struct chan *c; |
| |
| if (waserror()) { |
| poperror(); |
| return -1; |
| } |
| |
| c = namec(path, Aaccess, 0, 0); |
| if (waserror()) { |
| cclose(c); |
| nexterror(); |
| } |
| devtab[c->type].stat(c, buf, n); |
| poperror(); |
| cclose(c); |
| |
| poperror(); |
| |
| return n; |
| } |
| |
| int sysstatakaros(char *path, struct kstat *ks) |
| { |
| |
| int n = 4096; |
| uint8_t *buf; |
| buf = kmalloc(n, KMALLOC_WAIT); |
| n = sysstat(path, buf, n); |
| if (n > 0) { |
| convM2kstat(buf, n, ks); |
| n = 0; |
| } |
| kfree(buf); |
| return n; |
| } |
| |
| static long rwrite(int fd, void *va, long n, int64_t * offp) |
| { |
| ERRSTACK(3); |
| struct chan *c; |
| struct dir *dir; |
| int64_t off; |
| long m; |
| |
| if (waserror()) { |
| poperror(); |
| return -1; |
| } |
| c = fdtochan(current->fgrp, fd, OWRITE, 1, 1); |
| if (waserror()) { |
| cclose(c); |
| nexterror(); |
| } |
| if (c->qid.type & QTDIR) |
| error(Eisdir); |
| |
| if (n < 0) |
| error(Etoosmall); |
| |
| if (offp == NULL) { |
| /* append changes the offset to the end, and even if we fail later, this |
| * change will persist */ |
| if (c->flag & CAPPEND) { |
| dir = chandirstat(c); |
| if (!dir) |
| error("internal error: stat error in append write"); |
| spin_lock(&c->lock); /* legacy lock for int64 assignment */ |
| c->offset = dir->length; |
| spin_unlock(&c->lock); |
| kfree(dir); |
| } |
| spin_lock(&c->lock); |
| off = c->offset; |
| c->offset += n; |
| spin_unlock(&c->lock); |
| } else |
| off = *offp; |
| |
| if (waserror()) { |
| if (offp == NULL) { |
| spin_lock(&c->lock); |
| c->offset -= n; |
| spin_unlock(&c->lock); |
| } |
| nexterror(); |
| } |
| if (off < 0) |
| error(Enegoff); |
| m = devtab[c->type].write(c, va, n, off); |
| poperror(); |
| |
| if (offp == NULL && m < n) { |
| spin_lock(&c->lock); |
| c->offset -= n - m; |
| spin_unlock(&c->lock); |
| } |
| |
| poperror(); |
| cclose(c); |
| |
| poperror(); |
| return n; |
| } |
| |
| long syswrite(int fd, void *va, long n) |
| { |
| return rwrite(fd, va, n, NULL); |
| } |
| |
| long syspwrite(int fd, void *va, long n, int64_t off) |
| { |
| return rwrite(fd, va, n, &off); |
| } |
| |
| int syswstat(char *path, uint8_t * buf, int n) |
| { |
| ERRSTACK(2); |
| struct chan *c; |
| |
| if (waserror()) { |
| poperror(); |
| return -1; |
| } |
| |
| validstat(buf, n, 0); |
| c = namec(path, Aaccess, 0, 0); |
| if (waserror()) { |
| cclose(c); |
| nexterror(); |
| } |
| n = devtab[c->type].wstat(c, buf, n); |
| poperror(); |
| cclose(c); |
| |
| poperror(); |
| return n; |
| } |
| |
| struct dir *chandirstat(struct chan *c) |
| { |
| ERRSTACK(1); |
| struct dir *d; |
| uint8_t *buf; |
| int n, nd, i; |
| |
| nd = DIRSIZE; |
| for (i = 0; i < 2; i++) { /* should work by the second try */ |
| d = kzmalloc(sizeof(struct dir) + nd, 0); |
| buf = (uint8_t *) & d[1]; |
| if (waserror()) { |
| kfree(d); |
| poperror(); |
| return NULL; |
| } |
| n = devtab[c->type].stat(c, buf, nd); |
| poperror(); |
| if (n < BIT16SZ) { |
| kfree(d); |
| return NULL; |
| } |
| nd = GBIT16((uint8_t *) buf) + BIT16SZ; /* size needed to store whole stat buffer including count */ |
| if (nd <= n) { |
| convM2D(buf, n, d, (char *)&d[1]); |
| return d; |
| } |
| /* else sizeof(Dir)+nd is plenty */ |
| kfree(d); |
| } |
| return NULL; |
| |
| } |
| |
| struct dir *sysdirstat(char *name) |
| { |
| ERRSTACK(2); |
| struct chan *c; |
| struct dir *d; |
| |
| if (waserror()) { |
| poperror(); |
| return NULL; |
| } |
| |
| c = namec(name, Aaccess, 0, 0); |
| if (waserror()) { |
| cclose(c); |
| nexterror(); |
| } |
| d = chandirstat(c); |
| poperror(); |
| cclose(c); |
| |
| poperror(); |
| return d; |
| } |
| |
| struct dir *sysdirfstat(int fd) |
| { |
| ERRSTACK(2); |
| struct chan *c; |
| struct dir *d; |
| |
| if (waserror()) { |
| poperror(); |
| return NULL; |
| } |
| |
| c = fdtochan(current->fgrp, fd, -1, 0, 1); |
| if (waserror()) { |
| cclose(c); |
| nexterror(); |
| } |
| d = chandirstat(c); |
| poperror(); |
| cclose(c); |
| |
| poperror(); |
| return d; |
| } |
| |
| int sysdirwstat(char *name, struct dir *dir) |
| { |
| |
| uint8_t *buf; |
| int r; |
| |
| r = sizeD2M(dir); |
| buf = kzmalloc(r, 0); |
| convD2M(dir, buf, r); |
| r = syswstat(name, buf, r); |
| kfree(buf); |
| return r < 0 ? r : 0; |
| } |
| |
| int sysdirfwstat(int fd, struct dir *dir) |
| { |
| |
| uint8_t *buf; |
| int r; |
| |
| r = sizeD2M(dir); |
| buf = kzmalloc(r, 0); |
| convD2M(dir, buf, r); |
| r = sysfwstat(fd, buf, r); |
| kfree(buf); |
| return r < 0 ? r : 0; |
| } |
| |
| static long dirpackage(uint8_t * buf, long ts, struct kdirent **d) |
| { |
| |
| char *s; |
| long ss, i, n, nn, m = 0; |
| |
| *d = NULL; |
| if (ts <= 0) { |
| return ts; |
| } |
| |
| /* |
| * first find number of all stats, check they look like stats, & size all associated strings |
| */ |
| ss = 0; |
| n = 0; |
| for (i = 0; i < ts; i += m) { |
| m = BIT16SZ + GBIT16(&buf[i]); |
| if (statcheck(&buf[i], m) < 0) |
| break; |
| ss += m; |
| n++; |
| } |
| |
| if (i != ts) |
| error("bad directory format"); |
| |
| *d = kzmalloc(n * sizeof(**d) + ss, 0); |
| if (*d == NULL) |
| error(Enomem); |
| |
| /* |
| * then convert all buffers |
| */ |
| s = (char *)*d + n * sizeof(**d); |
| nn = 0; |
| for (i = 0; i < ts; i += m) { |
| m = BIT16SZ + GBIT16((uint8_t *) & buf[i]); |
| if (nn >= n || /*convM2D */ convM2kdirent(&buf[i], m, *d + nn, s) != m) { |
| kfree(*d); |
| *d = NULL; |
| error("bad directory entry"); |
| } |
| nn++; |
| s += m; |
| } |
| |
| return nn; |
| } |
| |
| long sysdirread(int fd, struct kdirent **d) |
| { |
| ERRSTACK(2); |
| uint8_t *buf; |
| long ts; |
| |
| *d = NULL; |
| if (waserror()) { |
| poperror(); |
| return -1; |
| } |
| buf = kzmalloc(DIRREADLIM, 0); |
| if (buf == NULL) |
| error(Enomem); |
| if (waserror()) { |
| kfree(buf); |
| nexterror(); |
| } |
| ts = sysread(fd, buf, DIRREADLIM); |
| if (ts >= 0) |
| ts = dirpackage(buf, ts, d); |
| poperror(); |
| kfree(buf); |
| poperror(); |
| return ts; |
| } |
| |
| int sysiounit(int fd) |
| { |
| ERRSTACK(1); |
| struct chan *c; |
| int n; |
| |
| c = fdtochan(current->fgrp, fd, -1, 0, 1); |
| if (waserror()) { |
| cclose(c); |
| poperror(); |
| return 0; /* n.b. */ |
| } |
| n = c->iounit; |
| poperror(); |
| cclose(c); |
| return n; |
| } |
| |
| /* Notes on concurrency: |
| * - Can't hold spinlocks while we call cclose, since it might sleep eventually. |
| * - We're called from proc_destroy, so we could have concurrent openers trying |
| * to add to the group (other syscalls), hence the "closed" flag. |
| * - dot and slash chans are dealt with in proc_free. its difficult to close |
| * and zero those with concurrent syscalls, since those are a source of krefs. |
| * - the memory is freed in proc_free(). need to wait to do it, since we can |
| * have concurrent accesses to fgrp before free. |
| * - Once we lock and set closed, no further additions can happen. To simplify |
| * our closes, we also allow multiple calls to this func (though that should |
| * never happen with the current code). */ |
| void close_9ns_files(struct proc *p, bool only_cloexec) |
| { |
| |
| struct fgrp *f = p->fgrp; |
| |
| spin_lock(&f->lock); |
| if (f->closed) { |
| spin_unlock(&f->lock); |
| warn("Unexpected double-close"); |
| return; |
| } |
| if (!only_cloexec) |
| f->closed = TRUE; |
| spin_unlock(&f->lock); |
| |
| /* maxfd is a legit val, not a +1 */ |
| for (int i = 0; i <= f->maxfd; i++) { |
| if (!f->fd[i]) |
| continue; |
| if (only_cloexec && !(f->fd[i]->flag & CCEXEC)) |
| continue; |
| cclose(f->fd[i]); |
| f->fd[i] = 0; |
| } |
| } |
| |
| void print_chaninfo(struct chan *c) |
| { |
| |
| char buf[64] = { 0 }; |
| bool has_dev = c->type != -1; |
| if (has_dev && !devtab[c->type].chaninfo) { |
| printk("Chan type %d has no chaninfo!\n", c->type); |
| has_dev = FALSE; |
| } |
| printk("Chan pathname: %s ref %d, Dev: %s, Devinfo: %s", |
| c->name ? c->name->s : "no cname", |
| kref_refcnt(&c->ref), |
| has_dev ? devtab[c->type].name : "no dev", |
| has_dev ? devtab[c->type].chaninfo(c, buf, sizeof(buf)) : ""); |
| if (!has_dev) |
| printk("qid.path: %p\n", c->qid.path); |
| printk("\n"); |
| } |
| |
| void print_9ns_files(struct proc *p) |
| { |
| |
| struct fgrp *f = p->fgrp; |
| spin_lock(&f->lock); |
| printk("9ns files for proc %d:\n", p->pid); |
| /* maxfd is a legit val, not a +1 */ |
| for (int i = 0; i <= f->maxfd; i++) { |
| if (!f->fd[i]) |
| continue; |
| printk("\t9fs %4d, ", i); |
| print_chaninfo(f->fd[i]); |
| } |
| spin_unlock(&f->lock); |
| } |
| |
| /* TODO: 9ns ns inheritance flags: Shared, copied, or empty. Looks like we're |
| * copying the fgrp, and sharing the pgrp. */ |
| int plan9setup(struct proc *new_proc, struct proc *parent, int flags) |
| { |
| |
| struct proc *old_current; |
| struct kref *new_dot_ref; |
| ERRSTACK(1); |
| if (waserror()) { |
| printk("plan9setup failed, %s\n", current_errstr()); |
| poperror(); |
| return -1; |
| } |
| if (!parent) { |
| /* We are probably spawned by the kernel directly, and have no parent to |
| * inherit from. Be sure to set up fgrp/pgrp before calling namec(). |
| * |
| * TODO: One problem is namec wants a current set for things like |
| * genbuf. So we'll use new_proc for this bootstrapping. Note |
| * switch_to() also loads the cr3. */ |
| new_proc->fgrp = newfgrp(); |
| new_proc->pgrp = newpgrp(); |
| old_current = switch_to(new_proc); |
| new_proc->slash = namec("#r", Atodir, 0, 0); |
| if (!new_proc->slash) |
| panic("no root device"); |
| switch_back(new_proc, old_current); |
| /* Want the name to be "/" instead of "#r" */ |
| cnameclose(new_proc->slash->name); |
| new_proc->slash->name = newcname("/"); |
| new_proc->dot = cclone(new_proc->slash); |
| poperror(); |
| return 0; |
| } |
| /* When we use the old fgrp, we have copy semantics: do not change this |
| * without revisiting proc_destroy, close_9ns_files, and closefgrp. */ |
| if (flags & PROC_DUP_FGRP) |
| new_proc->fgrp = dupfgrp(new_proc, parent->fgrp); |
| else |
| new_proc->fgrp = newfgrp(); |
| /* Shared semantics */ |
| kref_get(&parent->pgrp->ref, 1); |
| new_proc->pgrp = parent->pgrp; |
| /* copy semantics on / and . (doesn't make a lot of sense in akaros o/w) */ |
| /* / should never disappear while we hold a ref to parent */ |
| chan_incref(parent->slash); |
| new_proc->slash = parent->slash; |
| /* dot could change concurrently, and we could fail to gain a ref if whoever |
| * decref'd dot triggered the release. if that did happen, new_proc->dot |
| * should update and we can try again. */ |
| while (!(new_dot_ref = kref_get_not_zero(&parent->dot->ref, 1))) |
| cpu_relax(); |
| /* And now, we can't trust parent->dot, and need to determine our dot from |
| * the ref we obtained. */ |
| new_proc->dot = container_of(new_dot_ref, struct chan, ref); |
| poperror(); |
| return 0; |
| } |
| |
| /* Open flags, create modes, access types, file flags, and all that... |
| * |
| * there are a bunch of things here: |
| * 1) file creation flags (e.g. O_TRUNC) |
| * 2) file status flags (e.g. O_APPEND) |
| * 3) file open modes (e.g. O_RDWR) |
| * 4) file descriptor flags (e.g. CLOEXEC) |
| * 5) file creation mode (e.g. S_IRWXU) |
| * the 1-4 are passed in via open's vfs_flags, and the 5 via mode only when |
| * O_CREATE is set. |
| * |
| * file creation flags (1) only matter when creating, but aren't permanent. |
| * O_EXCL, O_DIRECTORY, O_TRUNC, etc. |
| * |
| * file status flags (2) are per struct file/chan. stuff like O_APPEND, |
| * O_ASYNC, etc. we convert those to an internal flag bit and store in c->flags |
| * |
| * the open mode (3) matters for a given FD/chan (chan->mode), and should be |
| * stored in the chan. (c->mode) stuff like O_RDONLY. |
| * |
| * the file descriptor flags (4) clearly are in the FD. note that the same |
| * file/chan can be opened by two different FDs, with different flags. the only |
| * one anyone uses is CLOEXEC. while exec may not last long in akaros, i can |
| * imagine similar "never pass to children" flags/meanings. |
| * |
| * the file creation mode (5) matters for the device's permissions; given this, |
| * it should be stored in the device/inode. ACLs fall under this category. |
| * |
| * finally, only certain categories can be edited afterwards: file status flags |
| * (2), FD flags (4), and file permissions (5). */ |
| int fd_getfl(int fd) |
| { |
| ERRSTACK(1); |
| struct chan *c; |
| int ret; |
| |
| if (waserror()) { |
| poperror(); |
| return -1; |
| } |
| c = fdtochan(current->fgrp, fd, -1, 0, 1); |
| |
| ret = c->mode; |
| if (c->flag & CAPPEND) |
| ret |= O_APPEND; |
| |
| cclose(c); |
| poperror(); |
| return ret; |
| } |
| |
| int fd_setfl(int fd, int flags) |
| { |
| ERRSTACK(1); |
| struct chan *c; |
| |
| if (waserror()) { |
| poperror(); |
| return -1; |
| } |
| c = fdtochan(current->fgrp, fd, -1, 0, 1); |
| |
| if (flags & O_APPEND) |
| c->flag |= CAPPEND; |
| |
| cclose(c); |
| poperror(); |
| return 0; |
| } |