blob: 205cee723b4b107c717bc3bc2be13c4c63c8ed46 [file] [log] [blame]
/*
* Copyright 2013 Google Inc.
* Copyright (c) 1989-2003 by Lucent Technologies, Bell Laboratories.
*/
//#define DEBUG
#include <setjmp.h>
#include <vfs.h>
#include <kfs.h>
#include <slab.h>
#include <kmalloc.h>
#include <kref.h>
#include <string.h>
#include <stdio.h>
#include <assert.h>
#include <error.h>
#include <cpio.h>
#include <pmap.h>
#include <smp.h>
#include <fcall.h>
#include <ros/fs.h>
void
validstat(uint8_t *s, unsigned long n)
{
unsigned long m;
char buf[64];
if(statcheck(s, n) < 0)
error(Ebadstat);
/* verify that name entry is acceptable */
s += STATFIXLEN - 4*BIT16SZ; /* location of first string */
/*
* s now points at count for first string.
* if it's too long, let the server decide; this is
* only for his protection anyway. otherwise
* we'd have to allocate and waserror.
*/
m = GBIT16(s);
s += BIT16SZ;
if(m+1 > sizeof buf)
return;
memmove(buf, s, m);
buf[m] = '\0';
/* name could be '/' */
if(strcmp(buf, "/") != 0)
validname(buf, 0);
}
/* read memory to a process. */
int readmem(unsigned long offset, char *buf, unsigned long n,
void *mem, size_t len)
{
if (offset >= len)
return 0;
if (offset + n > len)
n = len - offset;
memmove(buf, mem + offset, n);
return n;
}
/* simple functions for common uses. Read a num/string to user mode,
* accounting for offset. Not a huge fan of the 'size' parameter (the old plan9
* users just picked NUMSIZE (12), though they seem to want to limit it). */
int readnum(unsigned long off, char *buf, unsigned long n, unsigned long val,
int size)
{
char tmp[64];
size = MIN(sizeof(tmp), size);
/* we really need the %* format. */
size = snprintf(tmp, size, "%lu", val);
/* size is now strlen, so the rest of this is just like readstr. */
/* always include the \0 */
return readmem(off, buf, n, tmp, size + 1);
}
long readstr(long offset, char *buf, long n, char *str)
{
/* always include the \0 */
return readmem(offset, buf, n, str, strlen(str) + 1);
}
void fdclose(int fd, int flag)
{
int i;
struct chan *c;
struct fgrp *f;
f = current->fgrp;
spin_lock(&f->lock);
if (f->closed) {
spin_unlock(&f->lock);
return;
}
c = f->fd[fd];
if (c == NULL) {
/* can happen for users with shared fd tables */
spin_unlock(&f->lock);
return;
}
if (flag) {
if (c == NULL || !(c->flag & flag)) {
spin_unlock(&f->lock);
return;
}
}
f->fd[fd] = NULL;
if (fd == f->maxfd)
for (i = fd; --i >= 0 && f->fd[i] == 0;)
f->maxfd = i;
/* hack: give the FD back to VFS */
put_fd(&current->open_files, fd);
spin_unlock(&f->lock);
cclose(c);
}
int openmode(int omode)
{
#if 0
/* this is the old plan9 style. i think they want to turn exec into read,
* and strip off anything higher, and just return the RD/WR style bits. not
* stuff like ORCLOSE. the lack of OEXCL might be a bug on there part (it's
* the only one of their non-RW-related flags that isn't masked out) */
omode &= ~(OTRUNC | OCEXEC | ORCLOSE);
if (omode > OEXEC)
error(Ebadarg);
if (omode == OEXEC)
return OREAD;
return omode;
#endif
/* no error checking (we have a shitload of flags anyway), and we return the
* basic access modes (RD/WR/ETC) */
if (omode == O_EXEC)
return O_RDONLY;
return omode & O_ACCMODE;
}
static void unlockfgrp(struct fgrp *f)
{
int ex;
ex = f->exceed;
f->exceed = 0;
spin_unlock(&f->lock);
if (ex)
printd("warning: process exceeds %d file descriptors\n", ex);
}
int growfd(struct fgrp *f, int fd)
{ /* fd is always >= 0 */
struct chan **newfd, **oldfd;
if (fd < f->nfd)
return 0;
if (fd >= f->nfd + DELTAFD)
return -1; /* out of range */
/*
* Unbounded allocation is unwise
*/
if (f->nfd >= 5000) {
Exhausted:
printd("no free file descriptors\n");
return -1;
}
newfd = kzmalloc((f->nfd + DELTAFD) * sizeof(struct chan *), KMALLOC_WAIT);
if (newfd == 0)
goto Exhausted;
oldfd = f->fd;
memmove(newfd, oldfd, f->nfd * sizeof(struct chan *));
f->fd = newfd;
kfree(oldfd);
f->nfd += DELTAFD;
if (fd > f->maxfd) {
if (fd / 100 > f->maxfd / 100)
f->exceed = (fd / 100) * 100;
f->maxfd = fd;
}
printd("GROW \n");
return 1;
}
/*
* this assumes that the fgrp is locked
*/
int findfreefd(struct fgrp *f, int start)
{
int fd;
#if 0 /* this is the normal plan9 way */
for (fd = start; fd < f->nfd; fd++)
if (f->fd[fd] == 0)
break;
#else /* hack: ask the VFS for a free fd */
fd = get_fd(&current->open_files, start);
assert(f->fd[fd] == 0);
#endif
if (fd >= f->nfd && growfd(f, fd) < 0)
return -1;
return fd;
}
int newfd(struct chan *c)
{
int fd;
struct fgrp *f;
f = current->fgrp;
spin_lock(&f->lock);
if (f->closed) {
spin_unlock(&f->lock);
return -1;
}
fd = findfreefd(f, 0);
if (fd < 0) {
unlockfgrp(f);
return -1;
}
if (fd > f->maxfd)
f->maxfd = fd;
f->fd[fd] = c;
unlockfgrp(f);
return fd;
}
static int newfd2(int fd[2], struct chan *c[2])
{
struct fgrp *f;
f = current->fgrp;
spin_lock(&f->lock);
if (f->closed) {
spin_unlock(&f->lock);
return -1;
}
fd[0] = findfreefd(f, 0);
if (fd[0] < 0) {
unlockfgrp(f);
return -1;
}
fd[1] = findfreefd(f, fd[0] + 1);
if (fd[1] < 0) {
unlockfgrp(f);
return -1;
}
if (fd[1] > f->maxfd)
f->maxfd = fd[1];
f->fd[fd[0]] = c[0];
f->fd[fd[1]] = c[1];
unlockfgrp(f);
return 0;
}
struct chan *fdtochan(int fd, int mode, int chkmnt, int iref)
{
struct chan *c;
struct fgrp *f;
c = NULL;
f = current->fgrp;
spin_lock(&f->lock);
if (f->closed) {
spin_unlock(&f->lock);
error("File group closed");
}
if (fd < 0) {
spin_unlock(&f->lock);
error("%s: fd < 0", Ebadfd);
}
if (f->nfd <= fd) {
spin_unlock(&f->lock);
error("%s: f->nfd %d, fd %d", Ebadfd, f->nfd, fd);
}
if ((c = f->fd[fd]) == 0) {
spin_unlock(&f->lock);
error("%s: f->fd[%d] is 0", Ebadfd, fd);
}
if (iref)
kref_get(&c->ref, 1);
spin_unlock(&f->lock);
if (chkmnt && (c->flag & CMSG)) {
if (iref)
cclose(c);
error("MSG channel&chkmnt set");
}
if (mode < 0 || c->mode == ORDWR)
return c;
if ((mode & OTRUNC) && c->mode == OREAD) {
if (iref)
cclose(c);
error("mode&OTRUNC and mode==READ");
}
#if 0
/* seems not to match akaros.
if ((mode & ~OTRUNC) != c->mode) {
if (iref)
cclose(c);
error(Ebadusefd);
}
*/
#endif
return c;
}
static long unionread(struct chan *c, void *va, long n)
{
ERRSTACK(1);
int i;
long nr;
struct mhead *mh;
struct mount *mount;
qlock(&c->umqlock);
mh = c->umh;
rlock(&mh->lock);
mount = mh->mount;
/* bring mount in sync with c->uri and c->umc */
for (i = 0; mount != NULL && i < c->uri; i++)
mount = mount->next;
nr = 0;
while (mount != NULL) {
/* Error causes component of union to be skipped */
if (mount->to) {
/* Careful, this is a NOT waserror(), we're discarding any errors */
if (!waserror()) {
if (c->umc == NULL) {
c->umc = cclone(mount->to);
c->umc = c->umc->dev->open(c->umc, OREAD);
}
nr = c->umc->dev->read(c->umc, va, n, c->umc->offset);
c->umc->offset += nr;
}
poperror();
}
if (nr > 0)
break;
/* Advance to next element */
c->uri++;
if (c->umc) {
cclose(c->umc);
c->umc = NULL;
}
mount = mount->next;
}
runlock(&mh->lock);
qunlock(&c->umqlock);
return nr;
}
static void unionrewind(struct chan *c)
{
qlock(&c->umqlock);
c->uri = 0;
if (c->umc) {
cclose(c->umc);
c->umc = NULL;
}
qunlock(&c->umqlock);
}
static char *pathlast(struct path *p)
{
char *s;
if (p == NULL)
return NULL;
if (p->len == 0)
return NULL;
s = strrchr(p->s, '/');
if (s)
return s + 1;
return p->s;
}
static unsigned long
dirfixed(uint8_t * p, unsigned char *e, struct dir *d)
{
int len;
struct dev *dev;
len = GBIT16(p) + BIT16SZ;
if (p + len > e)
return 0;
p += BIT16SZ; /* ignore size */
dev = devtabget(GBIT16(p), 1); //XDYNX
if (dev != NULL) {
d->type = dev->dc;
//devtabdecr(dev);
} else
d->type = -1;
p += BIT16SZ;
d->dev = GBIT32(p);
p += BIT32SZ;
d->qid.type = GBIT8(p);
p += BIT8SZ;
d->qid.vers = GBIT32(p);
p += BIT32SZ;
d->qid.path = GBIT64(p);
p += BIT64SZ;
d->mode = GBIT32(p);
p += BIT32SZ;
d->atime = GBIT32(p);
p += BIT32SZ;
d->mtime = GBIT32(p);
p += BIT32SZ;
d->length = GBIT64(p);
return len;
}
static char *dirname(uint8_t * p, unsigned long *n)
{
p += BIT16SZ + BIT16SZ + BIT32SZ + BIT8SZ + BIT32SZ + BIT64SZ
+ BIT32SZ + BIT32SZ + BIT32SZ + BIT64SZ;
*n = GBIT16(p);
return (char *)p + BIT16SZ;
}
static unsigned long
dirsetname(char *name, unsigned long len, uint8_t * p, unsigned long n,
unsigned long maxn)
{
char *oname;
unsigned long nn, olen;
if (n == BIT16SZ)
return BIT16SZ;
oname = dirname(p, &olen);
nn = n + len - olen;
PBIT16(p, nn - BIT16SZ);
if (nn > maxn)
return BIT16SZ;
if (len != olen)
memmove(oname + len, oname + olen, p + n - (uint8_t *) (oname + olen));
PBIT16((uint8_t *) (oname - 2), len);
memmove(oname, name, len);
return nn;
}
/*
* struct mountfix might have caused the fixed results of the directory read
* to overflow the buffer. Catch the overflow in c->dirrock.
*/
static void mountrock(struct chan *c, uint8_t * p, unsigned char **pe)
{
uint8_t *e, *r;
int len, n;
e = *pe;
/* find last directory entry */
for (;;) {
len = BIT16SZ + GBIT16(p);
if (p + len >= e)
break;
p += len;
}
/* save it away */
qlock(&c->rockqlock);
if (c->nrock + len > c->mrock) {
n = ROUNDUP(c->nrock + len, 1024);
r = kzmalloc(n, KMALLOC_WAIT);
memmove(r, c->dirrock, c->nrock);
kfree(c->dirrock);
c->dirrock = r;
c->mrock = n;
}
memmove(c->dirrock + c->nrock, p, len);
c->nrock += len;
qunlock(&c->rockqlock);
/* drop it */
*pe = p;
}
/*
* Satisfy a directory read with the results saved in c->dirrock.
*/
static int mountrockread(struct chan *c, uint8_t * op, long n, long *nn)
{
long dirlen;
uint8_t *rp, *erp, *ep, *p;
/* common case */
if (c->nrock == 0)
return 0;
/* copy out what we can */
qlock(&c->rockqlock);
rp = c->dirrock;
erp = rp + c->nrock;
p = op;
ep = p + n;
while (rp + BIT16SZ <= erp) {
dirlen = BIT16SZ + GBIT16(rp);
if (p + dirlen > ep)
break;
memmove(p, rp, dirlen);
p += dirlen;
rp += dirlen;
}
if (p == op) {
qunlock(&c->rockqlock);
return 0;
}
/* shift the rest */
if (rp != erp)
memmove(c->dirrock, rp, erp - rp);
c->nrock = erp - rp;
*nn = p - op;
qunlock(&c->rockqlock);
return 1;
}
static void mountrewind(struct chan *c)
{
c->nrock = 0;
}
/*
* Rewrite the results of a directory read to reflect current
* name space bindings and mounts. Specifically, replace
* directory entries for bind and mount points with the results
* of statting what is mounted there. Except leave the old names.
*/
static long
mountfix(struct chan *c, uint8_t * op, long n, long maxn)
{
ERRSTACK(1);
char *name;
int nbuf;
struct chan *nc;
struct mhead *mh;
struct mount *mount;
unsigned long dirlen, nname, r, rest;
long l;
uint8_t *buf, *e, *p;
struct dir d;
p = op;
buf = NULL;
nbuf = 0;
for (e = &p[n]; p + BIT16SZ < e; p += dirlen) {
dirlen = dirfixed(p, e, &d);
if (dirlen == 0)
break;
nc = NULL;
mh = NULL;
if (findmount(&nc, &mh, d.type, d.dev, d.qid)) {
/*
* If it's a union directory and the original is
* in the union, don't rewrite anything.
*/
for (mount = mh->mount; mount; mount = mount->next)
if (eqchanddq(mount->to, d.type, d.dev, d.qid, 1))
goto Norewrite;
name = dirname(p, &nname);
/*
* Do the stat but fix the name. If it fails,
* leave old entry.
* BUG: If it fails because there isn't room for
* the entry, what can we do? Nothing, really.
* Might as well skip it.
*/
if (buf == NULL) {
buf = kzmalloc(4096, KMALLOC_WAIT);
nbuf = 4096;
}
if (waserror())
goto Norewrite;
l = nc->dev->stat(nc, buf, nbuf);
r = dirsetname(name, nname, buf, l, nbuf);
if (r == BIT16SZ)
error("dirsetname");
poperror();
/*
* Shift data in buffer to accomodate new entry,
* possibly overflowing into rock.
*/
rest = e - (p + dirlen);
if (r > dirlen) {
while (p + r + rest > op + maxn) {
mountrock(c, p, &e);
if (e == p) {
dirlen = 0;
goto Norewrite;
}
rest = e - (p + dirlen);
}
}
if (r != dirlen) {
memmove(p + r, p + dirlen, rest);
dirlen = r;
e = p + dirlen + rest;
}
/*
* Rewrite directory entry.
*/
memmove(p, buf, r);
Norewrite:
cclose(nc);
putmhead(mh);
}
}
if (buf)
kfree(buf);
if (p != e)
error("oops in mountfix");
return e - op;
}
extern struct dev procdevtab, rootdevtab;
long sysread(int fd, void *p, size_t n, off_t off)
{
ERRSTACK(1);
uint8_t *ep = p;
long nn, nnn;
struct chan *c;
int ispread = 1;
printd("%p: ", current->pid);
printd("sysread %d %p %d %lld\n", fd, p, n, off);
if (waserror()) {
set_errno(EBADF);
poperror();
return -1;
}
c = fdtochan(fd, OREAD, 1, 1);
poperror();
if (waserror()) {
cclose(c);
poperror();
return -1;
}
/*
* The offset is passed through on directories, normally.
* Sysseek complains, but pread is used by servers like exportfs,
* that shouldn't need to worry about this issue.
*
* Notice that c->devoffset is the offset that c's dev is seeing.
* The number of bytes read on this fd (c->offset) may be different
* due to rewritings in mountfix.
*/
if (off == ~0LL) { /* use and maintain channel's offset */
off = c->offset;
ispread = 0;
}
if (c->qid.type & QTDIR) {
unsigned char *ents;
/*
* struct directory read:
* rewind to the beginning of the file if necessary;
* try to fill the buffer via mountrockread;
* clear ispread to always maintain the struct chan offset.
*/
/* this is a bit of a hack until we resolve akaros direntry format. */
ents = kzmalloc(8192, KMALLOC_WAIT);
if (!ents)
error(Enomem);
if (off == 0LL) {
if (!ispread) {
c->offset = 0;
c->devoffset = 0;
}
mountrewind(c);
unionrewind(c);
}
printd("sysread: dir: ispread %d @ %lld\n", ispread, off);
/* tell it we have less than we have to make sure it will
* fit in the large akaros dirents.
*/
if (!mountrockread(c, ents, 2048, &nn)) {
printd("Rock read failed, going to the source\n");
if (c->umh)
nn = unionread(c, ents, 2048);
else {
if (off != c->offset)
error(Edirseek);
nn = c->dev->read(c, ents, 2048, c->devoffset);
}
} else
printd("rock read ok\n");
nnn = mountfix(c, ents, nn, n);
/* now convert to akaros kdents. This whole thing needs fixin' */
int total, amt = 0, iter = 0;
for (total = 0; total < nnn; total += amt) {
amt = convM2kdirent(ents, nnn - total, (struct kdirent *)ep);
ents += amt;
ep = (uint8_t *) ep + sizeof(struct kdirent);
}
ispread = 0;
nnn = ep - (uint8_t *) p;
} else
nnn = nn = c->dev->read(c, p, n, off);
if (!ispread) {
spin_lock(&c->lock);
c->devoffset += nn;
c->offset += nnn;
spin_unlock(&c->lock);
}
poperror();
cclose(c);
return nnn;
}
long syswrite(int fd, void *p, size_t n, off_t off)
{
ERRSTACK(1);
int ispwrite = 1;
long r = n;
struct chan *c;
printd("%p: ", current->pid);
printd("syswrite %d %p %d %d\n", fd, p, n, off);
n = 0;
if (waserror()) {
printk("%p: ", current->pid);
printk("BADFD: syswrite fd %d: '%s'\n", fd, current_errstr());
set_errno(EBADF);
poperror();
return -1;
}
c = fdtochan(fd, OWRITE, 1, 1);
poperror();
if (waserror()) {
printk("%p: ", current->pid);
printk("IO ERROR:syswrite fd %d: '%s'\n", fd, current_errstr());
set_errno(EIO);
if (!ispwrite) {
spin_lock(&c->lock);
c->offset -= n;
spin_unlock(&c->lock);
}
cclose(c);
poperror();
return -1;
}
if (c->qid.type & QTDIR)
error(Eisdir);
n = r;
if (off == ~0LL) { /* use and maintain channel's offset */
spin_lock(&c->lock);
off = c->offset;
c->offset += n;
spin_unlock(&c->lock);
}
r = c->dev->write(c, p, n, off);
if (!ispwrite && r < n) {
spin_lock(&c->lock);
c->offset -= n - r;
spin_unlock(&c->lock);
}
poperror();
cclose(c);
return r;
}
int syscreate(char *name, int omode)
{
ERRSTACK(1);
struct chan *c = NULL;
int fd;
/* if it exists, it is truncated.
* if it does not exists, it's created.
* so we don't need these flags.
*/
omode &= ~(O_CREAT | O_TRUNC);
if (waserror()){
printd("syscreate: bad mode %x\n", omode);
set_errno(EINVAL);
poperror();
return -1;
}
openmode(omode); /* error check only */
poperror();
if (waserror()) {
set_errno(EEXIST);
printd("syscreate fails:%s:\n", current_errstr());
if (c)
cclose(c);
poperror();
return -1;
}
c = namec(name, Acreate, omode, 0);
fd = newfd(c);
if (fd < 0)
error(Enofd);
poperror();
printd("syscreate: return %d\n", fd);
return fd;
}
int sysopen(char *name, int omode)
{
ERRSTACK(1);
struct chan *c = NULL;
int fd;
int mustdir = 0;
printd("%p: ", current->pid);
printd("sysopen %s mode %o\n", name, omode);
if (omode & O_NONBLOCK) /* what to do? */
omode &= ~O_NONBLOCK;
if (omode & O_DIRECTORY) {
omode &= ~O_DIRECTORY;
mustdir = 1;
}
if ((omode & (O_CREATE | O_EXCL)) == (O_CREATE | O_EXCL))
return syscreate(name, omode);
/* TODO: plan9 used to check for both CREATE and TRUNC here, calling create
* regardless. This will need work as we add in other devices. */
if (waserror()) {
if (omode & O_CREAT) {
poperror();
return syscreate(name, omode);
}
set_errno(ENOENT);
printd("error\n");
if (c)
cclose(c);
poperror();
return -1;
}
openmode(omode); /* error check only */
c = namec(name, Aopen, omode, 0);
fd = newfd(c);
if (fd < 0)
error(Enofd);
poperror();
printd("%p: ", current->pid);
printd("sysopen %s returns %d %x\n", name, fd, fd);
return fd;
}
int sysclose(int fd)
{
fdtochan(fd, -1, 0, 0);
fdclose(fd, 0);
printd("%p: ", current->pid);
printd("sysclose %d\n", fd);
return 0;
}
int sysstat(char *name, uint8_t * statbuf, int len)
{
ERRSTACK(1);
int r;
struct chan *c = NULL;
char *aname;
int fd;
uint8_t data[sizeof(struct dir)];
if (waserror()) {
set_errno(ENOENT);
if (c)
cclose(c);
poperror();
return -1;
}
c = namec(name, Aaccess, 0, 0);
r = c->dev->stat(c, data, sizeof(data));
#if 0
/* we don't currently set the path in stat. Plan9/NIX do. */
aname = pathlast(c->path);
if (aname)
r = dirsetname(aname, strlen(aname), data, r, sizeof(data));
#endif
poperror();
cclose(c);
/* now convert for akaros. */
convM2kstat(data, sizeof(data), (struct kstat *)statbuf);
return 0;
}
int sysfstat(int fd, uint8_t * statbuf, int len)
{
ERRSTACK(1);
int r;
struct chan *c = NULL;
uint8_t data[sizeof(struct dir)];
if (waserror()) {
poperror();
return -1;
}
c = fdtochan(fd, -1, 0, 1);
poperror();
if (waserror()) {
cclose(c);
nexterror();
}
r = c->dev->stat(c, data, sizeof(data));
poperror();
cclose(c);
/* now convert for akaros. */
convM2kstat(data, sizeof(data), (struct kstat *)statbuf);
printd("sysfstat fd %d ok\n", fd);
return 0;
}
int sysdup(int ofd, int nfd)
{
ERRSTACK(1);
struct chan *nc, *oc;
struct fgrp *f;
/*
* int dup(int oldfd, int newfd);
* if newfd is < 0, pick anything.
*
* Close after dup'ing, so date > #d/1 works
*/
if (waserror()) {
set_errno(EBADF);
poperror();
return -1;
}
oc = fdtochan(ofd, -1, 0, 1);
poperror();
if (nfd != -1) {
panic("Need to sync with VFS");
f = current->fgrp;
spin_lock(&f->lock);
if (f->closed) {
spin_unlock(&f->lock);
return -1;
}
if (nfd < 0 || growfd(f, nfd) < 0) {
unlockfgrp(f);
cclose(oc);
error(Ebadfd);
}
if (nfd > f->maxfd)
f->maxfd = nfd;
nc = f->fd[nfd];
f->fd[nfd] = oc;
unlockfgrp(f);
if (nc != NULL)
cclose(nc);
} else {
if (waserror()) {
cclose(oc);
nexterror();
}
nfd = newfd(oc);
if (nfd < 0)
error(Enofd);
poperror();
}
printd("sysdup %d -> %d\n", ofd, nfd);
return nfd;
}
/* TODO: 9ns ns inheritance flags: Shared, copied, or empty. Looks like we're
* copying the fgrp, and sharing the pgrp. */
int plan9setup(struct proc *new_proc, struct proc *parent)
{
struct proc *old_current;
struct kref *new_dot_ref;
ERRSTACK(1);
if (waserror()) {
printd("plan9setup failed\n");
poperror();
return -1;
}
if (!parent) {
/* We are probably spawned by the kernel directly, and have no parent to
* inherit from. Be sure to set up fgrp/pgrp before calling namec().
*
* TODO: One problem is namec wants a current set for things like
* genbuf. So we'll use new_proc for this bootstrapping. Note
* switch_to() also loads the cr3. */
new_proc->fgrp = dupfgrp(NULL);
new_proc->pgrp = newpgrp();
old_current = switch_to(new_proc);
new_proc->slash = namec("#r", Atodir, 0, 0);
switch_back(new_proc, old_current);
/* Want the name to be "/" instead of "#r" */
pathclose(new_proc->slash->path);
new_proc->slash->path = newpath("/");
new_proc->dot = cclone(new_proc->slash);
poperror();
return 0;
}
/* Copy semantics: do not change this without revisiting proc_destroy,
* close_9ns_files, and closefgrp. */
new_proc->fgrp = dupfgrp(parent->fgrp);
/* Shared semantics */
kref_get(&parent->pgrp->ref, 1);
new_proc->pgrp = parent->pgrp;
/* copy semantics on / and . (doesn't make a lot of sense in akaros o/w) */
/* / should never disappear while we hold a ref to parent */
kref_get(&parent->slash->ref, 1);
new_proc->slash = parent->slash;
/* dot could change concurrently, and we could fail to gain a ref if whoever
* decref'd dot triggered the release. if that did happen, new_proc->dot
* should update and we can try again. */
while (!(new_dot_ref = kref_get_not_zero(&parent->dot->ref, 1)))
cpu_relax();
/* And now, we can't trust parent->dot, and need to determine our dot from
* the ref we obtained. */
new_proc->dot = container_of(new_dot_ref, struct chan, ref);
poperror();
return 0;
}
/* bindmount -- common to bind and mount, since they're almost the same.
* fd
* afd -- auth fd, used to authenticate to a server if needed
*
* arg0 -- *source* of what we are mounting (old_dir / device in linux)
* arg1 -- where we are mounting *onto* (new_dir / dir in linux)
* flags -- options, i.e. MAFTER, MBEFORE, etc.
* spec -- additional third argument used in special cases like dosfs, etc.
*
* We're not renaming arg0 and arg1 for ease of diffing with the original 9ns
* source code (for now). */
int
bindmount(int ismount,
int fd,
int afd,
char* arg0,
char* arg1,
int flag,
char* spec)
{
ERRSTACK(4); /* it's still complicated. */
int i;
struct dev *dev;
struct chan *c0, *c1, *ac, *bc;
struct{
struct chan *chan;
struct chan *authchan;
char *spec;
int flags;
}bogus;
if (waserror()){
printk("bindmount: %s\n", current_errstr());
poperror();
return -1;
}
if((flag&~MMASK) || (flag&MORDER)==(MBEFORE|MAFTER))
error(Ebadarg);
bogus.flags = flag & MCACHE;
if(ismount){
if(current->pgrp->noattach)
error(Enoattach);
ac = NULL;
bc = fdtochan(fd, ORDWR, 0, 1);
if(waserror()) {
if(ac)
cclose(ac);
cclose(bc);
nexterror();
}
if(afd >= 0)
ac = fdtochan(afd, ORDWR, 0, 1);
bogus.chan = bc;
bogus.authchan = ac;
bogus.spec = spec;
if(waserror())
error(Ebadspec);
spec = validnamedup(spec, 1);
poperror();
if(waserror()){
kfree(spec);
nexterror();
}
dev = devtabget('M', 0); //XDYNX
if(waserror()){
//devtabdecr(dev);
nexterror();
}
c0 = dev->attach((char*)&bogus);
poperror();
//devtabdecr(dev);
poperror(); /* spec */
kfree(spec);
poperror(); /* ac bc */
if(ac)
cclose(ac);
cclose(bc);
}else{
bogus.spec = NULL;
/* this is the thing you will bind onto the mount. old_dir (or device)
* in linux terms. *src* in akaros. */
c0 = namec(arg0, Abind, 0, 0);
}
if(waserror()){
cclose(c0);
nexterror();
}
/* c1 is the target, where we will mount onto. new_dir, (or just dir) in
* linux terms. *onto* in Akaros */
c1 = namec(arg1, Amount, 0, 0);
if(waserror()){
cclose(c1);
nexterror();
}
i = cmount(&c0, c1, flag, bogus.spec);
poperror();
cclose(c1);
poperror();
cclose(c0);
if(ismount)
fdclose(fd, 0);
poperror();
return i;
}
/*
* int unmount(char* name, char* old);
*/
int
sysunmount(char *name, char *old)
{
ERRSTACK(2);
int ret;
struct chan *cmount, *cmounted;
if (waserror()){
printd("unmount went poorly\n");
poperror();
return -1;
}
cmount = namec(old, Amount, 0, 0);
cmounted = NULL;
if(name != NULL) {
if(waserror()) {
cclose(cmount);
nexterror();
}
/*
* This has to be namec(..., Aopen, ...) because
* if arg[0] is something like /srv/cs or /fd/0,
* opening it is the only way to get at the real
* struct chan underneath.
*/
cmounted = namec(name, Aopen, OREAD, 0);
poperror();
}
if(waserror()) {
cclose(cmount);
if(cmounted != NULL)
cclose(cmounted);
nexterror();
}
cunmount(cmount, cmounted);
cclose(cmount);
if(cmounted != NULL)
cclose(cmounted);
poperror();
poperror();
return 0;
}
/* Notes on concurrency:
* - Can't hold spinlocks while we call cclose, since it might sleep eventually.
* - We're called from proc_destroy, so we could have concurrent openers trying
* to add to the group (other syscalls), hence the "closed" flag.
* - dot and slash chans are dealt with in proc_free. its difficult to close
* and zero those with concurrent syscalls, since those are a source of krefs.
* - the memory is freed in proc_free(). need to wait to do it, since we can
* have concurrent accesses to fgrp before free.
* - Once we lock and set closed, no further additions can happen. To simplify
* our closes, we also allow multiple calls to this func (though that should
* never happen with the current code). */
void close_9ns_files(struct proc *p, bool only_cloexec)
{
struct fgrp *f = p->fgrp;
spin_lock(&f->lock);
if (f->closed) {
spin_unlock(&f->lock);
warn("Unexpected double-close");
return;
}
if (!only_cloexec)
f->closed = TRUE;
spin_unlock(&f->lock);
/* maxfd is a legit val, not a +1 */
for (int i = 0; i <= f->maxfd; i++) {
if (!f->fd[i])
continue;
if (only_cloexec && !(f->fd[i]->flag & CCEXEC))
continue;
cclose(f->fd[i]);
f->fd[i] = 0;
}
}
void print_chaninfo(struct chan *ch)
{
char buf[64] = {0};
printk("Chan pathname: %s, Dev: %s, Devinfo: %s\n",
ch->path ? ch->path->s : "no path",
ch->dev ? ch->dev->name: "no dev",
ch->dev ? ch->dev->chaninfo(ch, buf, sizeof(buf)) : "no info");
if (!ch->dev)
printk("No dev: intermediate chan? qid.path: %p\n", ch->qid.path);
}
void print_9ns_files(struct proc *p)
{
struct fgrp *f = p->fgrp;
spin_lock(&f->lock);
printk("9ns files for proc %d:\n", p->pid);
/* maxfd is a legit val, not a +1 */
for (int i = 0; i <= f->maxfd; i++) {
if (!f->fd[i])
continue;
printk("\t9fs %d, ", i);
print_chaninfo(f->fd[i]);
}
spin_unlock(&f->lock);
}
int syspipe(int *pfd)
{
ERRSTACK(1);
struct chan *c[2];
int fd[2];
static char *datastr[] = {"data", "data1"};
/*
* int pipe(int fd[2]);
*/
c[0] = namec("#P", Atodir, 0, 0);
c[1] = NULL;
fd[0] = -1;
fd[1] = -1;
if(waserror()){
cclose(c[0]);
if(c[1])
cclose(c[1]);
return -1;
}
c[1] = cclone(c[0]);
if(walk(&c[0], datastr+0, 1, 1, NULL) < 0)
error(Egreg);
if(walk(&c[1], datastr+1, 1, 1, NULL) < 0)
error(Egreg);
c[0] = c[0]->dev->open(c[0], ORDWR);
c[1] = c[1]->dev->open(c[1], ORDWR);
if(newfd2(fd, c) < 0)
error(Enofd);
poperror();
pfd[0] = fd[0];
pfd[1] = fd[1];
printd("syspipe returns [%d,%d]\n",
fd[0], fd[1]);
return 0;
}