blob: d1f79b2acc978b3c5b1c873a94f3381feb3be069 [file] [log] [blame]
/* Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
* Portions Copyright © 1997-1999 Vita Nuova Limited
* Portions Copyright © 2000-2007 Vita Nuova Holdings Limited
* (www.vitanuova.com)
* Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
*
* Modified for the Akaros operating system:
* Copyright (c) 2013-2014 The Regents of the University of California
* Copyright (c) 2013-2015 Google Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE. */
#include <slab.h>
#include <kmalloc.h>
#include <kref.h>
#include <string.h>
#include <stdio.h>
#include <assert.h>
#include <error.h>
#include <cpio.h>
#include <pmap.h>
#include <smp.h>
#include <net/ip.h>
struct dev ipdevtab;
static char *devname(void)
{
return ipdevtab.name;
}
enum {
Qtopdir = 1, /* top level directory */
Qtopbase,
Qarp = Qtopbase,
Qndb,
Qiproute,
Qiprouter,
Qipselftab,
Qlog,
Qprotodir, /* directory for a protocol */
Qprotobase,
Qclone = Qprotobase,
Qstats,
Qconvdir, /* directory for a conversation */
Qconvbase,
Qctl = Qconvbase,
Qdata,
Qerr,
Qlisten,
Qlocal,
Qremote,
Qstatus,
Qsnoop,
Logtype = 5,
Masktype = (1 << Logtype) - 1,
Logconv = 12,
Maskconv = (1 << Logconv) - 1,
Shiftconv = Logtype,
Logproto = 8,
Maskproto = (1 << Logproto) - 1,
Shiftproto = Logtype + Logconv,
Nfs = 32,
BYPASS_QMAX = 64 * MiB,
IPROUTE_LEN = 2 * PGSIZE,
};
#define TYPE(x) ( ((uint32_t)(x).path) & Masktype )
#define CONV(x) ( (((uint32_t)(x).path) >> Shiftconv) & Maskconv )
#define PROTO(x) ( (((uint32_t)(x).path) >> Shiftproto) & Maskproto )
#define QID(p, c, y) ( ((p)<<(Shiftproto)) | ((c)<<Shiftconv) | (y))
static char network[] = "network";
qlock_t fslock;
struct Fs *ipfs[Nfs]; /* attached fs's */
struct queue *qlog;
extern void nullmediumlink(void);
extern void pktmediumlink(void);
extern struct username eve;
static long ndbwrite(struct Fs *, char *unused_char_p_t, uint32_t, int);
static void closeconv(struct conv *);
static void setup_proto_qio_bypass(struct conv *cv);
static void undo_proto_qio_bypass(struct conv *cv);
static int connected(void *a);
static struct conv *chan2conv(struct chan *chan)
{
/* That's a lot of pointers to get to the conv! */
return ipfs[chan->dev]->p[PROTO(chan->qid)]->conv[CONV(chan->qid)];
}
static inline int founddevdir(struct chan *c, struct qid q, char *n,
int64_t length, char *user, long perm,
struct dir *db)
{
devdir(c, q, n, length, user, perm, db);
return 1;
}
static int topdirgen(struct chan *c, struct dir *dp)
{
struct qid q;
mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
snprintf(get_cur_genbuf(), GENBUF_SZ, "#%s%lu", devname(), c->dev);
return founddevdir(c, q, get_cur_genbuf(), 0, network, 0555, dp);
}
/* Computes the perm field for a stat for Qdata. Since select() polls the
* 'actionability' of a socket via the qdata FD, we'll also report listenable
* and connected conversations. It's a minor hack. =( */
static int qdata_stat_perm(struct conv *cv)
{
int perm;
perm = cv->perm;
/* If there is ever a listener, then it's readable. Ideally, we'd only
* report this on the Qlisten file (which we also do). The socket crap
* should never use a listening socket for data, so there shouldn't be
* any confusion when a Qdata shows up as readable. */
perm |= cv->incall ? DMREADABLE : 0;
/* For connectable convs, they need to be both connected and qio
* readable/writable. The way to think about this is that the convs are
* not truly writable/readable until they are connected. Conveniently,
* this means that when select polls Qdata for non-blocking connect(), a
* connected conversation pops up as writable (the qio is writable too).
*
* Note that a conversation can be 'Connected' even if it failed to
* connect. At least that's what the 9ns TCP code does. It's more like
* "the protocol did what it needed and the connectctlmsg call (or its
* non-blocking equivalent) is done". For instance, TCP has a few
* reasons to call Fsconnected, such as when we send the SYN and get a
* RST. */
if (!cv->p->connect || connected(cv)) {
perm |= qreadable(cv->rq) ? DMREADABLE : 0;
perm |= qwritable(cv->wq) ? DMWRITABLE : 0;
}
return perm;
}
static int ip3gen(struct chan *c, int i, struct dir *dp)
{
struct qid q;
struct conv *cv;
char *p;
int perm;
cv = chan2conv(c);
if (cv->owner == NULL)
kstrdup(&cv->owner, eve.name);
mkqid(&q, QID(PROTO(c->qid), CONV(c->qid), i), 0, QTFILE);
switch (i) {
default:
return -1;
case Qctl:
return founddevdir(c, q, "ctl", 0, cv->owner, cv->perm, dp);
case Qdata:
perm = qdata_stat_perm(cv);
return founddevdir(c, q, "data", qlen(cv->rq), cv->owner, perm,
dp);
case Qerr:
perm = cv->perm;
perm |= qreadable(cv->eq) ? DMREADABLE : 0;
return founddevdir(c, q, "err", qlen(cv->eq), cv->owner, perm,
dp);
case Qlisten:
perm = cv->perm;
perm |= cv->incall ? DMREADABLE : 0;
return founddevdir(c, q, "listen", 0, cv->owner, perm, dp);
case Qlocal:
p = "local";
break;
case Qremote:
p = "remote";
break;
case Qsnoop:
if (strcmp(cv->p->name, "ipifc") != 0)
return -1;
perm = 0400;
perm |= qreadable(cv->sq) ? DMREADABLE : 0;
return founddevdir(c, q, "snoop", qlen(cv->sq), cv->owner, perm,
dp);
case Qstatus:
p = "status";
break;
}
return founddevdir(c, q, p, 0, cv->owner, 0444, dp);
}
static int ip2gen(struct chan *c, int i, struct dir *dp)
{
struct qid q;
mkqid(&q, QID(PROTO(c->qid), 0, i), 0, QTFILE);
switch (i) {
case Qclone:
return founddevdir(c, q, "clone", 0, network, 0666, dp);
case Qstats:
return founddevdir(c, q, "stats", 0, network, 0444, dp);
}
return -1;
}
static int ip1gen(struct chan *c, int i, struct dir *dp)
{
struct qid q;
char *p;
int prot;
int len = 0;
struct Fs *f;
extern uint32_t kerndate;
f = ipfs[c->dev];
prot = 0666;
mkqid(&q, QID(0, 0, i), 0, QTFILE);
switch (i) {
default:
return -1;
case Qarp:
p = "arp";
break;
case Qndb:
p = "ndb";
len = strlen(f->ndb);
q.vers = f->ndbvers;
break;
case Qiproute:
p = "iproute";
break;
case Qipselftab:
p = "ipselftab";
prot = 0444;
break;
case Qiprouter:
p = "iprouter";
break;
case Qlog:
p = "log";
break;
}
devdir(c, q, p, len, network, prot, dp);
if (i == Qndb && f->ndbmtime > kerndate)
dp->mtime.tv_sec = f->ndbmtime;
return 1;
}
static int ipgen(struct chan *c, char *unused_char_p_t, struct dirtab *d,
int unused_int, int s, struct dir *dp)
{
struct qid q;
struct conv *cv;
struct Fs *f;
f = ipfs[c->dev];
switch (TYPE(c->qid)) {
case Qtopdir:
if (s == DEVDOTDOT)
return topdirgen(c, dp);
if (s < f->np) {
/* protocol with no user interface */
if (f->p[s]->connect == NULL)
return 0;
mkqid(&q, QID(s, 0, Qprotodir), 0, QTDIR);
return founddevdir(c, q, f->p[s]->name, 0, network,
0555, dp);
}
s -= f->np;
return ip1gen(c, s + Qtopbase, dp);
case Qarp:
case Qndb:
case Qlog:
case Qiproute:
case Qiprouter:
case Qipselftab:
return ip1gen(c, TYPE(c->qid), dp);
case Qprotodir:
if (s == DEVDOTDOT)
return topdirgen(c, dp);
else if (s < f->p[PROTO(c->qid)]->ac) {
cv = f->p[PROTO(c->qid)]->conv[s];
snprintf(get_cur_genbuf(), GENBUF_SZ, "%d", s);
mkqid(&q, QID(PROTO(c->qid), s, Qconvdir), 0, QTDIR);
return founddevdir(c, q, get_cur_genbuf(), 0, cv->owner,
0555, dp);
}
s -= f->p[PROTO(c->qid)]->ac;
return ip2gen(c, s + Qprotobase, dp);
case Qclone:
case Qstats:
return ip2gen(c, TYPE(c->qid), dp);
case Qconvdir:
if (s == DEVDOTDOT) {
s = PROTO(c->qid);
mkqid(&q, QID(s, 0, Qprotodir), 0, QTDIR);
devdir(c, q, f->p[s]->name, 0, network, 0555, dp);
return 1;
}
return ip3gen(c, s + Qconvbase, dp);
case Qctl:
case Qdata:
case Qerr:
case Qlisten:
case Qlocal:
case Qremote:
case Qstatus:
case Qsnoop:
return ip3gen(c, TYPE(c->qid), dp);
}
return -1;
}
static void ipinit(void)
{
qlock_init(&fslock);
nullmediumlink();
pktmediumlink();
/* if only
fmtinstall('i', eipfmt);
fmtinstall('I', eipfmt);
fmtinstall('E', eipfmt);
fmtinstall('V', eipfmt);
fmtinstall('M', eipfmt);
*/
}
static void ipreset(void)
{
}
static struct Fs *ipgetfs(int dev)
{
extern void (*ipprotoinit[]) (struct Fs *);
struct Fs *f;
int i;
if (dev >= Nfs)
return NULL;
qlock(&fslock);
if (ipfs[dev] == NULL) {
f = kzmalloc(sizeof(struct Fs), MEM_WAIT);
rwinit(&f->rwlock);
qlock_init(&f->iprouter.qlock);
ip_init(f);
arpinit(f);
netloginit(f);
for (i = 0; ipprotoinit[i]; i++)
ipprotoinit[i] (f);
f->dev = dev;
ipfs[dev] = f;
}
qunlock(&fslock);
return ipfs[dev];
}
struct IPaux *newipaux(char *owner, char *tag)
{
struct IPaux *a;
int n;
a = kzmalloc(sizeof(*a), 0);
kstrdup(&a->owner, owner);
memset(a->tag, ' ', sizeof(a->tag));
n = strlen(tag);
if (n > sizeof(a->tag))
n = sizeof(a->tag);
memmove(a->tag, tag, n);
return a;
}
#define ATTACHER(c) (((struct IPaux*)((c)->aux))->owner)
static struct chan *ipattach(char *spec)
{
struct chan *c;
int dev;
dev = atoi(spec);
if (dev >= Nfs)
error(EFAIL, "bad specification");
ipgetfs(dev);
c = devattach(devname(), spec);
mkqid(&c->qid, QID(0, 0, Qtopdir), 0, QTDIR);
c->dev = dev;
c->aux = newipaux(commonuser(), "none");
return c;
}
static struct walkqid *ipwalk(struct chan *c, struct chan *nc, char **name,
unsigned int nname)
{
struct IPaux *a = c->aux;
struct walkqid *w;
w = devwalk(c, nc, name, nname, NULL, 0, ipgen);
if (w != NULL && w->clone != NULL)
w->clone->aux = newipaux(a->owner, a->tag);
return w;
}
static size_t ipstat(struct chan *c, uint8_t *db, size_t n)
{
return devstat(c, db, n, NULL, 0, ipgen);
}
static int should_wake(void *arg)
{
struct conv *cv = arg;
/* signal that the conv is closed */
if (qisclosed(cv->rq))
return TRUE;
return cv->incall != NULL;
}
static struct chan *ipopen(struct chan *c, int omode)
{
ERRSTACK(2);
struct conv *cv, *nc;
struct Proto *p;
int perm;
struct Fs *f;
/* perm is a lone rwx, not the rwx------ from the conversion */
perm = omode_to_rwx(omode) >> 6;
f = ipfs[c->dev];
switch (TYPE(c->qid)) {
default:
break;
case Qndb:
if (omode & (O_WRITE | O_TRUNC) && !iseve())
error(EPERM, ERROR_FIXME);
if ((omode & (O_WRITE | O_TRUNC)) == (O_WRITE | O_TRUNC))
f->ndb[0] = 0;
break;
case Qlog:
netlogopen(f);
break;
case Qiprouter:
iprouteropen(f);
break;
case Qiproute:
c->synth_buf = kpages_zalloc(IPROUTE_LEN, MEM_WAIT);
routeread(f, c->synth_buf, 0, IPROUTE_LEN);
break;
case Qtopdir:
case Qprotodir:
case Qconvdir:
case Qstatus:
case Qremote:
case Qlocal:
case Qstats:
case Qipselftab:
if (omode & O_WRITE)
error(EPERM, ERROR_FIXME);
break;
case Qsnoop:
if (omode & O_WRITE)
error(EPERM, ERROR_FIXME);
/* might be racy. note the lack of a proto lock, unlike Qdata */
p = f->p[PROTO(c->qid)];
cv = p->conv[CONV(c->qid)];
if (strcmp(ATTACHER(c), cv->owner) != 0 && !iseve())
error(EPERM, ERROR_FIXME);
atomic_inc(&cv->snoopers);
break;
case Qclone:
p = f->p[PROTO(c->qid)];
qlock(&p->qlock);
if (waserror()) {
qunlock(&p->qlock);
nexterror();
}
cv = Fsprotoclone(p, ATTACHER(c));
qunlock(&p->qlock);
poperror();
if (cv == NULL) {
error(ENODEV, "Null conversation from Fsprotoclone");
break;
}
mkqid(&c->qid, QID(p->x, cv->x, Qctl), 0, QTFILE);
break;
case Qdata:
case Qctl:
case Qerr:
p = f->p[PROTO(c->qid)];
qlock(&p->qlock);
cv = p->conv[CONV(c->qid)];
qlock(&cv->qlock);
if (waserror()) {
qunlock(&cv->qlock);
qunlock(&p->qlock);
nexterror();
}
if ((perm & (cv->perm >> 6)) != perm) {
if (strcmp(ATTACHER(c), cv->owner) != 0)
error(EPERM, ERROR_FIXME);
if ((perm & cv->perm) != perm)
error(EPERM, ERROR_FIXME);
}
cv->inuse++;
if (cv->inuse == 1) {
kstrdup(&cv->owner, ATTACHER(c));
cv->perm = 0660;
}
qunlock(&cv->qlock);
qunlock(&p->qlock);
poperror();
break;
case Qlisten:
cv = f->p[PROTO(c->qid)]->conv[CONV(c->qid)];
/* No permissions or Announce checks required. We'll see if
* that's a good idea or not. (the perm check would do nothing,
* as is, since an O_PATH perm is 0).
*
* But we probably want to incref to keep the conversation
* around until this FD/chan is closed. #ip is a little weird
* in that objects never really go away (high water mark for
* convs, you can always find them in the ns). I think it is
* possible to namec/ipgen a chan, then have that conv close,
* then have that chan be opened. You can probably do this with
* a data file. */
if (omode & O_PATH) {
qlock(&cv->qlock);
cv->inuse++;
qunlock(&cv->qlock);
break;
}
if ((perm & (cv->perm >> 6)) != perm) {
if (strcmp(ATTACHER(c), cv->owner) != 0)
error(EPERM, ERROR_FIXME);
if ((perm & cv->perm) != perm)
error(EPERM, ERROR_FIXME);
}
if (cv->state != Announced)
error(EFAIL, "not announced");
if (waserror()) {
closeconv(cv);
nexterror();
}
qlock(&cv->qlock);
cv->inuse++;
qunlock(&cv->qlock);
nc = NULL;
while (nc == NULL) {
/* give up if we got a hangup */
if (qisclosed(cv->rq))
error(EFAIL, "listen hungup");
qlock(&cv->listenq);
if (waserror()) {
qunlock(&cv->listenq);
nexterror();
}
/* we can peek at incall without grabbing the cv qlock.
* if anything is there, it'll remain there until we
* dequeue it. no one else can, since we hold the
* listenq lock */
if ((c->flag & O_NONBLOCK) && !cv->incall)
error(EAGAIN, "listen queue empty");
/* wait for a connect */
rendez_sleep(&cv->listenr, should_wake, cv);
/* if there is a concurrent hangup, they will hold the
* qlock until the hangup is complete, including closing
* the cv->rq */
qlock(&cv->qlock);
nc = cv->incall;
if (nc != NULL) {
cv->incall = nc->next;
mkqid(&c->qid, QID(PROTO(c->qid), nc->x, Qctl),
0, QTFILE);
kstrdup(&cv->owner, ATTACHER(c));
}
qunlock(&cv->qlock);
qunlock(&cv->listenq);
poperror();
}
closeconv(cv);
poperror();
break;
}
c->mode = openmode(omode);
c->flag |= COPEN;
c->offset = 0;
return c;
}
static size_t ipwstat(struct chan *c, uint8_t *dp, size_t n)
{
ERRSTACK(2);
struct dir *d;
struct conv *cv;
struct Fs *f;
struct Proto *p;
f = ipfs[c->dev];
switch (TYPE(c->qid)) {
default:
error(EPERM, ERROR_FIXME);
break;
case Qctl:
case Qdata:
break;
}
d = kzmalloc(sizeof(*d) + n, 0);
if (waserror()) {
kfree(d);
nexterror();
}
n = convM2D(dp, n, d, (char *)&d[1]);
if (n == 0)
error(ENODATA, ERROR_FIXME);
p = f->p[PROTO(c->qid)];
cv = p->conv[CONV(c->qid)];
if (!iseve() && strcmp(ATTACHER(c), cv->owner) != 0)
error(EPERM, ERROR_FIXME);
if (!emptystr(d->uid))
kstrdup(&cv->owner, d->uid);
if (d->mode != -1)
cv->perm = d->mode & 0777;
poperror();
kfree(d);
return n;
}
/* Should be able to handle any file type chan. Feel free to extend it. */
static char *ipchaninfo(struct chan *ch, char *ret, size_t ret_l)
{
struct conv *conv;
struct Proto *proto;
char *p;
struct Fs *f;
f = ipfs[ch->dev];
switch (TYPE(ch->qid)) {
default:
ret = "Unknown type";
break;
case Qdata:
proto = f->p[PROTO(ch->qid)];
conv = proto->conv[CONV(ch->qid)];
snprintf(ret, ret_l,
"Qdata, %s, proto %s, conv idx %d, rq len %d, wq len %d, total read %llu",
SLIST_EMPTY(&conv->data_taps) ? "untapped" : "tapped",
proto->name, conv->x, qlen(conv->rq), qlen(conv->wq),
q_bytes_read(conv->rq));
break;
case Qarp:
ret = "Qarp";
break;
case Qiproute:
ret = "Qiproute";
break;
case Qlisten:
proto = f->p[PROTO(ch->qid)];
conv = proto->conv[CONV(ch->qid)];
snprintf(ret, ret_l,
"Qlisten, %s proto %s, conv idx %d, has %sincalls",
SLIST_EMPTY(&conv->listen_taps) ? "untapped"
: "tapped",
proto->name, conv->x, conv->incall ? "" : "no ");
break;
case Qlog:
ret = "Qlog";
break;
case Qndb:
ret = "Qndb";
break;
case Qctl:
proto = f->p[PROTO(ch->qid)];
conv = proto->conv[CONV(ch->qid)];
snprintf(ret, ret_l, "Qctl, proto %s, conv idx %d", proto->name,
conv->x);
break;
}
return ret;
}
static void closeconv(struct conv *cv)
{
ERRSTACK(1);
struct conv *nc;
struct Ipmulti *mp;
qlock(&cv->qlock);
if (--cv->inuse > 0) {
qunlock(&cv->qlock);
return;
}
if (waserror()) {
qunlock(&cv->qlock);
nexterror();
}
/* close all incoming calls since no listen will ever happen */
for (nc = cv->incall; nc; nc = cv->incall) {
cv->incall = nc->next;
closeconv(nc);
}
cv->incall = NULL;
kstrdup(&cv->owner, network);
cv->perm = 0660;
while ((mp = cv->multi) != NULL)
ipifcremmulti(cv, mp->ma, mp->ia);
cv->r = NULL;
cv->rgen = 0;
if (cv->state == Bypass)
undo_proto_qio_bypass(cv);
cv->p->close(cv);
cv->state = Idle;
qunlock(&cv->qlock);
poperror();
}
static void ipclose(struct chan *c)
{
struct Fs *f;
f = ipfs[c->dev];
switch (TYPE(c->qid)) {
default:
break;
case Qlog:
if (c->flag & COPEN)
netlogclose(f);
break;
case Qiprouter:
if (c->flag & COPEN)
iprouterclose(f);
break;
case Qdata:
case Qctl:
case Qerr:
case Qlisten:
if (c->flag & COPEN)
closeconv(f->p[PROTO(c->qid)]->conv[CONV(c->qid)]);
break;
case Qsnoop:
if (c->flag & COPEN)
atomic_dec(&f->p[PROTO(c->qid)]->conv[CONV(c->qid)]->snoopers);
break;
case Qiproute:
if (c->flag & COPEN) {
kpages_free(c->synth_buf, IPROUTE_LEN);
c->synth_buf = NULL;
}
break;
}
kfree(((struct IPaux *)c->aux)->owner);
kfree(c->aux);
}
enum {
Statelen = 32 * 1024,
};
static size_t ipread(struct chan *ch, void *a, size_t n, off64_t off)
{
struct conv *c;
struct Proto *x;
char *buf, *p;
long rv;
struct Fs *f;
uint32_t offset = off;
f = ipfs[ch->dev];
p = a;
switch (TYPE(ch->qid)) {
default:
error(EPERM, ERROR_FIXME);
case Qtopdir:
case Qprotodir:
case Qconvdir:
return devdirread(ch, a, n, 0, 0, ipgen);
case Qarp:
return arpread(f->arp, a, offset, n);
case Qndb:
return readstr(offset, a, n, f->ndb);
case Qiproute:
return readmem(offset, a, n, ch->synth_buf, IPROUTE_LEN);
case Qiprouter:
return iprouterread(f, a, n);
case Qipselftab:
return ipselftabread(f, a, offset, n);
case Qlog:
return netlogread(f, a, offset, n);
case Qctl:
snprintf(get_cur_genbuf(), GENBUF_SZ, "%lu", CONV(ch->qid));
return readstr(offset, p, n, get_cur_genbuf());
case Qremote:
buf = kzmalloc(Statelen, 0);
x = f->p[PROTO(ch->qid)];
c = x->conv[CONV(ch->qid)];
if (x->remote == NULL) {
snprintf(buf, Statelen, "%I!%d\n", c->raddr, c->rport);
} else {
(*x->remote) (c, buf, Statelen - 2);
}
rv = readstr(offset, p, n, buf);
kfree(buf);
return rv;
case Qlocal:
buf = kzmalloc(Statelen, 0);
x = f->p[PROTO(ch->qid)];
c = x->conv[CONV(ch->qid)];
if (x->local == NULL) {
snprintf(buf, Statelen, "%I!%d\n", c->laddr, c->lport);
} else {
(*x->local) (c, buf, Statelen - 2);
}
rv = readstr(offset, p, n, buf);
kfree(buf);
return rv;
case Qstatus:
/* this all is a bit screwed up since the size of some state's
* buffers will change from one invocation to another. a reader
* will come in and read the entire buffer. then it will come
* again and read from the next offset, expecting EOF. if the
* buffer changed sizes, it'll reprint the end of the buffer
* slightly. */
buf = kzmalloc(Statelen, 0);
x = f->p[PROTO(ch->qid)];
c = x->conv[CONV(ch->qid)];
if (c->state == Bypass)
snprintf(buf, Statelen, "Bypassed\n");
else
(*x->state)(c, buf, Statelen - 2);
rv = readstr(offset, p, n, buf);
kfree(buf);
return rv;
case Qdata:
c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
if (ch->flag & O_NONBLOCK)
return qread_nonblock(c->rq, a, n);
else
return qread(c->rq, a, n);
case Qerr:
c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
return qread(c->eq, a, n);
case Qsnoop:
c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
return qread(c->sq, a, n);
case Qstats:
x = f->p[PROTO(ch->qid)];
if (x->stats == NULL)
error(EFAIL, "stats not implemented");
buf = kzmalloc(Statelen, 0);
(*x->stats) (x, buf, Statelen);
rv = readstr(offset, p, n, buf);
kfree(buf);
return rv;
}
}
static struct block *ipbread(struct chan *ch, size_t n, off64_t offset)
{
struct conv *c;
switch (TYPE(ch->qid)) {
case Qdata:
c = chan2conv(ch);
if (ch->flag & O_NONBLOCK)
return qbread_nonblock(c->rq, n);
else
return qbread(c->rq, n);
default:
return devbread(ch, n, offset);
}
}
/*
* set local address to be that of the ifc closest to remote address
*/
static void setladdr(struct conv *c)
{
findlocalip(c->p->f, c->laddr, c->raddr);
}
/*
* set a local port making sure the quad of raddr,rport,laddr,lport is unique
*/
static void setluniqueport(struct conv *c, int lport)
{
struct Proto *p;
struct conv *xp;
int x;
p = c->p;
qlock(&p->qlock);
for (x = 0; x < p->nc; x++) {
xp = p->conv[x];
if (xp == NULL)
break;
if (xp == c)
continue;
if ((xp->state == Connected || xp->state == Announced
|| xp->state == Bypass)
&& xp->lport == lport
&& xp->rport == c->rport
&& ipcmp(xp->raddr, c->raddr) == 0
&& ipcmp(xp->laddr, c->laddr) == 0) {
qunlock(&p->qlock);
error(EFAIL, "address in use");
}
}
c->lport = lport;
qunlock(&p->qlock);
}
/*
* pick a local port and set it
*/
static void setlport(struct conv *c)
{
struct Proto *p;
uint16_t *pp;
int x, found;
p = c->p;
if (c->restricted)
pp = &p->nextrport;
else
pp = &p->nextport;
qlock(&p->qlock);
for (;; (*pp)++) {
/*
* Fsproto initialises p->nextport to 0 and the restricted
* ports (p->nextrport) to 600.
* Restricted ports must lie between 600 and 1024. For the
* initial condition or if the unrestricted port number has
* wrapped round, select a random port between 5000 and 1<<15 to
* start at.
*/
if (c->restricted) {
if (*pp >= 1024)
*pp = 600;
} else
while (*pp < 5000)
urandom_read(pp, sizeof(*pp));
found = 0;
for (x = 0; x < p->nc; x++) {
if (p->conv[x] == NULL)
break;
if (p->conv[x]->lport == *pp) {
found = 1;
break;
}
}
if (!found)
break;
}
c->lport = (*pp)++;
qunlock(&p->qlock);
}
/*
* set a local address and port from a string of the form
* [address!]port[!r]
*/
static void setladdrport(struct conv *c, char *str, int announcing)
{
char *p;
uint16_t lport;
uint8_t addr[IPaddrlen];
/*
* ignore restricted part if it exists. it's
* meaningless on local ports.
*/
p = strchr(str, '!');
if (p != NULL) {
*p++ = 0;
if (strcmp(p, "r") == 0)
p = NULL;
}
c->lport = 0;
if (p == NULL) {
if (announcing)
ipmove(c->laddr, IPnoaddr);
else
setladdr(c);
p = str;
} else {
if (strcmp(str, "*") == 0)
ipmove(c->laddr, IPnoaddr);
else {
parseip(addr, str);
if (ipforme(c->p->f, addr))
ipmove(c->laddr, addr);
else
error(EFAIL, "not a local IP address");
}
}
/* one process can get all connections */
if (announcing && strcmp(p, "*") == 0) {
if (!iseve())
error(EPERM, ERROR_FIXME);
setluniqueport(c, 0);
}
lport = atoi(p);
if (lport <= 0)
setlport(c);
else
setluniqueport(c, lport);
}
static void setraddrport(struct conv *c, char *str)
{
char *p;
p = strchr(str, '!');
if (p == NULL)
error(EFAIL, "malformed address");
*p++ = 0;
parseip(c->raddr, str);
c->rport = atoi(p);
p = strchr(p, '!');
if (p) {
if (strstr(p, "!r") != NULL)
c->restricted = 1;
}
}
/*
* called by protocol connect routine to set addresses
*/
void Fsstdconnect(struct conv *c, char *argv[], int argc)
{
switch (argc) {
default:
error(EINVAL, "bad args to %s", __func__);
case 2:
setraddrport(c, argv[1]);
setladdr(c);
setlport(c);
break;
case 3:
setraddrport(c, argv[1]);
setladdrport(c, argv[2], 0);
break;
}
/* TODO: why is an IPnoaddr (in v6 format, equivalent to v6Unspecified),
* a v4 format? */
if ((memcmp(c->raddr, v4prefix, IPv4off) == 0 &&
memcmp(c->laddr, v4prefix, IPv4off) == 0)
|| ipcmp(c->raddr, IPnoaddr) == 0)
c->ipversion = V4;
else
c->ipversion = V6;
/* Linux has taught people to use zeros for local interfaces. TODO: We
* might need this for v6 in the future. */
if (!ipcmp(c->raddr, IPv4_zeroes))
ipmove(c->raddr, IPv4_loopback);
}
/*
* initiate connection and sleep till its set up
*/
static int connected(void *a)
{
return ((struct conv *)a)->state == Connected;
}
static void connectctlmsg(struct Proto *x, struct conv *c, struct cmdbuf *cb,
struct chan *chan)
{
ERRSTACK(1);
char *p;
if (c->state != 0)
error(EBUSY, ERROR_FIXME);
c->state = Connecting;
c->cerr[0] = '\0';
if (x->connect == NULL)
error(EFAIL, "connect not supported");
/* It's up to the proto connect method to not block the kthread. This
* is currently the case for e.g. TCP. */
x->connect(c, cb->f, cb->nf);
/* This is notionally right before the rendez_sleep: either we block or
* we kick back to userspace. We do this before the unlock to avoid
* races with c->state (rendez's internal lock deals with its race with
* the waker) and to avoid the excessive unlock and relock.
*
* Also, it's important that we don't do anything important for the
* functionality of the conv after the rendez sleep. The non-blocking
* style won't call back into the kernel - it just wants the event. I
* considered allowing multiple connect calls, where we just return if
* it was already connected, but that would break UDP, which allows
* multiple different connect calls. */
if ((chan->flag & O_NONBLOCK) && !connected(c))
error(EINPROGRESS, "connection not ready yet");
qunlock(&c->qlock);
if (waserror()) {
qlock(&c->qlock);
nexterror();
}
rendez_sleep(&c->cr, connected, c);
qlock(&c->qlock);
poperror();
if (c->cerr[0] != '\0')
error(EFAIL, c->cerr);
}
/*
* called by protocol announce routine to set addresses
*/
void Fsstdannounce(struct conv *c, char *argv[], int argc)
{
memset(c->raddr, 0, sizeof(c->raddr));
c->rport = 0;
switch (argc) {
default:
error(EINVAL, "bad args to announce");
case 2:
setladdrport(c, argv[1], 1);
break;
}
}
/*
* initiate announcement and sleep till its set up
*/
static int announced(void *a)
{
return ((struct conv *)a)->state == Announced;
}
static void announcectlmsg(struct Proto *x, struct conv *c, struct cmdbuf *cb)
{
ERRSTACK(1);
char *p;
if (c->state != 0)
error(EBUSY, ERROR_FIXME);
c->state = Announcing;
c->cerr[0] = '\0';
if (x->announce == NULL)
error(EFAIL, "announce not supported");
x->announce(c, cb->f, cb->nf);
qunlock(&c->qlock);
if (waserror()) {
qlock(&c->qlock);
nexterror();
}
rendez_sleep(&c->cr, announced, c);
qlock(&c->qlock);
poperror();
if (c->cerr[0] != '\0')
error(EFAIL, c->cerr);
}
/*
* called by protocol bind routine to set addresses
*/
void Fsstdbind(struct conv *c, char *argv[], int argc)
{
switch (argc) {
default:
error(EINVAL, "bad args to bind");
case 2:
setladdrport(c, argv[1], 0);
break;
}
}
static void bindctlmsg(struct Proto *x, struct conv *c, struct cmdbuf *cb)
{
if (x->bind == NULL)
Fsstdbind(c, cb->f, cb->nf);
else
x->bind(c, cb->f, cb->nf);
}
/* Helper, called by protocols to use the bypass.
*
* This is a bit nasty due to the overall nastiness of #ip. We need to lock
* before checking the state and hold the qlock throughout, because a concurrent
* closeconv() could tear down the bypass. Specifically, it could free the
* bypass queues. The root issue is that conversation lifetimes are not managed
* well.
*
* If we fail, it's our responsibility to consume (free) the block(s). */
void bypass_or_drop(struct conv *cv, struct block *bp)
{
qlock(&cv->qlock);
if (cv->state == Bypass)
qpass(cv->rq, bp);
else
freeblist(bp);
qunlock(&cv->qlock);
}
/* Push the block directly to the approprite ipoput function.
*
* It's the protocol's responsibility (and thus ours here) to make sure there is
* at least the right amount of the IP header in the block (ipoput{4,6} assumes
* it has the right amount, and the other protocols account for the IP header in
* their own header).
*
* For the TTL and TOS, we just use the default ones. If we want, we could look
* into the actual block and see what the user wanted, though we're bypassing
* the protocol layer, not the IP layer. */
static void proto_bypass_kick(void *arg, struct block *bp)
{
struct conv *cv = (struct conv*)arg;
uint8_t vers_nibble;
struct Fs *f;
f = cv->p->f;
bp = pullupblock(bp, 1);
if (!bp)
error(EINVAL, "Proto bypass unable to pullup a byte!");
vers_nibble = *(uint8_t*)bp->rp & 0xf0;
switch (vers_nibble) {
case IP_VER4:
bp = pullupblock(bp, IPV4HDR_LEN);
if (!bp)
error(EINVAL,
"Proto bypass unable to pullup v4 header");
ipoput4(f, bp, FALSE, MAXTTL, DFLTTOS, NULL);
break;
case IP_VER6:
bp = pullupblock(bp, IPV6HDR_LEN);
if (!bp)
error(EINVAL,
"Proto bypass unable to pullup v6 header");
ipoput6(f, bp, FALSE, MAXTTL, DFLTTOS, NULL);
break;
default:
error(EINVAL, "Proto bypass block had unknown IP version 0x%x",
vers_nibble);
}
}
/* Sets up cv for the protocol bypass. We use different queues for two reasons:
* 1) To be protocol independent. For instance, TCP and UDP could use very
* different QIO styles.
* 2) To set up our own kick/bypass method. Note how udpcreate() and here uses
* qbypass() (just blast it out), while TCP uses qopen() with a kick. TCP still
* follows queuing discipline.
*
* It's like we are our own protocol, the bypass protocol, when it comes to how
* we interact with qio. The conv still is of the real protocol type (e.g.
* TCP).
*
* Note that we can't free the old queues. The way #ip works, the queues are
* created when the conv is created, but the conv is never freed. It's like a
* slab allocator that never frees objects, but just reinitializes them a
* little.
*
* For the queues, we're basically like UDP:
* - We take packets for rq and drop on overflow.
* - rq is also Qmsg, but we also have Qcoalesce, to ignore out zero-len blocks
* - We kick for our outbound (wq) messages.
*
* Note that Qmsg can drop parts of packets. It's up to the user to read
* enough. If they didn't read enough, the extra is dropped. This is similar
* to SOCK_DGRAM and recvfrom(). Minus major changes, there's no nice way to
* get individual messages with read(). Userspace using the bypass will need to
* find out the MTU of the NIC the IP stack is attached to, and make sure to
* read in at least that amount each time. */
static void setup_proto_qio_bypass(struct conv *cv)
{
cv->rq_save = cv->rq;
cv->wq_save = cv->wq;
cv->rq = qopen(BYPASS_QMAX, Qmsg | Qcoalesce, 0, 0);
cv->wq = qbypass(proto_bypass_kick, cv);
}
static void undo_proto_qio_bypass(struct conv *cv)
{
qfree(cv->rq);
qfree(cv->wq);
cv->rq = cv->rq_save;
cv->wq = cv->wq_save;
cv->rq_save = NULL;
cv->wq_save = NULL;
}
void Fsstdbypass(struct conv *cv, char *argv[], int argc)
{
memset(cv->raddr, 0, sizeof(cv->raddr));
cv->rport = 0;
switch (argc) {
case 2:
setladdrport(cv, argv[1], 1);
break;
default:
error(EINVAL, "Bad args (was %d, need 2) to bypass", argc);
}
}
static void bypassctlmsg(struct Proto *x, struct conv *cv, struct cmdbuf *cb)
{
if (!x->bypass)
error(EFAIL, "Protocol %s does not support bypass", x->name);
/* The protocol needs to set the port (usually by calling Fsstdbypass)
* and then do whatever it needs to make sure it can find the conv again
* during receive (usually by adding to a hash table). */
x->bypass(cv, cb->f, cb->nf);
setup_proto_qio_bypass(cv);
cv->state = Bypass;
}
static void shutdownctlmsg(struct conv *cv, struct cmdbuf *cb)
{
if (cb->nf < 2)
goto err;
if (!strcmp(cb->f[1], "rd")) {
qhangup(cv->rq, "shutdown");
if (cv->p->shutdown)
cv->p->shutdown(cv, SHUT_RD);
} else if (!strcmp(cb->f[1], "wr")) {
qhangup(cv->wq, "shutdown");
if (cv->p->shutdown)
cv->p->shutdown(cv, SHUT_WR);
} else if (!strcmp(cb->f[1], "rdwr")) {
qhangup(cv->rq, "shutdown");
qhangup(cv->wq, "shutdown");
if (cv->p->shutdown)
cv->p->shutdown(cv, SHUT_RDWR);
} else {
goto err;
}
return;
err:
error(EINVAL, "shutdown [rx|tx|rxtx]");
}
static void tosctlmsg(struct conv *c, struct cmdbuf *cb)
{
if (cb->nf < 2)
c->tos = 0;
else
c->tos = atoi(cb->f[1]);
}
static void ttlctlmsg(struct conv *c, struct cmdbuf *cb)
{
if (cb->nf < 2)
c->ttl = MAXTTL;
else
c->ttl = atoi(cb->f[1]);
}
/* Binds a conversation, as if the user wrote "bind *" into ctl. */
static void autobind(struct conv *cv)
{
ERRSTACK(1);
struct cmdbuf *cb;
cb = parsecmd("bind *", 7);
if (waserror()) {
kfree(cb);
nexterror();
}
bindctlmsg(cv->p, cv, cb);
poperror();
kfree(cb);
}
static size_t ipwrite(struct chan *ch, void *v, size_t n, off64_t off)
{
ERRSTACK(1);
struct conv *c;
struct Proto *x;
char *p;
struct cmdbuf *cb;
uint8_t ia[IPaddrlen], ma[IPaddrlen];
struct Fs *f;
char *a;
a = v;
f = ipfs[ch->dev];
switch (TYPE(ch->qid)) {
default:
error(EPERM, ERROR_FIXME);
case Qdata:
x = f->p[PROTO(ch->qid)];
c = x->conv[CONV(ch->qid)];
/* connection-less protocols (UDP) can write without manually
* binding. */
if (c->lport == 0)
autobind(c);
if (ch->flag & O_NONBLOCK)
qwrite_nonblock(c->wq, a, n);
else
qwrite(c->wq, a, n);
break;
case Qarp:
return arpwrite(f, a, n);
case Qiproute:
return routewrite(f, ch, a, n);
case Qlog:
netlogctl(f, a, n);
return n;
case Qndb:
return ndbwrite(f, a, off, n);
case Qctl:
x = f->p[PROTO(ch->qid)];
c = x->conv[CONV(ch->qid)];
cb = parsecmd(a, n);
qlock(&c->qlock);
if (waserror()) {
qunlock(&c->qlock);
kfree(cb);
nexterror();
}
if (cb->nf < 1)
error(EFAIL, "short control request");
if (strcmp(cb->f[0], "connect") == 0)
connectctlmsg(x, c, cb, ch);
else if (strcmp(cb->f[0], "announce") == 0)
announcectlmsg(x, c, cb);
else if (strcmp(cb->f[0], "bind") == 0)
bindctlmsg(x, c, cb);
else if (strcmp(cb->f[0], "bypass") == 0)
bypassctlmsg(x, c, cb);
else if (strcmp(cb->f[0], "shutdown") == 0)
shutdownctlmsg(c, cb);
else if (strcmp(cb->f[0], "ttl") == 0)
ttlctlmsg(c, cb);
else if (strcmp(cb->f[0], "tos") == 0)
tosctlmsg(c, cb);
else if (strcmp(cb->f[0], "ignoreadvice") == 0)
c->ignoreadvice = 1;
else if (strcmp(cb->f[0], "addmulti") == 0) {
if (cb->nf < 2)
error(EFAIL,
"addmulti needs interface address");
if (cb->nf == 2) {
if (!ipismulticast(c->raddr))
error(EFAIL, "addmulti for a non multicast address");
parseip(ia, cb->f[1]);
ipifcaddmulti(c, c->raddr, ia);
} else {
parseip(ma, cb->f[2]);
if (!ipismulticast(ma))
error(EFAIL, "addmulti for a non multicast address");
parseip(ia, cb->f[1]);
ipifcaddmulti(c, ma, ia);
}
} else if (strcmp(cb->f[0], "remmulti") == 0) {
if (cb->nf < 2)
error(EFAIL,
"remmulti needs interface address");
if (!ipismulticast(c->raddr))
error(EFAIL,
"remmulti for a non multicast address");
parseip(ia, cb->f[1]);
ipifcremmulti(c, c->raddr, ia);
} else if (x->ctl != NULL) {
x->ctl(c, cb->f, cb->nf);
} else
error(EFAIL, "unknown control request");
qunlock(&c->qlock);
kfree(cb);
poperror();
}
return n;
}
static size_t ipbwrite(struct chan *ch, struct block *bp, off64_t offset)
{
struct conv *c;
size_t n;
switch (TYPE(ch->qid)) {
case Qdata:
c = chan2conv(ch);
if (bp->next)
bp = concatblock(bp);
n = BLEN(bp);
if (ch->flag & O_NONBLOCK)
qbwrite_nonblock(c->wq, bp);
else
qbwrite(c->wq, bp);
return n;
default:
return devbwrite(ch, bp, offset);
}
}
static void fire_data_taps(struct conv *conv, int filter)
{
struct fd_tap *tap_i;
/* At this point, we have an event we want to send to our taps (if any).
* The lock protects list integrity and the existence of the tap.
*
* Previously, I thought of using the conv qlock. That actually breaks,
* due to weird usages of the qlock (someone holds it for a long time,
* blocking the inbound wakeup from etherread4).
*
* I opted for a spinlock for a couple reasons:
* - fire_tap should not block. ideally it'll be fast too (it's mostly
* a send_event).
* - our callers might not want to block. A lot of network wakeups will
* come network processes (etherread4) or otherwise unrelated to this
* particular conversation. I'd rather do something like fire off a
* KMSG than block those.
* - if fire_tap takes a while, holding the lock only slows down other
* events on this *same* conversation, or other tap registration. not a
* huge deal. */
spin_lock(&conv->tap_lock);
SLIST_FOREACH(tap_i, &conv->data_taps, link)
fire_tap(tap_i, filter);
spin_unlock(&conv->tap_lock);
}
static void ip_wake_cb(struct queue *q, void *data, int filter)
{
struct conv *conv = (struct conv*)data;
/* For these two, we want to ignore events on the opposite end of the
* queues. For instance, we want to know when the WQ is writable. Our
* writes will actually make it readable - we don't want to trigger a
* tap for that. However, qio doesn't know how/why we are using a
* queue, or even who the ends are (hence the callbacks) */
if ((filter & FDTAP_FILT_READABLE) && (q == conv->wq))
return;
if ((filter & FDTAP_FILT_WRITABLE) && (q == conv->rq))
return;
fire_data_taps(conv, filter);
}
int iptapfd(struct chan *chan, struct fd_tap *tap, int cmd)
{
struct conv *conv = chan2conv(chan);
int ret;
#define DEVIP_LEGAL_DATA_TAPS (FDTAP_FILT_READABLE | FDTAP_FILT_WRITABLE | \
FDTAP_FILT_HANGUP | FDTAP_FILT_PRIORITY | \
FDTAP_FILT_ERROR)
#define DEVIP_LEGAL_LISTEN_TAPS (FDTAP_FILT_READABLE | FDTAP_FILT_HANGUP)
switch (TYPE(chan->qid)) {
case Qdata:
if (tap->filter & ~DEVIP_LEGAL_DATA_TAPS) {
set_errno(ENOSYS);
set_errstr("Unsupported #%s data tap %p, must be %p",
devname(), tap->filter,
DEVIP_LEGAL_DATA_TAPS);
return -1;
}
spin_lock(&conv->tap_lock);
switch (cmd) {
case (FDTAP_CMD_ADD):
if (SLIST_EMPTY(&conv->data_taps)) {
qio_set_wake_cb(conv->rq, ip_wake_cb, conv);
qio_set_wake_cb(conv->wq, ip_wake_cb, conv);
}
SLIST_INSERT_HEAD(&conv->data_taps, tap, link);
ret = 0;
break;
case (FDTAP_CMD_REM):
SLIST_REMOVE(&conv->data_taps, tap, fd_tap, link);
if (SLIST_EMPTY(&conv->data_taps)) {
qio_set_wake_cb(conv->rq, 0, conv);
qio_set_wake_cb(conv->wq, 0, conv);
}
ret = 0;
break;
default:
set_errno(ENOSYS);
set_errstr("Unsupported #%s data tap command %p",
devname(), cmd);
ret = -1;
}
spin_unlock(&conv->tap_lock);
return ret;
case Qlisten:
if (tap->filter & ~DEVIP_LEGAL_LISTEN_TAPS) {
set_errno(ENOSYS);
set_errstr("Unsupported #%s listen tap %p, must be %p",
devname(), tap->filter,
DEVIP_LEGAL_LISTEN_TAPS);
return -1;
}
spin_lock(&conv->tap_lock);
switch (cmd) {
case (FDTAP_CMD_ADD):
SLIST_INSERT_HEAD(&conv->listen_taps, tap, link);
ret = 0;
break;
case (FDTAP_CMD_REM):
SLIST_REMOVE(&conv->listen_taps, tap, fd_tap, link);
ret = 0;
break;
default:
set_errno(ENOSYS);
set_errstr("Unsupported #%s listen tap command %p",
devname(), cmd);
ret = -1;
}
spin_unlock(&conv->tap_lock);
return ret;
default:
set_errno(ENOSYS);
set_errstr("Can't tap #%s file type %d", devname(),
TYPE(chan->qid));
return -1;
}
}
static unsigned long ip_chan_ctl(struct chan *c, int op, unsigned long a1,
unsigned long a2, unsigned long a3,
unsigned long a4)
{
switch (op) {
case CCTL_SET_FL:
return 0;
default:
error(EINVAL, "%s does not support %d", __func__, op);
}
}
struct dev ipdevtab __devtab = {
.name = "ip",
.reset = ipreset,
.init = ipinit,
.shutdown = devshutdown,
.attach = ipattach,
.walk = ipwalk,
.stat = ipstat,
.open = ipopen,
.create = devcreate,
.close = ipclose,
.read = ipread,
.bread = ipbread,
.write = ipwrite,
.bwrite = ipbwrite,
.remove = devremove,
.wstat = ipwstat,
.power = devpower,
.chaninfo = ipchaninfo,
.tapfd = iptapfd,
.chan_ctl = ip_chan_ctl,
};
int Fsproto(struct Fs *f, struct Proto *p)
{
if (f->np >= Maxproto)
return -1;
qlock_init(&p->qlock);
p->f = f;
if (p->ipproto > 0) {
if (f->t2p[p->ipproto] != NULL)
return -1;
f->t2p[p->ipproto] = p;
}
p->qid.type = QTDIR;
p->qid.path = QID(f->np, 0, Qprotodir);
p->conv = kzmalloc(sizeof(struct conv *) * (p->nc + 1), 0);
if (p->conv == NULL)
panic("Fsproto");
p->x = f->np;
p->nextport = 0;
p->nextrport = 600;
f->p[f->np++] = p;
return 0;
}
/*
* return true if this protocol is
* built in
*/
int Fsbuiltinproto(struct Fs *f, uint8_t proto)
{
return f->t2p[proto] != NULL;
}
/*
* called with protocol locked
*/
struct conv *Fsprotoclone(struct Proto *p, char *user)
{
struct conv *c, **pp, **ep;
retry:
c = NULL;
ep = &p->conv[p->nc];
for (pp = p->conv; pp < ep; pp++) {
c = *pp;
if (c == NULL) {
c = kzmalloc(sizeof(struct conv), 0);
if (c == NULL)
error(ENOMEM,
"conv kzmalloc(%d, 0) failed in Fsprotoclone",
sizeof(struct conv));
qlock_init(&c->qlock);
qlock_init(&c->listenq);
rendez_init(&c->cr);
rendez_init(&c->listenr);
/* already = 0; set to be futureproof */
SLIST_INIT(&c->data_taps);
SLIST_INIT(&c->listen_taps);
spinlock_init(&c->tap_lock);
qlock(&c->qlock);
c->p = p;
c->x = pp - p->conv;
if (p->ptclsize != 0) {
c->ptcl = kzmalloc(p->ptclsize, 0);
if (c->ptcl == NULL) {
kfree(c);
error(ENOMEM,
"ptcl kzmalloc(%d, 0) failed in Fsprotoclone",
p->ptclsize);
}
}
*pp = c;
p->ac++;
c->eq = qopen(1024, Qmsg, 0, 0);
(*p->create) (c);
assert(c->rq && c->wq);
break;
}
if (canqlock(&c->qlock)) {
/*
* make sure both processes and protocol
* are done with this Conv
*/
if (c->inuse == 0 && (p->inuse == NULL ||
(*p->inuse)(c) == 0))
break;
qunlock(&c->qlock);
}
}
if (pp >= ep) {
if (p->gc != NULL && (*p->gc) (p))
goto retry;
return NULL;
}
c->inuse = 1;
kstrdup(&c->owner, user);
c->perm = 0660;
c->state = Idle;
ipmove(c->laddr, IPnoaddr);
ipmove(c->raddr, IPnoaddr);
c->r = NULL;
c->rgen = 0;
c->lport = 0;
c->rport = 0;
c->restricted = 0;
c->ttl = MAXTTL;
c->tos = DFLTTOS;
qreopen(c->rq);
qreopen(c->wq);
qreopen(c->eq);
qunlock(&c->qlock);
return c;
}
int Fsconnected(struct conv *c, char *msg)
{
if (msg != NULL && *msg != '\0')
strlcpy(c->cerr, msg, sizeof(c->cerr));
switch (c->state) {
case Announcing:
c->state = Announced;
break;
case Connecting:
c->state = Connected;
break;
}
rendez_wakeup(&c->cr);
/* The user can poll or tap the connection status via Qdata */
fire_data_taps(c, FDTAP_FILT_WRITABLE);
return 0;
}
struct Proto *Fsrcvpcol(struct Fs *f, uint8_t proto)
{
if (f->ipmux)
return f->ipmux;
else
return f->t2p[proto];
}
struct Proto *Fsrcvpcolx(struct Fs *f, uint8_t proto)
{
return f->t2p[proto];
}
static void fire_listener_taps(struct conv *conv)
{
struct fd_tap *tap_i;
if (SLIST_EMPTY(&conv->listen_taps))
return;
spin_lock(&conv->tap_lock);
SLIST_FOREACH(tap_i, &conv->listen_taps, link)
fire_tap(tap_i, FDTAP_FILT_READABLE);
spin_unlock(&conv->tap_lock);
}
/*
* called with protocol locked
*/
struct conv *Fsnewcall(struct conv *c, uint8_t *raddr, uint16_t rport,
uint8_t *laddr, uint16_t lport, uint8_t version)
{
struct conv *nc;
struct conv **l;
int i;
qlock(&c->qlock);
i = 0;
for (l = &c->incall; *l; l = &(*l)->next)
i++;
if (i >= Maxincall) {
qunlock(&c->qlock);
return NULL;
}
/* find a free conversation */
nc = Fsprotoclone(c->p, network);
if (nc == NULL) {
qunlock(&c->qlock);
return NULL;
}
ipmove(nc->raddr, raddr);
nc->rport = rport;
ipmove(nc->laddr, laddr);
nc->lport = lport;
nc->next = NULL;
*l = nc;
nc->state = Connected;
nc->ipversion = version;
qunlock(&c->qlock);
rendez_wakeup(&c->listenr);
fire_listener_taps(c);
return nc;
}
static long ndbwrite(struct Fs *f, char *a, uint32_t off, int n)
{
if (off > strlen(f->ndb))
error(EIO, ERROR_FIXME);
if (off + n >= sizeof(f->ndb) - 1)
error(EIO, ERROR_FIXME);
memmove(f->ndb + off, a, n);
f->ndb[off + n] = 0;
f->ndbvers++;
f->ndbmtime = seconds();
return n;
}
uint32_t scalednconv(void)
{
//if(conf.npage*BY2PG >= 128*MB)
return Nchans * 4;
// return Nchans;
}