blob: a50bcef202c6be75e815e84e0579e2134cdbb32c [file] [log] [blame]
//#define DEBUG
/* Copyright 2014 Google Inc.
* Copyright (c) 2013 The Regents of the University of California
* Barret Rhoden <brho@cs.berkeley.edu>
* See LICENSE for details.
*
* devnix/#t: a device for NIX mode
*
* A struct nix is a "visitor" chunk of code. It has a memory image, and can be
* told to run an arbitrary address (in that image or otherwise) in kernel mode
* on various pcores, to which it has exclusive access.
*
* TODO:
*
* - FOR THE MOMENT, this is only intended to run one NIX at a time. Too many
* sharp edges for any other mode.
*
* - memory images: we have one now for all nixs. that'll be a mess.
*
* - what do we want to do for refcnting? decref on chan close? or remove?
* how do we manage the struct nix memory? (MGMT)
* - right now, we aren't decreffing at all. it's easier to work with from
* the shell, but it's definitely a debugging thing. the proper way to do
* these devices is to release on close (i think). the use case for the
* NIX is a "turn it on once and reboot if you don't like it", so this is
* fine for now.
* - we're using c->aux, which needs to be an uncounted ref, in my opinion.
* i messed around with this for a long time with devsrv, and all the
* different ways 9ns interacts with a device make it very tricky.
* - once we start freeing, we'll need to manage the memory better. if we
* have holes in the nixs[], we'll need to handle that in nixgen
*
* - how are we going to stop a nix?
* - graceful vs immediate? with some sort of immediate power-cord style
* halting, the entire nix is garbage once we pull the plug. a more
* graceful style would require the nix to poll or something - probably
* overkill.
* - could send an immediate kmsg (IPI), but we'd need to do some
* bookkeeping to know we're interrupting a NIX and whatnot
* - if we were sure it's a nix core, we might be able to send an immediate
* message telling the core to just smp_idle. doing that from hard IRQ
* would break a little, so we'd need to be careful (adjust various
* flags, etc).
* - another option would be to hack the halted context and have it call
* a cleanup function (which ultimately smp_idles)
* - if we had a process running the core, and "running the NIX" was a
* syscall or something, we'd want to abort the syscall. but since the
* syscall isn't trying to rendez or sleep, we couldn't use the existing
* facilities. so it's the same problem: know it is a nix, somehow
* kill/cleanup. then just smp_idle.
* - we'll also need to unreserve a core first, so we don't have any
* concurrent startups. careful of various races with cores coming and
* going. we can lock the nix before sending the message, but stale RKMs
* could exist for a while.
* - maybe we use a ktask, named nixID or something, to help detect if a
* nix is running. might also need to track the number of messages sent
* and completed (track completed via the wrapper)
*/
#include <kmalloc.h>
#include <string.h>
#include <stdio.h>
#include <assert.h>
#include <error.h>
#include <pmap.h>
#include <sys/queue.h>
#include <smp.h>
#include <kref.h>
#include <atomic.h>
#include <alarm.h>
#include <event.h>
#include <umem.h>
#include <devalarm.h>
#include <arch/types.h>
#include <arch/emulate.h>
#include <arch/vmdebug.h>
#include <kdebug.h>
#include <bitmap.h>
/* qid path types */
enum {
Qtopdir = 1,
Qclone,
Qstat,
Qnixdir,
Qctl,
Qimage,
};
/* The QID is the TYPE and the index into the nix array.
* We reserve the right to make it an id later. */
#define INDEX_SHIFT 5
/* nix's have an image.
* Note that the image can be read even as it is running. */
struct nix {
struct kref kref;
/* should this be an array of pages? Hmm. */
void *image;
unsigned long imagesize;
int id;
/* we could dynamically alloc one of these with num_cpus */
DECLARE_BITMAP(cpus, MAX_NUM_CPUS);
};
static spinlock_t nixlock = SPINLOCK_INITIALIZER_IRQSAVE;
/* array, not linked list. We expect few, might as well be cache friendly. */
static struct nix *nixs = NULL;
static int nnix = 0;
static int nixok = 0;
/* TODO: make this per-nix, somehow. */
static physaddr_t img_paddr = CONFIG_NIX_IMG_PADDR;
static size_t img_size = CONFIG_NIX_IMG_SIZE;
static atomic_t nixid = 0;
/* The index is not the id, for now. The index is the spot in nixs[]. The id
* is an increasing integer, regardless of struct nix* reuse. */
static inline struct nix *QID2NIX(struct qid q)
{
return &nixs[q.path >> INDEX_SHIFT];
}
static inline int TYPE(struct qid q)
{
return ((q).path & ((1 << INDEX_SHIFT) - 1));
}
static inline int QID(int index, int type)
{
return ((index << INDEX_SHIFT) | type);
}
static inline int QID2ID(struct qid q)
{
return q.path >> INDEX_SHIFT;
}
/* TODO: (MGMT) not called yet. -- we have to unlink the nix */
static void nix_release(struct kref *kref)
{
struct nix *v = container_of(kref, struct nix, kref);
spin_lock_irqsave(&nixlock);
/* cute trick. Save the last element of the array in place of the
* one we're deleting. Reduce nnix. Don't realloc; that way, next
* time we add a nix the allocator will just return.
* Well, this is stupid, because when we do this, we break
* the QIDs, which have pointers embedded in them.
* darn it, may have to use a linked list. Nope, will probably
* just walk the array until we find a matching id. Still ... yuck.
*
* If we have lots, we can track the lowest free, similar to FDs and low_fd.
* honestly, we need an integer allocator (vmem and magazine paper) */
if (v != &nixs[nnix - 1]) {
/* free the image ... oops */
/* get rid of the kref. */
*v = nixs[nnix - 1];
}
nnix--;
spin_unlock(&nixlock);
}
/* NIX ids run in the range 0..infinity. */
static int newnixid(void)
{
return atomic_fetch_and_add(&nixid, 1);
}
static int nixgen(struct chan *c, char *entry_name,
struct dirtab *unused, int unused_nr_dirtab,
int s, struct dir *dp)
{
struct qid q;
struct nix *nix_i;
/* Whether we're in one dir or at the top, .. still takes us to the top. */
if (s == DEVDOTDOT) {
mkqid(&q, Qtopdir, 0, QTDIR);
devdir(c, c->qid, "#V", 0, eve, 0555, dp);
return 1;
}
switch (TYPE(c->qid)) {
case Qtopdir:
/* Generate elements for the top level dir. We support clone, stat,
* nix dirs at the top level */
if (s == 0) {
mkqid(&q, Qclone, 0, QTFILE);
devdir(c, q, "clone", 0, eve, 0666, dp);
return 1;
}
s--;
if (s == 0) {
mkqid(&q, Qstat, 0, QTFILE);
devdir(c, q, "stat", 0, eve, 0666, dp);
return 1;
}
s--; /* 1 -> 0th element, 2 -> 1st element, etc */
spin_lock_irqsave(&nixlock);
if (s >= nnix) {
spin_unlock(&nixlock);
return -1;
}
nix_i = &nixs[s];
/* TODO (MGMT): if no nix_i, advance (in case of holes) */
snprintf(get_cur_genbuf(), GENBUF_SZ, "nix%d", nix_i->id);
spin_unlock(&nixlock);
mkqid(&q, QID(s, Qnixdir), 0, QTDIR);
devdir(c, q, get_cur_genbuf(), 0, eve, 0555, dp);
return 1;
case Qnixdir:
/* Gen the contents of the nix dirs */
s += Qctl; /* first time through, start on Qctl */
switch (s) {
case Qctl:
mkqid(&q, QID(QID2ID(c->qid), Qctl), 0, QTFILE);
devdir(c, q, "ctl", 0, eve, 0666, dp);
return 1;
case Qimage:
mkqid(&q, QID(QID2ID(c->qid), Qimage), 0, QTFILE);
devdir(c, q, "image", 0, eve, 0666, dp);
return 1;
}
return -1;
/* Need to also provide a direct hit for Qclone and all other files (at
* all levels of the hierarchy). Every file is both
* generated (via the s increments in their respective directories) and
* directly gen-able. devstat() will call gen with a specific path in
* the qid. In these cases, we make a dir for whatever they are asking
* for. Note the qid stays the same. I think this is what the old
* plan9 comments above devgen were talking about for (ii).
*
* We don't need to do this for the directories - devstat will look for
* the a directory by path and fail. Then it will manually build the
* stat output (check the -1 case in devstat). */
case Qclone:
devdir(c, c->qid, "clone", 0, eve, 0666, dp);
return 1;
case Qstat:
devdir(c, c->qid, "stat", 0, eve, 0444, dp);
return 1;
case Qctl:
devdir(c, c->qid, "ctl", 0, eve, 0666, dp);
return 1;
case Qimage:
devdir(c, c->qid, "image", 0, eve, 0666, dp);
return 1;
}
return -1;
}
void nixtest(void)
{
printk("nixtest ran on core %d\n", core_id());
}
static void nixinit(void)
{
size_t img_order = LOG2_UP(nr_pages(img_size));
void *img_kaddr;
if (img_size != 1ULL << img_order << PGSHIFT) {
printk("nixinit rounding up image size to a power of 2 pgs (was %p)\n",
img_size);
img_size = 1ULL << img_order << PGSHIFT;
}
img_kaddr = get_cont_phys_pages_at(img_order, img_paddr, 0);
if (!img_kaddr) {
printk("nixinit failed to get an image!\n");
return;
}
nixok = 1;
printk("nixinit image at KVA %p of size %p\n", img_kaddr, img_size);
}
static struct chan *nixattach(char *spec)
{
if (!nixok)
error("No NIXs available");
struct chan *c = devattach('t', spec);
mkqid(&c->qid, Qtopdir, 0, QTDIR);
return c;
}
static struct walkqid *nixwalk(struct chan *c, struct chan *nc, char **name,
int nname)
{
return devwalk(c, nc, name, nname, 0, 0, nixgen);
}
static int nixstat(struct chan *c, uint8_t * db, int n)
{
return devstat(c, db, n, 0, 0, nixgen);
}
/* It shouldn't matter if p = current is DYING. We'll eventually fail to insert
* the open chan into p's fd table, then decref the chan. */
static struct chan *nixopen(struct chan *c, int omode)
{
ERRSTACK(1);
struct nix *v = QID2NIX(c->qid);
if (waserror()) {
nexterror();
}
switch (TYPE(c->qid)) {
case Qtopdir:
case Qnixdir:
if (omode & ORCLOSE)
error(Eperm);
if (!IS_RDONLY(omode))
error(Eisdir);
break;
case Qclone:
spin_lock_irqsave(&nixlock);
if (nnix >= 1) {
spin_unlock_irqsave(&nixlock);
set_errno(EBUSY);
error("Already have 1 nix, we don't support more");
}
nixs = krealloc(nixs, sizeof(nixs[0]) * (nnix + 1), 0);
v = &nixs[nnix];
mkqid(&c->qid, QID(nnix, Qctl), 0, QTFILE);
nnix++;
spin_unlock(&nixlock);
kref_init(&v->kref, nix_release, 1);
v->id = newnixid();
v->image = KADDR(img_paddr);
v->imagesize = img_size;
printk("nix image is %p with %d bytes\n", v->image, v->imagesize);
c->aux = v;
bitmap_zero(v->cpus, MAX_NUM_CPUS);
break;
case Qstat:
break;
case Qctl:
case Qimage:
/* TODO: (MGMT) refcnting */
//kref_get(&v->kref, 1);
c->aux = QID2NIX(c->qid);
break;
}
c->mode = openmode(omode);
/* Assumes c is unique (can't be closed concurrently */
c->flag |= COPEN;
c->offset = 0;
poperror();
return c;
}
static void nixcreate(struct chan *c, char *name, int omode, uint32_t perm)
{
error(Eperm);
}
static void nixremove(struct chan *c)
{
error(Eperm);
}
static int nixwstat(struct chan *c, uint8_t * dp, int n)
{
error("No nixwstat");
return 0;
}
static void nixclose(struct chan *c)
{
struct nix *v = c->aux;
if (!v)
return;
/* There are more closes than opens. For instance, sysstat doesn't open,
* but it will close the chan it got from namec. We only want to clean
* up/decref chans that were actually open. */
if (!(c->flag & COPEN))
return;
switch (TYPE(c->qid)) {
/* TODO: (MGMT) the idea of 'stopping' a nix is tricky.
* for now, leave the NIX active even when we close ctl */
case Qctl:
break;
case Qimage:
//kref_put(&v->kref);
break;
}
}
static long nixread(struct chan *c, void *ubuf, long n, int64_t offset)
{
struct nix *v = c->aux;
switch (TYPE(c->qid)) {
case Qtopdir:
case Qnixdir:
return devdirread(c, ubuf, n, 0, 0, nixgen);
case Qstat:
return readnum(offset, ubuf, n, nnix, NUMSIZE32);
case Qctl:
assert(v);
return readnum(offset, ubuf, n, v->id, NUMSIZE32);
case Qimage:
assert(v);
return readmem(offset, ubuf, n, v->image, v->imagesize);
default:
panic("Bad QID %p in devnix", c->qid.path);
}
return 0;
}
static void nixwrapper(uint32_t srcid, long a0, long a1, long a2)
{
void (*f)(void) = (void (*)(void))a0;
f();
/* TODO: could do some tracking to say this message has been completed */
}
static long nixwrite(struct chan *c, void *ubuf, long n, int64_t off)
{
struct nix *v = c->aux;
ERRSTACK(1);
char buf[32];
struct cmdbuf *cb;
struct nix *nix;
uint64_t hexval;
switch (TYPE(c->qid)) {
case Qtopdir:
case Qnixdir:
case Qstat:
error(Eperm);
case Qctl:
nix = c->aux;
cb = parsecmd(ubuf, n);
/* TODO: lock the nix here, unlock in waserror and before popping */
if (waserror()) {
kfree(cb);
nexterror();
}
if (cb->nf < 1)
error("short control request");
if (!strcmp(cb->f[0], "run")) {
int core;
uintptr_t ip;
if (cb->nf != 3)
error("usage: run core entry");
core = strtoul(cb->f[1], 0, 0);
ip = strtoul(cb->f[2], 0, 0);
if (!test_bit(core, nix->cpus))
error("Bad core %d", core);
send_kernel_message(core, nixwrapper, (long)ip, 0, 0, KMSG_ROUTINE);
} else if (!strcmp(cb->f[0], "test")) {
int core;
if (cb->nf != 2)
error("usage: test core");
core = strtoul(cb->f[1], 0, 0);
if (!test_bit(core, nix->cpus))
error("Bad core %d", core);
send_kernel_message(core, nixwrapper, (long)nixtest, 0, 0,
KMSG_ROUTINE);
} else if (!strcmp(cb->f[0], "reserve")) {
int core;
if (cb->nf != 2)
error("Usage: reserve core (-1 for any)");
core = strtol(cb->f[1], 0, 0);
if (core == -1) {
core = get_any_idle_core();
if (core < 0)
error("No free idle cores!");
} else {
if (get_this_idle_core(core) < 0)
error("Failed to reserve core %d\n", core);
}
set_bit(core, nix->cpus);
} else if (!strcmp(cb->f[0], "check")) {
int i;
for(i = 0; i < MAX_NUM_CPUS; i++) {
if (!test_bit(i, nix->cpus))
continue;
printk("Core %d is available to nix%d\n", i, nix->id);
}
} else if (!strcmp(cb->f[0], "stop")) {
error("can't stop a nix yet");
} else {
error("%s: not implemented", cb->f[0]);
}
kfree(cb);
poperror();
break;
case Qimage:
if (off < 0)
error("offset < 0!");
if (off + n > v->imagesize) {
n = v->imagesize - off;
}
if (memcpy_from_user_errno(current, v->image + off, ubuf, n) < 0)
error("%s: bad user addr %p", __FUNCTION__, ubuf);
break;
default:
panic("Bad QID %p in devnix", c->qid.path);
}
return n;
}
struct dev nixdevtab __devtab = {
't',
"nix",
devreset,
nixinit,
devshutdown,
nixattach,
nixwalk,
nixstat,
nixopen,
nixcreate,
nixclose,
nixread,
devbread,
nixwrite,
devbwrite,
nixremove,
nixwstat,
devpower,
// devconfig,
devchaninfo,
};