blob: 67b3924622357048c8598197a15fee00ce6be8e8 [file] [log] [blame]
//#define DEBUG
/* Copyright 2014 Google Inc.
* Copyright (c) 2013 The Regents of the University of California
* Barret Rhoden <brho@cs.berkeley.edu>
* See LICENSE for details.
*
* devvm/#V: a device for VMs
*
*/
#include <kmalloc.h>
#include <string.h>
#include <stdio.h>
#include <assert.h>
#include <error.h>
#include <pmap.h>
#include <sys/queue.h>
#include <smp.h>
#include <kref.h>
#include <atomic.h>
#include <alarm.h>
#include <event.h>
#include <umem.h>
#include <devalarm.h>
#include <arch/types.h>
#include <arch/vm.h>
#include <arch/emulate.h>
#include <arch/vmdebug.h>
/* qid path types */
enum {
Qtopdir = 1,
Qclone,
Qstat,
Qvmdir,
Qctl,
Qimage,
};
/* The QID is the TYPE and the index into the vms array.
* We reserve the right to make it an id later.
*/
#define ID_SHIFT 5
/* vm's have an image.
* Note that the image can be read even as it is running. */
struct vm {
struct kref kref;
/* should this be an array of pages? Hmm. */
void *image;
unsigned long imagesize;
int id; // not used yet.
struct litevm *archvm;
};
static spinlock_t vmlock;
/* array, not linked list. We expect few, might as well be cache friendly. */
static struct vm *vms = NULL;
static int nvm = 0;
static int vmok = 0;
static spinlock_t vmidlock[1];
static struct kref vmid[1] = { {(void *)1, fake_release} };
/* not clear what .h to put these in. Put them here. */
struct litevm *vmx_open(void);
int vmx_create_vcpu(struct litevm *litevm, int n);
int vmx_init(void);
int vm_set_memory_region(struct litevm *litevm,
struct litevm_memory_region *mem);
int vm_run(struct litevm *litevm, struct litevm_run *litevm_run);
static inline struct vm *
QID2VM(struct qid q)
{
return &vms[((q).path >> ID_SHIFT)];
}
static inline int
TYPE(struct qid q)
{
return ((q).path & ((1 << ID_SHIFT) - 1));
}
static inline int QID(int index, int type)
{
return ((index << ID_SHIFT) | type);
}
/* we'll need this somewhere more generic. */
static void readn(struct chan *c, void *vp, long n)
{
//print_func_entry();
char *p;
long nn;
int total = 0, want = n;
p = vp;
while (n > 0) {
nn = devtab[c->type].read(c, p, n, c->offset);
printk("readn: Got %d@%lld\n", nn, c->offset);
if (nn == 0)
error("%s: wanted %d, got %d", Eshort, want, total);
c->offset += nn;
p += nn;
n -= nn;
total += nn;
}
//print_func_exit();
}
/* not called yet. -- we have to unlink the vm */
static void vm_release(struct kref *kref)
{
//print_func_entry();
struct vm *v = container_of(kref, struct vm, kref);
spin_lock_irqsave(&vmlock);
/* cute trick. Save the last element of the array in place of the
* one we're deleting. Reduce nvm. Don't realloc; that way, next
* time we add a vm the allocator will just return.
* Well, this is stupid, because when we do this, we break
* the QIDs, which have pointers embedded in them.
* darn it, may have to use a linked list. Nope, will probably
* just walk the array until we find a matching id. Still ... yuck.
*/
if (v != &vms[nvm - 1]) {
/* free the image ... oops */
/* get rid of the kref. */
*v = vms[nvm - 1];
}
nvm--;
spin_unlock(&vmlock);
//print_func_exit();
}
/* VM ids run in the range 1..infinity. But vmx.c wants them
* 0-based.
*/
static int newvmid(void)
{
//print_func_entry();
int id;
spin_lock_irqsave(vmidlock);
id = kref_refcnt(vmid);
kref_get(vmid, 1);
spin_unlock(vmidlock);
//print_func_exit();
return id - 1;
}
static int vmgen(struct chan *c, char *entry_name,
struct dirtab *unused, int unused_nr_dirtab,
int s, struct dir *dp)
{
//print_func_entry();
struct qid q;
struct vm *vm_i;
printd("GEN s %d\n", s);
/* Whether we're in one dir or at the top, .. still takes us to the top. */
if (s == DEVDOTDOT) {
mkqid(&q, Qtopdir, 0, QTDIR);
devdir(c, c->qid, "#V", 0, eve, 0555, dp);
//print_func_exit();
return 1;
}
printd("TYPE %d\n", TYPE(c->qid));
switch (TYPE(c->qid)) {
case Qtopdir:
printd("Qtopdir s %d nvm %d\n", s, nvm);
/* Generate elements for the top level dir. We support clone, stat,
* vm dirs at the top level */
if (s == 0) {
mkqid(&q, Qclone, 0, QTFILE);
devdir(c, q, "clone", 0, eve, 0666, dp);
//print_func_exit();
return 1;
}
s--;
if (s == 0) {
mkqid(&q, Qstat, 0, QTFILE);
devdir(c, q, "stat", 0, eve, 0666, dp);
//print_func_exit();
return 1;
}
s--; /* 1 -> 0th element, 2 -> 1st element, etc */
spin_lock_irqsave(&vmlock);
if (s >= nvm) {
printd("DONE qtopdir\n");
spin_unlock(&vmlock);
//print_func_exit();
return -1;
}
vm_i = &vms[s];
snprintf(get_cur_genbuf(), GENBUF_SZ, "vm%d", vm_i->id);
spin_unlock(&vmlock);
mkqid(&q, QID(s, Qvmdir), 0, QTDIR);
devdir(c, q, get_cur_genbuf(), 0, eve, 0555, dp);
//print_func_exit();
return 1;
case Qvmdir:
/* Gen the contents of the vm dirs */
s += Qctl; /* first time through, start on Qctl */
switch (s) {
case Qctl:
mkqid(&q, QID(s-Qctl, Qctl), 0, QTFILE);
devdir(c, q, "ctl", 0, eve, 0666, dp);
//print_func_exit();
return 1;
case Qimage:
mkqid(&q, QID(s-Qctl, Qimage), 0, QTFILE);
devdir(c, q, "image", 0, eve, 0666, dp);
//print_func_exit();
return 1;
}
//print_func_exit();
return -1;
/* Need to also provide a direct hit for Qclone and all other files
* (at all levels of the hierarchy). Every file is both generated
* (via the s increments in their respective directories) and
* directly gen-able. devstat() will call gen with a specific path
* in the qid. In these cases, we make a dir for whatever they are
* asking for. Note the qid stays the same. I think this is what
* the old plan9 comments above devgen were talking about for (ii).
*
* We don't need to do this for the directories - devstat will look
* for the a directory by path and fail. Then it will manually
* build the stat output (check the -1 case in devstat). */
case Qclone:
devdir(c, c->qid, "clone", 0, eve, 0666, dp);
//print_func_exit();
return 1;
case Qstat:
devdir(c, c->qid, "stat", 0, eve, 0444, dp);
//print_func_exit();
return 1;
case Qctl:
devdir(c, c->qid, "ctl", 0, eve, 0666, dp);
//print_func_exit();
return 1;
case Qimage:
devdir(c, c->qid, "image", 0, eve, 0666, dp);
//print_func_exit();
return 1;
}
//print_func_exit();
return -1;
}
static void vminit(void)
{
//print_func_entry();
int i;
spinlock_init_irqsave(&vmlock);
spinlock_init_irqsave(vmidlock);
i = vmx_init();
if (i == 0)
vmok = 1;
printk("vminit: litevm_init returns %d\n", i);
//print_func_exit();
}
static struct chan *vmattach(char *spec)
{
//print_func_entry();
if (!vmok)
error("No VMs available");
struct chan *c = devattach('V', spec);
mkqid(&c->qid, Qtopdir, 0, QTDIR);
//print_func_exit();
return c;
}
static struct walkqid *vmwalk(struct chan *c, struct chan *nc, char **name,
int nname)
{
//print_func_entry();
//print_func_exit();
return devwalk(c, nc, name, nname, 0, 0, vmgen);
}
static int vmstat(struct chan *c, uint8_t * db, int n)
{
//print_func_entry();
//print_func_exit();
return devstat(c, db, n, 0, 0, vmgen);
}
/* It shouldn't matter if p = current is DYING. We'll eventually fail to insert
* the open chan into p's fd table, then decref the chan. */
static struct chan *vmopen(struct chan *c, int omode)
{
//print_func_entry();
ERRSTACK(1);
struct vm *v = QID2VM(c->qid);
printk("vmopen: v is %p\n", v);
if (waserror()) {
nexterror();
}
switch (TYPE(c->qid)) {
case Qtopdir:
case Qvmdir:
if (omode & ORCLOSE)
error(Eperm);
if (!IS_RDONLY(omode))
error(Eisdir);
break;
case Qclone:
spin_lock_irqsave(&vmlock);
vms = krealloc(vms, sizeof(vms[0]) * (nvm + 1), 0);
v = &vms[nvm];
nvm++;
spin_unlock(&vmlock);
kref_init(&v->kref, vm_release, 1);
v->id = newvmid();
mkqid(&c->qid, QID(nvm, Qctl), 0, QTFILE);
c->aux = v;
printd("New VM id %d\n", v->id);
v->archvm = vmx_open();
if (!v->archvm) {
printk("vm_open failed\n");
error("vm_open failed");
}
if (vmx_create_vcpu(v->archvm, v->id) < 0) {
printk("vm_create failed");
error("vm_create failed");
}
printk("Qclone open: id %d, v is %p, v->archvm is %p\n",
nvm-1,
v, v->archvm);
break;
case Qstat:
break;
case Qctl:
case Qimage:
c->aux = QID2VM(c->qid);
printk("open qctl: aux (vm) is %p\n", c->aux);
break;
}
c->mode = openmode(omode);
/* Assumes c is unique (can't be closed concurrently */
c->flag |= COPEN;
c->offset = 0;
poperror();
//print_func_exit();
return c;
}
static void vmcreate(struct chan *c, char *name, int omode, uint32_t perm)
{
//print_func_entry();
error(Eperm);
//print_func_exit();
}
static void vmremove(struct chan *c)
{
//print_func_entry();
error(Eperm);
//print_func_exit();
}
static int vmwstat(struct chan *c, uint8_t * dp, int n)
{
//print_func_entry();
error("No vmwstat");
//print_func_exit();
return 0;
}
static void vmclose(struct chan *c)
{
//print_func_entry();
struct vm *v = c->aux;
if (!v) {
//print_func_exit();
return;
}
/* There are more closes than opens. For instance, sysstat doesn't open,
* but it will close the chan it got from namec. We only want to clean
* up/decref chans that were actually open. */
if (!(c->flag & COPEN)) {
//print_func_exit();
return;
}
switch (TYPE(c->qid)) {
/* for now, leave the VM active even when we close ctl */
case Qctl:
break;
case Qimage:
kref_put(&v->kref);
break;
}
//print_func_exit();
}
static long vmread(struct chan *c, void *ubuf, long n, int64_t offset)
{
//print_func_entry();
struct vm *v = c->aux;
printd("VMREAD\n");
switch (TYPE(c->qid)) {
case Qtopdir:
case Qvmdir:
//print_func_exit();
return devdirread(c, ubuf, n, 0, 0, vmgen);
case Qstat:
//print_func_exit();
return readnum(offset, ubuf, n, nvm, NUMSIZE32);
case Qctl:
assert(v);
//print_func_exit();
return readnum(offset, ubuf, n, v->id, NUMSIZE32);
case Qimage:
assert(v);
//print_func_exit();
return readmem(offset, ubuf, n, v->image, v->imagesize);
default:
panic("Bad QID %p in devvm", c->qid.path);
}
//print_func_exit();
return 0;
}
static long vmwrite(struct chan *c, void *ubuf, long n, int64_t unused)
{
//print_func_entry();
ERRSTACK(3);
char buf[32];
struct cmdbuf *cb;
struct vm *vm;
struct litevm *litevm;
uint64_t hexval;
printd("vmwrite(%p, %p, %d)\n", c, ubuf, n);
switch (TYPE(c->qid)) {
case Qtopdir:
case Qvmdir:
case Qstat:
error(Eperm);
case Qctl:
vm = c->aux;
litevm = vm->archvm;
printk("qctl: vm is %p, litevm is %p\n", vm, litevm);
cb = parsecmd(ubuf, n);
if (waserror()) {
kfree(cb);
nexterror();
}
if (cb->nf < 1)
error("short control request");
if (!strcmp(cb->f[0], "run")) {
int ret;
if (cb->nf != 4)
error("usage: run vcpu emulated mmio_completed");
struct litevm_run vmr;
vmr.vcpu = strtoul(cb->f[1], NULL, 0);
vmr.emulated = strtoul(cb->f[2], NULL, 0);
vmr.mmio_completed = strtoul(cb->f[3], NULL, 0);
disable_irq();
ret = vm_run(litevm, &vmr);
enable_irq();
printk("vm_run returns %d\n", ret);
//print_func_exit();
return ret;
} else if (!strcmp(cb->f[0], "stop")) {
error("can't stop a vm yet");
} else if (!strcmp(cb->f[0], "mapmem")) {
struct chan *file;
void *v;
vm = c->aux;
uint64_t filesize;
struct litevm_memory_region vmr;
int got;
if (cb->nf != 6)
error("usage: mapmem file slot flags addr size");
vmr.slot = strtoul(cb->f[2], NULL, 0);
vmr.flags = strtoul(cb->f[3], NULL, 0);
vmr.guest_phys_addr = strtoul(cb->f[4], NULL, 0);
filesize = strtoul(cb->f[5], NULL, 0);
vmr.memory_size = (filesize + 4095) & ~4095ULL;
file = namec(cb->f[1], Aopen, OREAD, 0);
printk("after namec file is %p\n", file);
if (waserror()) {
printk("File open, alloc bad\n");
cclose(file);
nexterror();
}
/* at some point we want to mmap from the kernel
* but we don't have that yet. This all needs
* rethinking but the abstractions of kvm do too.
*/
v = kmalloc(vmr.memory_size, KMALLOC_WAIT);
if (waserror()) {
printk("memory allocated, read bad %s\n",
current_errstr());
kfree(v);
nexterror();
}
readn(file, v, filesize);
vmr.init_data = v;
if (vm_set_memory_region(litevm, &vmr))
error("vm_set_memory_region failed");
void monitor(void *);
monitor(NULL);
poperror();
poperror();
kfree(v);
cclose(file);
} else if (!strcmp(cb->f[0], "region")) {
void *v;
struct litevm_memory_region vmr;
if (cb->nf != 5)
error("usage: mapmem slot flags addr size");
vmr.slot = strtoul(cb->f[1], NULL, 0);
vmr.flags = strtoul(cb->f[2], NULL, 0);
vmr.guest_phys_addr = strtoul(cb->f[3], NULL, 0);
vmr.memory_size = strtoul(cb->f[4], NULL, 0);
vmr.init_data = NULL;
if (vm_set_memory_region(litevm, &vmr))
error("vm_set_memory_region failed");
} else {
error("%s: not implemented", cb->f[0]);
}
kfree(cb);
poperror();
break;
case Qimage:
error("can't write an image yet");
break;
default:
panic("Bad QID %p in devvm", c->qid.path);
}
//print_func_exit();
return n;
}
struct dev vmdevtab __devtab = {
'V',
"vm",
devreset,
vminit,
devshutdown,
vmattach,
vmwalk,
vmstat,
vmopen,
vmcreate,
vmclose,
vmread,
devbread,
vmwrite,
devbwrite,
vmremove,
vmwstat,
devpower,
// devconfig,
devchaninfo,
};