user/vmm/vthread.c - upstream - Git at Google

 /* Copyright (c) 2016 Google Inc.
  *
  * See LICENSE for details.
  *
  * Helper functions for virtual machines */

 #include <errno.h>
 #include <stdlib.h>
 #include <parlib/bitmask.h>
 #include <parlib/uthread.h>
 #include <sys/mman.h>
 #include <sys/syscall.h>
 #include <sys/queue.h>
 #include <vmm/vmm.h>
 #include <vmm/vthread.h>

 static struct vmm_thread_tq parked_vths = TAILQ_HEAD_INITIALIZER(parked_vths);
 static struct spin_pdr_lock park_lock = SPINPDR_INITIALIZER;

 static void *pages(size_t count)
 {
 	void *v;
 	unsigned long flags = MAP_POPULATE | MAP_ANONYMOUS | MAP_PRIVATE;

 	return mmap(0, count * PGSIZE, PROT_READ | PROT_WRITE, flags, -1, 0);
 }

 static void vmsetup(void *arg)
 {
 	struct virtual_machine *vm = (struct virtual_machine *)arg;

 	setup_paging(vm);
 	vm->nr_gpcs = 0;
 	vm->__gths = NULL;
 	vm->gth_array_elem = 0;
 	uthread_mcp_init();
 }

 void gpci_init(struct vmm_gpcore_init *gpci)
 {
 	uint8_t *p;

 	/* Technically, we don't need these pages for the all guests. Currently,
 	 * the kernel requires them. */
 	p = pages(3);
 	if (!p)
 		panic("Can't allocate 3 pages for guest: %r");
 	gpci->posted_irq_desc = &p[0];
 	gpci->vapic_addr = &p[4096];
 	gpci->apic_addr = &p[8192];
 	/* TODO: once we are making these GPCs at the same time as vthreads, we
 	 * should set fsbase == the TLS desc of the vthread (if any). */
 	gpci->fsbase = 0;
 	gpci->gsbase = 0;
 }

 /* Helper, grows the array of guest_threads in vm.  Concurrent readers
  * (gpcid_to_gth()) need to use a seq-lock-style of concurrency.  They could
  * read the old array even after we free it.
  *
  * Unlike in the kernel, concurrent readers in userspace shouldn't even read
  * freed memory.  Electric fence could catch that fault.  Until we have a decent
  * userspace RCU, we can avoid these faults WHP by just sleeping. */
 static void __grow_gth_array(struct virtual_machine *vm,
                              unsigned int new_nr_gths)
 {
 	struct guest_thread **new_array, **old_array;
 	size_t new_nr_elem;

 	if (new_nr_gths <= vm->gth_array_elem)
 		return;
 	/* TODO: (RCU) we could defer the free */
 	old_array = vm->__gths;
 	new_nr_elem = MAX(vm->gth_array_elem * 2, new_nr_gths);
 	new_array = calloc(new_nr_elem, sizeof(void*));
 	assert(new_array);
 	memcpy(new_array, vm->__gths, sizeof(void*) * vm->nr_gpcs);
 	wmb();	/* all elements written before changing pointer */
 	vm->__gths = new_array;
 	wmb();	/* ptr written before potentially clobbering freed memory. */
 	uthread_usleep(1000);	/* hack for electric fence */
 	free(old_array);
 }

 void __add_gth_to_vm(struct virtual_machine *vm, struct guest_thread *gth)
 {
 	__grow_gth_array(vm, vm->nr_gpcs + 1);
 	vm->__gths[vm->nr_gpcs] = gth;
 	wmb();	/* concurrent readers will check nr_gpcs first */
 	vm->nr_gpcs++;
 }

 /* If we fully destroy these uthreads, we'll need to call uthread_cleanup() */
 void __vthread_exited(struct vthread *vth)
 {
 	struct virtual_machine *vm = vth_to_vm(vth);

 	spin_pdr_lock(&park_lock);
 	TAILQ_INSERT_HEAD(&parked_vths, (struct vmm_thread*)vth, tq_next);
 	spin_pdr_unlock(&park_lock);
 }

 /* The tricky part is that we need to reinit the threads */
 static struct vthread *get_parked_vth(struct virtual_machine *vm)
 {
 	struct vmm_thread *vmth;
 	struct guest_thread *gth;
 	struct ctlr_thread *cth;
 	/* These are from create_guest_thread() */
 	struct uth_thread_attr gth_attr = {.want_tls = FALSE};
 	struct uth_thread_attr cth_attr = {.want_tls = TRUE};

 	spin_pdr_lock(&park_lock);
 	vmth = TAILQ_FIRST(&parked_vths);
 	if (!vmth) {
 		spin_pdr_unlock(&park_lock);
 		return NULL;
 	}
 	TAILQ_REMOVE(&parked_vths, vmth, tq_next);
 	spin_pdr_unlock(&park_lock);

 	gth = (struct guest_thread*)vmth;
 	cth = gth->buddy;
 	uthread_init((struct uthread*)gth, &gth_attr);
 	uthread_init((struct uthread*)cth, &cth_attr);
 	return (struct vthread*)gth;
 }

 struct vthread *vthread_alloc(struct virtual_machine *vm,
                               struct vmm_gpcore_init *gpci)
 {
 	static parlib_once_t once = PARLIB_ONCE_INIT;
 	struct guest_thread *gth;
 	struct vthread *vth;
 	int ret;

 	parlib_run_once(&once, vmsetup, vm);

 	vth = get_parked_vth(vm);
 	if (vth)
 		return vth;
 	uth_mutex_lock(&vm->mtx);
 	ret = syscall(SYS_vmm_add_gpcs, 1, gpci);
 	assert(ret == 1);
 	gth = create_guest_thread(vm, vm->nr_gpcs, gpci);
 	assert(gth);
 	__add_gth_to_vm(vm, gth);
 	uth_mutex_unlock(&vm->mtx);
 	/* TODO: somewhat arch specific */
 	gth_to_vmtf(gth)->tf_cr3 = (uintptr_t)vm->root;
 	return (struct vthread*)gth;
 }

 /* TODO: this is arch specific */
 void vthread_init_ctx(struct vthread *vth, uintptr_t entry_pt, uintptr_t arg,
                       uintptr_t stacktop)
 {
 	struct vm_trapframe *vm_tf = vth_to_vmtf(vth);

 	vm_tf->tf_rip = entry_pt;
 	vm_tf->tf_rdi = arg;
 	vm_tf->tf_rsp = stacktop;
 	vm_tf->tf_rflags = FL_RSVD_1;
 }

 void vthread_run(struct vthread *vthread)
 {
 	start_guest_thread((struct guest_thread*)vthread);
 }

 #define DEFAULT_STACK_SIZE 65536
 static uintptr_t alloc_stacktop(struct virtual_machine *vm)
 {
 	int ret;
 	uintptr_t *stack, *tos;

 	ret = posix_memalign((void **)&stack, PGSIZE, DEFAULT_STACK_SIZE);
 	if (ret)
 		return 0;
 	add_pte_entries(vm, (uintptr_t)stack,
 	                (uintptr_t)stack + DEFAULT_STACK_SIZE);
 	/* touch the top word on the stack so we don't page fault
 	 * on that in the VM. */
 	tos = &stack[DEFAULT_STACK_SIZE / sizeof(uint64_t) - 1];
 	*tos = 0;
 	return (uintptr_t)tos;
 }

 static uintptr_t vth_get_stack(struct vthread *vth)
 {
 	struct guest_thread *gth = (struct guest_thread*)vth;
 	struct vthread_info *info = (struct vthread_info*)gth->user_data;
 	uintptr_t stacktop;

 	if (info) {
 		assert(info->stacktop);
 		return info->stacktop;
 	}
 	stacktop = alloc_stacktop(vth_to_vm(vth));
 	assert(stacktop);
 	/* Yes, an evil part of me thought of using the top of the stack for
 	 * this struct's storage. */
 	gth->user_data = malloc(sizeof(struct vthread_info));
 	assert(gth->user_data);
 	info = (struct vthread_info*)gth->user_data;
 	info->stacktop = stacktop;
 	return stacktop;
 }

 struct vthread *vthread_create(struct virtual_machine *vm, void *entry,
                                void *arg)
 {
 	struct vthread *vth;
 	struct vmm_gpcore_init gpci[1];

 	gpci_init(gpci);
 	vth = vthread_alloc(vm, gpci);
 	if (!vth)
 		return NULL;
 	vthread_init_ctx(vth, (uintptr_t)entry, (uintptr_t)arg,
 			 vth_get_stack(vth));
 	vthread_run(vth);
 	return vth;
 }

 void vthread_join(struct vthread *vth, void **retval_loc)
 {
 	struct ctlr_thread *cth = ((struct guest_thread*)vth)->buddy;

 	uthread_join((struct uthread*)cth, retval_loc);
 }

 long vmcall(unsigned int vmcall_nr, ...)
 {
 	va_list vl;
 	long a0, a1, a2, a3, a4;

 	va_start(vl, vmcall_nr);
 	a0 = va_arg(vl, long);
 	a1 = va_arg(vl, long);
 	a2 = va_arg(vl, long);
 	a3 = va_arg(vl, long);
 	a4 = va_arg(vl, long);
 	va_end(vl);
 	return raw_vmcall(a0, a1, a2, a3, a4, vmcall_nr);
 }

 bool vth_handle_vmcall(struct guest_thread *gth, struct vm_trapframe *vm_tf)
 {
 	switch (vm_tf->tf_rax) {
 	case VTH_VMCALL_NULL:
 		goto out_ok;
 	case VTH_VMCALL_PRINTC:
 		fprintf(stdout, "%c", vm_tf->tf_rdi);
 		fflush(stdout);
 		goto out_ok;
 	case VTH_VMCALL_EXIT:
 		uth_2ls_thread_exit((void*)vm_tf->tf_rdi);
 		assert(0);
 	default:
 		fprintf(stderr, "Unknown syscall nr %d\n", vm_tf->tf_rax);
 		return FALSE;
 	}
 	assert(0);
 out_ok:
 	vm_tf->tf_rip += 3;
 	return TRUE;
 }
	/* Copyright (c) 2016 Google Inc.
	*
	* See LICENSE for details.
	*
	* Helper functions for virtual machines */

	#include <errno.h>
	#include <stdlib.h>
	#include <parlib/bitmask.h>
	#include <parlib/uthread.h>
	#include <sys/mman.h>
	#include <sys/syscall.h>
	#include <sys/queue.h>
	#include <vmm/vmm.h>
	#include <vmm/vthread.h>

	static struct vmm_thread_tq parked_vths = TAILQ_HEAD_INITIALIZER(parked_vths);
	static struct spin_pdr_lock park_lock = SPINPDR_INITIALIZER;

	static void *pages(size_t count)
	{
	void *v;
	unsigned long flags = MAP_POPULATE \| MAP_ANONYMOUS \| MAP_PRIVATE;

	return mmap(0, count * PGSIZE, PROT_READ \| PROT_WRITE, flags, -1, 0);
	}

	static void vmsetup(void *arg)
	{
	struct virtual_machine vm = (struct virtual_machine )arg;

	setup_paging(vm);
	vm->nr_gpcs = 0;
	vm->__gths = NULL;
	vm->gth_array_elem = 0;
	uthread_mcp_init();
	}

	void gpci_init(struct vmm_gpcore_init *gpci)
	{
	uint8_t *p;

	/* Technically, we don't need these pages for the all guests. Currently,
	* the kernel requires them. */
	p = pages(3);
	if (!p)
	panic("Can't allocate 3 pages for guest: %r");
	gpci->posted_irq_desc = &p[0];
	gpci->vapic_addr = &p[4096];
	gpci->apic_addr = &p[8192];
	/* TODO: once we are making these GPCs at the same time as vthreads, we
	* should set fsbase == the TLS desc of the vthread (if any). */
	gpci->fsbase = 0;
	gpci->gsbase = 0;
	}

	/* Helper, grows the array of guest_threads in vm. Concurrent readers
	* (gpcid_to_gth()) need to use a seq-lock-style of concurrency. They could
	* read the old array even after we free it.
	*
	* Unlike in the kernel, concurrent readers in userspace shouldn't even read
	* freed memory. Electric fence could catch that fault. Until we have a decent
	* userspace RCU, we can avoid these faults WHP by just sleeping. */
	static void __grow_gth_array(struct virtual_machine *vm,
	unsigned int new_nr_gths)
	{
	struct guest_thread new_array, old_array;
	size_t new_nr_elem;

	if (new_nr_gths <= vm->gth_array_elem)
	return;
	/* TODO: (RCU) we could defer the free */
	old_array = vm->__gths;
	new_nr_elem = MAX(vm->gth_array_elem * 2, new_nr_gths);
	new_array = calloc(new_nr_elem, sizeof(void*));
	assert(new_array);
	memcpy(new_array, vm->__gths, sizeof(void) vm->nr_gpcs);
	wmb(); /* all elements written before changing pointer */
	vm->__gths = new_array;
	wmb(); /* ptr written before potentially clobbering freed memory. */
	uthread_usleep(1000); /* hack for electric fence */
	free(old_array);
	}

	void __add_gth_to_vm(struct virtual_machine vm, struct guest_thread gth)
	{
	__grow_gth_array(vm, vm->nr_gpcs + 1);
	vm->__gths[vm->nr_gpcs] = gth;
	wmb(); /* concurrent readers will check nr_gpcs first */
	vm->nr_gpcs++;
	}

	/* If we fully destroy these uthreads, we'll need to call uthread_cleanup() */
	void __vthread_exited(struct vthread *vth)
	{
	struct virtual_machine *vm = vth_to_vm(vth);

	spin_pdr_lock(&park_lock);
	TAILQ_INSERT_HEAD(&parked_vths, (struct vmm_thread*)vth, tq_next);
	spin_pdr_unlock(&park_lock);
	}

	/* The tricky part is that we need to reinit the threads */
	static struct vthread get_parked_vth(struct virtual_machine vm)
	{
	struct vmm_thread *vmth;
	struct guest_thread *gth;
	struct ctlr_thread *cth;
	/* These are from create_guest_thread() */
	struct uth_thread_attr gth_attr = {.want_tls = FALSE};
	struct uth_thread_attr cth_attr = {.want_tls = TRUE};

	spin_pdr_lock(&park_lock);
	vmth = TAILQ_FIRST(&parked_vths);
	if (!vmth) {
	spin_pdr_unlock(&park_lock);
	return NULL;
	}
	TAILQ_REMOVE(&parked_vths, vmth, tq_next);
	spin_pdr_unlock(&park_lock);

	gth = (struct guest_thread*)vmth;
	cth = gth->buddy;
	uthread_init((struct uthread*)gth, &gth_attr);
	uthread_init((struct uthread*)cth, &cth_attr);
	return (struct vthread*)gth;
	}

	struct vthread vthread_alloc(struct virtual_machine vm,
	struct vmm_gpcore_init *gpci)
	{
	static parlib_once_t once = PARLIB_ONCE_INIT;
	struct guest_thread *gth;
	struct vthread *vth;
	int ret;

	parlib_run_once(&once, vmsetup, vm);

	vth = get_parked_vth(vm);
	if (vth)
	return vth;
	uth_mutex_lock(&vm->mtx);
	ret = syscall(SYS_vmm_add_gpcs, 1, gpci);
	assert(ret == 1);
	gth = create_guest_thread(vm, vm->nr_gpcs, gpci);
	assert(gth);
	__add_gth_to_vm(vm, gth);
	uth_mutex_unlock(&vm->mtx);
	/* TODO: somewhat arch specific */
	gth_to_vmtf(gth)->tf_cr3 = (uintptr_t)vm->root;
	return (struct vthread*)gth;
	}

	/* TODO: this is arch specific */
	void vthread_init_ctx(struct vthread *vth, uintptr_t entry_pt, uintptr_t arg,
	uintptr_t stacktop)
	{
	struct vm_trapframe *vm_tf = vth_to_vmtf(vth);

	vm_tf->tf_rip = entry_pt;
	vm_tf->tf_rdi = arg;
	vm_tf->tf_rsp = stacktop;
	vm_tf->tf_rflags = FL_RSVD_1;
	}

	void vthread_run(struct vthread *vthread)
	{
	start_guest_thread((struct guest_thread*)vthread);
	}

	#define DEFAULT_STACK_SIZE 65536
	static uintptr_t alloc_stacktop(struct virtual_machine *vm)
	{
	int ret;
	uintptr_t stack, tos;

	ret = posix_memalign((void **)&stack, PGSIZE, DEFAULT_STACK_SIZE);
	if (ret)
	return 0;
	add_pte_entries(vm, (uintptr_t)stack,
	(uintptr_t)stack + DEFAULT_STACK_SIZE);
	/* touch the top word on the stack so we don't page fault
	* on that in the VM. */
	tos = &stack[DEFAULT_STACK_SIZE / sizeof(uint64_t) - 1];
	*tos = 0;
	return (uintptr_t)tos;
	}

	static uintptr_t vth_get_stack(struct vthread *vth)
	{
	struct guest_thread gth = (struct guest_thread)vth;
	struct vthread_info info = (struct vthread_info)gth->user_data;
	uintptr_t stacktop;

	if (info) {
	assert(info->stacktop);
	return info->stacktop;
	}
	stacktop = alloc_stacktop(vth_to_vm(vth));
	assert(stacktop);
	/* Yes, an evil part of me thought of using the top of the stack for
	* this struct's storage. */
	gth->user_data = malloc(sizeof(struct vthread_info));
	assert(gth->user_data);
	info = (struct vthread_info*)gth->user_data;
	info->stacktop = stacktop;
	return stacktop;
	}

	struct vthread vthread_create(struct virtual_machine vm, void *entry,
	void *arg)
	{
	struct vthread *vth;
	struct vmm_gpcore_init gpci[1];

	gpci_init(gpci);
	vth = vthread_alloc(vm, gpci);
	if (!vth)
	return NULL;
	vthread_init_ctx(vth, (uintptr_t)entry, (uintptr_t)arg,
	vth_get_stack(vth));
	vthread_run(vth);
	return vth;
	}

	void vthread_join(struct vthread vth, void *retval_loc)
	{
	struct ctlr_thread cth = ((struct guest_thread)vth)->buddy;

	uthread_join((struct uthread*)cth, retval_loc);
	}

	long vmcall(unsigned int vmcall_nr, ...)
	{
	va_list vl;
	long a0, a1, a2, a3, a4;

	va_start(vl, vmcall_nr);
	a0 = va_arg(vl, long);
	a1 = va_arg(vl, long);
	a2 = va_arg(vl, long);
	a3 = va_arg(vl, long);
	a4 = va_arg(vl, long);
	va_end(vl);
	return raw_vmcall(a0, a1, a2, a3, a4, vmcall_nr);
	}

	bool vth_handle_vmcall(struct guest_thread gth, struct vm_trapframe vm_tf)
	{
	switch (vm_tf->tf_rax) {
	case VTH_VMCALL_NULL:
	goto out_ok;
	case VTH_VMCALL_PRINTC:
	fprintf(stdout, "%c", vm_tf->tf_rdi);
	fflush(stdout);
	goto out_ok;
	case VTH_VMCALL_EXIT:
	uth_2ls_thread_exit((void*)vm_tf->tf_rdi);
	assert(0);
	default:
	fprintf(stderr, "Unknown syscall nr %d\n", vm_tf->tf_rax);
	return FALSE;
	}
	assert(0);
	out_ok:
	vm_tf->tf_rip += 3;
	return TRUE;
	}