| /* Copyright (c) 2016 Google Inc. |
| * |
| * See LICENSE for details. |
| * |
| * Helper functions for virtual machines */ |
| |
| #include <errno.h> |
| #include <stdlib.h> |
| #include <parlib/bitmask.h> |
| #include <parlib/uthread.h> |
| #include <sys/mman.h> |
| #include <sys/syscall.h> |
| #include <sys/queue.h> |
| #include <vmm/vmm.h> |
| #include <vmm/vthread.h> |
| |
| static struct vmm_thread_tq parked_vths = TAILQ_HEAD_INITIALIZER(parked_vths); |
| static struct spin_pdr_lock park_lock = SPINPDR_INITIALIZER; |
| |
| static void *pages(size_t count) |
| { |
| void *v; |
| unsigned long flags = MAP_POPULATE | MAP_ANONYMOUS | MAP_PRIVATE; |
| |
| return mmap(0, count * PGSIZE, PROT_READ | PROT_WRITE, flags, -1, 0); |
| } |
| |
| static void vmsetup(void *arg) |
| { |
| struct virtual_machine *vm = (struct virtual_machine *)arg; |
| |
| setup_paging(vm); |
| vm->nr_gpcs = 0; |
| vm->__gths = NULL; |
| vm->gth_array_elem = 0; |
| uthread_mcp_init(); |
| } |
| |
| void gpci_init(struct vmm_gpcore_init *gpci) |
| { |
| uint8_t *p; |
| |
| /* Technically, we don't need these pages for the all guests. Currently, |
| * the kernel requires them. */ |
| p = pages(3); |
| if (!p) |
| panic("Can't allocate 3 pages for guest: %r"); |
| gpci->posted_irq_desc = &p[0]; |
| gpci->vapic_addr = &p[4096]; |
| gpci->apic_addr = &p[8192]; |
| /* TODO: once we are making these GPCs at the same time as vthreads, we |
| * should set fsbase == the TLS desc of the vthread (if any). */ |
| gpci->fsbase = 0; |
| gpci->gsbase = 0; |
| } |
| |
| /* Helper, grows the array of guest_threads in vm. Concurrent readers |
| * (gpcid_to_gth()) need to use a seq-lock-style of concurrency. They could |
| * read the old array even after we free it. |
| * |
| * Unlike in the kernel, concurrent readers in userspace shouldn't even read |
| * freed memory. Electric fence could catch that fault. Until we have a decent |
| * userspace RCU, we can avoid these faults WHP by just sleeping. */ |
| static void __grow_gth_array(struct virtual_machine *vm, |
| unsigned int new_nr_gths) |
| { |
| struct guest_thread **new_array, **old_array; |
| size_t new_nr_elem; |
| |
| if (new_nr_gths <= vm->gth_array_elem) |
| return; |
| /* TODO: (RCU) we could defer the free */ |
| old_array = vm->__gths; |
| new_nr_elem = MAX(vm->gth_array_elem * 2, new_nr_gths); |
| new_array = calloc(new_nr_elem, sizeof(void*)); |
| assert(new_array); |
| memcpy(new_array, vm->__gths, sizeof(void*) * vm->nr_gpcs); |
| wmb(); /* all elements written before changing pointer */ |
| vm->__gths = new_array; |
| wmb(); /* ptr written before potentially clobbering freed memory. */ |
| uthread_usleep(1000); /* hack for electric fence */ |
| free(old_array); |
| } |
| |
| void __add_gth_to_vm(struct virtual_machine *vm, struct guest_thread *gth) |
| { |
| __grow_gth_array(vm, vm->nr_gpcs + 1); |
| vm->__gths[vm->nr_gpcs] = gth; |
| wmb(); /* concurrent readers will check nr_gpcs first */ |
| vm->nr_gpcs++; |
| } |
| |
| /* If we fully destroy these uthreads, we'll need to call uthread_cleanup() */ |
| void __vthread_exited(struct vthread *vth) |
| { |
| struct virtual_machine *vm = vth_to_vm(vth); |
| |
| spin_pdr_lock(&park_lock); |
| TAILQ_INSERT_HEAD(&parked_vths, (struct vmm_thread*)vth, tq_next); |
| spin_pdr_unlock(&park_lock); |
| } |
| |
| /* The tricky part is that we need to reinit the threads */ |
| static struct vthread *get_parked_vth(struct virtual_machine *vm) |
| { |
| struct vmm_thread *vmth; |
| struct guest_thread *gth; |
| struct ctlr_thread *cth; |
| /* These are from create_guest_thread() */ |
| struct uth_thread_attr gth_attr = {.want_tls = FALSE}; |
| struct uth_thread_attr cth_attr = {.want_tls = TRUE}; |
| |
| spin_pdr_lock(&park_lock); |
| vmth = TAILQ_FIRST(&parked_vths); |
| if (!vmth) { |
| spin_pdr_unlock(&park_lock); |
| return NULL; |
| } |
| TAILQ_REMOVE(&parked_vths, vmth, tq_next); |
| spin_pdr_unlock(&park_lock); |
| |
| gth = (struct guest_thread*)vmth; |
| cth = gth->buddy; |
| uthread_init((struct uthread*)gth, >h_attr); |
| uthread_init((struct uthread*)cth, &cth_attr); |
| return (struct vthread*)gth; |
| } |
| |
| struct vthread *vthread_alloc(struct virtual_machine *vm, |
| struct vmm_gpcore_init *gpci) |
| { |
| static parlib_once_t once = PARLIB_ONCE_INIT; |
| struct guest_thread *gth; |
| struct vthread *vth; |
| int ret; |
| |
| parlib_run_once(&once, vmsetup, vm); |
| |
| vth = get_parked_vth(vm); |
| if (vth) |
| return vth; |
| uth_mutex_lock(&vm->mtx); |
| ret = syscall(SYS_vmm_add_gpcs, 1, gpci); |
| assert(ret == 1); |
| gth = create_guest_thread(vm, vm->nr_gpcs, gpci); |
| assert(gth); |
| __add_gth_to_vm(vm, gth); |
| uth_mutex_unlock(&vm->mtx); |
| /* TODO: somewhat arch specific */ |
| gth_to_vmtf(gth)->tf_cr3 = (uintptr_t)vm->root; |
| return (struct vthread*)gth; |
| } |
| |
| /* TODO: this is arch specific */ |
| void vthread_init_ctx(struct vthread *vth, uintptr_t entry_pt, uintptr_t arg, |
| uintptr_t stacktop) |
| { |
| struct vm_trapframe *vm_tf = vth_to_vmtf(vth); |
| |
| vm_tf->tf_rip = entry_pt; |
| vm_tf->tf_rdi = arg; |
| vm_tf->tf_rsp = stacktop; |
| } |
| |
| void vthread_run(struct vthread *vthread) |
| { |
| start_guest_thread((struct guest_thread*)vthread); |
| } |
| |
| #define DEFAULT_STACK_SIZE 65536 |
| static uintptr_t alloc_stacktop(struct virtual_machine *vm) |
| { |
| int ret; |
| uintptr_t *stack, *tos; |
| |
| ret = posix_memalign((void **)&stack, PGSIZE, DEFAULT_STACK_SIZE); |
| if (ret) |
| return 0; |
| add_pte_entries(vm, (uintptr_t)stack, |
| (uintptr_t)stack + DEFAULT_STACK_SIZE); |
| /* touch the top word on the stack so we don't page fault |
| * on that in the VM. */ |
| tos = &stack[DEFAULT_STACK_SIZE / sizeof(uint64_t) - 1]; |
| *tos = 0; |
| return (uintptr_t)tos; |
| } |
| |
| static uintptr_t vth_get_stack(struct vthread *vth) |
| { |
| struct guest_thread *gth = (struct guest_thread*)vth; |
| struct vthread_info *info = (struct vthread_info*)gth->user_data; |
| uintptr_t stacktop; |
| |
| if (info) { |
| assert(info->stacktop); |
| return info->stacktop; |
| } |
| stacktop = alloc_stacktop(vth_to_vm(vth)); |
| assert(stacktop); |
| /* Yes, an evil part of me thought of using the top of the stack for |
| * this struct's storage. */ |
| gth->user_data = malloc(sizeof(struct vthread_info)); |
| assert(gth->user_data); |
| info = (struct vthread_info*)gth->user_data; |
| info->stacktop = stacktop; |
| return stacktop; |
| } |
| |
| struct vthread *vthread_create(struct virtual_machine *vm, void *entry, |
| void *arg) |
| { |
| struct vthread *vth; |
| struct vmm_gpcore_init gpci[1]; |
| |
| gpci_init(gpci); |
| vth = vthread_alloc(vm, gpci); |
| if (!vth) |
| return NULL; |
| vthread_init_ctx(vth, (uintptr_t)entry, (uintptr_t)arg, |
| vth_get_stack(vth)); |
| vthread_run(vth); |
| return vth; |
| } |
| |
| void vthread_join(struct vthread *vth, void **retval_loc) |
| { |
| struct ctlr_thread *cth = ((struct guest_thread*)vth)->buddy; |
| |
| uthread_join((struct uthread*)cth, retval_loc); |
| } |
| |
| long vmcall(unsigned int vmcall_nr, ...) |
| { |
| va_list vl; |
| long a0, a1, a2, a3, a4; |
| |
| va_start(vl, vmcall_nr); |
| a0 = va_arg(vl, long); |
| a1 = va_arg(vl, long); |
| a2 = va_arg(vl, long); |
| a3 = va_arg(vl, long); |
| a4 = va_arg(vl, long); |
| va_end(vl); |
| return raw_vmcall(a0, a1, a2, a3, a4, vmcall_nr); |
| } |
| |
| bool vth_handle_vmcall(struct guest_thread *gth, struct vm_trapframe *vm_tf) |
| { |
| switch (vm_tf->tf_rax) { |
| case VTH_VMCALL_NULL: |
| goto out_ok; |
| case VTH_VMCALL_PRINTC: |
| fprintf(stdout, "%c", vm_tf->tf_rdi); |
| fflush(stdout); |
| goto out_ok; |
| case VTH_VMCALL_EXIT: |
| uth_2ls_thread_exit((void*)vm_tf->tf_rdi); |
| assert(0); |
| default: |
| fprintf(stderr, "Unknown syscall nr %d\n", vm_tf->tf_rax); |
| return FALSE; |
| } |
| assert(0); |
| out_ok: |
| vm_tf->tf_rip += 3; |
| return TRUE; |
| } |