| /* Copyright (c) 2015-2016 Google Inc. |
| * See LICENSE for details. */ |
| |
| #include <parlib/common.h> |
| #include <vmm/virtio.h> |
| #include <vmm/virtio_mmio.h> |
| #include <vmm/virtio_ids.h> |
| #include <vmm/virtio_config.h> |
| #include <vmm/vmm.h> |
| #include <parlib/arch/trap.h> |
| #include <parlib/bitmask.h> |
| #include <stdio.h> |
| |
| static bool pir_notif_is_set(struct vmm_gpcore_init *gpci) |
| { |
| return GET_BITMASK_BIT(gpci->posted_irq_desc, VMX_POSTED_OUTSTANDING_NOTIF); |
| } |
| |
| static bool rvi_is_set(struct guest_thread *gth) |
| { |
| uint8_t rvi = gth_to_vmtf(gth)->tf_guest_intr_status & 0xff; |
| |
| return rvi != 0; |
| } |
| |
| /* Blocks a guest pcore / thread until it has an IRQ pending. Syncs with |
| * vmm_interrupt_guest(). */ |
| static void sleep_til_irq(struct guest_thread *gth) |
| { |
| struct vmm_gpcore_init *gpci = gth_to_gpci(gth); |
| |
| /* The invariant is that if an IRQ is posted, but not delivered, we will not |
| * sleep. Anyone who posts an IRQ must signal after setting it. |
| * vmm_interrupt_guest() does this. If we use alternate sources of IRQ |
| * posting, we'll need to revist this. |
| * |
| * Although vmm_interrupt_guest() only writes OUTSTANDING_NOTIF, it's |
| * possible that the hardware attempted to post the interrupt. In SDM |
| * parlance, the processor could have "recognized" the virtual IRQ, but not |
| * delivered it yet. This could happen if the guest had executed "sti", but |
| * not "hlt" yet. The IRQ was posted and recognized, but not delivered |
| * ("sti blocking"). Then the guest executes "hlt", and vmexits. |
| * OUTSTANDING_NOTIF will be clear in this case. RVI should be set - at |
| * least to the vector we just sent, but possibly to a greater vector if |
| * multiple were sent. RVI should only be cleared after virtual IRQs were |
| * actually delivered. So checking OUTSTANDING_NOTIF and RVI should |
| * suffice. |
| * |
| * Generally, we should also check GUEST_INTERRUPTIBILITY_INFO to see if |
| * there's some reason to not deliver the interrupt and check things like |
| * the VPPR (priority register). But since we're emulating a halt, mwait, |
| * or something else that needs to be woken by an IRQ, we can ignore that |
| * and just wake them up. Note that we won't actually deliver the IRQ, |
| * we'll just restart the guest and the hardware will deliver the virtual |
| * IRQ at the appropriate time. So in the event that something weird |
| * happens, the halt/mwait just returns spuriously. |
| * |
| * The more traditional race here is if the halt starts concurrently with |
| * the post; that's why we sync with the mutex to make sure there is an |
| * ordering between the actual halt (this function) and the posting. */ |
| uth_mutex_lock(gth->halt_mtx); |
| while (!(pir_notif_is_set(gpci) || rvi_is_set(gth))) |
| uth_cond_var_wait(gth->halt_cv, gth->halt_mtx); |
| uth_mutex_unlock(gth->halt_mtx); |
| } |
| |
| static bool handle_ept_fault(struct guest_thread *gth) |
| { |
| struct vm_trapframe *vm_tf = gth_to_vmtf(gth); |
| struct virtual_machine *vm = gth_to_vm(gth); |
| uint64_t gpa, *regp; |
| uint8_t regx; |
| int store, size; |
| int advance; |
| |
| int ret = decode(gth, &gpa, ®x, ®p, &store, &size, &advance); |
| |
| if (ret < 0) |
| return FALSE; |
| if (ret == VM_PAGE_FAULT) { |
| /* We were unable to translate RIP due to an ept fault */ |
| vm_tf->tf_trap_inject = VM_TRAP_VALID |
| | VM_TRAP_ERROR_CODE |
| | VM_TRAP_HARDWARE |
| | HW_TRAP_PAGE_FAULT; |
| return TRUE; |
| } |
| |
| assert(size >= 0); |
| /* TODO use helpers for some of these addr checks. the fee/fec ones might |
| * be wrong too. */ |
| for (int i = 0; i < VIRTIO_MMIO_MAX_NUM_DEV; i++) { |
| if (vm->virtio_mmio_devices[i] == NULL) |
| continue; |
| if (PG_ADDR(gpa) != vm->virtio_mmio_devices[i]->addr) |
| continue; |
| /* TODO: can the guest cause us to spawn off infinite threads? */ |
| if (store) |
| virtio_mmio_wr(vm, vm->virtio_mmio_devices[i], gpa, size, |
| (uint32_t *)regp); |
| else |
| *regp = virtio_mmio_rd(vm, vm->virtio_mmio_devices[i], gpa, size); |
| vm_tf->tf_rip += advance; |
| return TRUE; |
| } |
| if (PG_ADDR(gpa) == 0xfec00000) { |
| do_ioapic(gth, gpa, regx, regp, store); |
| } else if (PG_ADDR(gpa) == 0) { |
| memmove(regp, &vm->low4k[gpa], size); |
| } else { |
| fprintf(stderr, "EPT violation: can't handle %p\n", gpa); |
| fprintf(stderr, "RIP %p, exit reason 0x%x\n", vm_tf->tf_rip, |
| vm_tf->tf_exit_reason); |
| fprintf(stderr, "Returning 0xffffffff\n"); |
| showstatus(stderr, gth); |
| /* Just fill the whole register for now. */ |
| *regp = (uint64_t) -1; |
| return FALSE; |
| } |
| vm_tf->tf_rip += advance; |
| return TRUE; |
| } |
| |
| static bool handle_vmcall_printc(struct guest_thread *gth) |
| { |
| struct vm_trapframe *vm_tf = gth_to_vmtf(gth); |
| uint8_t byte; |
| |
| byte = vm_tf->tf_rdi; |
| printf("%c", byte); |
| if (byte == '\n') |
| printf("%c", '%'); |
| fflush(stdout); |
| return TRUE; |
| } |
| |
| static bool handle_vmcall_smpboot(struct guest_thread *gth) |
| { |
| struct vm_trapframe *vm_tf = gth_to_vmtf(gth); |
| struct vm_trapframe *vm_tf_ap; |
| struct virtual_machine *vm = gth_to_vm(gth); |
| int cur_pcores = vm->up_gpcs; |
| |
| /* Check if we're guest pcore 0. Only the BSP is allowed to start APs. */ |
| if (vm_tf->tf_guest_pcoreid != 0) { |
| fprintf(stderr, |
| "Only guest pcore 0 is allowed to start APs. core was %ld\n", |
| vm_tf->tf_guest_pcoreid); |
| return FALSE; |
| } |
| |
| /* Check if we've reached the maximum, if yes, blow out. */ |
| if (vm->nr_gpcs == cur_pcores) { |
| fprintf(stderr, |
| "guest tried to start up too many cores. max was %ld, current up %ld\n", |
| vm->nr_gpcs, cur_pcores); |
| return FALSE; |
| } |
| |
| /* Start up secondary core. */ |
| vm_tf_ap = gth_to_vmtf(vm->gths[cur_pcores]); |
| /* We use the BSP's CR3 for now. This should be fine because they |
| * change it later anyway. */ |
| vm_tf_ap->tf_cr3 = vm_tf->tf_cr3; |
| |
| /* Starting RIP is passed in via rdi. */ |
| vm_tf_ap->tf_rip = vm_tf->tf_rdi; |
| |
| /* Starting RSP is passed in via rsi. */ |
| vm_tf_ap->tf_rsp = vm_tf->tf_rsi; |
| |
| vm->up_gpcs++; |
| |
| start_guest_thread(vm->gths[cur_pcores]); |
| |
| return TRUE; |
| } |
| |
| static bool handle_vmcall(struct guest_thread *gth) |
| { |
| struct vm_trapframe *vm_tf = gth_to_vmtf(gth); |
| bool retval = FALSE; |
| |
| if (gth->vmcall) |
| return gth->vmcall(gth, vm_tf); |
| |
| switch (vm_tf->tf_rax) { |
| case VMCALL_PRINTC: |
| retval = handle_vmcall_printc(gth); |
| break; |
| case VMCALL_SMPBOOT: |
| retval = handle_vmcall_smpboot(gth); |
| break; |
| } |
| |
| if (retval) |
| vm_tf->tf_rip += 3; |
| |
| return retval; |
| } |
| |
| static bool handle_io(struct guest_thread *gth) |
| { |
| struct vm_trapframe *vm_tf = gth_to_vmtf(gth); |
| int ret = io(gth); |
| |
| if (ret < 0) |
| return FALSE; |
| if (ret == VM_PAGE_FAULT) { |
| /* We were unable to translate RIP due to an ept fault */ |
| vm_tf->tf_trap_inject = VM_TRAP_VALID |
| | VM_TRAP_ERROR_CODE |
| | VM_TRAP_HARDWARE |
| | HW_TRAP_PAGE_FAULT; |
| } |
| return TRUE; |
| } |
| |
| static bool handle_msr(struct guest_thread *gth) |
| { |
| struct vm_trapframe *vm_tf = gth_to_vmtf(gth); |
| |
| if (msrio(gth, gth_to_gpci(gth), vm_tf->tf_exit_reason)) { |
| /* Use event injection through vmctl to send a general protection fault |
| * vmctl.interrupt gets written to the VM-Entry Interruption-Information |
| * Field by vmx */ |
| vm_tf->tf_trap_inject = VM_TRAP_VALID |
| | VM_TRAP_ERROR_CODE |
| | VM_TRAP_HARDWARE |
| | HW_TRAP_GP_FAULT; |
| } else { |
| vm_tf->tf_rip += 2; |
| } |
| return TRUE; |
| } |
| |
| static bool handle_apic_access(struct guest_thread *gth) |
| { |
| uint64_t gpa, *regp; |
| uint8_t regx; |
| int store, size; |
| int advance; |
| struct vm_trapframe *vm_tf = gth_to_vmtf(gth); |
| |
| if (decode(gth, &gpa, ®x, ®p, &store, &size, &advance)) |
| return FALSE; |
| if (__apic_access(gth, gpa, regx, regp, store)) |
| return FALSE; |
| vm_tf->tf_rip += advance; |
| return TRUE; |
| } |
| |
| static bool handle_halt(struct guest_thread *gth) |
| { |
| struct vm_trapframe *vm_tf = gth_to_vmtf(gth); |
| |
| if (gth->halt_exit) |
| return FALSE; |
| /* It's possible the guest disabled IRQs and halted, perhaps waiting on an |
| * NMI or something. If we need to support that, we can change this. */ |
| sleep_til_irq(gth); |
| vm_tf->tf_rip += 1; |
| return TRUE; |
| } |
| |
| static bool handle_mwait(struct guest_thread *gth) |
| { |
| struct vm_trapframe *vm_tf = gth_to_vmtf(gth); |
| |
| /* TODO: we need to handle the actual monitor part of mwait. This just |
| * implements the power management / halting. Likewise, it's possible IRQs |
| * are disabled (as with halt). */ |
| sleep_til_irq(gth); |
| vm_tf->tf_rip += 3; |
| return TRUE; |
| } |
| |
| /* Is this a vmm specific thing? or generic? |
| * |
| * what do we do when we want to kill the vm? what are our other options? */ |
| bool handle_vmexit(struct guest_thread *gth) |
| { |
| struct vm_trapframe *vm_tf = gth_to_vmtf(gth); |
| |
| switch (vm_tf->tf_exit_reason) { |
| case EXIT_REASON_EPT_VIOLATION: |
| return handle_ept_fault(gth); |
| case EXIT_REASON_VMCALL: |
| return handle_vmcall(gth); |
| case EXIT_REASON_IO_INSTRUCTION: |
| return handle_io(gth); |
| case EXIT_REASON_MSR_WRITE: |
| case EXIT_REASON_MSR_READ: |
| return handle_msr(gth); |
| case EXIT_REASON_APIC_ACCESS: |
| return handle_apic_access(gth); |
| case EXIT_REASON_HLT: |
| return handle_halt(gth); |
| case EXIT_REASON_MWAIT_INSTRUCTION: |
| return handle_mwait(gth); |
| case EXIT_REASON_EXTERNAL_INTERRUPT: |
| case EXIT_REASON_APIC_WRITE: |
| /* TODO: just ignore these? */ |
| return TRUE; |
| default: |
| fprintf(stderr, "Don't know how to handle exit %d\n", |
| vm_tf->tf_exit_reason); |
| fprintf(stderr, "RIP %p, shutdown 0x%x\n", vm_tf->tf_rip, |
| vm_tf->tf_exit_reason); |
| return FALSE; |
| } |
| } |