blob: ebb4e0152aa0227636291eb18946ce384d98fa83 [file] [log] [blame] [edit]
/* Copyright (c) 2015-2016 Google Inc.
* See LICENSE for details. */
#include <parlib/common.h>
#include <vmm/virtio.h>
#include <vmm/virtio_mmio.h>
#include <vmm/virtio_ids.h>
#include <vmm/virtio_config.h>
#include <vmm/vmm.h>
#include <parlib/arch/trap.h>
#include <parlib/bitmask.h>
#include <stdio.h>
static bool pir_notif_is_set(struct vmm_gpcore_init *gpci)
{
return GET_BITMASK_BIT(gpci->posted_irq_desc, VMX_POSTED_OUTSTANDING_NOTIF);
}
static bool rvi_is_set(struct guest_thread *gth)
{
uint8_t rvi = gth_to_vmtf(gth)->tf_guest_intr_status & 0xff;
return rvi != 0;
}
/* Blocks a guest pcore / thread until it has an IRQ pending. Syncs with
* vmm_interrupt_guest(). */
static void sleep_til_irq(struct guest_thread *gth)
{
struct vmm_gpcore_init *gpci = gth_to_gpci(gth);
/* The invariant is that if an IRQ is posted, but not delivered, we will not
* sleep. Anyone who posts an IRQ must signal after setting it.
* vmm_interrupt_guest() does this. If we use alternate sources of IRQ
* posting, we'll need to revist this.
*
* Although vmm_interrupt_guest() only writes OUTSTANDING_NOTIF, it's
* possible that the hardware attempted to post the interrupt. In SDM
* parlance, the processor could have "recognized" the virtual IRQ, but not
* delivered it yet. This could happen if the guest had executed "sti", but
* not "hlt" yet. The IRQ was posted and recognized, but not delivered
* ("sti blocking"). Then the guest executes "hlt", and vmexits.
* OUTSTANDING_NOTIF will be clear in this case. RVI should be set - at
* least to the vector we just sent, but possibly to a greater vector if
* multiple were sent. RVI should only be cleared after virtual IRQs were
* actually delivered. So checking OUTSTANDING_NOTIF and RVI should
* suffice.
*
* Generally, we should also check GUEST_INTERRUPTIBILITY_INFO to see if
* there's some reason to not deliver the interrupt and check things like
* the VPPR (priority register). But since we're emulating a halt, mwait,
* or something else that needs to be woken by an IRQ, we can ignore that
* and just wake them up. Note that we won't actually deliver the IRQ,
* we'll just restart the guest and the hardware will deliver the virtual
* IRQ at the appropriate time. So in the event that something weird
* happens, the halt/mwait just returns spuriously.
*
* The more traditional race here is if the halt starts concurrently with
* the post; that's why we sync with the mutex to make sure there is an
* ordering between the actual halt (this function) and the posting. */
uth_mutex_lock(gth->halt_mtx);
while (!(pir_notif_is_set(gpci) || rvi_is_set(gth)))
uth_cond_var_wait(gth->halt_cv, gth->halt_mtx);
uth_mutex_unlock(gth->halt_mtx);
}
static bool handle_ept_fault(struct guest_thread *gth)
{
struct vm_trapframe *vm_tf = gth_to_vmtf(gth);
struct virtual_machine *vm = gth_to_vm(gth);
uint64_t gpa, *regp;
uint8_t regx;
int store, size;
int advance;
int ret = decode(gth, &gpa, &regx, &regp, &store, &size, &advance);
if (ret < 0)
return FALSE;
if (ret == VM_PAGE_FAULT) {
/* We were unable to translate RIP due to an ept fault */
vm_tf->tf_trap_inject = VM_TRAP_VALID
| VM_TRAP_ERROR_CODE
| VM_TRAP_HARDWARE
| HW_TRAP_PAGE_FAULT;
return TRUE;
}
assert(size >= 0);
/* TODO use helpers for some of these addr checks. the fee/fec ones might
* be wrong too. */
for (int i = 0; i < VIRTIO_MMIO_MAX_NUM_DEV; i++) {
if (vm->virtio_mmio_devices[i] == NULL)
continue;
if (PG_ADDR(gpa) != vm->virtio_mmio_devices[i]->addr)
continue;
/* TODO: can the guest cause us to spawn off infinite threads? */
if (store)
virtio_mmio_wr(vm, vm->virtio_mmio_devices[i], gpa, size,
(uint32_t *)regp);
else
*regp = virtio_mmio_rd(vm, vm->virtio_mmio_devices[i], gpa, size);
vm_tf->tf_rip += advance;
return TRUE;
}
if (PG_ADDR(gpa) == 0xfec00000) {
do_ioapic(gth, gpa, regx, regp, store);
} else if (PG_ADDR(gpa) == 0) {
memmove(regp, &vm->low4k[gpa], size);
} else {
fprintf(stderr, "EPT violation: can't handle %p\n", gpa);
fprintf(stderr, "RIP %p, exit reason 0x%x\n", vm_tf->tf_rip,
vm_tf->tf_exit_reason);
fprintf(stderr, "Returning 0xffffffff\n");
showstatus(stderr, gth);
/* Just fill the whole register for now. */
*regp = (uint64_t) -1;
return FALSE;
}
vm_tf->tf_rip += advance;
return TRUE;
}
static bool handle_vmcall_printc(struct guest_thread *gth)
{
struct vm_trapframe *vm_tf = gth_to_vmtf(gth);
uint8_t byte;
byte = vm_tf->tf_rdi;
printf("%c", byte);
if (byte == '\n')
printf("%c", '%');
fflush(stdout);
return TRUE;
}
static bool handle_vmcall_smpboot(struct guest_thread *gth)
{
struct vm_trapframe *vm_tf = gth_to_vmtf(gth);
struct vm_trapframe *vm_tf_ap;
struct virtual_machine *vm = gth_to_vm(gth);
int cur_pcores = vm->up_gpcs;
/* Check if we're guest pcore 0. Only the BSP is allowed to start APs. */
if (vm_tf->tf_guest_pcoreid != 0) {
fprintf(stderr,
"Only guest pcore 0 is allowed to start APs. core was %ld\n",
vm_tf->tf_guest_pcoreid);
return FALSE;
}
/* Check if we've reached the maximum, if yes, blow out. */
if (vm->nr_gpcs == cur_pcores) {
fprintf(stderr,
"guest tried to start up too many cores. max was %ld, current up %ld\n",
vm->nr_gpcs, cur_pcores);
return FALSE;
}
/* Start up secondary core. */
vm_tf_ap = gth_to_vmtf(vm->gths[cur_pcores]);
/* We use the BSP's CR3 for now. This should be fine because they
* change it later anyway. */
vm_tf_ap->tf_cr3 = vm_tf->tf_cr3;
/* Starting RIP is passed in via rdi. */
vm_tf_ap->tf_rip = vm_tf->tf_rdi;
/* Starting RSP is passed in via rsi. */
vm_tf_ap->tf_rsp = vm_tf->tf_rsi;
vm->up_gpcs++;
start_guest_thread(vm->gths[cur_pcores]);
return TRUE;
}
static bool handle_vmcall(struct guest_thread *gth)
{
struct vm_trapframe *vm_tf = gth_to_vmtf(gth);
bool retval = FALSE;
if (gth->vmcall)
return gth->vmcall(gth, vm_tf);
switch (vm_tf->tf_rax) {
case VMCALL_PRINTC:
retval = handle_vmcall_printc(gth);
break;
case VMCALL_SMPBOOT:
retval = handle_vmcall_smpboot(gth);
break;
}
if (retval)
vm_tf->tf_rip += 3;
return retval;
}
static bool handle_io(struct guest_thread *gth)
{
struct vm_trapframe *vm_tf = gth_to_vmtf(gth);
int ret = io(gth);
if (ret < 0)
return FALSE;
if (ret == VM_PAGE_FAULT) {
/* We were unable to translate RIP due to an ept fault */
vm_tf->tf_trap_inject = VM_TRAP_VALID
| VM_TRAP_ERROR_CODE
| VM_TRAP_HARDWARE
| HW_TRAP_PAGE_FAULT;
}
return TRUE;
}
static bool handle_msr(struct guest_thread *gth)
{
struct vm_trapframe *vm_tf = gth_to_vmtf(gth);
if (msrio(gth, gth_to_gpci(gth), vm_tf->tf_exit_reason)) {
/* Use event injection through vmctl to send a general protection fault
* vmctl.interrupt gets written to the VM-Entry Interruption-Information
* Field by vmx */
vm_tf->tf_trap_inject = VM_TRAP_VALID
| VM_TRAP_ERROR_CODE
| VM_TRAP_HARDWARE
| HW_TRAP_GP_FAULT;
} else {
vm_tf->tf_rip += 2;
}
return TRUE;
}
static bool handle_apic_access(struct guest_thread *gth)
{
uint64_t gpa, *regp;
uint8_t regx;
int store, size;
int advance;
struct vm_trapframe *vm_tf = gth_to_vmtf(gth);
if (decode(gth, &gpa, &regx, &regp, &store, &size, &advance))
return FALSE;
if (__apic_access(gth, gpa, regx, regp, store))
return FALSE;
vm_tf->tf_rip += advance;
return TRUE;
}
static bool handle_halt(struct guest_thread *gth)
{
struct vm_trapframe *vm_tf = gth_to_vmtf(gth);
if (gth->halt_exit)
return FALSE;
/* It's possible the guest disabled IRQs and halted, perhaps waiting on an
* NMI or something. If we need to support that, we can change this. */
sleep_til_irq(gth);
vm_tf->tf_rip += 1;
return TRUE;
}
static bool handle_mwait(struct guest_thread *gth)
{
struct vm_trapframe *vm_tf = gth_to_vmtf(gth);
/* TODO: we need to handle the actual monitor part of mwait. This just
* implements the power management / halting. Likewise, it's possible IRQs
* are disabled (as with halt). */
sleep_til_irq(gth);
vm_tf->tf_rip += 3;
return TRUE;
}
/* Is this a vmm specific thing? or generic?
*
* what do we do when we want to kill the vm? what are our other options? */
bool handle_vmexit(struct guest_thread *gth)
{
struct vm_trapframe *vm_tf = gth_to_vmtf(gth);
switch (vm_tf->tf_exit_reason) {
case EXIT_REASON_EPT_VIOLATION:
return handle_ept_fault(gth);
case EXIT_REASON_VMCALL:
return handle_vmcall(gth);
case EXIT_REASON_IO_INSTRUCTION:
return handle_io(gth);
case EXIT_REASON_MSR_WRITE:
case EXIT_REASON_MSR_READ:
return handle_msr(gth);
case EXIT_REASON_APIC_ACCESS:
return handle_apic_access(gth);
case EXIT_REASON_HLT:
return handle_halt(gth);
case EXIT_REASON_MWAIT_INSTRUCTION:
return handle_mwait(gth);
case EXIT_REASON_EXTERNAL_INTERRUPT:
case EXIT_REASON_APIC_WRITE:
/* TODO: just ignore these? */
return TRUE;
default:
fprintf(stderr, "Don't know how to handle exit %d\n",
vm_tf->tf_exit_reason);
fprintf(stderr, "RIP %p, shutdown 0x%x\n", vm_tf->tf_rip,
vm_tf->tf_exit_reason);
return FALSE;
}
}