vmm: Add support for changing VMX controls This adds internal support for changing pin-based, cpu-based, and secondary cpu-based VMX controls. VMMs will want to change some of them on the fly, such as "vmexit on halt." It's not enough to set them once at startup either, since the 2LS's decision may depend on the number of cores available dynamically. Later patches will add support for userspace to change the flags in vmx_vmm. Once those bits are changed, the next time a GPC reloads, it will have the new controls. At a minimum, GPCs reload any time we run a VM and were previously in userspace. That should be sufficient for 2LSs. The alternative is to set those VMCS fields on every pop, which will slightly slow down kernel vmexit handling. The VMCS writes are only a few nsec each - basically the minimum cost of any similar instruction. Signed-off-by: Barret Rhoden <brho@cs.berkeley.edu>
diff --git a/kern/arch/x86/vmm/intel/vmx.c b/kern/arch/x86/vmm/intel/vmx.c index 6f9cef7..863fec7 100644 --- a/kern/arch/x86/vmm/intel/vmx.c +++ b/kern/arch/x86/vmm/intel/vmx.c
@@ -325,6 +325,11 @@ void vapic_status_dump_kernel(void *vapic); +static bool vmx_control_can_be_changed(struct vmxec *v, uint32_t ctl) +{ + return v->hw_changeable & v->policy_changeable & ctl; +} + /* * A note on Things You Can't Make Up. * or @@ -401,10 +406,8 @@ * weirdness in the bits, we don't want to run. * The try_set stuff adds particular ugliness but we have to have it. */ - -static bool -check_vmxec_controls(struct vmxec const *v, bool have_true_msr, - uint32_t * result) +static bool check_vmxec_controls(struct vmxec *v, bool have_true_msr, + uint32_t *result) { bool err = false; uint32_t vmx_msr_low, vmx_msr_high; @@ -425,6 +428,7 @@ reserved_0 = (~vmx_msr_low) & (~vmx_msr_high); reserved_1 = vmx_msr_low & vmx_msr_high; changeable_bits = ~(reserved_0 | reserved_1); + v->hw_changeable = changeable_bits; /* * this is very much as follows: @@ -495,7 +499,7 @@ * We're trying to make this as readable as possible. Realistically, it will * rarely if ever change, if the past is any guide. */ -static const struct vmxec pbec = { +static struct vmxec pbec = { .name = "Pin Based Execution Controls", .msr = MSR_IA32_VMX_PINBASED_CTLS, .truemsr = MSR_IA32_VMX_TRUE_PINBASED_CTLS, @@ -508,7 +512,7 @@ .must_be_0 = (PIN_BASED_VMX_PREEMPTION_TIMER), }; -static const struct vmxec cbec = { +static struct vmxec cbec = { .name = "CPU Based Execution Controls", .msr = MSR_IA32_VMX_PROCBASED_CTLS, .truemsr = MSR_IA32_VMX_TRUE_PROCBASED_CTLS, @@ -537,10 +541,13 @@ CPU_BASED_PAUSE_EXITING | CPU_BASED_UNCOND_IO_EXITING), - .try_set_0 = (CPU_BASED_MONITOR_EXITING) + .try_set_0 = (CPU_BASED_MONITOR_EXITING), + .policy_changeable = ( + CPU_BASED_HLT_EXITING | + CPU_BASED_PAUSE_EXITING), }; -static const struct vmxec cb2ec = { +static struct vmxec cb2ec = { .name = "CPU Based 2nd Execution Controls", .msr = MSR_IA32_VMX_PROCBASED_CTLS2, .truemsr = MSR_IA32_VMX_PROCBASED_CTLS2, @@ -552,8 +559,6 @@ SECONDARY_EXEC_WBINVD_EXITING), .must_be_0 = ( - //SECONDARY_EXEC_APIC_REGISTER_VIRT | - //SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | SECONDARY_EXEC_DESCRIPTOR_EXITING | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | SECONDARY_EXEC_ENABLE_VPID | @@ -573,7 +578,7 @@ }; -static const struct vmxec vmentry = { +static struct vmxec vmentry = { .name = "VMENTRY controls", .msr = MSR_IA32_VMX_ENTRY_CTLS, .truemsr = MSR_IA32_VMX_TRUE_ENTRY_CTLS, @@ -589,7 +594,7 @@ VM_ENTRY_LOAD_IA32_PAT), }; -static const struct vmxec vmexit = { +static struct vmxec vmexit = { .name = "VMEXIT controls", .msr = MSR_IA32_VMX_EXIT_CTLS, .truemsr = MSR_IA32_VMX_TRUE_EXIT_CTLS, @@ -655,6 +660,7 @@ printk("vmxexec controls is no good.\n"); return; } + assert(cpu_has_secondary_exec_ctrls()); /* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */ if ((vmx_msr_high & 0x1fff) > PGSIZE) { @@ -767,12 +773,18 @@ static void __vmx_setup_pcpu(struct guest_pcore *gpc) { struct per_cpu_info *pcpui = &per_cpu_info[core_id()]; + struct vmx_vmm *vmx = &gpc->proc->vmm.vmx; vmcs_write(HOST_TR_BASE, (uintptr_t)pcpui->tss); vmcs_writel(HOST_GDTR_BASE, (uintptr_t)pcpui->gdt); vmcs_write(HOST_GS_BASE, (uintptr_t)pcpui); /* TODO: we might need to also set HOST_IA32_PERF_GLOBAL_CTRL. Need to * think about how perf will work with VMs */ + /* Userspace can request changes to the ctls. They take effect when we + * reload the GPC, which occurs after a transition from userspace to VM. */ + vmcs_write(PIN_BASED_VM_EXEC_CONTROL, vmx->pin_exec_ctls); + vmcs_write(CPU_BASED_VM_EXEC_CONTROL, vmx->cpu_exec_ctls); + vmcs_write(SECONDARY_VM_EXEC_CONTROL, vmx->cpu2_exec_ctls); } uint64_t @@ -1004,6 +1016,13 @@ vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); } +void vmx_setup_vmx_vmm(struct vmx_vmm *vmx) +{ + vmx->pin_exec_ctls = vmcs_config.pin_based_exec_ctrl; + vmx->cpu_exec_ctls = vmcs_config.cpu_based_exec_ctrl; + vmx->cpu2_exec_ctls = vmcs_config.cpu_based_2nd_exec_ctrl; +} + /** * vmx_setup_vmcs - configures the vmcs with starting parameters */ @@ -1012,18 +1031,6 @@ vmcs_write16(VIRTUAL_PROCESSOR_ID, 0); vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ - /* Control */ - vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, - vmcs_config.pin_based_exec_ctrl); - - vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, - vmcs_config.cpu_based_exec_ctrl); - - if (cpu_has_secondary_exec_ctrls()) { - vmcs_write32(SECONDARY_VM_EXEC_CONTROL, - vmcs_config.cpu_based_2nd_exec_ctrl); - } - vmcs_write64(EPT_POINTER, gpc_get_eptp(gpc)); vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0);
diff --git a/kern/arch/x86/vmm/intel/vmx.h b/kern/arch/x86/vmm/intel/vmx.h index e6b50ac..2b448f7 100644 --- a/kern/arch/x86/vmm/intel/vmx.h +++ b/kern/arch/x86/vmm/intel/vmx.h
@@ -385,6 +385,15 @@ uint32_t must_be_0; uint32_t try_set_1; uint32_t try_set_0; + uint32_t hw_changeable; + uint32_t policy_changeable; +}; + +/* Per-VM VMX info */ +struct vmx_vmm { + uint32_t pin_exec_ctls; + uint32_t cpu_exec_ctls; + uint32_t cpu2_exec_ctls; }; int intel_vmm_init(void); @@ -393,3 +402,4 @@ void vmx_unload_guest_pcore(struct guest_pcore *gpc); uint64_t gpc_get_eptp(struct guest_pcore *gpc); void vmx_clear_vmcs(void); +void vmx_setup_vmx_vmm(struct vmx_vmm *vmx);
diff --git a/kern/arch/x86/vmm/vmm.c b/kern/arch/x86/vmm/vmm.c index 8f0acc8..f40625d 100644 --- a/kern/arch/x86/vmm/vmm.c +++ b/kern/arch/x86/vmm/vmm.c
@@ -91,8 +91,9 @@ error(EAGAIN, "We're already running a vmmcp?"); /* Set this early, so cleanup checks the gpc array */ vmm->vmmcp = TRUE; - nr_guest_pcores = MIN(nr_guest_pcores, num_cores); vmm->amd = 0; + vmx_setup_vmx_vmm(&vmm->vmx); + nr_guest_pcores = MIN(nr_guest_pcores, num_cores); vmm->guest_pcores = kzmalloc(sizeof(void *) * nr_guest_pcores, MEM_WAIT); if (!vmm->guest_pcores) error(ENOMEM, "Allocation of vmm->guest_pcores failed");
diff --git a/kern/arch/x86/vmm/vmm.h b/kern/arch/x86/vmm/vmm.h index 9ae840d..da62fe9 100644 --- a/kern/arch/x86/vmm/vmm.h +++ b/kern/arch/x86/vmm/vmm.h
@@ -40,8 +40,9 @@ // installed would GPF on a K7. union { void *svm; - struct guest_pcore **guest_pcores; + struct vmx_vmm vmx; }; + struct guest_pcore **guest_pcores; unsigned long vmexits[VMM_VMEXIT_NR_TYPES]; };