|  | /* | 
|  | * MSR emulation | 
|  | * | 
|  | * Copyright 2015 Google Inc. | 
|  | * | 
|  | * See LICENSE for details. | 
|  | */ | 
|  |  | 
|  | #include <stdio.h> | 
|  | #include <sys/types.h> | 
|  | #include <pthread.h> | 
|  | #include <sys/stat.h> | 
|  | #include <fcntl.h> | 
|  | #include <parlib/arch/arch.h> | 
|  | #include <parlib/ros_debug.h> | 
|  | #include <unistd.h> | 
|  | #include <errno.h> | 
|  | #include <stdlib.h> | 
|  | #include <string.h> | 
|  | #include <sys/uio.h> | 
|  | #include <stdint.h> | 
|  | #include <err.h> | 
|  | #include <sys/mman.h> | 
|  | #include <ros/vmm.h> | 
|  | #include <ros/arch/msr-index.h> | 
|  | #include <vmm/virtio.h> | 
|  | #include <vmm/virtio_mmio.h> | 
|  | #include <vmm/virtio_ids.h> | 
|  | #include <vmm/virtio_config.h> | 
|  | #include <vmm/sched.h> | 
|  | #include <ros/arch/trapframe.h> | 
|  |  | 
|  | struct emmsr { | 
|  | uint32_t reg; | 
|  | char *name; | 
|  | int (*f)(struct guest_thread *vm_thread, struct emmsr *, uint32_t); | 
|  | bool written; | 
|  | uint32_t edx, eax; | 
|  | }; | 
|  | // Might need to mfence rdmsr.  supposedly wrmsr serializes, but not for x2APIC | 
|  | static inline uint64_t read_msr(uint32_t reg) | 
|  | { | 
|  | uint32_t edx, eax; | 
|  | asm volatile("rdmsr; mfence" : "=d"(edx), "=a"(eax) : "c"(reg)); | 
|  | return (uint64_t)edx << 32 | eax; | 
|  | } | 
|  |  | 
|  | static inline void write_msr(uint32_t reg, uint64_t val) | 
|  | { | 
|  | asm volatile("wrmsr" : : "d"((uint32_t)(val >> 32)), | 
|  | "a"((uint32_t)(val & 0xFFFFFFFF)), | 
|  | "c"(reg)); | 
|  | } | 
|  |  | 
|  | static int emsr_miscenable(struct guest_thread *vm_thread, struct emmsr *, | 
|  | uint32_t); | 
|  | static int emsr_mustmatch(struct guest_thread *vm_thread, struct emmsr *, | 
|  | uint32_t); | 
|  | static int emsr_readonly(struct guest_thread *vm_thread, struct emmsr *, | 
|  | uint32_t); | 
|  | static int emsr_readzero(struct guest_thread *vm_thread, struct emmsr *, | 
|  | uint32_t); | 
|  | static int emsr_fakewrite(struct guest_thread *vm_thread, struct emmsr *, | 
|  | uint32_t); | 
|  | static int emsr_ok(struct guest_thread *vm_thread, struct emmsr *, uint32_t); | 
|  |  | 
|  | struct emmsr emmsrs[] = { | 
|  | {MSR_IA32_MISC_ENABLE, "MSR_IA32_MISC_ENABLE", emsr_miscenable}, | 
|  | {MSR_IA32_SYSENTER_CS, "MSR_IA32_SYSENTER_CS", emsr_ok}, | 
|  | {MSR_IA32_SYSENTER_EIP, "MSR_IA32_SYSENTER_EIP", emsr_ok}, | 
|  | {MSR_IA32_SYSENTER_ESP, "MSR_IA32_SYSENTER_ESP", emsr_ok}, | 
|  | {MSR_IA32_UCODE_REV, "MSR_IA32_UCODE_REV", emsr_fakewrite}, | 
|  | {MSR_CSTAR, "MSR_CSTAR", emsr_fakewrite}, | 
|  | {MSR_IA32_VMX_BASIC_MSR, "MSR_IA32_VMX_BASIC_MSR", emsr_fakewrite}, | 
|  | {MSR_IA32_VMX_PINBASED_CTLS_MSR, "MSR_IA32_VMX_PINBASED_CTLS_MSR", | 
|  | emsr_fakewrite}, | 
|  | {MSR_IA32_VMX_PROCBASED_CTLS_MSR, "MSR_IA32_VMX_PROCBASED_CTLS_MSR", | 
|  | emsr_fakewrite}, | 
|  | {MSR_IA32_VMX_PROCBASED_CTLS2, "MSR_IA32_VMX_PROCBASED_CTLS2", | 
|  | emsr_fakewrite}, | 
|  | {MSR_IA32_VMX_EXIT_CTLS_MSR, "MSR_IA32_VMX_EXIT_CTLS_MSR", | 
|  | emsr_fakewrite}, | 
|  | {MSR_IA32_VMX_ENTRY_CTLS_MSR, "MSR_IA32_VMX_ENTRY_CTLS_MSR", | 
|  | emsr_fakewrite}, | 
|  | {MSR_IA32_ENERGY_PERF_BIAS, "MSR_IA32_ENERGY_PERF_BIAS", | 
|  | emsr_fakewrite}, | 
|  | {MSR_LBR_SELECT, "MSR_LBR_SELECT", emsr_ok}, | 
|  | {MSR_LBR_TOS, "MSR_LBR_TOS", emsr_ok}, | 
|  | {MSR_LBR_NHM_FROM, "MSR_LBR_NHM_FROM", emsr_ok}, | 
|  | {MSR_LBR_NHM_TO, "MSR_LBR_NHM_TO", emsr_ok}, | 
|  | {MSR_LBR_CORE_FROM, "MSR_LBR_CORE_FROM", emsr_ok}, | 
|  | {MSR_LBR_CORE_TO, "MSR_LBR_CORE_TO", emsr_ok}, | 
|  |  | 
|  | // grumble. | 
|  | {MSR_OFFCORE_RSP_0, "MSR_OFFCORE_RSP_0", emsr_ok}, | 
|  | {MSR_OFFCORE_RSP_1, "MSR_OFFCORE_RSP_1", emsr_ok}, | 
|  | // louder. | 
|  | {MSR_PEBS_LD_LAT_THRESHOLD, "MSR_PEBS_LD_LAT_THRESHOLD", emsr_ok}, | 
|  | // aaaaaahhhhhhhhhhhhhhhhhhhhh | 
|  | {MSR_ARCH_PERFMON_EVENTSEL0, "MSR_ARCH_PERFMON_EVENTSEL0", emsr_ok}, | 
|  | {MSR_ARCH_PERFMON_EVENTSEL1, "MSR_ARCH_PERFMON_EVENTSEL0", emsr_ok}, | 
|  | {MSR_IA32_PERF_CAPABILITIES, "MSR_IA32_PERF_CAPABILITIES", emsr_ok}, | 
|  | // unsafe. | 
|  | {MSR_IA32_APICBASE, "MSR_IA32_APICBASE", emsr_fakewrite}, | 
|  |  | 
|  | // mostly harmless. | 
|  | {MSR_TSC_AUX, "MSR_TSC_AUX", emsr_fakewrite}, | 
|  | {MSR_RAPL_POWER_UNIT, "MSR_RAPL_POWER_UNIT", emsr_readzero}, | 
|  | }; | 
|  |  | 
|  | static uint64_t set_low32(uint64_t hi, uint32_t lo) | 
|  | { | 
|  | return (hi & 0xffffffff00000000ULL) | lo; | 
|  | } | 
|  |  | 
|  | static uint64_t set_low16(uint64_t hi, uint16_t lo) | 
|  | { | 
|  | return (hi & 0xffffffffffff0000ULL) | lo; | 
|  | } | 
|  |  | 
|  | static uint64_t set_low8(uint64_t hi, uint8_t lo) | 
|  | { | 
|  | return (hi & 0xffffffffffffff00ULL) | lo; | 
|  | } | 
|  |  | 
|  | /* this may be the only register that needs special handling. | 
|  | * If there others then we might want to extend teh emmsr struct. | 
|  | */ | 
|  | static int emsr_miscenable(struct guest_thread *vm_thread, struct emmsr *msr, | 
|  | uint32_t opcode) { | 
|  | uint32_t eax, edx; | 
|  | struct vm_trapframe *vm_tf = &(vm_thread->uthread.u_ctx.tf.vm_tf); | 
|  |  | 
|  | rdmsr(msr->reg, eax, edx); | 
|  | /* we just let them read the misc msr for now. */ | 
|  | if (opcode == EXIT_REASON_MSR_READ) { | 
|  | vm_tf->tf_rax = set_low32(vm_tf->tf_rax, eax); | 
|  | vm_tf->tf_rax |= MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL; | 
|  | vm_tf->tf_rdx = set_low32(vm_tf->tf_rdx, edx); | 
|  | return 0; | 
|  | } else { | 
|  | /* if they are writing what is already written, that's ok. */ | 
|  | if (((uint32_t) vm_tf->tf_rax == eax) | 
|  | && ((uint32_t) vm_tf->tf_rdx == edx)) | 
|  | return 0; | 
|  | } | 
|  | fprintf(stderr, | 
|  | "%s: Wanted to write 0x%x:0x%x, but could not; value was 0x%x:0x%x\n", | 
|  | msr->name, (uint32_t) vm_tf->tf_rdx, | 
|  | (uint32_t) vm_tf->tf_rax, edx, eax); | 
|  | return SHUTDOWN_UNHANDLED_EXIT_REASON; | 
|  | } | 
|  |  | 
|  | static int emsr_mustmatch(struct guest_thread *vm_thread, struct emmsr *msr, | 
|  | uint32_t opcode) { | 
|  | uint32_t eax, edx; | 
|  | struct vm_trapframe *vm_tf = &(vm_thread->uthread.u_ctx.tf.vm_tf); | 
|  |  | 
|  | rdmsr(msr->reg, eax, edx); | 
|  | /* we just let them read the misc msr for now. */ | 
|  | if (opcode == EXIT_REASON_MSR_READ) { | 
|  | vm_tf->tf_rax = set_low32(vm_tf->tf_rax, eax); | 
|  | vm_tf->tf_rdx = set_low32(vm_tf->tf_rdx, edx); | 
|  | return 0; | 
|  | } else { | 
|  | /* if they are writing what is already written, that's ok. */ | 
|  | if (((uint32_t) vm_tf->tf_rax == eax) | 
|  | && ((uint32_t) vm_tf->tf_rdx == edx)) | 
|  | return 0; | 
|  | } | 
|  | fprintf(stderr, | 
|  | "%s: Wanted to write 0x%x:0x%x, but could not; value was 0x%x:0x%x\n", | 
|  | msr->name, (uint32_t) vm_tf->tf_rdx, | 
|  | (uint32_t) vm_tf->tf_rax, edx, eax); | 
|  | return SHUTDOWN_UNHANDLED_EXIT_REASON; | 
|  | } | 
|  |  | 
|  | static int emsr_ok(struct guest_thread *vm_thread, struct emmsr *msr, | 
|  | uint32_t opcode) | 
|  | { | 
|  | struct vm_trapframe *vm_tf = &(vm_thread->uthread.u_ctx.tf.vm_tf); | 
|  |  | 
|  | if (opcode == EXIT_REASON_MSR_READ) { | 
|  | rdmsr(msr->reg, vm_tf->tf_rdx, vm_tf->tf_rax); | 
|  | } else { | 
|  | uint64_t val = | 
|  | (uint64_t) vm_tf->tf_rdx << 32 | vm_tf->tf_rax; | 
|  | write_msr(msr->reg, val); | 
|  | } | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static int emsr_readonly(struct guest_thread *vm_thread, struct emmsr *msr, | 
|  | uint32_t opcode) | 
|  | { | 
|  | uint32_t eax, edx; | 
|  | struct vm_trapframe *vm_tf = &(vm_thread->uthread.u_ctx.tf.vm_tf); | 
|  |  | 
|  | rdmsr((uint32_t) vm_tf->tf_rcx, eax, edx); | 
|  | /* we just let them read the misc msr for now. */ | 
|  | if (opcode == EXIT_REASON_MSR_READ) { | 
|  | vm_tf->tf_rax = set_low32(vm_tf->tf_rax, eax); | 
|  | vm_tf->tf_rdx = set_low32(vm_tf->tf_rdx, edx); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | fprintf(stderr,"%s: Tried to write a readonly register\n", msr->name); | 
|  | return SHUTDOWN_UNHANDLED_EXIT_REASON; | 
|  | } | 
|  |  | 
|  | static int emsr_readzero(struct guest_thread *vm_thread, struct emmsr *msr, | 
|  | uint32_t opcode) | 
|  | { | 
|  | struct vm_trapframe *vm_tf = &(vm_thread->uthread.u_ctx.tf.vm_tf); | 
|  |  | 
|  | if (opcode == EXIT_REASON_MSR_READ) { | 
|  | vm_tf->tf_rax = 0; | 
|  | vm_tf->tf_rdx = 0; | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | fprintf(stderr,"%s: Tried to write a readonly register\n", msr->name); | 
|  | return SHUTDOWN_UNHANDLED_EXIT_REASON; | 
|  | } | 
|  |  | 
|  | /* pretend to write it, but don't write it. */ | 
|  | static int emsr_fakewrite(struct guest_thread *vm_thread, struct emmsr *msr, | 
|  | uint32_t opcode) | 
|  | { | 
|  | uint32_t eax, edx; | 
|  | struct vm_trapframe *vm_tf = &(vm_thread->uthread.u_ctx.tf.vm_tf); | 
|  |  | 
|  | if (!msr->written) { | 
|  | rdmsr(msr->reg, eax, edx); | 
|  | } else { | 
|  | edx = msr->edx; | 
|  | eax = msr->eax; | 
|  | } | 
|  | /* we just let them read the misc msr for now. */ | 
|  | if (opcode == EXIT_REASON_MSR_READ) { | 
|  | vm_tf->tf_rax = set_low32(vm_tf->tf_rax, eax); | 
|  | vm_tf->tf_rdx = set_low32(vm_tf->tf_rdx, edx); | 
|  | return 0; | 
|  | } else { | 
|  | /* if they are writing what is already written, that's ok. */ | 
|  | if (((uint32_t) vm_tf->tf_rax == eax) | 
|  | && ((uint32_t) vm_tf->tf_rdx == edx)) | 
|  | return 0; | 
|  | msr->edx = vm_tf->tf_rdx; | 
|  | msr->eax = vm_tf->tf_rax; | 
|  | msr->written = true; | 
|  | } | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static int emsr_apic(struct guest_thread *vm_thread, | 
|  | struct vmm_gpcore_init *gpci, uint32_t opcode) | 
|  | { | 
|  | struct vm_trapframe *vm_tf = &(vm_thread->uthread.u_ctx.tf.vm_tf); | 
|  | int apic_offset = vm_tf->tf_rcx & 0xff; | 
|  | uint64_t value; | 
|  |  | 
|  | if (opcode == EXIT_REASON_MSR_READ) { | 
|  | if (vm_tf->tf_rcx != MSR_LAPIC_ICR) { | 
|  | vm_tf->tf_rax = ((uint32_t *)(gpci->vapic_addr))[apic_offset]; | 
|  | vm_tf->tf_rdx = 0; | 
|  | } else { | 
|  | vm_tf->tf_rax = ((uint32_t *)(gpci->vapic_addr))[apic_offset]; | 
|  | vm_tf->tf_rdx = ((uint32_t *)(gpci->vapic_addr))[apic_offset + 1]; | 
|  | } | 
|  | } else { | 
|  | if (vm_tf->tf_rcx != MSR_LAPIC_ICR) | 
|  | ((uint32_t *)(gpci->vapic_addr))[apic_offset] = | 
|  | (uint32_t)(vm_tf->tf_rax); | 
|  | else { | 
|  | ((uint32_t *)(gpci->vapic_addr))[apic_offset] = | 
|  | (uint32_t)(vm_tf->tf_rax); | 
|  | ((uint32_t *)(gpci->vapic_addr))[apic_offset + 1] = | 
|  | (uint32_t)(vm_tf->tf_rdx); | 
|  | } | 
|  | } | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | int msrio(struct guest_thread *vm_thread, struct vmm_gpcore_init *gpci, | 
|  | uint32_t opcode) | 
|  | { | 
|  | int i; | 
|  | struct vm_trapframe *vm_tf = &(vm_thread->uthread.u_ctx.tf.vm_tf); | 
|  |  | 
|  | if (vm_tf->tf_rcx >= MSR_LAPIC_ID && vm_tf->tf_rcx < MSR_LAPIC_END) | 
|  | return emsr_apic(vm_thread, gpci, opcode); | 
|  |  | 
|  | for (i = 0; i < sizeof(emmsrs)/sizeof(emmsrs[0]); i++) { | 
|  | if (emmsrs[i].reg != vm_tf->tf_rcx) | 
|  | continue; | 
|  | return emmsrs[i].f(vm_thread, &emmsrs[i], opcode); | 
|  | } | 
|  | fprintf(stderr, "msrio for 0x%lx failed\n", vm_tf->tf_rcx); | 
|  | return SHUTDOWN_UNHANDLED_EXIT_REASON; | 
|  | } | 
|  |  |