blob: 276cabf814b3e6f3576e2ef0da3765b807bfb3ca [file] [log] [blame] [edit]
/*
* Kernel-based Virtual Machine driver for Linux
*
* This module enables machines with Intel VT-x extensions to run virtual
* machines without emulation or binary translation.
*
* Copyright (C) 2006 Qumranet, Inc.
*
* Authors:
* Avi Kivity <avi@qumranet.com>
* Yaniv Kamay <yaniv@qumranet.com>
*
*/
#define DEBUG
#define LITEVM_DEBUG
#include <kmalloc.h>
#include <string.h>
#include <stdio.h>
#include <assert.h>
#include <error.h>
#include <pmap.h>
#include <sys/queue.h>
#include <smp.h>
#include <kref.h>
#include <atomic.h>
#include <alarm.h>
#include <event.h>
#include <umem.h>
#include <devalarm.h>
#include <arch/types.h>
#include <arch/vm.h>
#include <arch/emulate.h>
#include <arch/vmdebug.h>
#include <arch/msr-index.h>
#define currentcpu (&per_cpu_info[core_id()])
struct litevm_stat litevm_stat;
static struct litevm_stats_debugfs_item {
const char *name;
uint32_t *data;
} debugfs_entries[] = {
{ "pf_fixed", &litevm_stat.pf_fixed },
{ "pf_guest", &litevm_stat.pf_guest },
{ "tlb_flush", &litevm_stat.tlb_flush },
{ "invlpg", &litevm_stat.invlpg },
{ "exits", &litevm_stat.exits },
{ "io_exits", &litevm_stat.io_exits },
{ "mmio_exits", &litevm_stat.mmio_exits },
{ "signal_exits", &litevm_stat.signal_exits },
{ "irq_exits", &litevm_stat.irq_exits },
{ 0, 0 }
};
static struct dentry *debugfs_dir;
static const uint32_t vmx_msr_index[] = {
#ifdef __x86_64__
MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, MSR_KERNEL_GS_BASE,
#endif
MSR_EFER, // wtf? MSR_K6_STAR,
};
#define NR_VMX_MSR (sizeof(vmx_msr_index) / sizeof(*vmx_msr_index))
#ifdef __x86_64__
/*
* avoid save/load MSR_SYSCALL_MASK and MSR_LSTAR by std vt
* mechanism (cpu bug AA24)
*/
#define NR_BAD_MSRS 2
#else
#define NR_BAD_MSRS 0
#endif
#define TSS_IOPB_BASE_OFFSET 0x66
#define TSS_BASE_SIZE 0x68
#define TSS_IOPB_SIZE (65536 / 8)
#define TSS_REDIRECTION_SIZE (256 / 8)
#define RMODE_TSS_SIZE (TSS_BASE_SIZE + TSS_REDIRECTION_SIZE + TSS_IOPB_SIZE + 1)
#define MSR_IA32_VMX_BASIC_MSR 0x480
#define MSR_IA32_VMX_PINBASED_CTLS_MSR 0x481
#define MSR_IA32_VMX_PROCBASED_CTLS_MSR 0x482
#define MSR_IA32_VMX_EXIT_CTLS_MSR 0x483
#define MSR_IA32_VMX_ENTRY_CTLS_MSR 0x484
#define CR0_RESEVED_BITS 0xffffffff1ffaffc0ULL
#define LMSW_GUEST_MASK 0x0eULL
#define CR4_RESEVED_BITS (~((1ULL << 11) - 1))
//#define CR4_VMXE 0x2000
#define CR8_RESEVED_BITS (~0x0fULL)
#define EFER_RESERVED_BITS 0xfffffffffffff2fe
#ifdef __x86_64__
#define HOST_IS_64 1
#else
#define HOST_IS_64 0
#endif
/* bit ops not yet widely used in akaros and we're not sure where to put them. */
/**
* __ffs - find first set bit in word
* @word: The word to search
*
* Undefined if no bit exists, so code should check against 0 first.
*/
static inline unsigned long __ffs(unsigned long word)
{
print_func_entry();
asm("rep; bsf %1,%0"
: "=r" (word)
: "rm" (word));
print_func_exit();
return word;
}
static struct vmx_msr_entry *find_msr_entry(struct litevm_vcpu *vcpu, uint32_t msr)
{
print_func_entry();
int i;
for (i = 0; i < vcpu->nmsrs; ++i)
if (vcpu->guest_msrs[i].index == msr) {
print_func_exit();
return &vcpu->guest_msrs[i];
}
print_func_exit();
return 0;
}
struct descriptor_table {
uint16_t limit;
unsigned long base;
} __attribute__((packed));
static void get_gdt(struct descriptor_table *table)
{
print_func_entry();
asm ("sgdt %0" : "=m"(*table));
print_func_exit();
}
static void get_idt(struct descriptor_table *table)
{
print_func_entry();
asm ("sidt %0" : "=m"(*table));
print_func_exit();
}
static uint16_t read_fs(void)
{
print_func_entry();
uint16_t seg;
asm ("mov %%fs, %0" : "=g"(seg));
print_func_exit();
return seg;
}
static uint16_t read_gs(void)
{
print_func_entry();
uint16_t seg;
asm ("mov %%gs, %0" : "=g"(seg));
print_func_exit();
return seg;
}
static uint16_t read_ldt(void)
{
print_func_entry();
uint16_t ldt;
asm ("sldt %0" : "=g"(ldt));
print_func_exit();
return ldt;
}
static void load_fs(uint16_t sel)
{
print_func_entry();
asm ("mov %0, %%fs" : : "g"(sel));
print_func_exit();
}
static void load_gs(uint16_t sel)
{
print_func_entry();
asm ("mov %0, %%gs" : : "g"(sel));
print_func_exit();
}
#ifndef load_ldt
static void load_ldt(uint16_t sel)
{
print_func_entry();
asm ("lldt %0" : : "g"(sel));
print_func_exit();
}
#endif
static void fx_save(void *image)
{
print_func_entry();
asm ("fxsave (%0)":: "r" (image));
print_func_exit();
}
static void fx_restore(void *image)
{
print_func_entry();
asm ("fxrstor (%0)":: "r" (image));
print_func_exit();
}
static void fpu_init(void)
{
print_func_entry();
asm ("finit");
print_func_exit();
}
struct segment_descriptor {
uint16_t limit_low;
uint16_t base_low;
uint8_t base_mid;
uint8_t type : 4;
uint8_t system : 1;
uint8_t dpl : 2;
uint8_t present : 1;
uint8_t limit_high : 4;
uint8_t avl : 1;
uint8_t long_mode : 1;
uint8_t default_op : 1;
uint8_t granularity : 1;
uint8_t base_high;
} __attribute__((packed));
#ifdef __x86_64__
// LDT or TSS descriptor in the GDT. 16 bytes.
struct segment_descriptor_64 {
struct segment_descriptor s;
uint32_t base_higher;
uint32_t pad_zero;
};
#endif
static unsigned long segment_base(uint16_t selector)
{
print_func_entry();
struct descriptor_table gdt;
struct segment_descriptor *d;
unsigned long table_base;
typedef unsigned long ul;
unsigned long v;
asm ("sgdt %0" : "=m"(gdt));
table_base = gdt.base;
if (selector & 4) { /* from ldt */
uint16_t ldt_selector;
asm ("sldt %0" : "=g"(ldt_selector));
table_base = segment_base(ldt_selector);
}
d = (struct segment_descriptor *)(table_base + (selector & ~7));
v = d->base_low | ((ul)d->base_mid << 16) | ((ul)d->base_high << 24);
#ifdef __x86_64__
if (d->system == 0
&& (d->type == 2 || d->type == 9 || d->type == 11))
v |= ((ul)((struct segment_descriptor_64 *)d)->base_higher) << 32;
#endif
print_func_exit();
return v;
}
static unsigned long read_tr_base(void)
{
print_func_entry();
uint16_t tr;
asm ("str %0" : "=g"(tr));
print_func_exit();
return segment_base(tr);
}
static void reload_tss(void)
{
print_func_entry();
#ifndef __x86_64__
/*
* VT restores TR but not its size. Useless.
*/
struct descriptor_table gdt;
struct segment_descriptor *descs;
get_gdt(&gdt);
descs = (void *)gdt.base;
descs[GDT_ENTRY_TSS].type = 9; /* available TSS */
load_TR_desc();
#endif
print_func_exit();
}
static struct vmcs_descriptor {
int size;
int order;
uint32_t revision_id;
} vmcs_descriptor;
static inline struct page *_gfn_to_page(struct litevm *litevm, gfn_t gfn)
{
print_func_entry();
struct litevm_memory_slot *slot = gfn_to_memslot(litevm, gfn);
print_func_exit();
return (slot) ? slot->phys_mem[gfn - slot->base_gfn] : 0;
}
int litevm_read_guest(struct litevm_vcpu *vcpu,
gva_t addr,
unsigned long size,
void *dest)
{
print_func_entry();
unsigned char *host_buf = dest;
unsigned long req_size = size;
while (size) {
hpa_t paddr;
unsigned now;
unsigned offset;
hva_t guest_buf;
paddr = gva_to_hpa(vcpu, addr);
if (is_error_hpa(paddr))
break;
guest_buf = (hva_t)KADDR(paddr);
offset = addr & ~PAGE_MASK;
guest_buf |= offset;
now = MIN(size, PAGE_SIZE - offset);
memcpy(host_buf, (void*)guest_buf, now);
host_buf += now;
addr += now;
size -= now;
}
print_func_exit();
return req_size - size;
}
int litevm_write_guest(struct litevm_vcpu *vcpu,
gva_t addr,
unsigned long size,
void *data)
{
print_func_entry();
unsigned char *host_buf = data;
unsigned long req_size = size;
while (size) {
hpa_t paddr;
unsigned now;
unsigned offset;
hva_t guest_buf;
paddr = gva_to_hpa(vcpu, addr);
if (is_error_hpa(paddr))
break;
guest_buf = (hva_t)KADDR(paddr);
offset = addr & ~PAGE_MASK;
guest_buf |= offset;
now = MIN(size, PAGE_SIZE - offset);
memcpy((void*)guest_buf, host_buf, now);
host_buf += now;
addr += now;
size -= now;
}
print_func_exit();
return req_size - size;
}
static void setup_vmcs_descriptor(void)
{
print_func_entry();
uint64_t msr;
msr = read_msr(MSR_IA32_VMX_BASIC_MSR);
vmcs_descriptor.size = (msr>>32) & 0x1fff;
vmcs_descriptor.order = LOG2_UP(vmcs_descriptor.size>>PAGE_SHIFT);
vmcs_descriptor.revision_id = (uint32_t)msr;
printk("setup_vmcs_descriptor: msr 0x%x, size 0x%x order 0x%x id 0x%x\n",
msr, vmcs_descriptor.size, vmcs_descriptor.order,
vmcs_descriptor.revision_id);
print_func_exit();
};
static void vmcs_clear(struct vmcs *vmcs)
{
print_func_entry();
uint64_t phys_addr = PADDR(vmcs);
uint8_t error;
printk("%d: vmcs %p phys_addr %p\n", core_id(), vmcs, (void *)phys_addr);
asm volatile ("vmclear %1; setna %0"
: "=m"(error) : "m"(phys_addr) : "cc", "memory" );
if (error)
printk("litevm: vmclear fail: %p/%llx\n",
vmcs, phys_addr);
print_func_exit();
}
static void __vcpu_clear(struct hw_trapframe *hw_tf, void *arg)
{
print_func_entry();
struct litevm_vcpu *vcpu = arg;
int cpu = core_id();
printd("__vcpu_clear: cpu %d vcpu->cpu %d currentcpu->vmcs %p vcpu->vmcs %p\n",
cpu, vcpu->cpu, currentcpu->vmcs, vcpu->vmcs);
if (vcpu->cpu == cpu)
vmcs_clear(vcpu->vmcs);
if (currentcpu->vmcs == vcpu->vmcs)
currentcpu->vmcs = NULL;
print_func_exit();
}
static int vcpu_slot(struct litevm_vcpu *vcpu)
{
print_func_entry();
print_func_exit();
return vcpu - vcpu->litevm->vcpus;
}
/*
* Switches to specified vcpu, until a matching vcpu_put(), but assumes
* vcpu mutex is already taken.
*/
static struct litevm_vcpu *__vcpu_load(struct litevm_vcpu *vcpu)
{
print_func_entry();
uint64_t phys_addr = PADDR(vcpu->vmcs);
int cpu;
cpu = core_id();
printk("%d: __vcpu_load phys_addr %p\n", cpu, phys_addr);
if (vcpu->cpu != cpu) {
handler_wrapper_t *w;
smp_call_function_single(vcpu->cpu, __vcpu_clear, vcpu, &w);
smp_call_wait(w);
vcpu->launched = 0;
}
if (currentcpu->vmcs != vcpu->vmcs) {
uint8_t error;
currentcpu->vmcs = vcpu->vmcs;
asm volatile ("vmptrld %1; setna %0"
: "=m"(error) : "m"(phys_addr) : "cc" );
if (error){
printk("litevm: vmptrld %p/%llx fail\n",
vcpu->vmcs, phys_addr);
error("litevm: vmptrld %p/%llx fail\n",
vcpu->vmcs, phys_addr);
}
}
if (vcpu->cpu != cpu) {
struct descriptor_table dt;
unsigned long sysenter_esp;
vcpu->cpu = cpu;
/*
* Linux uses per-cpu TSS and GDT, so set these when switching
* processors.
*/
vmcs_writel(HOST_TR_BASE, read_tr_base()); /* 22.2.4 */
get_gdt(&dt);
vmcs_writel(HOST_GDTR_BASE, dt.base); /* 22.2.4 */
sysenter_esp = read_msr(MSR_IA32_SYSENTER_ESP);
vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
}
print_func_exit();
return vcpu;
}
/*
* Switches to specified vcpu, until a matching vcpu_put()
*/
static struct litevm_vcpu *vcpu_load(struct litevm *litevm, int vcpu_slot)
{
int ret;
print_func_entry();
struct litevm_vcpu *vcpu = &litevm->vcpus[vcpu_slot];
printk("vcpu_slot %d vcpu %p\n", vcpu_slot, vcpu);
qlock(&vcpu->mutex);
printk("after qlock\n");
if (!vcpu->vmcs) {
qunlock(&vcpu->mutex);
error("vcpu->vmcs is NULL");
}
ret = __vcpu_load(vcpu);
print_func_exit();
}
static void vcpu_put(struct litevm_vcpu *vcpu)
{
print_func_entry();
//put_cpu();
qunlock(&vcpu->mutex);
print_func_exit();
}
static struct vmcs *alloc_vmcs_cpu(int cpu)
{
print_func_entry();
int node = node_id();
struct vmcs *vmcs;
vmcs = get_cont_pages_node(node, vmcs_descriptor.order, KMALLOC_WAIT);
if (!pages) {
print_func_exit();
return 0;
}
memset(vmcs, 0, vmcs_descriptor.size);
vmcs->revision_id = vmcs_descriptor.revision_id; /* vmcs revision id */
print_func_exit();
return vmcs;
}
static struct vmcs *alloc_vmcs(void)
{
struct vmcs *ret;
print_func_entry();
ret = alloc_vmcs_cpu(core_id());
print_func_exit();
return ret;
}
static int cpu_has_litevm_support(void)
{
print_func_entry();
uint32_t ecx = cpuid_ecx(1);
print_func_exit();
return ecx & 5; /* CPUID.1:ECX.VMX[bit 5] -> VT */
}
static int vmx_disabled_by_bios(void)
{
print_func_entry();
uint64_t msr;
msr = read_msr(MSR_IA32_FEATURE_CONTROL);
print_func_exit();
return (msr & 5) == 1; /* locked but not enabled */
}
static void vm_enable(struct hw_trapframe *hw_tf, void *garbage)
{
print_func_entry();
int cpu = hw_core_id();
uint64_t phys_addr;
uint64_t old;
uint64_t status = 0;
currentcpu->vmxarea = get_cont_pages_node(core_id(), vmcs_descriptor.order,
KMALLOC_WAIT);
if (! currentcpu->vmxarea)
return;
memset(currentcpu->vmxarea, 0, vmcs_descriptor.size);
currentcpu->vmxarea->revision_id = vmcs_descriptor.revision_id;
phys_addr = PADDR(currentcpu->vmxarea);
printk("%d: currentcpu->vmxarea %p phys_addr %p\n", core_id(),
currentcpu->vmxarea, (void *)phys_addr);
if (phys_addr & 0xfff){
printk("fix vmxarea alignment!");
}
printk("%d: CR4 is 0x%x, and VMXE is %x\n", core_id(), rcr4(), CR4_VMXE);
old = read_msr(MSR_IA32_FEATURE_CONTROL);
printk("%d: vm_enable, old is %d\n", core_id(), old);
if ((old & 5) == 0){
/* enable and lock */
write_msr(MSR_IA32_FEATURE_CONTROL, old | 5);
old = read_msr(MSR_IA32_FEATURE_CONTROL);
printk("%d:vm_enable, tried to set 5, old is %d\n", core_id(), old);
}
printk("%d:CR4 is 0x%x, and VMXE is %x\n", core_id(), rcr4(), CR4_VMXE);
lcr4(rcr4() | CR4_VMXE); /* FIXME: not cpu hotplug safe */
printk("%d:CR4 is 0x%x, and VMXE is %x\n", core_id(), rcr4(), CR4_VMXE);
printk("%d:cr0 is %x\n", core_id(), rcr0());
lcr0(rcr0() | 0x20);
printk("%d:cr0 is %x\n", core_id(), rcr0());
printk("%d:A20 is %d (0x2 should be set)\n", core_id(), inb(0x92));
outb(0x92, inb(0x92)|2);
printk("%d:A20 is %d (0x2 should be set)\n", core_id(), inb(0x92));
asm volatile ("vmxon %1\njbe 1f\nmovl $1, %0\n1:" \
: "=m" (status) : "m"(phys_addr) : "memory", "cc");
printk("%d:vmxon status is %d\n", core_id(), status);
printk("%d:CR4 is 0x%x, and VMXE is %x\n", core_id(), rcr4(), CR4_VMXE);
if (! status){
printk("%d:vm_enable: status says fail\n", core_id());
}
print_func_exit();
}
static void litevm_disable(void *garbage)
{
print_func_entry();
asm volatile ("vmxoff" : : : "cc");
print_func_exit();
}
struct litevm *vmx_open(void)
{
print_func_entry();
struct litevm *litevm = kzmalloc(sizeof(struct litevm), KMALLOC_WAIT);
int i;
if (!litevm) {
printk("NO LITEVM! MAKES NO SENSE!\n");
error("litevm alloc failed");
print_func_exit();
return 0;
}
spinlock_init_irqsave(&litevm->lock);
LIST_INIT(&litevm->link);
for (i = 0; i < LITEVM_MAX_VCPUS; ++i) {
struct litevm_vcpu *vcpu = &litevm->vcpus[i];
qlock_init(&vcpu->mutex);
vcpu->mmu.root_hpa = INVALID_PAGE;
LIST_INIT(&vcpu->link);
}
printk("vmx_open: busy %d\n", litevm->busy);
printk("return %p\n", litevm);
print_func_exit();
return litevm;
}
/*
* Free any memory in @free but not in @dont.
*/
static void litevm_free_physmem_slot(struct litevm_memory_slot *free,
struct litevm_memory_slot *dont)
{
print_func_entry();
int i;
if (!dont || free->phys_mem != dont->phys_mem)
if (free->phys_mem) {
for (i = 0; i < free->npages; ++i){
page_t *page = free->phys_mem[i];
page_decref(page);
assert(page_is_free(page2ppn(page)));
}
kfree(free->phys_mem);
}
if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
kfree(free->dirty_bitmap);
free->phys_mem = 0;
free->npages = 0;
free->dirty_bitmap = 0;
print_func_exit();
}
static void litevm_free_physmem(struct litevm *litevm)
{
print_func_entry();
int i;
for (i = 0; i < litevm->nmemslots; ++i)
litevm_free_physmem_slot(&litevm->memslots[i], 0);
print_func_exit();
}
static void litevm_free_vmcs(struct litevm_vcpu *vcpu)
{
print_func_entry();
if (vcpu->vmcs) {
handler_wrapper_t *w;
smp_call_function_all(__vcpu_clear, vcpu, &w);
smp_call_wait(w);
//free_vmcs(vcpu->vmcs);
vcpu->vmcs = 0;
}
print_func_exit();
}
static void litevm_free_vcpu(struct litevm_vcpu *vcpu)
{
print_func_entry();
litevm_free_vmcs(vcpu);
litevm_mmu_destroy(vcpu);
print_func_exit();
}
static void litevm_free_vcpus(struct litevm *litevm)
{
print_func_entry();
unsigned int i;
for (i = 0; i < LITEVM_MAX_VCPUS; ++i)
litevm_free_vcpu(&litevm->vcpus[i]);
print_func_exit();
}
static int litevm_dev_release(struct litevm *litevm)
{
print_func_entry();
litevm_free_vcpus(litevm);
litevm_free_physmem(litevm);
kfree(litevm);
print_func_exit();
return 0;
}
unsigned long vmcs_readl(unsigned long field)
{
print_func_entry();
unsigned long value;
asm volatile ("vmread %1, %0" : "=g"(value) : "r"(field) : "cc");
print_func_exit();
return value;
}
void vmcs_writel(unsigned long field, unsigned long value)
{
print_func_entry();
uint8_t error;
asm volatile ("vmwrite %1, %2; setna %0"
: "=g"(error) : "r"(value), "r"(field) : "cc" );
if (error)
printk("vmwrite error: reg %lx value %lx (err %d)\n",
field, value, vmcs_read32(VM_INSTRUCTION_ERROR));
print_func_exit();
}
static void vmcs_write16(unsigned long field, uint16_t value)
{
print_func_entry();
vmcs_writel(field, value);
print_func_exit();
}
static void vmcs_write64(unsigned long field, uint64_t value)
{
print_func_entry();
#ifdef __x86_64__
vmcs_writel(field, value);
#else
vmcs_writel(field, value);
asm volatile ("");
vmcs_writel(field+1, value >> 32);
#endif
print_func_exit();
}
static void inject_gp(struct litevm_vcpu *vcpu)
{
print_func_entry();
printd("inject_general_protection: rip 0x%lx\n",
vmcs_readl(GUEST_RIP));
vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, 0);
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
GP_VECTOR |
INTR_TYPE_EXCEPTION |
INTR_INFO_DELIEVER_CODE_MASK |
INTR_INFO_VALID_MASK);
print_func_exit();
}
static void update_exception_bitmap(struct litevm_vcpu *vcpu)
{
print_func_entry();
if (vcpu->rmode.active)
vmcs_write32(EXCEPTION_BITMAP, ~0);
else
vmcs_write32(EXCEPTION_BITMAP, 1 << PF_VECTOR);
print_func_exit();
}
static void enter_pmode(struct litevm_vcpu *vcpu)
{
print_func_entry();
unsigned long flags;
vcpu->rmode.active = 0;
vmcs_writel(GUEST_TR_BASE, vcpu->rmode.tr.base);
vmcs_write32(GUEST_TR_LIMIT, vcpu->rmode.tr.limit);
vmcs_write32(GUEST_TR_AR_BYTES, vcpu->rmode.tr.ar);
flags = vmcs_readl(GUEST_RFLAGS);
flags &= ~(X86_EFLAGS_IOPL | X86_EFLAGS_VM);
flags |= (vcpu->rmode.save_iopl << IOPL_SHIFT);
vmcs_writel(GUEST_RFLAGS, flags);
vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~CR4_VME_MASK) |
(vmcs_readl(CR0_READ_SHADOW) & CR4_VME_MASK) );
update_exception_bitmap(vcpu);
#define FIX_PMODE_DATASEG(seg, save) { \
vmcs_write16(GUEST_##seg##_SELECTOR, 0); \
vmcs_writel(GUEST_##seg##_BASE, 0); \
vmcs_write32(GUEST_##seg##_LIMIT, 0xffff); \
vmcs_write32(GUEST_##seg##_AR_BYTES, 0x93); \
}
FIX_PMODE_DATASEG(SS, vcpu->rmode.ss);
FIX_PMODE_DATASEG(ES, vcpu->rmode.es);
FIX_PMODE_DATASEG(DS, vcpu->rmode.ds);
FIX_PMODE_DATASEG(GS, vcpu->rmode.gs);
FIX_PMODE_DATASEG(FS, vcpu->rmode.fs);
vmcs_write16(GUEST_CS_SELECTOR,
vmcs_read16(GUEST_CS_SELECTOR) & ~SELECTOR_RPL_MASK);
vmcs_write32(GUEST_CS_AR_BYTES, 0x9b);
print_func_exit();
}
static int rmode_tss_base(struct litevm* litevm)
{
print_func_entry();
gfn_t base_gfn = litevm->memslots[0].base_gfn + litevm->memslots[0].npages - 3;
print_func_exit();
return base_gfn << PAGE_SHIFT;
}
static void enter_rmode(struct litevm_vcpu *vcpu)
{
print_func_entry();
unsigned long flags;
vcpu->rmode.active = 1;
vcpu->rmode.tr.base = vmcs_readl(GUEST_TR_BASE);
vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->litevm));
vcpu->rmode.tr.limit = vmcs_read32(GUEST_TR_LIMIT);
vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1);
vcpu->rmode.tr.ar = vmcs_read32(GUEST_TR_AR_BYTES);
vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
flags = vmcs_readl(GUEST_RFLAGS);
vcpu->rmode.save_iopl = (flags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
vmcs_writel(GUEST_RFLAGS, flags);
vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | CR4_VME_MASK);
update_exception_bitmap(vcpu);
#define FIX_RMODE_SEG(seg, save) { \
vmcs_write16(GUEST_##seg##_SELECTOR, \
vmcs_readl(GUEST_##seg##_BASE) >> 4); \
vmcs_write32(GUEST_##seg##_LIMIT, 0xffff); \
vmcs_write32(GUEST_##seg##_AR_BYTES, 0xf3); \
}
vmcs_write32(GUEST_CS_AR_BYTES, 0xf3);
vmcs_write16(GUEST_CS_SELECTOR, vmcs_readl(GUEST_CS_BASE) >> 4);
FIX_RMODE_SEG(ES, vcpu->rmode.es);
FIX_RMODE_SEG(DS, vcpu->rmode.ds);
FIX_RMODE_SEG(SS, vcpu->rmode.ss);
FIX_RMODE_SEG(GS, vcpu->rmode.gs);
FIX_RMODE_SEG(FS, vcpu->rmode.fs);
print_func_exit();
}
static int init_rmode_tss(struct litevm* litevm)
{
print_func_entry();
struct page *p1, *p2, *p3;
gfn_t fn = rmode_tss_base(litevm) >> PAGE_SHIFT;
char *page;
p1 = _gfn_to_page(litevm, fn++);
p2 = _gfn_to_page(litevm, fn++);
p3 = _gfn_to_page(litevm, fn);
if (!p1 || !p2 || !p3) {
printk("%s: gfn_to_page failed\n", __FUNCTION__);
print_func_exit();
return 0;
}
page = page2kva(p1);
memset(page, 0, PAGE_SIZE);
*(uint16_t*)(page + 0x66) = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE;
page = page2kva(p2);
memset(page, 0, PAGE_SIZE);
page = page2kva(p3);
memset(page, 0, PAGE_SIZE);
*(page + RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1) = ~0;
print_func_exit();
return 1;
}
#ifdef __x86_64__
static void __set_efer(struct litevm_vcpu *vcpu, uint64_t efer)
{
print_func_entry();
struct vmx_msr_entry *msr = find_msr_entry(vcpu, MSR_EFER);
vcpu->shadow_efer = efer;
if (efer & EFER_LMA) {
vmcs_write32(VM_ENTRY_CONTROLS,
vmcs_read32(VM_ENTRY_CONTROLS) |
VM_ENTRY_CONTROLS_IA32E_MASK);
msr->data = efer;
} else {
vmcs_write32(VM_ENTRY_CONTROLS,
vmcs_read32(VM_ENTRY_CONTROLS) &
~VM_ENTRY_CONTROLS_IA32E_MASK);
msr->data = efer & ~EFER_LME;
}
print_func_exit();
}
static void enter_lmode(struct litevm_vcpu *vcpu)
{
print_func_entry();
uint32_t guest_tr_ar;
guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES);
if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) {
printd("%s: tss fixup for long mode. \n",
__FUNCTION__);
vmcs_write32(GUEST_TR_AR_BYTES,
(guest_tr_ar & ~AR_TYPE_MASK)
| AR_TYPE_BUSY_64_TSS);
}
vcpu->shadow_efer |= EFER_LMA;
find_msr_entry(vcpu, MSR_EFER)->data |= EFER_LMA | EFER_LME;
vmcs_write32(VM_ENTRY_CONTROLS,
vmcs_read32(VM_ENTRY_CONTROLS)
| VM_ENTRY_CONTROLS_IA32E_MASK);
print_func_exit();
}
static void exit_lmode(struct litevm_vcpu *vcpu)
{
print_func_entry();
vcpu->shadow_efer &= ~EFER_LMA;
vmcs_write32(VM_ENTRY_CONTROLS,
vmcs_read32(VM_ENTRY_CONTROLS)
& ~VM_ENTRY_CONTROLS_IA32E_MASK);
print_func_exit();
}
#endif
static void __set_cr0(struct litevm_vcpu *vcpu, unsigned long cr0)
{
print_func_entry();
if (vcpu->rmode.active && (cr0 & CR0_PE_MASK))
enter_pmode(vcpu);
if (!vcpu->rmode.active && !(cr0 & CR0_PE_MASK))
enter_rmode(vcpu);
#ifdef __x86_64__
if (vcpu->shadow_efer & EFER_LME) {
if (!is_paging() && (cr0 & CR0_PG_MASK))
enter_lmode(vcpu);
if (is_paging() && !(cr0 & CR0_PG_MASK))
exit_lmode(vcpu);
}
#endif
vmcs_writel(CR0_READ_SHADOW, cr0);
vmcs_writel(GUEST_CR0, cr0 | LITEVM_VM_CR0_ALWAYS_ON);
print_func_exit();
}
static int pdptrs_have_reserved_bits_set(struct litevm_vcpu *vcpu,
unsigned long cr3)
{
print_func_entry();
gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
unsigned offset = (cr3 & (PAGE_SIZE-1)) >> 5;
int i;
uint64_t pdpte;
uint64_t *pdpt;
struct litevm_memory_slot *memslot;
spin_lock_irqsave(&vcpu->litevm->lock);
memslot = gfn_to_memslot(vcpu->litevm, pdpt_gfn);
/* FIXME: !memslot - emulate? 0xff? */
pdpt = page2kva(gfn_to_page(memslot, pdpt_gfn));
for (i = 0; i < 4; ++i) {
pdpte = pdpt[offset + i];
if ((pdpte & 1) && (pdpte & 0xfffffff0000001e6ull))
break;
}
spin_unlock(&vcpu->litevm->lock);
print_func_exit();
return i != 4;
}
static void set_cr0(struct litevm_vcpu *vcpu, unsigned long cr0)
{
print_func_entry();
if (cr0 & CR0_RESEVED_BITS) {
printd("set_cr0: 0x%lx #GP, reserved bits 0x%lx\n",
cr0, guest_cr0());
inject_gp(vcpu);
print_func_exit();
return;
}
if ((cr0 & CR0_NW_MASK) && !(cr0 & CR0_CD_MASK)) {
printd("set_cr0: #GP, CD == 0 && NW == 1\n");
inject_gp(vcpu);
print_func_exit();
return;
}
if ((cr0 & CR0_PG_MASK) && !(cr0 & CR0_PE_MASK)) {
printd("set_cr0: #GP, set PG flag "
"and a clear PE flag\n");
inject_gp(vcpu);
print_func_exit();
return;
}
if (!is_paging() && (cr0 & CR0_PG_MASK)) {
#ifdef __x86_64__
if ((vcpu->shadow_efer & EFER_LME)) {
uint32_t guest_cs_ar;
if (!is_pae()) {
printd("set_cr0: #GP, start paging "
"in long mode while PAE is disabled\n");
inject_gp(vcpu);
print_func_exit();
return;
}
guest_cs_ar = vmcs_read32(GUEST_CS_AR_BYTES);
if (guest_cs_ar & SEGMENT_AR_L_MASK) {
printd("set_cr0: #GP, start paging "
"in long mode while CS.L == 1\n");
inject_gp(vcpu);
print_func_exit();
return;
}
} else
#endif
if (is_pae() &&
pdptrs_have_reserved_bits_set(vcpu, vcpu->cr3)) {
printd("set_cr0: #GP, pdptrs "
"reserved bits\n");
inject_gp(vcpu);
print_func_exit();
return;
}
}
__set_cr0(vcpu, cr0);
litevm_mmu_reset_context(vcpu);
print_func_exit();
return;
}
static void lmsw(struct litevm_vcpu *vcpu, unsigned long msw)
{
print_func_entry();
unsigned long cr0 = guest_cr0();
if ((msw & CR0_PE_MASK) && !(cr0 & CR0_PE_MASK)) {
enter_pmode(vcpu);
vmcs_writel(CR0_READ_SHADOW, cr0 | CR0_PE_MASK);
} else
printd("lmsw: unexpected\n");
vmcs_writel(GUEST_CR0, (vmcs_readl(GUEST_CR0) & ~LMSW_GUEST_MASK)
| (msw & LMSW_GUEST_MASK));
print_func_exit();
}
static void __set_cr4(struct litevm_vcpu *vcpu, unsigned long cr4)
{
print_func_entry();
vmcs_writel(CR4_READ_SHADOW, cr4);
vmcs_writel(GUEST_CR4, cr4 | (vcpu->rmode.active ?
LITEVM_RMODE_VM_CR4_ALWAYS_ON : LITEVM_PMODE_VM_CR4_ALWAYS_ON));
print_func_exit();
}
static void set_cr4(struct litevm_vcpu *vcpu, unsigned long cr4)
{
print_func_entry();
if (cr4 & CR4_RESEVED_BITS) {
printd("set_cr4: #GP, reserved bits\n");
inject_gp(vcpu);
print_func_exit();
return;
}
if (is_long_mode()) {
if (!(cr4 & CR4_PAE_MASK)) {
printd("set_cr4: #GP, clearing PAE while "
"in long mode\n");
inject_gp(vcpu);
print_func_exit();
return;
}
} else if (is_paging() && !is_pae() && (cr4 & CR4_PAE_MASK)
&& pdptrs_have_reserved_bits_set(vcpu, vcpu->cr3)) {
printd("set_cr4: #GP, pdptrs reserved bits\n");
inject_gp(vcpu);
}
if (cr4 & CR4_VMXE_MASK) {
printd("set_cr4: #GP, setting VMXE\n");
inject_gp(vcpu);
print_func_exit();
return;
}
__set_cr4(vcpu, cr4);
spin_lock_irqsave(&vcpu->litevm->lock);
litevm_mmu_reset_context(vcpu);
spin_unlock(&vcpu->litevm->lock);
print_func_exit();
}
static void set_cr3(struct litevm_vcpu *vcpu, unsigned long cr3)
{
print_func_entry();
if (is_long_mode()) {
if ( cr3 & CR3_L_MODE_RESEVED_BITS) {
printd("set_cr3: #GP, reserved bits\n");
inject_gp(vcpu);
print_func_exit();
return;
}
} else {
if (cr3 & CR3_RESEVED_BITS) {
printd("set_cr3: #GP, reserved bits\n");
inject_gp(vcpu);
print_func_exit();
return;
}
if (is_paging() && is_pae() &&
pdptrs_have_reserved_bits_set(vcpu, cr3)) {
printd("set_cr3: #GP, pdptrs "
"reserved bits\n");
inject_gp(vcpu);
print_func_exit();
return;
}
}
vcpu->cr3 = cr3;
spin_lock_irqsave(&vcpu->litevm->lock);
vcpu->mmu.new_cr3(vcpu);
spin_unlock(&vcpu->litevm->lock);
print_func_exit();
}
static void set_cr8(struct litevm_vcpu *vcpu, unsigned long cr8)
{
print_func_entry();
if ( cr8 & CR8_RESEVED_BITS) {
printd("set_cr8: #GP, reserved bits 0x%lx\n", cr8);
inject_gp(vcpu);
print_func_exit();
return;
}
vcpu->cr8 = cr8;
print_func_exit();
}
static uint32_t get_rdx_init_val(void)
{
print_func_entry();
uint32_t val;
asm ("movl $1, %%eax \n\t"
"movl %%eax, %0 \n\t" : "=g"(val) );
print_func_exit();
return val;
}
static void fx_init(struct litevm_vcpu *vcpu)
{
print_func_entry();
struct __attribute__ ((__packed__)) fx_image_s {
uint16_t control; //fcw
uint16_t status; //fsw
uint16_t tag; // ftw
uint16_t opcode; //fop
uint64_t ip; // fpu ip
uint64_t operand;// fpu dp
uint32_t mxcsr;
uint32_t mxcsr_mask;
} *fx_image;
fx_save(vcpu->host_fx_image);
fpu_init();
fx_save(vcpu->guest_fx_image);
fx_restore(vcpu->host_fx_image);
fx_image = (struct fx_image_s *)vcpu->guest_fx_image;
fx_image->mxcsr = 0x1f80;
memset(vcpu->guest_fx_image + sizeof(struct fx_image_s),
0, FX_IMAGE_SIZE - sizeof(struct fx_image_s));
print_func_exit();
}
static void vmcs_write32_fixedbits(uint32_t msr, uint32_t vmcs_field, uint32_t val)
{
print_func_entry();
uint32_t msr_high, msr_low;
uint64_t msrval;
msrval = read_msr(msr);
msr_low = msrval;
msr_high = (msrval>>32);
val &= msr_high;
val |= msr_low;
vmcs_write32(vmcs_field, val);
print_func_exit();
}
/*
* Sets up the vmcs for emulated real mode.
*/
static int litevm_vcpu_setup(struct litevm_vcpu *vcpu)
{
print_func_entry();
/* no op on x86_64 */
#define asmlinkage
extern asmlinkage void litevm_vmx_return(void);
uint32_t host_sysenter_cs;
uint32_t junk;
uint64_t a;
struct descriptor_table dt;
int i;
int ret;
uint64_t tsc;
int nr_good_msrs;
if (!init_rmode_tss(vcpu->litevm)) {
error("vcpu_setup: init_rmode_tss failed");
}
memset(vcpu->regs, 0, sizeof(vcpu->regs));
vcpu->regs[VCPU_REGS_RDX] = get_rdx_init_val();
vcpu->cr8 = 0;
vcpu->apic_base = 0xfee00000 |
/*for vcpu 0*/ MSR_IA32_APICBASE_BSP |
MSR_IA32_APICBASE_ENABLE;
fx_init(vcpu);
#define SEG_SETUP(seg) do { \
vmcs_write16(GUEST_##seg##_SELECTOR, 0); \
vmcs_writel(GUEST_##seg##_BASE, 0); \
vmcs_write32(GUEST_##seg##_LIMIT, 0xffff); \
vmcs_write32(GUEST_##seg##_AR_BYTES, 0x93); \
} while (0)
/*
* GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode
* insists on having GUEST_CS_BASE == GUEST_CS_SELECTOR << 4. Sigh.
*/
vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
vmcs_writel(GUEST_CS_BASE, 0x000f0000);
vmcs_write32(GUEST_CS_LIMIT, 0xffff);
vmcs_write32(GUEST_CS_AR_BYTES, 0x9b);
SEG_SETUP(DS);
SEG_SETUP(ES);
SEG_SETUP(FS);
SEG_SETUP(GS);
SEG_SETUP(SS);
vmcs_write16(GUEST_TR_SELECTOR, 0);
vmcs_writel(GUEST_TR_BASE, 0);
vmcs_write32(GUEST_TR_LIMIT, 0xffff);
vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
vmcs_write16(GUEST_LDTR_SELECTOR, 0);
vmcs_writel(GUEST_LDTR_BASE, 0);
vmcs_write32(GUEST_LDTR_LIMIT, 0xffff);
vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082);
vmcs_write32(GUEST_SYSENTER_CS, 0);
vmcs_writel(GUEST_SYSENTER_ESP, 0);
vmcs_writel(GUEST_SYSENTER_EIP, 0);
vmcs_writel(GUEST_RFLAGS, 0x02);
vmcs_writel(GUEST_RIP, 0xfff0);
vmcs_writel(GUEST_RSP, 0);
vmcs_writel(GUEST_CR3, 0);
//todo: dr0 = dr1 = dr2 = dr3 = 0; dr6 = 0xffff0ff0
vmcs_writel(GUEST_DR7, 0x400);
vmcs_writel(GUEST_GDTR_BASE, 0);
vmcs_write32(GUEST_GDTR_LIMIT, 0xffff);
vmcs_writel(GUEST_IDTR_BASE, 0);
vmcs_write32(GUEST_IDTR_LIMIT, 0xffff);
vmcs_write32(GUEST_ACTIVITY_STATE, 0);
vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0);
/* I/O */
vmcs_write64(IO_BITMAP_A, 0);
vmcs_write64(IO_BITMAP_B, 0);
tsc = read_tsc();
vmcs_write64(TSC_OFFSET, -tsc);
vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
/* Special registers */
vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
/* Control */
vmcs_write32_fixedbits(MSR_IA32_VMX_PINBASED_CTLS_MSR,
PIN_BASED_VM_EXEC_CONTROL,
PIN_BASED_EXT_INTR_MASK /* 20.6.1 */
| PIN_BASED_NMI_EXITING /* 20.6.1 */
);
vmcs_write32_fixedbits(MSR_IA32_VMX_PROCBASED_CTLS_MSR,
CPU_BASED_VM_EXEC_CONTROL,
CPU_BASED_HLT_EXITING /* 20.6.2 */
| CPU_BASED_CR8_LOAD_EXITING /* 20.6.2 */
| CPU_BASED_CR8_STORE_EXITING /* 20.6.2 */
| CPU_BASED_UNCOND_IO_EXITING /* 20.6.2 */
| CPU_BASED_INVDPG_EXITING
| CPU_BASED_MOV_DR_EXITING
| CPU_BASED_USE_TSC_OFFSETING /* 21.3 */
);
vmcs_write32(EXCEPTION_BITMAP, 1 << PF_VECTOR);
vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0);
vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0);
vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */
vmcs_writel(HOST_CR0, rcr0()); /* 22.2.3 */
vmcs_writel(HOST_CR4, rcr4()); /* 22.2.3, 22.2.5 */
vmcs_writel(HOST_CR3, rcr3()); /* 22.2.3 FIXME: shadow tables */
#warning "not setting selectors; do we need them?"
#if 0
vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */
vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */
#endif
vmcs_write16(HOST_FS_SELECTOR, read_fs()); /* 22.2.4 */
vmcs_write16(HOST_GS_SELECTOR, read_gs()); /* 22.2.4 */
#if 0
vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
#endif
#ifdef __x86_64__
a = read_msr(MSR_FS_BASE);
vmcs_writel(HOST_FS_BASE, a); /* 22.2.4 */
a = read_msr(MSR_GS_BASE);
vmcs_writel(HOST_GS_BASE, a); /* 22.2.4 */
#else
vmcs_writel(HOST_FS_BASE, 0); /* 22.2.4 */
vmcs_writel(HOST_GS_BASE, 0); /* 22.2.4 */
#endif
#warning "Not setting HOST_TR_SELECTOR"
#if 0
vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */
#endif
get_idt(&dt);
vmcs_writel(HOST_IDTR_BASE, dt.base); /* 22.2.4 */
vmcs_writel(HOST_RIP, (unsigned long)litevm_vmx_return); /* 22.2.5 */
/* it's the HIGH 32 bits! */
host_sysenter_cs = read_msr(MSR_IA32_SYSENTER_CS) >> 32;
vmcs_write32(HOST_IA32_SYSENTER_CS, host_sysenter_cs);
a = read_msr(MSR_IA32_SYSENTER_ESP);
vmcs_writel(HOST_IA32_SYSENTER_ESP, a); /* 22.2.3 */
a = read_msr(MSR_IA32_SYSENTER_EIP);
vmcs_writel(HOST_IA32_SYSENTER_EIP, a); /* 22.2.3 */
ret = -ENOMEM;
vcpu->guest_msrs = kmalloc(PAGE_SIZE, KMALLOC_WAIT);
if (!vcpu->guest_msrs)
error("guest_msrs kmalloc failed");
vcpu->host_msrs = kmalloc(PAGE_SIZE, KMALLOC_WAIT);
if (!vcpu->host_msrs)
error("vcpu->host_msrs kmalloc failed -- storage leaked");
for (i = 0; i < NR_VMX_MSR; ++i) {
uint32_t index = vmx_msr_index[i];
uint32_t data_low, data_high;
uint64_t data;
int j = vcpu->nmsrs;
#warning "need readmsr_safe"
// if (rdmsr_safe(index, &data_low, &data_high) < 0)
// continue;
data = read_msr(index);
vcpu->host_msrs[j].index = index;
vcpu->host_msrs[j].reserved = 0;
vcpu->host_msrs[j].data = data;
vcpu->guest_msrs[j] = vcpu->host_msrs[j];
++vcpu->nmsrs;
}
printk("msrs: %d\n", vcpu->nmsrs);
nr_good_msrs = vcpu->nmsrs - NR_BAD_MSRS;
vmcs_writel(VM_ENTRY_MSR_LOAD_ADDR,
PADDR(vcpu->guest_msrs + NR_BAD_MSRS));
vmcs_writel(VM_EXIT_MSR_STORE_ADDR,
PADDR(vcpu->guest_msrs + NR_BAD_MSRS));
vmcs_writel(VM_EXIT_MSR_LOAD_ADDR,
PADDR(vcpu->host_msrs + NR_BAD_MSRS));
vmcs_write32_fixedbits(MSR_IA32_VMX_EXIT_CTLS_MSR, VM_EXIT_CONTROLS,
(HOST_IS_64 << 9)); /* 22.2,1, 20.7.1 */
vmcs_write32(VM_EXIT_MSR_STORE_COUNT, nr_good_msrs); /* 22.2.2 */
vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, nr_good_msrs); /* 22.2.2 */
vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, nr_good_msrs); /* 22.2.2 */
/* 22.2.1, 20.8.1 */
vmcs_write32_fixedbits(MSR_IA32_VMX_ENTRY_CTLS_MSR,
VM_ENTRY_CONTROLS, 0);
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); /* 22.2.1 */
vmcs_writel(VIRTUAL_APIC_PAGE_ADDR, 0);
vmcs_writel(TPR_THRESHOLD, 0);
vmcs_writel(CR0_GUEST_HOST_MASK, LITEVM_GUEST_CR0_MASK);
vmcs_writel(CR4_GUEST_HOST_MASK, LITEVM_GUEST_CR4_MASK);
__set_cr0(vcpu, 0x60000010); // enter rmode
__set_cr4(vcpu, 0);
#ifdef __x86_64__
__set_efer(vcpu, 0);
#endif
ret = litevm_mmu_init(vcpu);
print_func_exit();
return ret;
out_free_guest_msrs:
kfree(vcpu->guest_msrs);
out:
return ret;
}
/*
* Sync the rsp and rip registers into the vcpu structure. This allows
* registers to be accessed by indexing vcpu->regs.
*/
static void vcpu_load_rsp_rip(struct litevm_vcpu *vcpu)
{
print_func_entry();
vcpu->regs[VCPU_REGS_RSP] = vmcs_readl(GUEST_RSP);
vcpu->rip = vmcs_readl(GUEST_RIP);
print_func_exit();
}
/*
* Syncs rsp and rip back into the vmcs. Should be called after possible
* modification.
*/
static void vcpu_put_rsp_rip(struct litevm_vcpu *vcpu)
{
print_func_entry();
vmcs_writel(GUEST_RSP, vcpu->regs[VCPU_REGS_RSP]);
vmcs_writel(GUEST_RIP, vcpu->rip);
print_func_exit();
}
/*
* Creates some virtual cpus. Good luck creating more than one.
*/
int vmx_create_vcpu(struct litevm *litevm, int n)
{
print_func_entry();
ERRSTACK(1);
int r;
struct litevm_vcpu *vcpu;
struct vmcs *vmcs;
char *errstring = NULL;
if (n < 0 || n >= LITEVM_MAX_VCPUS){
printk("%d is out of range; LITEVM_MAX_VCPUS is %d", n, LITEVM_MAX_VCPUS);
error("%d is out of range; LITEVM_MAX_VCPUS is %d", n, LITEVM_MAX_VCPUS);
}
vcpu = &litevm->vcpus[n];
qlock(&vcpu->mutex);
if (vcpu->vmcs) {
qunlock(&vcpu->mutex);
printk("VM already exists\n");
error("VM already exists");
}
/* I'm a bad person */
//ALIGN(vcpu->fx_buf, FX_IMAGE_ALIGN);
uint64_t a = (uint64_t) vcpu->fx_buf;
a += FX_IMAGE_ALIGN-1;
a /= FX_IMAGE_ALIGN;
a *= FX_IMAGE_ALIGN;
vcpu->host_fx_image = (char*)a;
vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE;
vcpu->cpu = -1; /* First load will set up TR */
vcpu->litevm = litevm;
vmcs = alloc_vmcs();
if (!vmcs) {
errstring = "vmcs allocate failed";
printk("%s\n", errstring);
qunlock(&vcpu->mutex);
goto out_free_vcpus;
}
vmcs_clear(vmcs);
printk("after vmcs_clear\n");
vcpu->vmcs = vmcs;
vcpu->launched = 0;
printk("vcpu %p slot %d vmcs is %p\n", vcpu, n, vmcs);
error("before vcpu_load");
__vcpu_load(vcpu);
printk("PAST vcpu_load\n");
if (waserror()){
/* we really need to fix waserror() */
poperror();
goto out_free_vcpus;
}
r = litevm_vcpu_setup(vcpu);
vcpu_put(vcpu);
printk("r is %d\n", r);
if (! r) {
print_func_exit();
return 0;
}
errstring = "vcup set failed";
out_free_vcpus:
printk("out_free_vcpus: life sucks\n");
litevm_free_vcpu(vcpu);
error(errstring);
out:
print_func_exit();
return r;
}
/*
* Allocate some memory and give it an address in the guest physical address
* space.
*
* Discontiguous memory is allowed, mostly for framebuffers.
*/
int vm_set_memory_region(struct litevm *litevm,
struct litevm_memory_region *mem)
{
print_func_entry();
ERRSTACK(2);
int r;
gfn_t base_gfn;
unsigned long npages;
unsigned long i;
struct litevm_memory_slot *memslot;
struct litevm_memory_slot old, new;
int memory_config_version;
void *init_data = mem->init_data;
int pass = 1;
printk("litevm %p\n", litevm);
/* should not happen but ... */
if (! litevm)
error("NULL litevm in %s", __func__);
if (!mem)
error("NULL mem in %s", __func__);
if (litevm->busy)
error("litevm->busy is set! 0x%x\n", litevm->busy);
r = -EINVAL;
/* General sanity checks */
if (mem->memory_size & (PAGE_SIZE - 1))
error("mem->memory_size %lld is not page-aligned", mem->memory_size);
if (mem->guest_phys_addr & (PAGE_SIZE - 1))
error("guest_phys_addr 0x%llx is not page-aligned", mem->guest_phys_addr);
if (mem->slot >= LITEVM_MEMORY_SLOTS)
error("Slot %d is >= %d", mem->slot, LITEVM_MEMORY_SLOTS);
if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
error("0x%x + 0x%x is < 0x%x",
mem->guest_phys_addr, mem->memory_size, mem->guest_phys_addr);
memslot = &litevm->memslots[mem->slot];
base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
npages = mem->memory_size >> PAGE_SHIFT;
if (!npages)
mem->flags &= ~LITEVM_MEM_LOG_DIRTY_PAGES;
/* this is actually a very tricky for loop. The use of
* error is a bit dangerous, so we don't use it much.
* consider a rewrite. Would be nice if akaros could do the
* allocation of a bunch of pages for us.
*/
raced:
printk("raced: pass %d\n", pass);
spin_lock_irqsave(&litevm->lock);
printk("locked\n");
if (waserror()){
spin_unlock(&litevm->lock);
nexterror();
}
memory_config_version = litevm->memory_config_version;
new = old = *memslot;
new.base_gfn = base_gfn;
new.npages = npages;
new.flags = mem->flags;
/* Disallow changing a memory slot's size. */
r = -EINVAL;
if (npages && old.npages && npages != old.npages)
error("npages is %d, old.npages is %d, can't change",
npages, old.npages);
/* Check for overlaps */
r = -EEXIST;
for (i = 0; i < LITEVM_MEMORY_SLOTS; ++i) {
struct litevm_memory_slot *s = &litevm->memslots[i];
if (s == memslot)
continue;
if (!((base_gfn + npages <= s->base_gfn) ||
(base_gfn >= s->base_gfn + s->npages)))
error("Overlap");
}
/*
* Do memory allocations outside lock. memory_config_version will
* detect any races.
*/
spin_unlock(&litevm->lock);
printk("unlocked\n");
poperror();
/* Deallocate if slot is being removed */
if (!npages)
new.phys_mem = 0;
/* Free page dirty bitmap if unneeded */
if (!(new.flags & LITEVM_MEM_LOG_DIRTY_PAGES))
new.dirty_bitmap = 0;
r = -ENOMEM;
/* Allocate if a slot is being created */
if (npages && !new.phys_mem) {
new.phys_mem = kzmalloc(npages * sizeof(struct page *), KMALLOC_WAIT);
if (!new.phys_mem)
goto out_free;
for (i = 0; i < npages; ++i) {
int ret;
ret = kpage_alloc(&new.phys_mem[i]);
if (ret != ESUCCESS)
goto out_free;
if (init_data){
printk("init data memcpy(%p,%p,4096);\n",
page2kva(new.phys_mem[i]), init_data);
memcpy(page2kva(new.phys_mem[i]), init_data, PAGE_SIZE);
init_data += PAGE_SIZE;
}
}
}
/* Allocate page dirty bitmap if needed */
if ((new.flags & LITEVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) {
unsigned dirty_bytes;//ALIGN(npages, BITS_PER_LONG) / 8;
dirty_bytes = (((npages + BITS_PER_LONG-1)/BITS_PER_LONG)*BITS_PER_LONG)/8;
new.dirty_bitmap = kzmalloc(dirty_bytes, KMALLOC_WAIT);
if (!new.dirty_bitmap){
printk("VM: alloc of %d bytes for map failed\n", dirty_bytes);
goto out_free;
}
}
spin_lock_irqsave(&litevm->lock);
printk("locked\n");
if (memory_config_version != litevm->memory_config_version) {
spin_unlock(&litevm->lock);
printk("unlocked, try again\n");
litevm_free_physmem_slot(&new, &old);
goto raced;
}
r = -EAGAIN;
if (litevm->busy){
printk("BUSY!\n");
goto out_unlock;
}
if (mem->slot >= litevm->nmemslots)
litevm->nmemslots = mem->slot + 1;
*memslot = new;
++litevm->memory_config_version;
spin_unlock(&litevm->lock);
printk("unlocked\n");
for (i = 0; i < LITEVM_MAX_VCPUS; ++i) {
struct litevm_vcpu *vcpu;
vcpu = vcpu_load(litevm, i);
if (!vcpu)
continue;
litevm_mmu_reset_context(vcpu);
vcpu_put(vcpu);
}
litevm_free_physmem_slot(&old, &new);
print_func_exit();
return 0;
out_unlock:
spin_unlock(&litevm->lock);
printk("out_unlock\n");
out_free:
printk("out_free\n");
litevm_free_physmem_slot(&new, &old);
out:
printk("vm_set_memory_region: return %d\n", r);
print_func_exit();
return r;
}
#if 0
/*
* Get (and clear) the dirty memory log for a memory slot.
*/
static int litevm_dev_ioctl_get_dirty_log(struct litevm *litevm,
struct litevm_dirty_log *log)
{
struct litevm_memory_slot *memslot;
int r, i;
int n;
unsigned long any = 0;
spin_lock_irqsave(&litevm->lock);
/*
* Prevent changes to guest memory configuration even while the lock
* is not taken.
*/
++litevm->busy;
spin_unlock(&litevm->lock);
r = -EINVAL;
if (log->slot >= LITEVM_MEMORY_SLOTS)
goto out;
memslot = &litevm->memslots[log->slot];
r = -ENOENT;
if (!memslot->dirty_bitmap)
goto out;
n = ALIGN(memslot->npages, 8) / 8;
for (i = 0; !any && i < n; ++i)
any = memslot->dirty_bitmap[i];
r = -EFAULT;
if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n))
goto out;
if (any) {
spin_lock_irqsave(&litevm->lock);
litevm_mmu_slot_remove_write_access(litevm, log->slot);
spin_unlock(&litevm->lock);
memset(memslot->dirty_bitmap, 0, n);
for (i = 0; i < LITEVM_MAX_VCPUS; ++i) {
struct litevm_vcpu *vcpu = vcpu_load(litevm, i);
if (!vcpu)
continue;
flush_guest_tlb(vcpu);
vcpu_put(vcpu);
}
}
r = 0;
out:
spin_lock_irqsave(&litevm->lock);
--litevm->busy;
spin_unlock(&litevm->lock);
return r;
}
#endif
struct litevm_memory_slot *gfn_to_memslot(struct litevm *litevm, gfn_t gfn)
{
print_func_entry();
int i;
for (i = 0; i < litevm->nmemslots; ++i) {
struct litevm_memory_slot *memslot = &litevm->memslots[i];
if (gfn >= memslot->base_gfn
&& gfn < memslot->base_gfn + memslot->npages) {
print_func_exit();
return memslot;
}
}
print_func_exit();
return 0;
}
void mark_page_dirty(struct litevm *litevm, gfn_t gfn)
{
print_func_entry();
int i;
struct litevm_memory_slot *memslot = 0;
unsigned long rel_gfn;
for (i = 0; i < litevm->nmemslots; ++i) {
memslot = &litevm->memslots[i];
if (gfn >= memslot->base_gfn
&& gfn < memslot->base_gfn + memslot->npages) {
if (!memslot || !memslot->dirty_bitmap) {
print_func_exit();
return;
}
rel_gfn = gfn - memslot->base_gfn;
/* avoid RMW */
if (!GET_BITMASK_BIT(memslot->dirty_bitmap, rel_gfn))
SET_BITMASK_BIT_ATOMIC(memslot->dirty_bitmap, rel_gfn);
print_func_exit();
return;
}
}
print_func_exit();
}
static void skip_emulated_instruction(struct litevm_vcpu *vcpu)
{
print_func_entry();
unsigned long rip;
uint32_t interruptibility;
rip = vmcs_readl(GUEST_RIP);
rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
vmcs_writel(GUEST_RIP, rip);
/*
* We emulated an instruction, so temporary interrupt blocking
* should be removed, if set.
*/
interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
if (interruptibility & 3)
vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
interruptibility & ~3);
print_func_exit();
}
static int emulator_read_std(unsigned long addr,
unsigned long *val,
unsigned int bytes,
struct x86_emulate_ctxt *ctxt)
{
print_func_entry();
struct litevm_vcpu *vcpu = ctxt->vcpu;
void *data = val;
while (bytes) {
gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
unsigned offset = addr & (PAGE_SIZE-1);
unsigned tocopy = bytes < (unsigned)PAGE_SIZE - offset ?
bytes : (unsigned)PAGE_SIZE - offset;
unsigned long pfn;
struct litevm_memory_slot *memslot;
void *page;
if (gpa == UNMAPPED_GVA) {
print_func_exit();
return X86EMUL_PROPAGATE_FAULT;
}
pfn = gpa >> PAGE_SHIFT;
memslot = gfn_to_memslot(vcpu->litevm, pfn);
if (!memslot) {
print_func_exit();
return X86EMUL_UNHANDLEABLE;
}
page = page2kva(gfn_to_page(memslot, pfn));
memcpy(data, page + offset, tocopy);
bytes -= tocopy;
data += tocopy;
addr += tocopy;
}
print_func_exit();
return X86EMUL_CONTINUE;
}
static int emulator_write_std(unsigned long addr,
unsigned long val,
unsigned int bytes,
struct x86_emulate_ctxt *ctxt)
{
print_func_entry();
printk("emulator_write_std: addr %lx n %d\n",
addr, bytes);
print_func_exit();
return X86EMUL_UNHANDLEABLE;
}
static int emulator_read_emulated(unsigned long addr,
unsigned long *val,
unsigned int bytes,
struct x86_emulate_ctxt *ctxt)
{
print_func_entry();
struct litevm_vcpu *vcpu = ctxt->vcpu;
if (vcpu->mmio_read_completed) {
memcpy(val, vcpu->mmio_data, bytes);
vcpu->mmio_read_completed = 0;
print_func_exit();
return X86EMUL_CONTINUE;
} else if (emulator_read_std(addr, val, bytes, ctxt)
== X86EMUL_CONTINUE) {
print_func_exit();
return X86EMUL_CONTINUE;
}
else {
gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
if (gpa == UNMAPPED_GVA) {
print_func_exit();
return vcpu_printf(vcpu, "not present\n"), X86EMUL_PROPAGATE_FAULT;
}
vcpu->mmio_needed = 1;
vcpu->mmio_phys_addr = gpa;
vcpu->mmio_size = bytes;
vcpu->mmio_is_write = 0;
print_func_exit();
return X86EMUL_UNHANDLEABLE;
}
}
static int emulator_write_emulated(unsigned long addr,
unsigned long val,
unsigned int bytes,
struct x86_emulate_ctxt *ctxt)
{
print_func_entry();
struct litevm_vcpu *vcpu = ctxt->vcpu;
gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
if (gpa == UNMAPPED_GVA) {
print_func_exit();
return X86EMUL_PROPAGATE_FAULT;
}
vcpu->mmio_needed = 1;
vcpu->mmio_phys_addr = gpa;
vcpu->mmio_size = bytes;
vcpu->mmio_is_write = 1;
memcpy(vcpu->mmio_data, &val, bytes);
print_func_exit();
return X86EMUL_CONTINUE;
}
static int emulator_cmpxchg_emulated(unsigned long addr,
unsigned long old,
unsigned long new,
unsigned int bytes,
struct x86_emulate_ctxt *ctxt)
{
print_func_entry();
static int reported;
if (!reported) {
reported = 1;
printk("litevm: emulating exchange as write\n");
}
print_func_exit();
return emulator_write_emulated(addr, new, bytes, ctxt);
}
static void report_emulation_failure(struct x86_emulate_ctxt *ctxt)
{
print_func_entry();
static int reported;
uint8_t opcodes[4];
unsigned long rip = vmcs_readl(GUEST_RIP);
unsigned long rip_linear = rip + vmcs_readl(GUEST_CS_BASE);
if (reported) {
print_func_exit();
return;
}
emulator_read_std(rip_linear, (void *)opcodes, 4, ctxt);
printk("emulation failed but !mmio_needed?"
" rip %lx %02x %02x %02x %02x\n",
rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]);
reported = 1;
print_func_exit();
}
struct x86_emulate_ops emulate_ops = {
.read_std = emulator_read_std,
.write_std = emulator_write_std,
.read_emulated = emulator_read_emulated,
.write_emulated = emulator_write_emulated,
.cmpxchg_emulated = emulator_cmpxchg_emulated,
};
enum emulation_result {
EMULATE_DONE, /* no further processing */
EMULATE_DO_MMIO, /* litevm_run filled with mmio request */
EMULATE_FAIL, /* can't emulate this instruction */
};
static int emulate_instruction(struct litevm_vcpu *vcpu,
struct litevm_run *run,
unsigned long cr2,
uint16_t error_code)
{
print_func_entry();
struct x86_emulate_ctxt emulate_ctxt;
int r;
uint32_t cs_ar;
vcpu_load_rsp_rip(vcpu);
cs_ar = vmcs_read32(GUEST_CS_AR_BYTES);
emulate_ctxt.vcpu = vcpu;
emulate_ctxt.eflags = vmcs_readl(GUEST_RFLAGS);
emulate_ctxt.cr2 = cr2;
emulate_ctxt.mode = (emulate_ctxt.eflags & X86_EFLAGS_VM)
? X86EMUL_MODE_REAL : (cs_ar & AR_L_MASK)
? X86EMUL_MODE_PROT64 : (cs_ar & AR_DB_MASK)
? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
if (emulate_ctxt.mode == X86EMUL_MODE_PROT64) {
emulate_ctxt.cs_base = 0;
emulate_ctxt.ds_base = 0;
emulate_ctxt.es_base = 0;
emulate_ctxt.ss_base = 0;
emulate_ctxt.gs_base = 0;
emulate_ctxt.fs_base = 0;
} else {
emulate_ctxt.cs_base = vmcs_readl(GUEST_CS_BASE);
emulate_ctxt.ds_base = vmcs_readl(GUEST_DS_BASE);
emulate_ctxt.es_base = vmcs_readl(GUEST_ES_BASE);
emulate_ctxt.ss_base = vmcs_readl(GUEST_SS_BASE);
emulate_ctxt.gs_base = vmcs_readl(GUEST_GS_BASE);
emulate_ctxt.fs_base = vmcs_readl(GUEST_FS_BASE);
}
vcpu->mmio_is_write = 0;
r = x86_emulate_memop(&emulate_ctxt, &emulate_ops);
if ((r || vcpu->mmio_is_write) && run) {
run->mmio.phys_addr = vcpu->mmio_phys_addr;
memcpy(run->mmio.data, vcpu->mmio_data, 8);
run->mmio.len = vcpu->mmio_size;
run->mmio.is_write = vcpu->mmio_is_write;
}
if (r) {
if (!vcpu->mmio_needed) {
report_emulation_failure(&emulate_ctxt);
print_func_exit();
return EMULATE_FAIL;
}
print_func_exit();
return EMULATE_DO_MMIO;
}
vcpu_put_rsp_rip(vcpu);
vmcs_writel(GUEST_RFLAGS, emulate_ctxt.eflags);
if (vcpu->mmio_is_write) {
print_func_exit();
return EMULATE_DO_MMIO;
}
print_func_exit();
return EMULATE_DONE;
}
static uint64_t mk_cr_64(uint64_t curr_cr, uint32_t new_val)
{
print_func_entry();
print_func_exit();
return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
}
void realmode_lgdt(struct litevm_vcpu *vcpu, uint16_t limit, unsigned long base)
{
print_func_entry();
vmcs_writel(GUEST_GDTR_BASE, base);
vmcs_write32(GUEST_GDTR_LIMIT, limit);
print_func_exit();
}
void realmode_lidt(struct litevm_vcpu *vcpu, uint16_t limit, unsigned long base)
{
print_func_entry();
vmcs_writel(GUEST_IDTR_BASE, base);
vmcs_write32(GUEST_IDTR_LIMIT, limit);
print_func_exit();
}
void realmode_lmsw(struct litevm_vcpu *vcpu, unsigned long msw,
unsigned long *rflags)
{
print_func_entry();
lmsw(vcpu, msw);
*rflags = vmcs_readl(GUEST_RFLAGS);
print_func_exit();
}
unsigned long realmode_get_cr(struct litevm_vcpu *vcpu, int cr)
{
print_func_entry();
switch (cr) {
case 0:
print_func_exit();
return guest_cr0();
case 2:
print_func_exit();
return vcpu->cr2;
case 3:
print_func_exit();
return vcpu->cr3;
case 4:
print_func_exit();
return guest_cr4();
default:
vcpu_printf(vcpu, "%s: unexpected cr %u\n", __FUNCTION__, cr);
print_func_exit();
return 0;
}
}
void realmode_set_cr(struct litevm_vcpu *vcpu, int cr, unsigned long val,
unsigned long *rflags)
{
print_func_entry();
switch (cr) {
case 0:
set_cr0(vcpu, mk_cr_64(guest_cr0(), val));
*rflags = vmcs_readl(GUEST_RFLAGS);
break;
case 2:
vcpu->cr2 = val;
break;
case 3:
set_cr3(vcpu, val);
break;
case 4:
set_cr4(vcpu, mk_cr_64(guest_cr4(), val));
break;
default:
vcpu_printf(vcpu, "%s: unexpected cr %u\n", __FUNCTION__, cr);
}
print_func_exit();
}
static int handle_rmode_exception(struct litevm_vcpu *vcpu,
int vec, uint32_t err_code)
{
print_func_entry();
if (!vcpu->rmode.active) {
print_func_exit();
return 0;
}
if (vec == GP_VECTOR && err_code == 0)
if (emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE) {
print_func_exit();
return 1;
}
print_func_exit();
return 0;
}
static int handle_exception(struct litevm_vcpu *vcpu, struct litevm_run *litevm_run)
{
print_func_entry();
uint32_t intr_info, error_code;
unsigned long cr2, rip;
uint32_t vect_info;
enum emulation_result er;
vect_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
if ((vect_info & VECTORING_INFO_VALID_MASK) &&
!is_page_fault(intr_info)) {
printk("%s: unexpected, vectoring info 0x%x "
"intr info 0x%x\n", __FUNCTION__, vect_info, intr_info);
}
if (is_external_interrupt(vect_info)) {
int irq = vect_info & VECTORING_INFO_VECTOR_MASK;
SET_BITMASK_BIT_ATOMIC(((uint8_t *)&vcpu->irq_pending), irq);
SET_BITMASK_BIT_ATOMIC(((uint8_t *)&vcpu->irq_summary), irq / BITS_PER_LONG);
}
if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) { /* nmi */
asm ("int $2");
print_func_exit();
return 1;
}
error_code = 0;
rip = vmcs_readl(GUEST_RIP);
if (intr_info & INTR_INFO_DELIEVER_CODE_MASK)
error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
if (is_page_fault(intr_info)) {
cr2 = vmcs_readl(EXIT_QUALIFICATION);
spin_lock_irqsave(&vcpu->litevm->lock);
if (!vcpu->mmu.page_fault(vcpu, cr2, error_code)) {
spin_unlock(&vcpu->litevm->lock);
print_func_exit();
return 1;
}
er = emulate_instruction(vcpu, litevm_run, cr2, error_code);
spin_unlock(&vcpu->litevm->lock);
switch (er) {
case EMULATE_DONE:
print_func_exit();
return 1;
case EMULATE_DO_MMIO:
++litevm_stat.mmio_exits;
litevm_run->exit_reason = LITEVM_EXIT_MMIO;
print_func_exit();
return 0;
case EMULATE_FAIL:
vcpu_printf(vcpu, "%s: emulate fail\n", __FUNCTION__);
break;
default:
assert(0);
}
}
if (vcpu->rmode.active &&
handle_rmode_exception(vcpu, intr_info & INTR_INFO_VECTOR_MASK,
error_code)) {
print_func_exit();
return 1;
}
if ((intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK)) == (INTR_TYPE_EXCEPTION | 1)) {
litevm_run->exit_reason = LITEVM_EXIT_DEBUG;
print_func_exit();
return 0;
}
litevm_run->exit_reason = LITEVM_EXIT_EXCEPTION;
litevm_run->ex.exception = intr_info & INTR_INFO_VECTOR_MASK;
litevm_run->ex.error_code = error_code;
print_func_exit();
return 0;
}
static int handle_external_interrupt(struct litevm_vcpu *vcpu,
struct litevm_run *litevm_run)
{
print_func_entry();
++litevm_stat.irq_exits;
print_func_exit();
return 1;
}
static int get_io_count(struct litevm_vcpu *vcpu, uint64_t *count)
{
print_func_entry();
uint64_t inst;
gva_t rip;
int countr_size;
int i, n;
if ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_VM)) {
countr_size = 2;
} else {
uint32_t cs_ar = vmcs_read32(GUEST_CS_AR_BYTES);
countr_size = (cs_ar & AR_L_MASK) ? 8:
(cs_ar & AR_DB_MASK) ? 4: 2;
}
rip = vmcs_readl(GUEST_RIP);
if (countr_size != 8)
rip += vmcs_readl(GUEST_CS_BASE);
n = litevm_read_guest(vcpu, rip, sizeof(inst), &inst);
for (i = 0; i < n; i++) {
switch (((uint8_t*)&inst)[i]) {
case 0xf0:
case 0xf2:
case 0xf3:
case 0x2e:
case 0x36:
case 0x3e:
case 0x26:
case 0x64:
case 0x65:
case 0x66:
break;
case 0x67:
countr_size = (countr_size == 2) ? 4: (countr_size >> 1);
default:
goto done;
}
}
print_func_exit();
return 0;
done:
countr_size *= 8;
*count = vcpu->regs[VCPU_REGS_RCX] & (~0ULL >> (64 - countr_size));
print_func_exit();
return 1;
}
static int handle_io(struct litevm_vcpu *vcpu, struct litevm_run *litevm_run)
{
print_func_entry();
uint64_t exit_qualification;
++litevm_stat.io_exits;
exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
litevm_run->exit_reason = LITEVM_EXIT_IO;
if (exit_qualification & 8)
litevm_run->io.direction = LITEVM_EXIT_IO_IN;
else
litevm_run->io.direction = LITEVM_EXIT_IO_OUT;
litevm_run->io.size = (exit_qualification & 7) + 1;
litevm_run->io.string = (exit_qualification & 16) != 0;
litevm_run->io.string_down
= (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_DF) != 0;
litevm_run->io.rep = (exit_qualification & 32) != 0;
litevm_run->io.port = exit_qualification >> 16;
if (litevm_run->io.string) {
if (!get_io_count(vcpu, &litevm_run->io.count)) {
print_func_exit();
return 1;
}
litevm_run->io.address = vmcs_readl(GUEST_LINEAR_ADDRESS);
} else
litevm_run->io.value = vcpu->regs[VCPU_REGS_RAX]; /* rax */
print_func_exit();
return 0;
}
static int handle_invlpg(struct litevm_vcpu *vcpu, struct litevm_run *litevm_run)
{
print_func_entry();
uint64_t address = vmcs_read64(EXIT_QUALIFICATION);
int instruction_length = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
spin_lock_irqsave(&vcpu->litevm->lock);
vcpu->mmu.inval_page(vcpu, address);
spin_unlock(&vcpu->litevm->lock);
vmcs_writel(GUEST_RIP, vmcs_readl(GUEST_RIP) + instruction_length);
print_func_exit();
return 1;
}
static int handle_cr(struct litevm_vcpu *vcpu, struct litevm_run *litevm_run)
{
print_func_entry();
uint64_t exit_qualification;
int cr;
int reg;
#ifdef LITEVM_DEBUG
if (guest_cpl() != 0) {
vcpu_printf(vcpu, "%s: not supervisor\n", __FUNCTION__);
inject_gp(vcpu);
print_func_exit();
return 1;
}
#endif
exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
cr = exit_qualification & 15;
reg = (exit_qualification >> 8) & 15;
switch ((exit_qualification >> 4) & 3) {
case 0: /* mov to cr */
switch (cr) {
case 0:
vcpu_load_rsp_rip(vcpu);
set_cr0(vcpu, vcpu->regs[reg]);
skip_emulated_instruction(vcpu);
print_func_exit();
return 1;
case 3:
vcpu_load_rsp_rip(vcpu);
set_cr3(vcpu, vcpu->regs[reg]);
skip_emulated_instruction(vcpu);
print_func_exit();
return 1;
case 4:
vcpu_load_rsp_rip(vcpu);
set_cr4(vcpu, vcpu->regs[reg]);
skip_emulated_instruction(vcpu);
print_func_exit();
return 1;
case 8:
vcpu_load_rsp_rip(vcpu);
set_cr8(vcpu, vcpu->regs[reg]);
skip_emulated_instruction(vcpu);
print_func_exit();
return 1;
};
break;
case 1: /*mov from cr*/
switch (cr) {
case 3:
vcpu_load_rsp_rip(vcpu);
vcpu->regs[reg] = vcpu->cr3;
vcpu_put_rsp_rip(vcpu);
skip_emulated_instruction(vcpu);
print_func_exit();
return 1;
case 8:
printd("handle_cr: read CR8 "
"cpu erratum AA15\n");
vcpu_load_rsp_rip(vcpu);
vcpu->regs[reg] = vcpu->cr8;
vcpu_put_rsp_rip(vcpu);
skip_emulated_instruction(vcpu);
print_func_exit();
return 1;
}
break;
case 3: /* lmsw */
lmsw(vcpu, (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f);
skip_emulated_instruction(vcpu);
print_func_exit();
return 1;
default:
break;
}
litevm_run->exit_reason = 0;
printk("litevm: unhandled control register: op %d cr %d\n",
(int)(exit_qualification >> 4) & 3, cr);
print_func_exit();
return 0;
}
static int handle_dr(struct litevm_vcpu *vcpu, struct litevm_run *litevm_run)
{
print_func_entry();
uint64_t exit_qualification;
unsigned long val;
int dr, reg;
/*
* FIXME: this code assumes the host is debugging the guest.
* need to deal with guest debugging itself too.
*/
exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
dr = exit_qualification & 7;
reg = (exit_qualification >> 8) & 15;
vcpu_load_rsp_rip(vcpu);
if (exit_qualification & 16) {
/* mov from dr */
switch (dr) {
case 6:
val = 0xffff0ff0;
break;
case 7:
val = 0x400;
break;
default:
val = 0;
}
vcpu->regs[reg] = val;
} else {
/* mov to dr */
}
vcpu_put_rsp_rip(vcpu);
skip_emulated_instruction(vcpu);
print_func_exit();
return 1;
}
static int handle_cpuid(struct litevm_vcpu *vcpu, struct litevm_run *litevm_run)
{
print_func_entry();
litevm_run->exit_reason = LITEVM_EXIT_CPUID;
print_func_exit();
return 0;
}
static int handle_rdmsr(struct litevm_vcpu *vcpu, struct litevm_run *litevm_run)
{
print_func_entry();
uint32_t ecx = vcpu->regs[VCPU_REGS_RCX];
struct vmx_msr_entry *msr = find_msr_entry(vcpu, ecx);
uint64_t data;
if (guest_cpl() != 0) {
vcpu_printf(vcpu, "%s: not supervisor\n", __FUNCTION__);
inject_gp(vcpu);
print_func_exit();
return 1;
}
switch (ecx) {
case MSR_FS_BASE:
data = vmcs_readl(GUEST_FS_BASE);
break;
case MSR_GS_BASE:
data = vmcs_readl(GUEST_GS_BASE);
break;
case MSR_IA32_SYSENTER_CS:
data = vmcs_read32(GUEST_SYSENTER_CS);
break;
case MSR_IA32_SYSENTER_EIP:
data = vmcs_read32(GUEST_SYSENTER_EIP);
break;
case MSR_IA32_SYSENTER_ESP:
data = vmcs_read32(GUEST_SYSENTER_ESP);
break;
case MSR_IA32_MC0_CTL:
case MSR_IA32_MCG_STATUS:
case MSR_IA32_MCG_CAP:
case MSR_IA32_MC0_MISC:
case MSR_IA32_MC0_MISC+4:
case MSR_IA32_MC0_MISC+8:
case MSR_IA32_MC0_MISC+12:
case MSR_IA32_MC0_MISC+16:
case MSR_IA32_UCODE_REV:
/* MTRR registers */
case 0xfe:
case 0x200 ... 0x2ff:
data = 0;
break;
case MSR_IA32_APICBASE:
data = vcpu->apic_base;
break;
default:
if (msr) {
data = msr->data;
break;
}
printk("litevm: unhandled rdmsr: %x\n", ecx);
inject_gp(vcpu);
print_func_exit();
return 1;
}
/* FIXME: handling of bits 32:63 of rax, rdx */
vcpu->regs[VCPU_REGS_RAX] = data & -1u;
vcpu->regs[VCPU_REGS_RDX] = (data >> 32) & -1u;
skip_emulated_instruction(vcpu);
print_func_exit();
return 1;
}
#ifdef __x86_64__
static void set_efer(struct litevm_vcpu *vcpu, uint64_t efer)
{
print_func_entry();
struct vmx_msr_entry *msr;
if (efer & EFER_RESERVED_BITS) {
printd("set_efer: 0x%llx #GP, reserved bits\n",
efer);
inject_gp(vcpu);
print_func_exit();
return;
}
if (is_paging() && (vcpu->shadow_efer & EFER_LME) != (efer & EFER_LME)) {
printd("set_efer: #GP, change LME while paging\n");
inject_gp(vcpu);
print_func_exit();
return;
}
efer &= ~EFER_LMA;
efer |= vcpu->shadow_efer & EFER_LMA;
vcpu->shadow_efer = efer;
msr = find_msr_entry(vcpu, MSR_EFER);
if (!(efer & EFER_LMA))
efer &= ~EFER_LME;
msr->data = efer;
skip_emulated_instruction(vcpu);
print_func_exit();
}
#endif
#define MSR_IA32_TIME_STAMP_COUNTER 0x10
static int handle_wrmsr(struct litevm_vcpu *vcpu, struct litevm_run *litevm_run)
{
print_func_entry();
uint32_t ecx = vcpu->regs[VCPU_REGS_RCX];
struct vmx_msr_entry *msr;
uint64_t data = (vcpu->regs[VCPU_REGS_RAX] & -1u)
| ((uint64_t)(vcpu->regs[VCPU_REGS_RDX] & -1u) << 32);
if (guest_cpl() != 0) {
vcpu_printf(vcpu, "%s: not supervisor\n", __FUNCTION__);
inject_gp(vcpu);
print_func_exit();
return 1;
}
switch (ecx) {
case MSR_FS_BASE:
vmcs_writel(GUEST_FS_BASE, data);
break;
case MSR_GS_BASE:
vmcs_writel(GUEST_GS_BASE, data);
break;
case MSR_IA32_SYSENTER_CS:
vmcs_write32(GUEST_SYSENTER_CS, data);
break;
case MSR_IA32_SYSENTER_EIP:
vmcs_write32(GUEST_SYSENTER_EIP, data);
break;
case MSR_IA32_SYSENTER_ESP:
vmcs_write32(GUEST_SYSENTER_ESP, data);
break;
case MSR_EFER:
set_efer(vcpu, data);
print_func_exit();
return 1;
case MSR_IA32_MC0_STATUS:
printk("%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n"
, __FUNCTION__, data);
break;
case MSR_IA32_TIME_STAMP_COUNTER: {
uint64_t tsc;
tsc = read_tsc();
vmcs_write64(TSC_OFFSET, data - tsc);
break;
}
case MSR_IA32_UCODE_REV:
case MSR_IA32_UCODE_WRITE:
case 0x200 ... 0x2ff: /* MTRRs */
break;
case MSR_IA32_APICBASE:
vcpu->apic_base = data;
break;
default:
msr = find_msr_entry(vcpu, ecx);
if (msr) {
msr->data = data;
break;
}
printk("litevm: unhandled wrmsr: %x\n", ecx);
inject_gp(vcpu);
print_func_exit();
return 1;
}
skip_emulated_instruction(vcpu);
print_func_exit();
return 1;
}
static int handle_interrupt_window(struct litevm_vcpu *vcpu,
struct litevm_run *litevm_run)
{
print_func_entry();
/* Turn off interrupt window reporting. */
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
vmcs_read32(CPU_BASED_VM_EXEC_CONTROL)
& ~CPU_BASED_VIRTUAL_INTR_PENDING);
print_func_exit();
return 1;
}
static int handle_halt(struct litevm_vcpu *vcpu, struct litevm_run *litevm_run)
{
print_func_entry();
skip_emulated_instruction(vcpu);
if (vcpu->irq_summary && (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF)) {
print_func_exit();
return 1;
}
litevm_run->exit_reason = LITEVM_EXIT_HLT;
print_func_exit();
return 0;
}
/*
* The exit handlers return 1 if the exit was handled fully and guest execution
* may resume. Otherwise they set the litevm_run parameter to indicate what needs
* to be done to userspace and return 0.
*/
static int (*litevm_vmx_exit_handlers[])(struct litevm_vcpu *vcpu,
struct litevm_run *litevm_run) = {
[EXIT_REASON_EXCEPTION_NMI] = handle_exception,
[EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt,
[EXIT_REASON_IO_INSTRUCTION] = handle_io,
[EXIT_REASON_INVLPG] = handle_invlpg,
[EXIT_REASON_CR_ACCESS] = handle_cr,
[EXIT_REASON_DR_ACCESS] = handle_dr,
[EXIT_REASON_CPUID] = handle_cpuid,
[EXIT_REASON_MSR_READ] = handle_rdmsr,
[EXIT_REASON_MSR_WRITE] = handle_wrmsr,
[EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window,
[EXIT_REASON_HLT] = handle_halt,
};
static const int litevm_vmx_max_exit_handlers =
sizeof(litevm_vmx_exit_handlers) / sizeof(*litevm_vmx_exit_handlers);
/*
* The guest has exited. See if we can fix it or if we need userspace
* assistance.
*/
static int litevm_handle_exit(struct litevm_run *litevm_run, struct litevm_vcpu *vcpu)
{
print_func_entry();
uint32_t vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
uint32_t exit_reason = vmcs_read32(VM_EXIT_REASON);
if ( (vectoring_info & VECTORING_INFO_VALID_MASK) &&
exit_reason != EXIT_REASON_EXCEPTION_NMI )
printk("%s: unexpected, valid vectoring info and "
"exit reason is 0x%x\n", __FUNCTION__, exit_reason);
litevm_run->instruction_length = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
if (exit_reason < litevm_vmx_max_exit_handlers
&& litevm_vmx_exit_handlers[exit_reason]) {
print_func_exit();
return litevm_vmx_exit_handlers[exit_reason](vcpu, litevm_run);
}
else {
litevm_run->exit_reason = LITEVM_EXIT_UNKNOWN;
litevm_run->hw.hardware_exit_reason = exit_reason;
}
print_func_exit();
return 0;
}
static void inject_rmode_irq(struct litevm_vcpu *vcpu, int irq)
{
print_func_entry();
uint16_t ent[2];
uint16_t cs;
uint16_t ip;
unsigned long flags;
unsigned long ss_base = vmcs_readl(GUEST_SS_BASE);
uint16_t sp = vmcs_readl(GUEST_RSP);
uint32_t ss_limit = vmcs_read32(GUEST_SS_LIMIT);
if (sp > ss_limit || ((sp - 6) > sp)) {
vcpu_printf(vcpu, "%s: #SS, rsp 0x%lx ss 0x%lx limit 0x%x\n",
__FUNCTION__,
vmcs_readl(GUEST_RSP),
vmcs_readl(GUEST_SS_BASE),
vmcs_read32(GUEST_SS_LIMIT));
print_func_exit();
return;
}
if (litevm_read_guest(vcpu, irq * sizeof(ent), sizeof(ent), &ent) !=
sizeof(ent)) {
//vcpu_printf(vcpu, "%s: read guest err\n", __FUNCTION__);
print_func_exit();
return;
}
flags = vmcs_readl(GUEST_RFLAGS);
cs = vmcs_readl(GUEST_CS_BASE) >> 4;
ip = vmcs_readl(GUEST_RIP);
if (litevm_write_guest(vcpu, ss_base + sp - 2, 2, &flags) != 2 ||
litevm_write_guest(vcpu, ss_base + sp - 4, 2, &cs) != 2 ||
litevm_write_guest(vcpu, ss_base + sp - 6, 2, &ip) != 2) {
//vcpu_printf(vcpu, "%s: write guest err\n", __FUNCTION__);
print_func_exit();
return;
}
vmcs_writel(GUEST_RFLAGS, flags &
~( X86_EFLAGS_IF | X86_EFLAGS_AC | X86_EFLAGS_TF));
vmcs_write16(GUEST_CS_SELECTOR, ent[1]) ;
vmcs_writel(GUEST_CS_BASE, ent[1] << 4);
vmcs_writel(GUEST_RIP, ent[0]);
vmcs_writel(GUEST_RSP, (vmcs_readl(GUEST_RSP) & ~0xffff) | (sp - 6));
print_func_exit();
}
static void litevm_do_inject_irq(struct litevm_vcpu *vcpu)
{
print_func_entry();
int word_index = __ffs(vcpu->irq_summary);
int bit_index = __ffs(vcpu->irq_pending[word_index]);
int irq = word_index * BITS_PER_LONG + bit_index;
/* don't have clear_bit and I'm not sure the akaros
* bitops are really going to work.
*/
vcpu->irq_pending[word_index] &= ~(1 << bit_index);
if (!vcpu->irq_pending[word_index])
vcpu->irq_summary &= ~ (1 << word_index);
if (vcpu->rmode.active) {
inject_rmode_irq(vcpu, irq);
print_func_exit();
return;
}
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
irq | INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK);
print_func_exit();
}
static void litevm_try_inject_irq(struct litevm_vcpu *vcpu)
{
print_func_entry();
if ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF)
&& (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0)
/*
* Interrupts enabled, and not blocked by sti or mov ss. Good.
*/
litevm_do_inject_irq(vcpu);
else
/*
* Interrupts blocked. Wait for unblock.
*/
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
vmcs_read32(CPU_BASED_VM_EXEC_CONTROL)
| CPU_BASED_VIRTUAL_INTR_PENDING);
print_func_exit();
}
static void litevm_guest_debug_pre(struct litevm_vcpu *vcpu)
{
print_func_entry();
struct litevm_guest_debug *dbg = &vcpu->guest_debug;
#warning "no debugging guests yet"
assert(0);
/*
set_debugreg(dbg->bp[0], 0);
set_debugreg(dbg->bp[1], 1);
set_debugreg(dbg->bp[2], 2);
set_debugreg(dbg->bp[3], 3);
*/
if (dbg->singlestep) {
unsigned long flags;
flags = vmcs_readl(GUEST_RFLAGS);
flags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
vmcs_writel(GUEST_RFLAGS, flags);
}
print_func_exit();
}
static void load_msrs(struct vmx_msr_entry *e, int n)
{
print_func_entry();
int i;
for (i = 0; i < n; ++i)
write_msr(e[i].index, e[i].data);
print_func_exit();
}
static void save_msrs(struct vmx_msr_entry *e, int n)
{
print_func_entry();
int i;
for (i = 0; i < n; ++i)
e[i].data = read_msr(e[i].index);
print_func_exit();
}
int vm_run(struct litevm *litevm, struct litevm_run *litevm_run)
{
print_func_entry();
struct litevm_vcpu *vcpu;
uint8_t fail;
uint16_t fs_sel, gs_sel, ldt_sel;
int fs_gs_ldt_reload_needed;
if (litevm_run->vcpu < 0 || litevm_run->vcpu >= LITEVM_MAX_VCPUS)
error("vcpu is %d but must be in the range %d..%d\n",
litevm_run->vcpu, LITEVM_MAX_VCPUS);
vcpu = vcpu_load(litevm, litevm_run->vcpu);
if (!vcpu)
error("vcpu_load failed");
if (litevm_run->emulated) {
skip_emulated_instruction(vcpu);
litevm_run->emulated = 0;
}
if (litevm_run->mmio_completed) {
memcpy(vcpu->mmio_data, litevm_run->mmio.data, 8);
vcpu->mmio_read_completed = 1;
}
vcpu->mmio_needed = 0;
again:
/*
* Set host fs and gs selectors. Unfortunately, 22.2.3 does not
* allow segment selectors with cpl > 0 or ti == 1.
*/
fs_sel = read_fs();
gs_sel = read_gs();
ldt_sel = read_ldt();
fs_gs_ldt_reload_needed = (fs_sel & 7) | (gs_sel & 7) | ldt_sel;
if (!fs_gs_ldt_reload_needed) {
vmcs_write16(HOST_FS_SELECTOR, fs_sel);
vmcs_write16(HOST_GS_SELECTOR, gs_sel);
} else {
vmcs_write16(HOST_FS_SELECTOR, 0);
vmcs_write16(HOST_GS_SELECTOR, 0);
}
#ifdef __x86_64__
vmcs_writel(HOST_FS_BASE, read_msr(MSR_FS_BASE));
vmcs_writel(HOST_GS_BASE, read_msr(MSR_GS_BASE));
#endif
if (vcpu->irq_summary &&
!(vmcs_read32(VM_ENTRY_INTR_INFO_FIELD) & INTR_INFO_VALID_MASK))
litevm_try_inject_irq(vcpu);
if (vcpu->guest_debug.enabled)
litevm_guest_debug_pre(vcpu);
fx_save(vcpu->host_fx_image);
fx_restore(vcpu->guest_fx_image);
save_msrs(vcpu->host_msrs, vcpu->nmsrs);
load_msrs(vcpu->guest_msrs, NR_BAD_MSRS);
asm (
/* Store host registers */
"pushf \n\t"
#ifdef __x86_64__
"push %%rax; push %%rbx; push %%rdx;"
"push %%rsi; push %%rdi; push %%rbp;"
"push %%r8; push %%r9; push %%r10; push %%r11;"
"push %%r12; push %%r13; push %%r14; push %%r15;"
"push %%rcx \n\t"
"vmwrite %%rsp, %2 \n\t"
#else
"pusha; push %%ecx \n\t"
"vmwrite %%esp, %2 \n\t"
#endif
/* Check if vmlaunch of vmresume is needed */
"cmp $0, %1 \n\t"
/* Load guest registers. Don't clobber flags. */
#ifdef __x86_64__
"mov %c[cr2](%3), %%rax \n\t"
"mov %%rax, %%cr2 \n\t"
"mov %c[rax](%3), %%rax \n\t"
"mov %c[rbx](%3), %%rbx \n\t"
"mov %c[rdx](%3), %%rdx \n\t"
"mov %c[rsi](%3), %%rsi \n\t"
"mov %c[rdi](%3), %%rdi \n\t"
"mov %c[rbp](%3), %%rbp \n\t"
"mov %c[r8](%3), %%r8 \n\t"
"mov %c[r9](%3), %%r9 \n\t"
"mov %c[r10](%3), %%r10 \n\t"
"mov %c[r11](%3), %%r11 \n\t"
"mov %c[r12](%3), %%r12 \n\t"
"mov %c[r13](%3), %%r13 \n\t"
"mov %c[r14](%3), %%r14 \n\t"
"mov %c[r15](%3), %%r15 \n\t"
"mov %c[rcx](%3), %%rcx \n\t" /* kills %3 (rcx) */
#else
"mov %c[cr2](%3), %%eax \n\t"
"mov %%eax, %%cr2 \n\t"
"mov %c[rax](%3), %%eax \n\t"
"mov %c[rbx](%3), %%ebx \n\t"
"mov %c[rdx](%3), %%edx \n\t"
"mov %c[rsi](%3), %%esi \n\t"
"mov %c[rdi](%3), %%edi \n\t"
"mov %c[rbp](%3), %%ebp \n\t"
"mov %c[rcx](%3), %%ecx \n\t" /* kills %3 (ecx) */
#endif
/* Enter guest mode */
"jne launched \n\t"
"vmlaunch \n\t"
"jmp litevm_vmx_return \n\t"
"launched: vmresume \n\t"
".globl litevm_vmx_return \n\t"
"litevm_vmx_return: "
/* Save guest registers, load host registers, keep flags */
#ifdef __x86_64__
"xchg %3, 0(%%rsp) \n\t"
"mov %%rax, %c[rax](%3) \n\t"
"mov %%rbx, %c[rbx](%3) \n\t"
"pushq 0(%%rsp); popq %c[rcx](%3) \n\t"
"mov %%rdx, %c[rdx](%3) \n\t"
"mov %%rsi, %c[rsi](%3) \n\t"
"mov %%rdi, %c[rdi](%3) \n\t"
"mov %%rbp, %c[rbp](%3) \n\t"
"mov %%r8, %c[r8](%3) \n\t"
"mov %%r9, %c[r9](%3) \n\t"
"mov %%r10, %c[r10](%3) \n\t"
"mov %%r11, %c[r11](%3) \n\t"
"mov %%r12, %c[r12](%3) \n\t"
"mov %%r13, %c[r13](%3) \n\t"
"mov %%r14, %c[r14](%3) \n\t"
"mov %%r15, %c[r15](%3) \n\t"
"mov %%cr2, %%rax \n\t"
"mov %%rax, %c[cr2](%3) \n\t"
"mov 0(%%rsp), %3 \n\t"
"pop %%rcx; pop %%r15; pop %%r14; pop %%r13; pop %%r12;"
"pop %%r11; pop %%r10; pop %%r9; pop %%r8;"
"pop %%rbp; pop %%rdi; pop %%rsi;"
"pop %%rdx; pop %%rbx; pop %%rax \n\t"
#else
"xchg %3, 0(%%esp) \n\t"
"mov %%eax, %c[rax](%3) \n\t"
"mov %%ebx, %c[rbx](%3) \n\t"
"pushl 0(%%esp); popl %c[rcx](%3) \n\t"
"mov %%edx, %c[rdx](%3) \n\t"
"mov %%esi, %c[rsi](%3) \n\t"
"mov %%edi, %c[rdi](%3) \n\t"
"mov %%ebp, %c[rbp](%3) \n\t"
"mov %%cr2, %%eax \n\t"
"mov %%eax, %c[cr2](%3) \n\t"
"mov 0(%%esp), %3 \n\t"
"pop %%ecx; popa \n\t"
#endif
"setbe %0 \n\t"
"popf \n\t"
: "=g" (fail)
: "r"(vcpu->launched), "r"((unsigned long)HOST_RSP),
"c"(vcpu),
[rax]"i"(offsetof(struct litevm_vcpu, regs[VCPU_REGS_RAX])),
[rbx]"i"(offsetof(struct litevm_vcpu, regs[VCPU_REGS_RBX])),
[rcx]"i"(offsetof(struct litevm_vcpu, regs[VCPU_REGS_RCX])),
[rdx]"i"(offsetof(struct litevm_vcpu, regs[VCPU_REGS_RDX])),
[rsi]"i"(offsetof(struct litevm_vcpu, regs[VCPU_REGS_RSI])),
[rdi]"i"(offsetof(struct litevm_vcpu, regs[VCPU_REGS_RDI])),
[rbp]"i"(offsetof(struct litevm_vcpu, regs[VCPU_REGS_RBP])),
#ifdef __x86_64__
[r8 ]"i"(offsetof(struct litevm_vcpu, regs[VCPU_REGS_R8 ])),
[r9 ]"i"(offsetof(struct litevm_vcpu, regs[VCPU_REGS_R9 ])),
[r10]"i"(offsetof(struct litevm_vcpu, regs[VCPU_REGS_R10])),
[r11]"i"(offsetof(struct litevm_vcpu, regs[VCPU_REGS_R11])),
[r12]"i"(offsetof(struct litevm_vcpu, regs[VCPU_REGS_R12])),
[r13]"i"(offsetof(struct litevm_vcpu, regs[VCPU_REGS_R13])),
[r14]"i"(offsetof(struct litevm_vcpu, regs[VCPU_REGS_R14])),
[r15]"i"(offsetof(struct litevm_vcpu, regs[VCPU_REGS_R15])),
#endif
[cr2]"i"(offsetof(struct litevm_vcpu, cr2))
: "cc", "memory" );
++litevm_stat.exits;
printk("vm_run exits");
save_msrs(vcpu->guest_msrs, NR_BAD_MSRS);
load_msrs(vcpu->host_msrs, NR_BAD_MSRS);
fx_save(vcpu->guest_fx_image);
fx_restore(vcpu->host_fx_image);
#ifndef __x86_64__
asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
#endif
litevm_run->exit_type = 0;
if (fail) {
litevm_run->exit_type = LITEVM_EXIT_TYPE_FAIL_ENTRY;
litevm_run->exit_reason = vmcs_read32(VM_INSTRUCTION_ERROR);
} else {
if (fs_gs_ldt_reload_needed) {
load_ldt(ldt_sel);
load_fs(fs_sel);
/*
* If we have to reload gs, we must take care to
* preserve our gs base.
*/
disable_irq();
load_gs(gs_sel);
#ifdef __x86_64__
write_msr(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE));
#endif
enable_irq();
reload_tss();
}
vcpu->launched = 1;
litevm_run->exit_type = LITEVM_EXIT_TYPE_VM_EXIT;
if (litevm_handle_exit(litevm_run, vcpu)) {
/* Give scheduler a change to reschedule. */
vcpu_put(vcpu);
#warning "how to tell if signal is pending"
/*
if (signal_pending(current)) {
++litevm_stat.signal_exits;
return -EINTR;
}
*/
kthread_yield();
/* Cannot fail - no vcpu unplug yet. */
vcpu_load(litevm, vcpu_slot(vcpu));
goto again;
}
}
vcpu_put(vcpu);
printk("vm_run returns\n");
print_func_exit();
return 0;
}
static int litevm_dev_ioctl_get_regs(struct litevm *litevm, struct litevm_regs *regs)
{
print_func_entry();
struct litevm_vcpu *vcpu;
if (regs->vcpu < 0 || regs->vcpu >= LITEVM_MAX_VCPUS) {
print_func_exit();
return -EINVAL;
}
vcpu = vcpu_load(litevm, regs->vcpu);
if (!vcpu) {
print_func_exit();
return -ENOENT;
}
regs->rax = vcpu->regs[VCPU_REGS_RAX];
regs->rbx = vcpu->regs[VCPU_REGS_RBX];
regs->rcx = vcpu->regs[VCPU_REGS_RCX];
regs->rdx = vcpu->regs[VCPU_REGS_RDX];
regs->rsi = vcpu->regs[VCPU_REGS_RSI];
regs->rdi = vcpu->regs[VCPU_REGS_RDI];
regs->rsp = vmcs_readl(GUEST_RSP);
regs->rbp = vcpu->regs[VCPU_REGS_RBP];
#ifdef __x86_64__
regs->r8 = vcpu->regs[VCPU_REGS_R8];
regs->r9 = vcpu->regs[VCPU_REGS_R9];
regs->r10 = vcpu->regs[VCPU_REGS_R10];
regs->r11 = vcpu->regs[VCPU_REGS_R11];
regs->r12 = vcpu->regs[VCPU_REGS_R12];
regs->r13 = vcpu->regs[VCPU_REGS_R13];
regs->r14 = vcpu->regs[VCPU_REGS_R14];
regs->r15 = vcpu->regs[VCPU_REGS_R15];
#endif
regs->rip = vmcs_readl(GUEST_RIP);
regs->rflags = vmcs_readl(GUEST_RFLAGS);
/*
* Don't leak debug flags in case they were set for guest debugging
*/
if (vcpu->guest_debug.enabled && vcpu->guest_debug.singlestep)
regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
vcpu_put(vcpu);
print_func_exit();
return 0;
}
static int litevm_dev_ioctl_set_regs(struct litevm *litevm, struct litevm_regs *regs)
{
print_func_entry();
struct litevm_vcpu *vcpu;
if (regs->vcpu < 0 || regs->vcpu >= LITEVM_MAX_VCPUS) {
print_func_exit();
return -EINVAL;
}
vcpu = vcpu_load(litevm, regs->vcpu);
if (!vcpu) {
print_func_exit();
return -ENOENT;
}
vcpu->regs[VCPU_REGS_RAX] = regs->rax;
vcpu->regs[VCPU_REGS_RBX] = regs->rbx;
vcpu->regs[VCPU_REGS_RCX] = regs->rcx;
vcpu->regs[VCPU_REGS_RDX] = regs->rdx;
vcpu->regs[VCPU_REGS_RSI] = regs->rsi;
vcpu->regs[VCPU_REGS_RDI] = regs->rdi;
vmcs_writel(GUEST_RSP, regs->rsp);
vcpu->regs[VCPU_REGS_RBP] = regs->rbp;
#ifdef __x86_64__
vcpu->regs[VCPU_REGS_R8] = regs->r8;
vcpu->regs[VCPU_REGS_R9] = regs->r9;
vcpu->regs[VCPU_REGS_R10] = regs->r10;
vcpu->regs[VCPU_REGS_R11] = regs->r11;
vcpu->regs[VCPU_REGS_R12] = regs->r12;
vcpu->regs[VCPU_REGS_R13] = regs->r13;
vcpu->regs[VCPU_REGS_R14] = regs->r14;
vcpu->regs[VCPU_REGS_R15] = regs->r15;
#endif
vmcs_writel(GUEST_RIP, regs->rip);
vmcs_writel(GUEST_RFLAGS, regs->rflags);
vcpu_put(vcpu);
print_func_exit();
return 0;
}
static int litevm_dev_ioctl_get_sregs(struct litevm *litevm, struct litevm_sregs *sregs)
{
print_func_entry();
struct litevm_vcpu *vcpu;
if (sregs->vcpu < 0 || sregs->vcpu >= LITEVM_MAX_VCPUS) {
print_func_exit();
return -EINVAL;
}
vcpu = vcpu_load(litevm, sregs->vcpu);
if (!vcpu) {
print_func_exit();
return -ENOENT;
}
#define get_segment(var, seg) \
do { \
uint32_t ar; \
\
sregs->var.base = vmcs_readl(GUEST_##seg##_BASE); \
sregs->var.limit = vmcs_read32(GUEST_##seg##_LIMIT); \
sregs->var.selector = vmcs_read16(GUEST_##seg##_SELECTOR); \
ar = vmcs_read32(GUEST_##seg##_AR_BYTES); \
if (ar & AR_UNUSABLE_MASK) ar = 0; \
sregs->var.type = ar & 15; \
sregs->var.s = (ar >> 4) & 1; \
sregs->var.dpl = (ar >> 5) & 3; \
sregs->var.present = (ar >> 7) & 1; \
sregs->var.avl = (ar >> 12) & 1; \
sregs->var.l = (ar >> 13) & 1; \
sregs->var.db = (ar >> 14) & 1; \
sregs->var.g = (ar >> 15) & 1; \
sregs->var.unusable = (ar >> 16) & 1; \
} while (0);
get_segment(cs, CS);
get_segment(ds, DS);
get_segment(es, ES);
get_segment(fs, FS);
get_segment(gs, GS);
get_segment(ss, SS);
get_segment(tr, TR);
get_segment(ldt, LDTR);
#undef get_segment
#define get_dtable(var, table) \
sregs->var.limit = vmcs_read32(GUEST_##table##_LIMIT), \
sregs->var.base = vmcs_readl(GUEST_##table##_BASE)
get_dtable(idt, IDTR);
get_dtable(gdt, GDTR);
#undef get_dtable
sregs->cr0 = guest_cr0();
sregs->cr2 = vcpu->cr2;
sregs->cr3 = vcpu->cr3;
sregs->cr4 = guest_cr4();
sregs->cr8 = vcpu->cr8;
sregs->efer = vcpu->shadow_efer;
sregs->apic_base = vcpu->apic_base;
sregs->pending_int = vcpu->irq_summary != 0;
vcpu_put(vcpu);
print_func_exit();
return 0;
}
static int litevm_dev_ioctl_set_sregs(struct litevm *litevm, struct litevm_sregs *sregs)
{
print_func_entry();
struct litevm_vcpu *vcpu;
int mmu_reset_needed = 0;
if (sregs->vcpu < 0 || sregs->vcpu >= LITEVM_MAX_VCPUS) {
print_func_exit();
return -EINVAL;
}
vcpu = vcpu_load(litevm, sregs->vcpu);
if (!vcpu) {
print_func_exit();
return -ENOENT;
}
#define set_segment(var, seg) \
do { \
uint32_t ar; \
\
vmcs_writel(GUEST_##seg##_BASE, sregs->var.base); \
vmcs_write32(GUEST_##seg##_LIMIT, sregs->var.limit); \
vmcs_write16(GUEST_##seg##_SELECTOR, sregs->var.selector); \
if (sregs->var.unusable) { \
ar = (1 << 16); \
} else { \
ar = (sregs->var.type & 15); \
ar |= (sregs->var.s & 1) << 4; \
ar |= (sregs->var.dpl & 3) << 5; \
ar |= (sregs->var.present & 1) << 7; \
ar |= (sregs->var.avl & 1) << 12; \
ar |= (sregs->var.l & 1) << 13; \
ar |= (sregs->var.db & 1) << 14; \
ar |= (sregs->var.g & 1) << 15; \
} \
vmcs_write32(GUEST_##seg##_AR_BYTES, ar); \
} while (0);
set_segment(cs, CS);
set_segment(ds, DS);
set_segment(es, ES);
set_segment(fs, FS);
set_segment(gs, GS);
set_segment(ss, SS);
set_segment(tr, TR);
set_segment(ldt, LDTR);
#undef set_segment
#define set_dtable(var, table) \
vmcs_write32(GUEST_##table##_LIMIT, sregs->var.limit), \
vmcs_writel(GUEST_##table##_BASE, sregs->var.base)
set_dtable(idt, IDTR);
set_dtable(gdt, GDTR);
#undef set_dtable
vcpu->cr2 = sregs->cr2;
mmu_reset_needed |= vcpu->cr3 != sregs->cr3;
vcpu->cr3 = sregs->cr3;
vcpu->cr8 = sregs->cr8;
mmu_reset_needed |= vcpu->shadow_efer != sregs->efer;
#ifdef __x86_64__
__set_efer(vcpu, sregs->efer);
#endif
vcpu->apic_base = sregs->apic_base;
mmu_reset_needed |= guest_cr0() != sregs->cr0;
vcpu->rmode.active = ((sregs->cr0 & CR0_PE_MASK) == 0);
update_exception_bitmap(vcpu);
vmcs_writel(CR0_READ_SHADOW, sregs->cr0);
vmcs_writel(GUEST_CR0, sregs->cr0 | LITEVM_VM_CR0_ALWAYS_ON);
mmu_reset_needed |= guest_cr4() != sregs->cr4;
__set_cr4(vcpu, sregs->cr4);
if (mmu_reset_needed)
litevm_mmu_reset_context(vcpu);
vcpu_put(vcpu);
print_func_exit();
return 0;
}
/*
* Translate a guest virtual address to a guest physical address.
*/
static int litevm_dev_ioctl_translate(struct litevm *litevm, struct litevm_translation *tr)
{
print_func_entry();
unsigned long vaddr = tr->linear_address;
struct litevm_vcpu *vcpu;
gpa_t gpa;
vcpu = vcpu_load(litevm, tr->vcpu);
if (!vcpu) {
print_func_exit();
return -ENOENT;
}
spin_lock_irqsave(&litevm->lock);
gpa = vcpu->mmu.gva_to_gpa(vcpu, vaddr);
tr->physical_address = gpa;
tr->valid = gpa != UNMAPPED_GVA;
tr->writeable = 1;
tr->usermode = 0;
spin_unlock(&litevm->lock);
vcpu_put(vcpu);
print_func_exit();
return 0;
}
#if 0
static int litevm_dev_ioctl_interrupt(struct litevm *litevm, struct litevm_interrupt *irq)
{
struct litevm_vcpu *vcpu;
if (irq->vcpu < 0 || irq->vcpu >= LITEVM_MAX_VCPUS)
return -EINVAL;
if (irq->irq < 0 || irq->irq >= 256)
return -EINVAL;
vcpu = vcpu_load(litevm, irq->vcpu);
if (!vcpu)
return -ENOENT;
set_bit(irq->irq, vcpu->irq_pending);
set_bit(irq->irq / BITS_PER_LONG, &vcpu->irq_summary);
vcpu_put(vcpu);
return 0;
}
#endif
#if 0
static int litevm_dev_ioctl_debug_guest(struct litevm *litevm,
struct litevm_debug_guest *dbg)
{
struct litevm_vcpu *vcpu;
unsigned long dr7 = 0x400;
uint32_t exception_bitmap;
int old_singlestep;
if (dbg->vcpu < 0 || dbg->vcpu >= LITEVM_MAX_VCPUS)
return -EINVAL;
vcpu = vcpu_load(litevm, dbg->vcpu);
if (!vcpu)
return -ENOENT;
exception_bitmap = vmcs_read32(EXCEPTION_BITMAP);
old_singlestep = vcpu->guest_debug.singlestep;
vcpu->guest_debug.enabled = dbg->enabled;
if (vcpu->guest_debug.enabled) {
int i;
dr7 |= 0x200; /* exact */
for (i = 0; i < 4; ++i) {
if (!dbg->breakpoints[i].enabled)
continue;
vcpu->guest_debug.bp[i] = dbg->breakpoints[i].address;
dr7 |= 2 << (i*2); /* global enable */
dr7 |= 0 << (i*4+16); /* execution breakpoint */
}
exception_bitmap |= (1u << 1); /* Trap debug exceptions */
vcpu->guest_debug.singlestep = dbg->singlestep;
} else {
exception_bitmap &= ~(1u << 1); /* Ignore debug exceptions */
vcpu->guest_debug.singlestep = 0;
}
if (old_singlestep && !vcpu->guest_debug.singlestep) {
unsigned long flags;
flags = vmcs_readl(GUEST_RFLAGS);
flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
vmcs_writel(GUEST_RFLAGS, flags);
}
vmcs_write32(EXCEPTION_BITMAP, exception_bitmap);
vmcs_writel(GUEST_DR7, dr7);
vcpu_put(vcpu);
return 0;
}
#endif
#if 0
long litevm_control(struct litevm *litevm, int command, unsigned long arg)
{
int r = -EINVAL;
switch (command) {
case LITEVM_CREATE_VCPU: {
r = create_vcpu(litevm, arg);
if (r)
goto out;
break;
}
case LITEVM_RUN: {
struct litevm_run litevm_run;
r = -EFAULT;
if (copy_from_user(&litevm_run, (void *)arg, sizeof litevm_run))
goto out;
r = litevm_dev_ioctl_run(litevm, &litevm_run);
if (r < 0)
goto out;
r = -EFAULT;
if (copy_to_user((void *)arg, &litevm_run, sizeof litevm_run))
goto out;
r = 0;
break;
}
case LITEVM_GET_REGS: {
struct litevm_regs litevm_regs;
r = -EFAULT;
if (copy_from_user(&litevm_regs, (void *)arg, sizeof litevm_regs))
goto out;
r = litevm_dev_ioctl_get_regs(litevm, &litevm_regs);
if (r)
goto out;
r = -EFAULT;
if (copy_to_user((void *)arg, &litevm_regs, sizeof litevm_regs))
goto out;
r = 0;
break;
}
case LITEVM_SET_REGS: {
struct litevm_regs litevm_regs;
r = -EFAULT;
if (copy_from_user(&litevm_regs, (void *)arg, sizeof litevm_regs))
goto out;
r = litevm_dev_ioctl_set_regs(litevm, &litevm_regs);
if (r)
goto out;
r = 0;
break;
}
case LITEVM_GET_SREGS: {
struct litevm_sregs litevm_sregs;
r = -EFAULT;
if (copy_from_user(&litevm_sregs, (void *)arg, sizeof litevm_sregs))
goto out;
r = litevm_dev_ioctl_get_sregs(litevm, &litevm_sregs);
if (r)
goto out;
r = -EFAULT;
if (copy_to_user((void *)arg, &litevm_sregs, sizeof litevm_sregs))
goto out;
r = 0;
break;
}
case LITEVM_SET_SREGS: {
struct litevm_sregs litevm_sregs;
r = -EFAULT;
if (copy_from_user(&litevm_sregs, (void *)arg, sizeof litevm_sregs))
goto out;
r = litevm_dev_ioctl_set_sregs(litevm, &litevm_sregs);
if (r)
goto out;
r = 0;
break;
}
case LITEVM_TRANSLATE: {
struct litevm_translation tr;
r = -EFAULT;
if (copy_from_user(&tr, (void *)arg, sizeof tr))
goto out;
r = litevm_dev_ioctl_translate(litevm, &tr);
if (r)
goto out;
r = -EFAULT;
if (copy_to_user((void *)arg, &tr, sizeof tr))
goto out;
r = 0;
break;
}
case LITEVM_INTERRUPT: {
struct litevm_interrupt irq;
r = -EFAULT;
if (copy_from_user(&irq, (void *)arg, sizeof irq))
goto out;
r = litevm_dev_ioctl_interrupt(litevm, &irq);
if (r)
goto out;
r = 0;
break;
}
case LITEVM_DEBUG_GUEST: {
struct litevm_debug_guest dbg;
r = -EFAULT;
if (copy_from_user(&dbg, (void *)arg, sizeof dbg))
goto out;
r = litevm_dev_ioctl_debug_guest(litevm, &dbg);
if (r)
goto out;
r = 0;
break;
}
case LITEVM_SET_MEMORY_REGION: {
struct litevm_memory_region litevm_mem;
r = -EFAULT;
if (copy_from_user(&litevm_mem, (void *)arg, sizeof litevm_mem))
goto out;
r = litevm_dev_ioctl_set_memory_region(litevm, &litevm_mem);
if (r)
goto out;
break;
}
case LITEVM_GET_DIRTY_LOG: {
struct litevm_dirty_log log;
r = -EFAULT;
if (copy_from_user(&log, (void *)arg, sizeof log))
goto out;
r = litevm_dev_ioctl_get_dirty_log(litevm, &log);
if (r)
goto out;
break;
}
default:
;
}
out:
return r;
}
#endif
#if 0
static int litevm_dev_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct litevm *litevm = vma->vm_file->private_data;
struct litevm_memory_slot *slot;
struct page *page;
slot = gfn_to_memslot(litevm, vmf->pgoff);
if (!slot)
return VM_FAULT_SIGBUS;
page = gfn_to_page(slot, vmf->pgoff);
if (!page)
return VM_FAULT_SIGBUS;
get_page(page);
vmf->page = page;
return 0;
}
#endif
#if 0
static int litevm_reboot(struct notifier_block *notifier, unsigned long val,
void *v)
{
panic("litevm_reboot");
if (val == SYS_RESTART) {
/*
* Some (well, at least mine) BIOSes hang on reboot if
* in vmx root mode.
*/
printk("litevm: exiting vmx mode\n");
handler_wrapper_t *w;
smp_call_function_all(litevm_disable, 0, &w);
smp_call_wait(w);
}
return NOTIFY_OK;
return 0;
}
#endif
hpa_t bad_page_address;
int vmx_init(void)
{
print_func_entry();
handler_wrapper_t *w;
int r = 0;
if (!cpu_has_litevm_support()) {
printk("litevm: no hardware support\n");
print_func_exit();
return -EOPNOTSUPP;
}
if (vmx_disabled_by_bios()) {
printk("litevm: disabled by bios\n");
print_func_exit();
return -EOPNOTSUPP;
}
setup_vmcs_descriptor();
smp_call_function_all(vm_enable, 0, &w);
if (smp_call_wait(w)){
printk("litevm_init. smp_call_wait failed. Expect a panic.\n");
}
if ((bad_page_address = PADDR(kpage_zalloc_addr())) == 0ULL) {
r = -ENOMEM;
}
print_func_exit();
return r;
}
static void litevm_exit(void)
{
print_func_entry();
//free_litevm_area();
//__free_page(pfn_to_page(bad_page_address >> PAGE_SHIFT));
print_func_exit();
}