blob: 614e4d997867c8b8b05571923ca47563948dc637 [file] [log] [blame]
/*
* Copyright 2015 Google Inc.
*
* This file is part of Akaros.
*
* Akarosn is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, version 2 of the License.
*
* Akaros is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Lesser GNU General Public License for more details.
*
* See COPYING.LESSER for details on the GNU Lesser General Public License.
* See COPYING for details on the GNU General Public License.
*/
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <parlib/arch/arch.h>
#include <parlib/ros_debug.h>
#include <unistd.h>
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include <sys/uio.h>
#include <stdint.h>
#include <err.h>
#include <sys/mman.h>
#include <vmm/vmm.h>
#include <vmm/virtio.h>
#include <vmm/virtio_mmio.h>
#include <vmm/virtio_ids.h>
#include <vmm/virtio_config.h>
#include <ros/arch/mmu.h>
#include <ros/arch/trapframe.h>
int debug_decode = 0;
#define DPRINTF(fmt, ...) \
do { \
if (debug_decode) { \
fprintf(stderr, "decode: " fmt, ## __VA_ARGS__); \
} \
} \
while (0)
static char *modrmreg[] = {"rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi"};
// Since we at most have to decode less than half of each instruction, I'm
// trying to be dumb here.
// Fortunately, for me, that's not hard.
// I'm trying to avoid the whole Big Fun of full instruction decode, and in most
// of these cases we only have to know register, address, operation size, and
// instruction length.
// The ugly messiness of the SIB and all that are not yet needed. Maybe they
// never will be.
// Target size -- 1, 2, 4, or 8 bytes. We have yet to see 64 bytes.
// TODO: if we ever see it, test the prefix. Since this only supports the low
// 1M, that's not likely.
static int target(void *insn, int *store)
{
*store = 0;
int s = -1;
uint8_t *byte = insn;
uint16_t *word = insn;
if (*byte == 0x66) {
s = target(insn+1,store);
// flip the sense of s.
s = s == 4 ? 2 : 4;
return s;
}
if (*byte == 0x44) {
byte++;
word++;
}
switch(*byte) {
case 0x3a:
case 0x8a:
case 0x88:
s = 1;
break;
case 0x89:
case 0x8b:
// TODO: To really know, for sure, that this is 32 bit, we'd
// likely have to check the segment descriptor for the guest's
// current code segment in it's GDT. The D flag (bit 22)
// determines whether the instruction is using 32 or 16-bit
// operand size. I'm just going to assume the flag is set
// (meaning 32 bit operands) for now, in order to make virtio
// work. But really we should check if we want to know for sure.
// Note that this hack (changing the below line) only applies to
// mov instructions.
//
// And I think there's also a prefix you can use to switch
// the instruction to 16-bit addressing (address-size
// override prefix?)
s = 4;
break;
case 0x81:
s = 4;
break;
case 0x0f:
switch (*word) {
case 0xb70f:
s = 2;
break;
default:
fprintf(stderr,
"can't get size of %02x/%04x @ %p\n",
*byte, *word, byte);
return -1;
}
break;
case 0x41:
/* VEX byte for modrm field */
switch (*word) {
case 0x8a41:
s = 1;
break;
default:
fprintf(stderr, "unparsed vex instruction %02x/%04x @ %p\n",
*byte, *word, byte);
return -1;
}
break;
default:
fprintf(stderr, "can't get size of %02x @ %p\n", *byte, byte);
fprintf(stderr, "can't get WORD of %04x @ %p\n", *word, word);
return -1;
break;
}
switch(*byte) {
case 0x0f:
case 0x41:
break;
case 0x3a:
case 0x8a:
case 0x88:
case 0x89:
case 0x8b:
case 0x81:
*store = !(*byte & 2);
break;
default:
fprintf(stderr, "%s: Can't happen. rip is: %p\n", __func__,
byte);
break;
}
return s;
}
char *regname(uint8_t reg)
{
return modrmreg[reg];
}
static int insize(void *rip)
{
uint8_t *rip_gpa = rip;
int advance = 3;
int extra = 0;
if (rip_gpa[0] == 0x44) {
extra = 1;
rip_gpa++;
}
/* return 3 to handle this specific instruction case. We don't want this
* to turn into a fully fledged decode.
* This specific instruction is an extended move using r9. It uses the
* VEX byte to extend the register bits. */
if (rip_gpa[0] == 0x41 && rip_gpa[1] == 0x8a && rip_gpa[2] == 0x01)
return 3;
/* the dreaded mod/rm byte. */
int mod = rip_gpa[1] >> 6;
int rm = rip_gpa[1] & 7;
switch (rip_gpa[0]) {
default:
fprintf(stderr, "BUG! %s got 0x%x\n", __func__, rip_gpa[0]);
case 0x0f:
break;
case 0x81:
advance = 6 + extra;
break;
case 0x3a:
case 0x8a:
case 0x88:
case 0x89:
case 0x8b:
switch (mod) {
case 0:
advance = 2 + (rm == 4) + extra;
break;
case 1:
advance = 3 + (rm == 4) + extra;
break;
case 2:
advance = 6 + (rm == 4) + extra;
break;
case 3:
advance = 2 + extra;
break;
}
break;
}
return advance;
}
// This is a very limited function. It's only here to manage virtio-mmio and low
// memory pointer loads. I am hoping it won't grow with time. The intent is that
// we enter it with and EPT fault from a region that is deliberately left
// unbacked by any memory.
// We return enough info to let you emulate the operation if you want. Because
// we have the failing physical address (gpa) the decode is far simpler because
// we only need to find the register, how many bytes to move, and how big the
// instruction is. I thought about bringing in emulate.c from kvm from xen,
// but it has way more stuff than we need.
// gpa is a pointer to the gpa.
// int is the reg index which we can use for printing info.
// regp points to the register in hw_trapframe from which
// to load or store a result.
int decode(struct guest_thread *vm_thread, uint64_t *gpa, uint8_t *destreg,
uint64_t **regp, int *store, int *size, int *advance)
{
struct vm_trapframe *vm_tf = &(vm_thread->uthread.u_ctx.tf.vm_tf);
uint8_t *rip_gpa = NULL;
DPRINTF("v is %p\n", vm_tf);
// Duh, which way did he go George? Which way did he go?
// First hit on Google gets you there!
// This is the guest physical address of the access.
// This is nice, because if we ever go with more complete
// instruction decode, knowing this gpa reduces our work:
// we don't have to find the source address in registers,
// only the register holding or receiving the value.
*gpa = vm_tf->tf_guest_pa;
DPRINTF("gpa is %p\n", *gpa);
DPRINTF("rip is %p\n", vm_tf->tf_rip);
if (rippa(vm_thread, (uint64_t *)&rip_gpa))
return VM_PAGE_FAULT;
DPRINTF("rip_gpa is %p\n", rip_gpa);
// fail fast. If we can't get the size we're done.
*size = target(rip_gpa, store);
DPRINTF("store is %d\n", *store);
if (*size < 0)
return -1;
*advance = insize(rip_gpa);
uint16_t ins = *(uint16_t *)(rip_gpa +
((rip_gpa[0] == 0x44) || (rip_gpa[0] == 0x0f) || (rip_gpa[0] ==
0x41)));
DPRINTF("ins is %04x\n", ins);
*destreg = (ins>>11) & 7;
*destreg += 8 * (rip_gpa[0] == 0x44);
// Our primitive approach wins big here.
// We don't have to decode the register or the offset used
// in the computation; that was done by the CPU and is the gpa.
// All we need to know is which destination or source register it is.
switch (*destreg) {
case 0:
*regp = &vm_tf->tf_rax;
break;
case 1:
*regp = &vm_tf->tf_rcx;
break;
case 2:
*regp = &vm_tf->tf_rdx;
break;
case 3:
*regp = &vm_tf->tf_rbx;
break;
case 4:
*regp = &vm_tf->tf_rsp; // uh, right.
break;
case 5:
*regp = &vm_tf->tf_rbp;
break;
case 6:
*regp = &vm_tf->tf_rsi;
break;
case 7:
*regp = &vm_tf->tf_rdi;
break;
case 8:
*regp = &vm_tf->tf_r8;
break;
case 9:
*regp = &vm_tf->tf_r9;
break;
case 10:
*regp = &vm_tf->tf_r10;
break;
case 11:
*regp = &vm_tf->tf_r11;
break;
case 12:
*regp = &vm_tf->tf_r12;
break;
case 13:
*regp = &vm_tf->tf_r13;
break;
case 14:
*regp = &vm_tf->tf_r14;
break;
case 15:
*regp = &vm_tf->tf_r15;
break;
}
/* Handle movz{b,w}X. Zero the destination. */
if ((rip_gpa[0] == 0x0f) && (rip_gpa[1] == 0xb6)) {
/* movzb.
* TODO: figure out if the destination size is 16 or 32 bits.
* Linux doesn't call this yet, so it's not urgent. */
return -1;
}
if ((rip_gpa[0] == 0x0f) && (rip_gpa[1] == 0xb7)) {
/* movzwl. Destination is 32 bits, unless we had the rex prefix
* */
**regp &= ~((1ULL << 32) - 1);
}
return 0;
}