| /* |
| * Copyright 2015 Google Inc. |
| * |
| * This file is part of Akaros. |
| * |
| * Akarosn is free software: you can redistribute it and/or modify |
| * it under the terms of the GNU General Public License as published by |
| * the Free Software Foundation, version 2 of the License. |
| * |
| * Akaros is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| * Lesser GNU General Public License for more details. |
| * |
| * See COPYING.LESSER for details on the GNU Lesser General Public License. |
| * See COPYING for details on the GNU General Public License. |
| */ |
| |
| #include <stdio.h> |
| #include <sys/types.h> |
| #include <sys/stat.h> |
| #include <fcntl.h> |
| #include <parlib/arch/arch.h> |
| #include <parlib/ros_debug.h> |
| #include <unistd.h> |
| #include <errno.h> |
| #include <stdlib.h> |
| #include <string.h> |
| #include <sys/uio.h> |
| #include <stdint.h> |
| #include <err.h> |
| #include <sys/mman.h> |
| #include <vmm/vmm.h> |
| #include <vmm/virtio.h> |
| #include <vmm/virtio_mmio.h> |
| #include <vmm/virtio_ids.h> |
| #include <vmm/virtio_config.h> |
| #include <ros/arch/mmu.h> |
| #include <ros/arch/trapframe.h> |
| |
| int debug_decode = 0; |
| #define DPRINTF(fmt, ...) \ |
| do { \ |
| if (debug_decode) { \ |
| fprintf(stderr, "decode: " fmt, ## __VA_ARGS__); \ |
| } \ |
| } \ |
| while (0) |
| |
| static char *modrmreg[] = {"rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi"}; |
| |
| // Since we at most have to decode less than half of each instruction, I'm |
| // trying to be dumb here. |
| // Fortunately, for me, that's not hard. |
| // I'm trying to avoid the whole Big Fun of full instruction decode, and in most |
| // of these cases we only have to know register, address, operation size, and |
| // instruction length. |
| // The ugly messiness of the SIB and all that are not yet needed. Maybe they |
| // never will be. |
| |
| // Target size -- 1, 2, 4, or 8 bytes. We have yet to see 64 bytes. |
| // TODO: if we ever see it, test the prefix. Since this only supports the low |
| // 1M, that's not likely. |
| static int target(void *insn, int *store) |
| { |
| *store = 0; |
| int s = -1; |
| uint8_t *byte = insn; |
| uint16_t *word = insn; |
| |
| if (*byte == 0x66) { |
| s = target(insn+1,store); |
| // flip the sense of s. |
| s = s == 4 ? 2 : 4; |
| return s; |
| } |
| if (*byte == 0x44) { |
| byte++; |
| word++; |
| } |
| switch(*byte) { |
| case 0x3a: |
| case 0x8a: |
| case 0x88: |
| s = 1; |
| break; |
| case 0x89: |
| case 0x8b: |
| // TODO: To really know, for sure, that this is 32 bit, we'd |
| // likely have to check the segment descriptor for the guest's |
| // current code segment in it's GDT. The D flag (bit 22) |
| // determines whether the instruction is using 32 or 16-bit |
| // operand size. I'm just going to assume the flag is set |
| // (meaning 32 bit operands) for now, in order to make virtio |
| // work. But really we should check if we want to know for sure. |
| // Note that this hack (changing the below line) only applies to |
| // mov instructions. |
| // |
| // And I think there's also a prefix you can use to switch |
| // the instruction to 16-bit addressing (address-size |
| // override prefix?) |
| s = 4; |
| break; |
| case 0x81: |
| s = 4; |
| break; |
| case 0x0f: |
| switch (*word) { |
| case 0xb70f: |
| s = 2; |
| break; |
| default: |
| fprintf(stderr, |
| "can't get size of %02x/%04x @ %p\n", |
| *byte, *word, byte); |
| return -1; |
| } |
| break; |
| case 0x41: |
| /* VEX byte for modrm field */ |
| switch (*word) { |
| case 0x8a41: |
| s = 1; |
| break; |
| default: |
| fprintf(stderr, "unparsed vex instruction %02x/%04x @ %p\n", |
| *byte, *word, byte); |
| return -1; |
| } |
| break; |
| default: |
| fprintf(stderr, "can't get size of %02x @ %p\n", *byte, byte); |
| fprintf(stderr, "can't get WORD of %04x @ %p\n", *word, word); |
| return -1; |
| break; |
| } |
| |
| switch(*byte) { |
| case 0x0f: |
| case 0x41: |
| break; |
| case 0x3a: |
| case 0x8a: |
| case 0x88: |
| case 0x89: |
| case 0x8b: |
| case 0x81: |
| *store = !(*byte & 2); |
| break; |
| default: |
| fprintf(stderr, "%s: Can't happen. rip is: %p\n", __func__, |
| byte); |
| break; |
| } |
| return s; |
| } |
| |
| char *regname(uint8_t reg) |
| { |
| return modrmreg[reg]; |
| } |
| |
| static int insize(void *rip) |
| { |
| uint8_t *rip_gpa = rip; |
| int advance = 3; |
| int extra = 0; |
| if (rip_gpa[0] == 0x44) { |
| extra = 1; |
| rip_gpa++; |
| } |
| |
| /* return 3 to handle this specific instruction case. We don't want this |
| * to turn into a fully fledged decode. |
| * This specific instruction is an extended move using r9. It uses the |
| * VEX byte to extend the register bits. */ |
| if (rip_gpa[0] == 0x41 && rip_gpa[1] == 0x8a && rip_gpa[2] == 0x01) |
| return 3; |
| /* the dreaded mod/rm byte. */ |
| int mod = rip_gpa[1] >> 6; |
| int rm = rip_gpa[1] & 7; |
| |
| switch (rip_gpa[0]) { |
| default: |
| fprintf(stderr, "BUG! %s got 0x%x\n", __func__, rip_gpa[0]); |
| case 0x0f: |
| break; |
| case 0x81: |
| advance = 6 + extra; |
| break; |
| case 0x3a: |
| case 0x8a: |
| case 0x88: |
| case 0x89: |
| case 0x8b: |
| switch (mod) { |
| case 0: |
| advance = 2 + (rm == 4) + extra; |
| break; |
| case 1: |
| advance = 3 + (rm == 4) + extra; |
| break; |
| case 2: |
| advance = 6 + (rm == 4) + extra; |
| break; |
| case 3: |
| advance = 2 + extra; |
| break; |
| } |
| break; |
| } |
| return advance; |
| } |
| |
| // This is a very limited function. It's only here to manage virtio-mmio and low |
| // memory pointer loads. I am hoping it won't grow with time. The intent is that |
| // we enter it with and EPT fault from a region that is deliberately left |
| // unbacked by any memory. |
| // We return enough info to let you emulate the operation if you want. Because |
| // we have the failing physical address (gpa) the decode is far simpler because |
| // we only need to find the register, how many bytes to move, and how big the |
| // instruction is. I thought about bringing in emulate.c from kvm from xen, |
| // but it has way more stuff than we need. |
| // gpa is a pointer to the gpa. |
| // int is the reg index which we can use for printing info. |
| // regp points to the register in hw_trapframe from which |
| // to load or store a result. |
| int decode(struct guest_thread *vm_thread, uint64_t *gpa, uint8_t *destreg, |
| uint64_t **regp, int *store, int *size, int *advance) |
| { |
| struct vm_trapframe *vm_tf = &(vm_thread->uthread.u_ctx.tf.vm_tf); |
| uint8_t *rip_gpa = NULL; |
| |
| DPRINTF("v is %p\n", vm_tf); |
| |
| // Duh, which way did he go George? Which way did he go? |
| // First hit on Google gets you there! |
| // This is the guest physical address of the access. |
| // This is nice, because if we ever go with more complete |
| // instruction decode, knowing this gpa reduces our work: |
| // we don't have to find the source address in registers, |
| // only the register holding or receiving the value. |
| *gpa = vm_tf->tf_guest_pa; |
| DPRINTF("gpa is %p\n", *gpa); |
| |
| DPRINTF("rip is %p\n", vm_tf->tf_rip); |
| |
| if (rippa(vm_thread, (uint64_t *)&rip_gpa)) |
| return VM_PAGE_FAULT; |
| DPRINTF("rip_gpa is %p\n", rip_gpa); |
| |
| // fail fast. If we can't get the size we're done. |
| *size = target(rip_gpa, store); |
| DPRINTF("store is %d\n", *store); |
| if (*size < 0) |
| return -1; |
| |
| *advance = insize(rip_gpa); |
| |
| uint16_t ins = *(uint16_t *)(rip_gpa + |
| ((rip_gpa[0] == 0x44) || (rip_gpa[0] == 0x0f) || (rip_gpa[0] == |
| 0x41))); |
| |
| DPRINTF("ins is %04x\n", ins); |
| |
| *destreg = (ins>>11) & 7; |
| *destreg += 8 * (rip_gpa[0] == 0x44); |
| // Our primitive approach wins big here. |
| // We don't have to decode the register or the offset used |
| // in the computation; that was done by the CPU and is the gpa. |
| // All we need to know is which destination or source register it is. |
| switch (*destreg) { |
| case 0: |
| *regp = &vm_tf->tf_rax; |
| break; |
| case 1: |
| *regp = &vm_tf->tf_rcx; |
| break; |
| case 2: |
| *regp = &vm_tf->tf_rdx; |
| break; |
| case 3: |
| *regp = &vm_tf->tf_rbx; |
| break; |
| case 4: |
| *regp = &vm_tf->tf_rsp; // uh, right. |
| break; |
| case 5: |
| *regp = &vm_tf->tf_rbp; |
| break; |
| case 6: |
| *regp = &vm_tf->tf_rsi; |
| break; |
| case 7: |
| *regp = &vm_tf->tf_rdi; |
| break; |
| case 8: |
| *regp = &vm_tf->tf_r8; |
| break; |
| case 9: |
| *regp = &vm_tf->tf_r9; |
| break; |
| case 10: |
| *regp = &vm_tf->tf_r10; |
| break; |
| case 11: |
| *regp = &vm_tf->tf_r11; |
| break; |
| case 12: |
| *regp = &vm_tf->tf_r12; |
| break; |
| case 13: |
| *regp = &vm_tf->tf_r13; |
| break; |
| case 14: |
| *regp = &vm_tf->tf_r14; |
| break; |
| case 15: |
| *regp = &vm_tf->tf_r15; |
| break; |
| } |
| /* Handle movz{b,w}X. Zero the destination. */ |
| if ((rip_gpa[0] == 0x0f) && (rip_gpa[1] == 0xb6)) { |
| /* movzb. |
| * TODO: figure out if the destination size is 16 or 32 bits. |
| * Linux doesn't call this yet, so it's not urgent. */ |
| return -1; |
| } |
| if ((rip_gpa[0] == 0x0f) && (rip_gpa[1] == 0xb7)) { |
| /* movzwl. Destination is 32 bits, unless we had the rex prefix |
| * */ |
| **regp &= ~((1ULL << 32) - 1); |
| } |
| return 0; |
| } |