|  | /* | 
|  | * Copyright 2015 Google Inc. | 
|  | * | 
|  | * This file is part of Akaros. | 
|  | * | 
|  | * Akarosn is free software: you can redistribute it and/or modify | 
|  | * it under the terms of the GNU General Public License as published by | 
|  | * the Free Software Foundation, version 2 of the License. | 
|  | * | 
|  | * Akaros is distributed in the hope that it will be useful, | 
|  | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|  | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
|  | * Lesser GNU General Public License for more details. | 
|  | * | 
|  | * See COPYING.LESSER for details on the GNU Lesser General Public License. | 
|  | * See COPYING for details on the GNU General Public License. | 
|  | */ | 
|  |  | 
|  | #include <stdio.h> | 
|  | #include <sys/types.h> | 
|  | #include <sys/stat.h> | 
|  | #include <fcntl.h> | 
|  | #include <parlib/arch/arch.h> | 
|  | #include <parlib/ros_debug.h> | 
|  | #include <unistd.h> | 
|  | #include <errno.h> | 
|  | #include <stdlib.h> | 
|  | #include <string.h> | 
|  | #include <sys/uio.h> | 
|  | #include <stdint.h> | 
|  | #include <err.h> | 
|  | #include <sys/mman.h> | 
|  | #include <vmm/vmm.h> | 
|  | #include <vmm/virtio.h> | 
|  | #include <vmm/virtio_mmio.h> | 
|  | #include <vmm/virtio_ids.h> | 
|  | #include <vmm/virtio_config.h> | 
|  | #include <ros/arch/mmu.h> | 
|  | #include <ros/arch/trapframe.h> | 
|  |  | 
|  | int debug_decode = 0; | 
|  | #define DPRINTF(fmt, ...) \ | 
|  | do { \ | 
|  | if (debug_decode) { \ | 
|  | fprintf(stderr, "decode: " fmt, ## __VA_ARGS__); \ | 
|  | } \ | 
|  | } \ | 
|  | while (0) | 
|  |  | 
|  | static char *modrmreg[] = {"rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi"}; | 
|  |  | 
|  | // Since we at most have to decode less than half of each instruction, I'm | 
|  | // trying to be dumb here. | 
|  | // Fortunately, for me, that's not hard. | 
|  | // I'm trying to avoid the whole Big Fun of full instruction decode, and in most | 
|  | // of these cases we only have to know register, address, operation size, and | 
|  | // instruction length. | 
|  | // The ugly messiness of the SIB and all that are not yet needed. Maybe they | 
|  | // never will be. | 
|  |  | 
|  | // Target size -- 1, 2, 4, or 8 bytes. We have yet to see 64 bytes. | 
|  | // TODO: if we ever see it, test the prefix. Since this only supports the low | 
|  | // 1M, that's not likely. | 
|  | static int target(void *insn, int *store) | 
|  | { | 
|  | *store = 0; | 
|  | int s = -1; | 
|  | uint8_t *byte = insn; | 
|  | uint16_t *word = insn; | 
|  |  | 
|  | if (*byte == 0x66) { | 
|  | s = target(insn+1,store); | 
|  | // flip the sense of s. | 
|  | s = s == 4 ? 2 : 4; | 
|  | return s; | 
|  | } | 
|  | if (*byte == 0x44) { | 
|  | byte++; | 
|  | word++; | 
|  | } | 
|  | switch(*byte) { | 
|  | case 0x3a: | 
|  | case 0x8a: | 
|  | case 0x88: | 
|  | s = 1; | 
|  | break; | 
|  | case 0x89: | 
|  | case 0x8b: | 
|  | // TODO: To really know, for sure, that this is 32 bit, we'd | 
|  | // likely have to check the segment descriptor for the guest's | 
|  | // current code segment in it's GDT. The D flag (bit 22) | 
|  | // determines whether the instruction is using 32 or 16-bit | 
|  | // operand size. I'm just going to assume the flag is set | 
|  | // (meaning 32 bit operands) for now, in order to make virtio | 
|  | // work. But really we should check if we want to know for sure. | 
|  | // Note that this hack (changing the below line) only applies to | 
|  | // mov instructions. | 
|  | // | 
|  | //       And I think there's also a prefix you can use to switch | 
|  | //       the instruction to 16-bit addressing (address-size | 
|  | //       override prefix?) | 
|  | s = 4; | 
|  | break; | 
|  | case 0x81: | 
|  | s = 4; | 
|  | break; | 
|  | case 0x0f: | 
|  | switch (*word) { | 
|  | case 0xb70f: | 
|  | s = 2; | 
|  | break; | 
|  | default: | 
|  | fprintf(stderr, | 
|  | "can't get size of %02x/%04x @ %p\n", | 
|  | *byte, *word, byte); | 
|  | return -1; | 
|  | } | 
|  | break; | 
|  | case 0x41: | 
|  | /* VEX byte for modrm field */ | 
|  | switch (*word) { | 
|  | case 0x8a41: | 
|  | s = 1; | 
|  | break; | 
|  | default: | 
|  | fprintf(stderr, "unparsed vex instruction %02x/%04x @ %p\n", | 
|  | *byte, *word, byte); | 
|  | return -1; | 
|  | } | 
|  | break; | 
|  | default: | 
|  | fprintf(stderr, "can't get size of %02x @ %p\n", *byte, byte); | 
|  | fprintf(stderr, "can't get WORD of %04x @ %p\n", *word, word); | 
|  | return -1; | 
|  | break; | 
|  | } | 
|  |  | 
|  | switch(*byte) { | 
|  | case 0x0f: | 
|  | case 0x41: | 
|  | break; | 
|  | case 0x3a: | 
|  | case 0x8a: | 
|  | case 0x88: | 
|  | case 0x89: | 
|  | case 0x8b: | 
|  | case 0x81: | 
|  | *store = !(*byte & 2); | 
|  | break; | 
|  | default: | 
|  | fprintf(stderr, "%s: Can't happen. rip is: %p\n", __func__, | 
|  | byte); | 
|  | break; | 
|  | } | 
|  | return s; | 
|  | } | 
|  |  | 
|  | char *regname(uint8_t reg) | 
|  | { | 
|  | return modrmreg[reg]; | 
|  | } | 
|  |  | 
|  | static int insize(void *rip) | 
|  | { | 
|  | uint8_t *rip_gpa = rip; | 
|  | int advance = 3; | 
|  | int extra = 0; | 
|  | if (rip_gpa[0] == 0x44) { | 
|  | extra = 1; | 
|  | rip_gpa++; | 
|  | } | 
|  |  | 
|  | /* return 3 to handle this specific instruction case. We don't want this | 
|  | * to turn into a fully fledged decode. | 
|  | * This specific instruction is an extended move using r9. It uses the | 
|  | * VEX byte to extend the register bits. */ | 
|  | if (rip_gpa[0] == 0x41 && rip_gpa[1] == 0x8a && rip_gpa[2] == 0x01) | 
|  | return 3; | 
|  | /* the dreaded mod/rm byte. */ | 
|  | int mod = rip_gpa[1] >> 6; | 
|  | int rm = rip_gpa[1] & 7; | 
|  |  | 
|  | switch (rip_gpa[0]) { | 
|  | default: | 
|  | fprintf(stderr, "BUG! %s got 0x%x\n", __func__, rip_gpa[0]); | 
|  | case 0x0f: | 
|  | break; | 
|  | case 0x81: | 
|  | advance = 6 + extra; | 
|  | break; | 
|  | case 0x3a: | 
|  | case 0x8a: | 
|  | case 0x88: | 
|  | case 0x89: | 
|  | case 0x8b: | 
|  | switch (mod) { | 
|  | case 0: | 
|  | advance = 2 + (rm == 4) + extra; | 
|  | break; | 
|  | case 1: | 
|  | advance = 3 + (rm == 4) + extra; | 
|  | break; | 
|  | case 2: | 
|  | advance = 6 + (rm == 4) + extra; | 
|  | break; | 
|  | case 3: | 
|  | advance = 2 + extra; | 
|  | break; | 
|  | } | 
|  | break; | 
|  | } | 
|  | return advance; | 
|  | } | 
|  |  | 
|  | // This is a very limited function. It's only here to manage virtio-mmio and low | 
|  | // memory pointer loads. I am hoping it won't grow with time. The intent is that | 
|  | // we enter it with and EPT fault from a region that is deliberately left | 
|  | // unbacked by any memory. | 
|  | // We return enough info to let you emulate the operation if you want. Because | 
|  | // we have the failing physical address (gpa) the decode is far simpler because | 
|  | // we only need to find the register, how many bytes to move, and how big the | 
|  | // instruction is. I thought about bringing in emulate.c from kvm from xen, | 
|  | // but it has way more stuff than we need. | 
|  | // gpa is a pointer to the gpa. | 
|  | // int is the reg index which we can use for printing info. | 
|  | // regp points to the register in hw_trapframe from which | 
|  | // to load or store a result. | 
|  | int decode(struct guest_thread *vm_thread, uint64_t *gpa, uint8_t *destreg, | 
|  | uint64_t **regp, int *store, int *size, int *advance) | 
|  | { | 
|  | struct vm_trapframe *vm_tf = &(vm_thread->uthread.u_ctx.tf.vm_tf); | 
|  | uint8_t *rip_gpa = NULL; | 
|  |  | 
|  | DPRINTF("v is %p\n", vm_tf); | 
|  |  | 
|  | // Duh, which way did he go George? Which way did he go? | 
|  | // First hit on Google gets you there! | 
|  | // This is the guest physical address of the access. | 
|  | // This is nice, because if we ever go with more complete | 
|  | // instruction decode, knowing this gpa reduces our work: | 
|  | // we don't have to find the source address in registers, | 
|  | // only the register holding or receiving the value. | 
|  | *gpa = vm_tf->tf_guest_pa; | 
|  | DPRINTF("gpa is %p\n", *gpa); | 
|  |  | 
|  | DPRINTF("rip is %p\n", vm_tf->tf_rip); | 
|  |  | 
|  | if (rippa(vm_thread, (uint64_t *)&rip_gpa)) | 
|  | return VM_PAGE_FAULT; | 
|  | DPRINTF("rip_gpa is %p\n", rip_gpa); | 
|  |  | 
|  | // fail fast. If we can't get the size we're done. | 
|  | *size = target(rip_gpa, store); | 
|  | DPRINTF("store is %d\n", *store); | 
|  | if (*size < 0) | 
|  | return -1; | 
|  |  | 
|  | *advance = insize(rip_gpa); | 
|  |  | 
|  | uint16_t ins = *(uint16_t *)(rip_gpa + | 
|  | ((rip_gpa[0] == 0x44) || (rip_gpa[0] == 0x0f) || (rip_gpa[0] == | 
|  | 0x41))); | 
|  |  | 
|  | DPRINTF("ins is %04x\n", ins); | 
|  |  | 
|  | *destreg = (ins>>11) & 7; | 
|  | *destreg += 8 * (rip_gpa[0] == 0x44); | 
|  | // Our primitive approach wins big here. | 
|  | // We don't have to decode the register or the offset used | 
|  | // in the computation; that was done by the CPU and is the gpa. | 
|  | // All we need to know is which destination or source register it is. | 
|  | switch (*destreg) { | 
|  | case 0: | 
|  | *regp = &vm_tf->tf_rax; | 
|  | break; | 
|  | case 1: | 
|  | *regp = &vm_tf->tf_rcx; | 
|  | break; | 
|  | case 2: | 
|  | *regp = &vm_tf->tf_rdx; | 
|  | break; | 
|  | case 3: | 
|  | *regp = &vm_tf->tf_rbx; | 
|  | break; | 
|  | case 4: | 
|  | *regp = &vm_tf->tf_rsp; // uh, right. | 
|  | break; | 
|  | case 5: | 
|  | *regp = &vm_tf->tf_rbp; | 
|  | break; | 
|  | case 6: | 
|  | *regp = &vm_tf->tf_rsi; | 
|  | break; | 
|  | case 7: | 
|  | *regp = &vm_tf->tf_rdi; | 
|  | break; | 
|  | case 8: | 
|  | *regp = &vm_tf->tf_r8; | 
|  | break; | 
|  | case 9: | 
|  | *regp = &vm_tf->tf_r9; | 
|  | break; | 
|  | case 10: | 
|  | *regp = &vm_tf->tf_r10; | 
|  | break; | 
|  | case 11: | 
|  | *regp = &vm_tf->tf_r11; | 
|  | break; | 
|  | case 12: | 
|  | *regp = &vm_tf->tf_r12; | 
|  | break; | 
|  | case 13: | 
|  | *regp = &vm_tf->tf_r13; | 
|  | break; | 
|  | case 14: | 
|  | *regp = &vm_tf->tf_r14; | 
|  | break; | 
|  | case 15: | 
|  | *regp = &vm_tf->tf_r15; | 
|  | break; | 
|  | } | 
|  | /* Handle movz{b,w}X.  Zero the destination. */ | 
|  | if ((rip_gpa[0] == 0x0f) && (rip_gpa[1] == 0xb6)) { | 
|  | /* movzb. | 
|  | * TODO: figure out if the destination size is 16 or 32 bits. | 
|  | * Linux doesn't call this yet, so it's not urgent. */ | 
|  | return -1; | 
|  | } | 
|  | if ((rip_gpa[0] == 0x0f) && (rip_gpa[1] == 0xb7)) { | 
|  | /* movzwl.  Destination is 32 bits, unless we had the rex prefix | 
|  | * */ | 
|  | **regp &= ~((1ULL << 32) - 1); | 
|  | } | 
|  | return 0; | 
|  | } |