| /* | 
 |  * Copyright 2015 Google Inc. | 
 |  * | 
 |  * This file is part of Akaros. | 
 |  * | 
 |  * Akarosn is free software: you can redistribute it and/or modify | 
 |  * it under the terms of the GNU General Public License as published by | 
 |  * the Free Software Foundation, version 2 of the License. | 
 |  * | 
 |  * Akaros is distributed in the hope that it will be useful, | 
 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
 |  * Lesser GNU General Public License for more details. | 
 |  * | 
 |  * See COPYING.LESSER for details on the GNU Lesser General Public License. | 
 |  * See COPYING for details on the GNU General Public License. | 
 |  */ | 
 |  | 
 | #include <parlib/stdio.h> | 
 | #include <sys/types.h> | 
 | #include <sys/stat.h> | 
 | #include <fcntl.h> | 
 | #include <parlib/arch/arch.h> | 
 | #include <parlib/ros_debug.h> | 
 | #include <unistd.h> | 
 | #include <errno.h> | 
 | #include <stdlib.h> | 
 | #include <string.h> | 
 | #include <sys/uio.h> | 
 | #include <stdint.h> | 
 | #include <err.h> | 
 | #include <sys/mman.h> | 
 | #include <vmm/vmm.h> | 
 | #include <vmm/virtio.h> | 
 | #include <vmm/virtio_mmio.h> | 
 | #include <vmm/virtio_ids.h> | 
 | #include <vmm/virtio_config.h> | 
 | #include <ros/arch/mmu.h> | 
 | #include <ros/arch/trapframe.h> | 
 |  | 
 | int debug_decode = 0; | 
 | #define DPRINTF(fmt, ...) \ | 
 | 	do { \ | 
 | 		if (debug_decode) { \ | 
 | 			fprintf(stderr, "decode: " fmt, ## __VA_ARGS__); \ | 
 | 		} \ | 
 | 	} \ | 
 | 	while (0) | 
 |  | 
 | static char *modrmreg[] = {"rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi"}; | 
 |  | 
 | // Since we at most have to decode less than half of each instruction, I'm | 
 | // trying to be dumb here. | 
 | // Fortunately, for me, that's not hard. | 
 | // I'm trying to avoid the whole Big Fun of full instruction decode, and in most | 
 | // of these cases we only have to know register, address, operation size, and | 
 | // instruction length. | 
 | // The ugly messiness of the SIB and all that are not yet needed. Maybe they | 
 | // never will be. | 
 |  | 
 | // Target size -- 1, 2, 4, or 8 bytes. We have yet to see 64 bytes. | 
 | // TODO: if we ever see it, test the prefix. Since this only supports the low | 
 | // 1M, that's not likely. | 
 | static int target(void *insn, int *store) | 
 | { | 
 | 	*store = 0; | 
 | 	int s = -1; | 
 | 	uint8_t *byte = insn; | 
 | 	uint16_t *word = insn; | 
 |  | 
 | 	if (*byte == 0x66) { | 
 | 		s = target(insn+1,store); | 
 | 		// flip the sense of s. | 
 | 		s = s == 4 ? 2 : 4; | 
 | 		return s; | 
 | 	} | 
 | 	if (*byte == 0x44) { | 
 | 		byte++; | 
 | 		word++; | 
 | 	} | 
 | 	switch(*byte) { | 
 | 	case 0x3a: | 
 | 	case 0x8a: | 
 | 	case 0x88: | 
 | 		s = 1; | 
 | 		break; | 
 | 	case 0x89: | 
 | 	case 0x8b: | 
 | 		// TODO: To really know, for sure, that this is 32 bit, we'd | 
 | 		// likely have to check the segment descriptor for the guest's | 
 | 		// current code segment in it's GDT. The D flag (bit 22) | 
 | 		// determines whether the instruction is using 32 or 16-bit | 
 | 		// operand size. I'm just going to assume the flag is set | 
 | 		// (meaning 32 bit operands) for now, in order to make virtio | 
 | 		// work. But really we should check if we want to know for sure. | 
 | 		// Note that this hack (changing the below line) only applies to | 
 | 		// mov instructions. | 
 | 		// | 
 | 		//       And I think there's also a prefix you can use to switch | 
 | 		//       the instruction to 16-bit addressing (address-size | 
 | 		//       override prefix?) | 
 | 		s = 4; | 
 | 		break; | 
 | 	case 0x81: | 
 | 		s = 4; | 
 | 		break; | 
 | 	case 0x0f: | 
 | 		switch (*word) { | 
 | 			case 0xb70f: | 
 | 				s = 2; | 
 | 				break; | 
 | 			default: | 
 | 				fprintf(stderr, | 
 | 					"can't get size of %02x/%04x @ %p\n", | 
 | 					*byte, *word, byte); | 
 | 				return -1; | 
 | 		} | 
 | 		break; | 
 | 	case 0x41: | 
 | 		/* VEX byte for modrm field */ | 
 | 		switch (*word) { | 
 | 			case 0x8a41: | 
 | 				s = 1; | 
 | 				break; | 
 | 			default: | 
 | 				fprintf(stderr, "unparsed vex instruction %02x/%04x @ %p\n", | 
 | 				        *byte, *word, byte); | 
 | 				return -1; | 
 | 		} | 
 | 		break; | 
 | 	default: | 
 | 		fprintf(stderr, "can't get size of %02x @ %p\n", *byte, byte); | 
 | 		fprintf(stderr, "can't get WORD of %04x @ %p\n", *word, word); | 
 | 		return -1; | 
 | 		break; | 
 | 	} | 
 |  | 
 | 	switch(*byte) { | 
 | 	case 0x0f: | 
 | 	case 0x41: | 
 | 		break; | 
 | 	case 0x3a: | 
 | 	case 0x8a: | 
 | 	case 0x88: | 
 | 	case 0x89: | 
 | 	case 0x8b: | 
 | 	case 0x81: | 
 | 		*store = !(*byte & 2); | 
 | 		break; | 
 | 	default: | 
 | 		fprintf(stderr, "%s: Can't happen. rip is: %p\n", __func__, | 
 | 			byte); | 
 | 		break; | 
 | 	} | 
 | 	return s; | 
 | } | 
 |  | 
 | char *regname(uint8_t reg) | 
 | { | 
 | 	return modrmreg[reg]; | 
 | } | 
 |  | 
 | static int insize(void *rip) | 
 | { | 
 | 	uint8_t *rip_gpa = rip; | 
 | 	int advance = 3; | 
 | 	int extra = 0; | 
 | 	if (rip_gpa[0] == 0x44) { | 
 | 		extra = 1; | 
 | 		rip_gpa++; | 
 | 	} | 
 |  | 
 | 	/* return 3 to handle this specific instruction case. We don't want this | 
 | 	 * to turn into a fully fledged decode. | 
 | 	 * This specific instruction is an extended move using r9. It uses the | 
 | 	 * VEX byte to extend the register bits. */ | 
 | 	if (rip_gpa[0] == 0x41 && rip_gpa[1] == 0x8a && rip_gpa[2] == 0x01) | 
 | 		return 3; | 
 | 	/* the dreaded mod/rm byte. */ | 
 | 	int mod = rip_gpa[1] >> 6; | 
 | 	int rm = rip_gpa[1] & 7; | 
 |  | 
 | 	switch (rip_gpa[0]) { | 
 | 	default: | 
 | 		fprintf(stderr, "BUG! %s got 0x%x\n", __func__, rip_gpa[0]); | 
 | 	case 0x0f: | 
 | 		break; | 
 | 	case 0x81: | 
 | 		advance = 6 + extra; | 
 | 		break; | 
 | 	case 0x3a: | 
 | 	case 0x8a: | 
 | 	case 0x88: | 
 | 	case 0x89: | 
 | 	case 0x8b: | 
 | 		switch (mod) { | 
 | 		case 0: | 
 | 			advance = 2 + (rm == 4) + extra; | 
 | 			break; | 
 | 		case 1: | 
 | 			advance = 3 + (rm == 4) + extra; | 
 | 			break; | 
 | 		case 2: | 
 | 			advance = 6 + (rm == 4) + extra; | 
 | 			break; | 
 | 		case 3: | 
 | 			advance = 2 + extra; | 
 | 			break; | 
 | 		} | 
 | 		break; | 
 | 	} | 
 | 	return advance; | 
 | } | 
 |  | 
 | // This is a very limited function. It's only here to manage virtio-mmio and low | 
 | // memory pointer loads. I am hoping it won't grow with time. The intent is that | 
 | // we enter it with and EPT fault from a region that is deliberately left | 
 | // unbacked by any memory. | 
 | // We return enough info to let you emulate the operation if you want. Because | 
 | // we have the failing physical address (gpa) the decode is far simpler because | 
 | // we only need to find the register, how many bytes to move, and how big the | 
 | // instruction is. I thought about bringing in emulate.c from kvm from xen, | 
 | // but it has way more stuff than we need. | 
 | // gpa is a pointer to the gpa. | 
 | // int is the reg index which we can use for printing info. | 
 | // regp points to the register in hw_trapframe from which | 
 | // to load or store a result. | 
 | int decode(struct guest_thread *vm_thread, uint64_t *gpa, uint8_t *destreg, | 
 |            uint64_t **regp, int *store, int *size, int *advance) | 
 | { | 
 | 	struct vm_trapframe *vm_tf = &(vm_thread->uthread.u_ctx.tf.vm_tf); | 
 | 	uint8_t *rip_gpa = NULL; | 
 |  | 
 | 	DPRINTF("v is %p\n", vm_tf); | 
 |  | 
 | 	// Duh, which way did he go George? Which way did he go? | 
 | 	// First hit on Google gets you there! | 
 | 	// This is the guest physical address of the access. | 
 | 	// This is nice, because if we ever go with more complete | 
 | 	// instruction decode, knowing this gpa reduces our work: | 
 | 	// we don't have to find the source address in registers, | 
 | 	// only the register holding or receiving the value. | 
 | 	*gpa = vm_tf->tf_guest_pa; | 
 | 	DPRINTF("gpa is %p\n", *gpa); | 
 |  | 
 | 	DPRINTF("rip is %p\n", vm_tf->tf_rip); | 
 |  | 
 | 	if (rippa(vm_thread, (uint64_t *)&rip_gpa)) | 
 | 		return VM_PAGE_FAULT; | 
 | 	DPRINTF("rip_gpa is %p\n", rip_gpa); | 
 |  | 
 | 	// fail fast. If we can't get the size we're done. | 
 | 	*size = target(rip_gpa, store); | 
 | 	DPRINTF("store is %d\n", *store); | 
 | 	if (*size < 0) | 
 | 		return -1; | 
 |  | 
 | 	*advance = insize(rip_gpa); | 
 |  | 
 | 	uint16_t ins = *(uint16_t *)(rip_gpa + | 
 | 	    ((rip_gpa[0] == 0x44) || (rip_gpa[0] == 0x0f) || (rip_gpa[0] == | 
 | 							      0x41))); | 
 |  | 
 | 	DPRINTF("ins is %04x\n", ins); | 
 |  | 
 | 	*destreg = (ins>>11) & 7; | 
 | 	*destreg += 8 * (rip_gpa[0] == 0x44); | 
 | 	// Our primitive approach wins big here. | 
 | 	// We don't have to decode the register or the offset used | 
 | 	// in the computation; that was done by the CPU and is the gpa. | 
 | 	// All we need to know is which destination or source register it is. | 
 | 	switch (*destreg) { | 
 | 	case 0: | 
 | 		*regp = &vm_tf->tf_rax; | 
 | 		break; | 
 | 	case 1: | 
 | 		*regp = &vm_tf->tf_rcx; | 
 | 		break; | 
 | 	case 2: | 
 | 		*regp = &vm_tf->tf_rdx; | 
 | 		break; | 
 | 	case 3: | 
 | 		*regp = &vm_tf->tf_rbx; | 
 | 		break; | 
 | 	case 4: | 
 | 		*regp = &vm_tf->tf_rsp; // uh, right. | 
 | 		break; | 
 | 	case 5: | 
 | 		*regp = &vm_tf->tf_rbp; | 
 | 		break; | 
 | 	case 6: | 
 | 		*regp = &vm_tf->tf_rsi; | 
 | 		break; | 
 | 	case 7: | 
 | 		*regp = &vm_tf->tf_rdi; | 
 | 		break; | 
 | 	case 8: | 
 | 		*regp = &vm_tf->tf_r8; | 
 | 		break; | 
 | 	case 9: | 
 | 		*regp = &vm_tf->tf_r9; | 
 | 		break; | 
 | 	case 10: | 
 | 		*regp = &vm_tf->tf_r10; | 
 | 		break; | 
 | 	case 11: | 
 | 		*regp = &vm_tf->tf_r11; | 
 | 		break; | 
 | 	case 12: | 
 | 		*regp = &vm_tf->tf_r12; | 
 | 		break; | 
 | 	case 13: | 
 | 		*regp = &vm_tf->tf_r13; | 
 | 		break; | 
 | 	case 14: | 
 | 		*regp = &vm_tf->tf_r14; | 
 | 		break; | 
 | 	case 15: | 
 | 		*regp = &vm_tf->tf_r15; | 
 | 		break; | 
 | 	} | 
 | 	/* Handle movz{b,w}X.  Zero the destination. */ | 
 | 	if ((rip_gpa[0] == 0x0f) && (rip_gpa[1] == 0xb6)) { | 
 | 		/* movzb. | 
 | 		 * TODO: figure out if the destination size is 16 or 32 bits. | 
 | 		 * Linux doesn't call this yet, so it's not urgent. */ | 
 | 		return -1; | 
 | 	} | 
 | 	if ((rip_gpa[0] == 0x0f) && (rip_gpa[1] == 0xb7)) { | 
 | 		/* movzwl.  Destination is 32 bits, unless we had the rex prefix | 
 | 		 * */ | 
 | 		**regp &= ~((1ULL << 32) - 1); | 
 | 	} | 
 | 	return 0; | 
 | } |