| #include <stdio.h> |
| #include <pthread.h> |
| #include <sys/types.h> |
| #include <sys/stat.h> |
| #include <fcntl.h> |
| #include <parlib/arch/arch.h> |
| #include <parlib/ros_debug.h> |
| #include <unistd.h> |
| #include <gelf.h> |
| #include <errno.h> |
| #include <libelf.h> |
| #include <dirent.h> |
| #include <stdlib.h> |
| #include <string.h> |
| #include <ros/syscall.h> |
| #include <sys/mman.h> |
| #include <vmm/vmm.h> |
| #include <vmm/acpi/acpi.h> |
| #include <vmm/acpi/vmm_simple_dsdt.h> |
| #include <ros/arch/mmu.h> |
| #include <ros/arch/membar.h> |
| #include <ros/vmm.h> |
| #include <parlib/uthread.h> |
| #include <vmm/linux_bootparam.h> |
| #include <getopt.h> |
| |
| #include <vmm/sched.h> |
| #include <sys/eventfd.h> |
| #include <sys/uio.h> |
| |
| #define MiB 0x100000ull |
| #define GiB (1ull << 30) |
| #define MinMemory (16*MiB) |
| |
| static struct virtual_machine local_vm, *vm = &local_vm; |
| struct vmm_gpcore_init gpci; |
| static void *ram; |
| static unsigned long long memsize = GiB; |
| static uintptr_t memstart = MinMemory; |
| static uintptr_t stack; |
| static unsigned long long *p512, *p1, *p2m; |
| |
| static int debug = 0; |
| |
| /* load_kernel loads an ELF file as a kernel. */ |
| uintptr_t |
| load_kernel(char *filename) |
| { |
| Elf64_Ehdr *ehdr; |
| Elf *elf; |
| size_t phnum = 0; |
| Elf64_Phdr *hdrs; |
| int fd; |
| |
| elf_version(EV_CURRENT); |
| fd = open(filename, O_RDONLY); |
| if (fd < 0) { |
| fprintf(stderr, "Can't open %s: %r\n", filename); |
| return 0; |
| } |
| |
| elf = elf_begin(fd, ELF_C_READ, NULL); |
| if (elf == NULL) { |
| fprintf(stderr, "%s: cannot read %s ELF file.\n", __func__, filename); |
| close(fd); |
| return 0; |
| } |
| |
| ehdr = elf64_getehdr(elf); |
| if (ehdr == NULL) { |
| fprintf(stderr, "%s: cannot get exec header of %s.\n", |
| __func__, filename); |
| goto fail; |
| } |
| fprintf(stderr, "%s ELF entry point is %p\n", filename, |
| (void *)ehdr->e_entry); |
| |
| if (elf_getphdrnum(elf, &phnum) < 0) { |
| fprintf(stderr, "%s: cannot get program header num of %s.\n", |
| __func__, filename); |
| goto fail; |
| } |
| fprintf(stderr, "%s has %p program headers\n", filename, phnum); |
| |
| hdrs = elf64_getphdr(elf); |
| if (hdrs == NULL) { |
| fprintf(stderr, "%s: cannot get program headers of %s.\n", |
| __func__, filename); |
| goto fail; |
| } |
| |
| for (int i = 0; i < phnum; i++) { |
| size_t tot; |
| Elf64_Phdr *h = &hdrs[i]; |
| uintptr_t pa; |
| |
| fprintf(stderr, |
| "%d: type 0x%lx flags 0x%lx offset 0x%lx vaddr 0x%lx paddr 0x%lx size 0x%lx memsz 0x%lx align 0x%lx\n", |
| i, |
| h->p_type, /* Segment type */ |
| h->p_flags, /* Segment flags */ |
| h->p_offset, /* Segment file offset */ |
| h->p_vaddr, /* Segment virtual address */ |
| h->p_paddr, /* Segment physical address */ |
| h->p_filesz, /* Segment size in file */ |
| h->p_memsz, /* Segment size in memory */ |
| h->p_align /* Segment alignment */); |
| if (h->p_type != PT_LOAD) |
| continue; |
| if ((h->p_flags & (PF_R | PF_W | PF_X)) == 0) |
| continue; |
| |
| pa = h->p_paddr; |
| fprintf(stderr, |
| "Read header %d @offset %p to %p (elf PA is %p) %d bytes:", |
| i, h->p_offset, pa, h->p_paddr, h->p_filesz); |
| tot = 0; |
| while (tot < h->p_filesz) { |
| int amt = pread(fd, (void *)(pa + tot), h->p_filesz - tot, |
| h->p_offset + tot); |
| if (amt < 1) |
| break; |
| tot += amt; |
| } |
| fprintf(stderr, "read a total of %d bytes\n", tot); |
| if (tot < h->p_filesz) { |
| fprintf(stderr, "%s: got %d bytes, wanted %d bytes\n", |
| filename, tot, h->p_filesz); |
| goto fail; |
| } |
| } |
| |
| close(fd); |
| elf_end(elf); |
| return ehdr->e_entry; |
| fail: |
| close(fd); |
| elf_end(elf); |
| return 0; |
| } |
| |
| int main(int argc, char **argv) |
| { |
| int vmmflags = VMM_VMCALL_PRINTF; |
| uint64_t entry = 0; |
| int ret; |
| struct vm_trapframe *vm_tf; |
| int c; |
| int option_index; |
| static struct option long_options[] = { |
| {"debug", no_argument, 0, 'd'}, |
| {"vmmflags", required_argument, 0, 'v'}, |
| {"memsize", required_argument, 0, 'm'}, |
| {"memstart", required_argument, 0, 'M'}, |
| {"stack", required_argument, 0, 'S'}, |
| {"cmdline_extra", required_argument, 0, 'c'}, |
| {"greedy", no_argument, 0, 'g'}, |
| {"scp", no_argument, 0, 's'}, |
| {"help", no_argument, 0, 'h'}, |
| {0, 0, 0, 0} |
| }; |
| |
| fprintf(stderr, "%p %p %p %p\n", PGSIZE, PGSHIFT, PML1_SHIFT, |
| PML1_PTE_REACH); |
| |
| if ((uintptr_t)__procinfo.program_end >= MinMemory) { |
| fprintf(stderr, |
| "Panic: vmrunkernel binary extends into guest memory\n"); |
| exit(1); |
| } |
| |
| while ((c = getopt_long(argc, argv, "dv:m:M:S:gsh", long_options, |
| &option_index)) != -1) { |
| switch (c) { |
| case 'd': |
| debug++; |
| break; |
| case 'v': |
| vmmflags = strtoull(optarg, 0, 0); |
| break; |
| case 'm': |
| memsize = strtoull(optarg, 0, 0); |
| break; |
| case 'M': |
| memstart = strtoull(optarg, 0, 0); |
| break; |
| case 'S': |
| stack = strtoull(optarg, 0, 0); |
| break; |
| case 'g': /* greedy */ |
| parlib_never_yield = TRUE; |
| break; |
| case 's': /* scp */ |
| parlib_wants_to_be_mcp = FALSE; |
| break; |
| case 'h': |
| default: |
| // Sadly, the getopt_long struct does |
| // not have a pointer to help text. |
| for (int i = 0; |
| i < sizeof(long_options)/sizeof(long_options[0]) - 1; |
| i++) { |
| struct option *l = &long_options[i]; |
| |
| fprintf(stderr, "%s or %c%s\n", l->name, l->val, |
| l->has_arg ? " <arg>" : ""); |
| } |
| exit(0); |
| } |
| } |
| argc -= optind; |
| argv += optind; |
| if (argc < 1) { |
| fprintf(stderr, "Usage: %s vmimage [-n (no vmcall printf)]\n", argv[0]); |
| exit(1); |
| } |
| |
| if ((uintptr_t)(memstart + memsize) >= (uintptr_t)BRK_START) { |
| fprintf(stderr, |
| "memstart 0x%lx memsize 0x%lx -> 0x%lx is too large; overlaps BRK_START at %p\n", |
| memstart, memsize, memstart + memsize, BRK_START); |
| exit(1); |
| } |
| |
| ram = mmap((void *)memstart, memsize, |
| PROT_READ | PROT_WRITE | PROT_EXEC, |
| MAP_POPULATE | MAP_ANONYMOUS, -1, 0); |
| if (ram != (void *)memstart) { |
| fprintf(stderr, "Could not mmap 0x%lx bytes at 0x%lx\n", |
| memsize, memstart); |
| exit(1); |
| } |
| |
| entry = load_kernel(argv[0]); |
| if (entry == 0) { |
| fprintf(stderr, "Unable to load kernel %s\n", argv[0]); |
| exit(1); |
| } |
| |
| vm->nr_gpcs = 1; |
| vm->gpcis = &gpci; |
| ret = vmm_init(vm, vmmflags); |
| if (ret) { |
| fprintf(stderr, "vmm_init failed: %r\n"); |
| exit(1); |
| } |
| |
| /* Allocate 3 pages for page table pages: a page of 512 GiB |
| * PTEs with only one entry filled to point to a page of 1 GiB |
| * PTEs; a page of 1 GiB PTEs with only one entry filled to |
| * point to a page of 2 MiB PTEs; and a page of 2 MiB PTEs, |
| * all of which may be filled. For now, we don't handle |
| * starting addresses not aligned on 512 GiB boundaries or |
| * sizes > GiB */ |
| ret = posix_memalign((void **)&p512, PGSIZE, 3 * PGSIZE); |
| if (ret) { |
| perror("ptp alloc"); |
| exit(1); |
| } |
| |
| /* Set up a 1:1 ("identity") page mapping from guest virtual |
| * to guest physical using the (host virtual) |
| * `kerneladdress`. This mapping may be used for only a short |
| * time, until the guest sets up its own page tables. Be aware |
| * that the values stored in the table are physical addresses. |
| * This is subtle and mistakes are easily disguised due to the |
| * identity mapping, so take care when manipulating these |
| * mappings. */ |
| p1 = &p512[NPTENTRIES]; |
| p2m = &p512[2 * NPTENTRIES]; |
| |
| fprintf(stderr, "Map %p for %zu bytes\n", memstart, memsize); |
| /* TODO: fix this nested loop so it's correct for more than |
| * one GiB. */ |
| for(uintptr_t p4 = memstart; p4 < memstart + memsize; |
| p4 += PML4_PTE_REACH) { |
| p512[PML4(p4)] = (uint64_t)p1 | PTE_KERN_RW; |
| for (uintptr_t p3 = p4; p3 < memstart + memsize; |
| p3 += PML3_PTE_REACH) { |
| p1[PML3(p3)] = (uint64_t)p2m | PTE_KERN_RW; |
| for (uintptr_t p2 = p3; p2 < memstart + memsize; p2 += PML2_PTE_REACH) { |
| p2m[PML2(p2)] = |
| (uint64_t)(p2) | PTE_KERN_RW | PTE_PS; |
| } |
| } |
| } |
| |
| fprintf(stderr, "p512 %p p512[0] is 0x%lx p1 %p p1[0] is 0x%x\n", p512, p512[0], p1, p1[0]); |
| |
| vm_tf = gth_to_vmtf(vm->gths[0]); |
| vm_tf->tf_cr3 = (uint64_t) p512; |
| vm_tf->tf_rip = entry; |
| vm_tf->tf_rsp = stack; |
| vm_tf->tf_rsi = (uint64_t) 0; |
| start_guest_thread(vm->gths[0]); |
| |
| uthread_sleep_forever(); |
| return 0; |
| } |