|  | /* Copyright (c) 2017 Google Inc. | 
|  | * See LICENSE for details. | 
|  | * | 
|  | * Memory, paging, e820, bootparams and other helpers */ | 
|  |  | 
|  | #include <parlib/stdio.h> | 
|  | #include <stdlib.h> | 
|  | #include <sys/mman.h> | 
|  | #include <ros/arch/mmu.h> | 
|  | #include <vmm/linux_bootparam.h> | 
|  | #include <vmm/vmm.h> | 
|  | #include <err.h> | 
|  | #include <vmm/util.h> | 
|  | #include <parlib/ros_debug.h> | 
|  | #include <fcntl.h> | 
|  |  | 
|  |  | 
|  | static char *entrynames[] = { | 
|  | [E820_RAM] "E820_RAM", | 
|  | [E820_RESERVED] "E820_RESERVED", | 
|  | [E820_ACPI] "E820_ACPI", | 
|  | [E820_NVS] "E820_NVS", | 
|  | [E820_UNUSABLE] "E820_UNUSABLE", | 
|  | }; | 
|  |  | 
|  | static void dumpe820(struct e820entry *e, int nr) | 
|  | { | 
|  | for (int i = 0; i < nr; i++) { | 
|  | fprintf(stderr, "%d:%p %p %p %s\n", | 
|  | i, e[i].addr, e[i].size, e[i].type, | 
|  | entrynames[e[i].type]); | 
|  | } | 
|  | } | 
|  |  | 
|  | // e820map creates an e820 map in the bootparams struct.  If we've | 
|  | // gotten here, then memsize and memstart are valid.  It returns | 
|  | // pointer to the first page after the map for our bump allocator.  We | 
|  | // assume the ranges passed in are validated already. | 
|  | void *init_e820map(struct virtual_machine *vm, struct boot_params *bp) | 
|  | { | 
|  | uintptr_t memstart = vm->minphys; | 
|  | size_t memsize = vm->maxphys - vm->minphys + 1; | 
|  | uintptr_t lowmem = 0; | 
|  |  | 
|  | // Everything in Linux at this level is PGSIZE. | 
|  | memset(bp, 0, PGSIZE); | 
|  |  | 
|  | bp->e820_entries = 0; | 
|  |  | 
|  | // The first page is always reserved. | 
|  | bp->e820_map[bp->e820_entries].addr = 0; | 
|  | bp->e820_map[bp->e820_entries].size = PGSIZE; | 
|  | bp->e820_map[bp->e820_entries++].type = E820_RESERVED; | 
|  |  | 
|  | /* Give it just a tiny bit of memory -- 60k -- at low memory. */ | 
|  | bp->e820_map[bp->e820_entries].addr = PGSIZE; | 
|  | bp->e820_map[bp->e820_entries].size = LOW64K - PGSIZE; | 
|  | bp->e820_map[bp->e820_entries++].type = E820_RAM; | 
|  |  | 
|  | // All other memory from 64k to memstart is reserved. | 
|  | bp->e820_map[bp->e820_entries].addr = LOW64K; | 
|  | bp->e820_map[bp->e820_entries].size = memstart - LOW64K; | 
|  | bp->e820_map[bp->e820_entries++].type = E820_RESERVED; | 
|  |  | 
|  | // If memory starts below RESERVED, then add an entry for memstart to | 
|  | // the smaller of RESERVED or memsize. | 
|  | if (memstart < RESERVED) { | 
|  | bp->e820_map[bp->e820_entries].addr = memstart; | 
|  | if (memstart + memsize > RESERVED) | 
|  | bp->e820_map[bp->e820_entries].size = RESERVED - | 
|  | memstart; | 
|  | else | 
|  | bp->e820_map[bp->e820_entries].size = memsize; | 
|  | lowmem = bp->e820_map[bp->e820_entries].size; | 
|  | bp->e820_map[bp->e820_entries++].type = E820_RAM; | 
|  | } | 
|  |  | 
|  | bp->e820_map[bp->e820_entries].addr = RESERVED; | 
|  | bp->e820_map[bp->e820_entries].size = RESERVEDSIZE; | 
|  | bp->e820_map[bp->e820_entries++].type = E820_RESERVED; | 
|  |  | 
|  | if ((memstart + memsize) > RESERVED) { | 
|  | bp->e820_map[bp->e820_entries].addr = MAX(memstart, _4GiB); | 
|  | bp->e820_map[bp->e820_entries].size = memsize - lowmem; | 
|  | bp->e820_map[bp->e820_entries++].type = E820_RAM; | 
|  | } | 
|  |  | 
|  | return (void *)bp + PGSIZE; | 
|  | } | 
|  |  | 
|  | /* checkmemaligned verifies alignment attributes of your memory space. | 
|  | * It terminates your process with extreme prejudice if they are | 
|  | * incorrect in some way. */ | 
|  | void checkmemaligned(uintptr_t memstart, size_t memsize) | 
|  | { | 
|  | if (!ALIGNED(memstart, PML1_REACH)) | 
|  | errx(1, "memstart (%#x) wrong: must be aligned to %#x", | 
|  | memstart, PML1_REACH); | 
|  | if (!ALIGNED(memsize, PML1_REACH)) | 
|  | errx(1, "memsize (%#x) wrong: must be aligned to %#x", | 
|  | memsize, PML1_REACH); | 
|  | } | 
|  |  | 
|  | // memory allocates memory for the VM. It's a complicated mess because of the | 
|  | // break for APIC and other things. We just go ahead and leave the region from | 
|  | // RESERVED to _4GiB for that.  The memory is either split, all low, or all | 
|  | // high. This code is designed for a kernel. Dune-style code does not need it | 
|  | // as it does not have the RESERVED restrictions. Dune-style code can use this, | 
|  | // however, by setting memstart to 4 GiB. This code can be called multiple | 
|  | // times with more ranges. It does not check for overlaps. | 
|  | void mmap_memory(struct virtual_machine *vm, uintptr_t memstart, size_t memsize) | 
|  | { | 
|  | void *r1, *r2; | 
|  | unsigned long r1size = memsize; | 
|  |  | 
|  | // Let's do some minimal validation, so we don't drive | 
|  | // people crazy. | 
|  | checkmemaligned(memstart, memsize); | 
|  | if ((memstart >= RESERVED) && (memstart < _4GiB)) | 
|  | errx(1, "memstart (%#x) wrong: must be < %#x or >= %#x\n", | 
|  | memstart, RESERVED, _4GiB); | 
|  | if (memstart < MinMemory) | 
|  | errx(1, "memstart (%#x) wrong: must be > %#x\n", | 
|  | memstart, MinMemory); | 
|  |  | 
|  | // Note: this test covers the split case as well as the | 
|  | // 'all above 4G' case. | 
|  | if ((memstart + memsize) > RESERVED) { | 
|  | unsigned long long r2start = MAX(memstart, _4GiB); | 
|  |  | 
|  | r1size = memstart < RESERVED ? RESERVED - memstart : 0; | 
|  | r2 = mmap((void *)r2start, memsize - r1size, | 
|  | PROT_READ | PROT_WRITE | PROT_EXEC, | 
|  | MAP_POPULATE | MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); | 
|  | if (r2 != (void *)r2start) { | 
|  | fprintf(stderr, | 
|  | "High region: Could not mmap 0x%lx bytes at 0x%lx\n", | 
|  | memsize, r2start); | 
|  | exit(1); | 
|  | } | 
|  | if (memstart >= _4GiB) | 
|  | goto done; | 
|  | } | 
|  |  | 
|  | r1 = mmap((void *)memstart, r1size, | 
|  | PROT_READ | PROT_WRITE | PROT_EXEC, | 
|  | MAP_POPULATE | MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); | 
|  | if (r1 != (void *)memstart) { | 
|  | fprintf(stderr, | 
|  | "Low region: Could not mmap 0x%lx bytes at 0x%lx\n", | 
|  | memsize, memstart); | 
|  | exit(1); | 
|  | } | 
|  |  | 
|  | done: | 
|  | if ((vm->minphys == 0) || (vm->minphys > memstart)) | 
|  | vm->minphys = memstart; | 
|  |  | 
|  | if (vm->maxphys < memstart + memsize - 1) | 
|  | vm->maxphys = memstart + memsize - 1; | 
|  | } | 
|  |  | 
|  | bool mmap_file(const char *path, uintptr_t memstart, size_t memsize, | 
|  | uint64_t protections, size_t offset) | 
|  | { | 
|  | int fd = open(path, O_RDONLY); | 
|  |  | 
|  | if (fd == -1) { | 
|  | fprintf(stderr, "Unable to open %s for reading.\n", path); | 
|  | return false; | 
|  | } | 
|  |  | 
|  | void *addr = mmap((void*) memstart, memsize, protections, MAP_PRIVATE, | 
|  | fd, offset); | 
|  | int err = errno; | 
|  |  | 
|  | close(fd); | 
|  |  | 
|  | if (addr == MAP_FAILED) { | 
|  | fprintf(stderr, "Failed to mmap %s, got error %d\n", path, err); | 
|  | return false; | 
|  | } | 
|  |  | 
|  | if ((uint64_t) addr != (uint64_t) memstart) { | 
|  | fprintf(stderr, "Could not mmap %s correctly.\n", path); | 
|  | if (munmap(addr, memsize) == -1) | 
|  | perror("Failed to unmap memory; leaking a mapping"); | 
|  | return false; | 
|  | } | 
|  | return true; | 
|  | } | 
|  |  | 
|  |  | 
|  | /* populate_stack fills the stack with an argv, envp, and auxv. | 
|  | * We assume the stack pointer is backed by real memory. | 
|  | * It will go hard with you if it does not. For your own health, | 
|  | * stack should be 16-byte aligned. */ | 
|  | void *populate_stack(uintptr_t *stack, int argc, char *argv[], | 
|  | int envc, char *envp[], | 
|  | int auxc, struct elf_aux auxv[]) | 
|  | { | 
|  | /* Func to get the lengths of the argument and environment strings. */ | 
|  | int get_lens(int argc, char *argv[], int arg_lens[]) | 
|  | { | 
|  | int total = 0; | 
|  |  | 
|  | if (!argc) | 
|  | return 0; | 
|  | for (int i = 0; i < argc; i++) { | 
|  | arg_lens[i] = strlen(argv[i]) + 1; | 
|  | total += arg_lens[i]; | 
|  | } | 
|  | return total; | 
|  | } | 
|  |  | 
|  | /* Function to help map the argument and environment strings, to their | 
|  | * final location. */ | 
|  | int remap(int argc, char *argv[], char *new_argv[], | 
|  | char new_argbuf[], int arg_lens[]) | 
|  | { | 
|  | int offset = 0; | 
|  |  | 
|  | if (!argc) | 
|  | return 0; | 
|  | for (int i = 0; i < argc; i++) { | 
|  | memcpy(new_argbuf + offset, argv[i], arg_lens[i]); | 
|  | fprintf(stderr, "data: memcpy(%p, %p, %ld)\n", | 
|  | new_argbuf + offset, argv[i], arg_lens[i]); | 
|  | fprintf(stderr, "arg: set arg %d, @%p, to %p\n", i, | 
|  | &new_argv[i], new_argbuf + offset); | 
|  | new_argv[i] = new_argbuf + offset; | 
|  | offset += arg_lens[i]; | 
|  | } | 
|  | new_argv[argc] = NULL; | 
|  | return offset; | 
|  | } | 
|  |  | 
|  | /* Start tracking the size of the buffer necessary to hold all of our | 
|  | * data on the stack. Preallocate space for argc, argv, envp, and auxv | 
|  | * in this buffer. */ | 
|  | int bufsize = 0; | 
|  |  | 
|  | bufsize += 1 * sizeof(size_t); | 
|  | bufsize += (auxc + 1) * sizeof(struct elf_aux); | 
|  | bufsize += (envc + 1) * sizeof(char**); | 
|  | bufsize += (argc + 1) * sizeof(char**); | 
|  | fprintf(stderr, "Bufsize for pointers and argc is %d\n", bufsize); | 
|  |  | 
|  | /* Add in the size of the env and arg strings. */ | 
|  | int arg_lens[argc]; | 
|  | int env_lens[envc]; | 
|  |  | 
|  | bufsize += get_lens(argc, argv, arg_lens); | 
|  | bufsize += get_lens(envc, envp, env_lens); | 
|  | fprintf(stderr, "Bufsize for pointers, argc, and strings is %d\n", | 
|  | bufsize); | 
|  |  | 
|  | /* Adjust bufsize so that our buffer will ultimately be 16 byte aligned. | 
|  | */ | 
|  | bufsize = (bufsize + 15) & ~0xf; | 
|  | fprintf(stderr, | 
|  | "Bufsize for pointers, argc, and strings is rounded is %d\n", | 
|  | bufsize); | 
|  |  | 
|  | /* Set up pointers to all of the appropriate data regions we map to. */ | 
|  | size_t *new_argc = (size_t*)((uint8_t*)stack - bufsize); | 
|  | char **new_argv = (char**)(new_argc + 1); | 
|  | char **new_envp = new_argv + argc + 1; | 
|  | struct elf_aux *new_auxv = (struct elf_aux*)(new_envp + envc + 1); | 
|  | char *new_argbuf = (char*)(new_auxv + auxc + 1); | 
|  |  | 
|  | fprintf(stderr, "There are %d args, %d env, and %d aux\n", new_argc, | 
|  | envc, auxc); | 
|  | fprintf(stderr, "Locations: argc: %p, argv: %p, envp: %p, auxv: %p\n", | 
|  | new_argc, new_argv, new_envp, new_auxv); | 
|  | fprintf(stderr, "Locations: argbuf: %p, ", new_argbuf); | 
|  | fprintf(stderr, "Sizeof argc is %d\n", sizeof(size_t)); | 
|  | /* Map argc into its final location. */ | 
|  | *new_argc = argc; | 
|  |  | 
|  | /* Map all data for argv and envp into its final location. */ | 
|  | int offset = 0; | 
|  |  | 
|  | offset = remap(argc, argv, new_argv, new_argbuf, arg_lens); | 
|  | if (offset == -1) | 
|  | return 0; | 
|  | fprintf(stderr, "Locations: argbuf: %p, envbuf: %p, ", new_argbuf, | 
|  | new_argbuf + offset); | 
|  | offset = remap(envc, envp, new_envp, new_argbuf + offset, env_lens); | 
|  | if (offset == -1) | 
|  | return 0; | 
|  |  | 
|  | /* Map auxv into its final location. */ | 
|  | struct elf_aux null_aux = {0, 0}; | 
|  |  | 
|  | memcpy(new_auxv, auxv, auxc * sizeof(struct elf_aux)); | 
|  | memcpy(new_auxv + auxc, &null_aux, sizeof(struct elf_aux)); | 
|  | fprintf(stderr, "auxbuf: %p\n", new_auxv); | 
|  | hexdump(stdout, new_auxv, auxc * sizeof(struct elf_aux)); | 
|  | return (uint8_t*)stack - bufsize; | 
|  | } |