blob: 25c9c4c1450f5cbe823b053948b978a340b9ca6c [file] [log] [blame]
/* Copyright (c) 2017 Google Inc.
* See LICENSE for details.
*
* Memory, paging, e820, bootparams and other helpers */
#include <stdio.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <ros/arch/mmu.h>
#include <vmm/linux_bootparam.h>
#include <vmm/vmm.h>
#include <err.h>
#include <vmm/util.h>
#include <parlib/ros_debug.h>
#include <fcntl.h>
static char *entrynames[] = {
[E820_RAM] "E820_RAM",
[E820_RESERVED] "E820_RESERVED",
[E820_ACPI] "E820_ACPI",
[E820_NVS] "E820_NVS",
[E820_UNUSABLE] "E820_UNUSABLE",
};
static void dumpe820(struct e820entry *e, int nr)
{
for (int i = 0; i < nr; i++) {
fprintf(stderr, "%d:%p %p %p %s\n",
i, e[i].addr, e[i].size, e[i].type,
entrynames[e[i].type]);
}
}
// e820map creates an e820 map in the bootparams struct. If we've
// gotten here, then memsize and memstart are valid. It returns
// pointer to the first page after the map for our bump allocator. We
// assume the ranges passed in are validated already.
void *init_e820map(struct virtual_machine *vm, struct boot_params *bp)
{
uintptr_t memstart = vm->minphys;
size_t memsize = vm->maxphys - vm->minphys + 1;
uintptr_t lowmem = 0;
// Everything in Linux at this level is PGSIZE.
memset(bp, 0, PGSIZE);
bp->e820_entries = 0;
// The first page is always reserved.
bp->e820_map[bp->e820_entries].addr = 0;
bp->e820_map[bp->e820_entries].size = PGSIZE;
bp->e820_map[bp->e820_entries++].type = E820_RESERVED;
/* Give it just a tiny bit of memory -- 60k -- at low memory. */
bp->e820_map[bp->e820_entries].addr = PGSIZE;
bp->e820_map[bp->e820_entries].size = LOW64K - PGSIZE;
bp->e820_map[bp->e820_entries++].type = E820_RAM;
// All other memory from 64k to memstart is reserved.
bp->e820_map[bp->e820_entries].addr = LOW64K;
bp->e820_map[bp->e820_entries].size = memstart - LOW64K;
bp->e820_map[bp->e820_entries++].type = E820_RESERVED;
// If memory starts below RESERVED, then add an entry for memstart to
// the smaller of RESERVED or memsize.
if (memstart < RESERVED) {
bp->e820_map[bp->e820_entries].addr = memstart;
if (memstart + memsize > RESERVED)
bp->e820_map[bp->e820_entries].size = RESERVED -
memstart;
else
bp->e820_map[bp->e820_entries].size = memsize;
lowmem = bp->e820_map[bp->e820_entries].size;
bp->e820_map[bp->e820_entries++].type = E820_RAM;
}
bp->e820_map[bp->e820_entries].addr = RESERVED;
bp->e820_map[bp->e820_entries].size = RESERVEDSIZE;
bp->e820_map[bp->e820_entries++].type = E820_RESERVED;
if ((memstart + memsize) > RESERVED) {
bp->e820_map[bp->e820_entries].addr = MAX(memstart, _4GiB);
bp->e820_map[bp->e820_entries].size = memsize - lowmem;
bp->e820_map[bp->e820_entries++].type = E820_RAM;
}
dumpe820(bp->e820_map, bp->e820_entries);
return (void *)bp + PGSIZE;
}
/* checkmemaligned verifies alignment attributes of your memory space.
* It terminates your process with extreme prejudice if they are
* incorrect in some way. */
void checkmemaligned(uintptr_t memstart, size_t memsize)
{
if (!ALIGNED(memstart, PML1_REACH))
errx(1, "memstart (%#x) wrong: must be aligned to %#x",
memstart, PML1_REACH);
if (!ALIGNED(memsize, PML1_REACH))
errx(1, "memsize (%#x) wrong: must be aligned to %#x",
memsize, PML1_REACH);
}
// memory allocates memory for the VM. It's a complicated mess because of the
// break for APIC and other things. We just go ahead and leave the region from
// RESERVED to _4GiB for that. The memory is either split, all low, or all
// high. This code is designed for a kernel. Dune-style code does not need it
// as it does not have the RESERVED restrictions. Dune-style code can use this,
// however, by setting memstart to 4 GiB. This code can be called multiple
// times with more ranges. It does not check for overlaps.
void mmap_memory(struct virtual_machine *vm, uintptr_t memstart, size_t memsize)
{
void *r1, *r2;
unsigned long r1size = memsize;
// Let's do some minimal validation, so we don't drive
// people crazy.
checkmemaligned(memstart, memsize);
if ((memstart >= RESERVED) && (memstart < _4GiB))
errx(1, "memstart (%#x) wrong: must be < %#x or >= %#x\n",
memstart, RESERVED, _4GiB);
if (memstart < MinMemory)
errx(1, "memstart (%#x) wrong: must be > %#x\n",
memstart, MinMemory);
// Note: this test covers the split case as well as the
// 'all above 4G' case.
if ((memstart + memsize) > RESERVED) {
unsigned long long r2start = MAX(memstart, _4GiB);
r1size = memstart < RESERVED ? RESERVED - memstart : 0;
r2 = mmap((void *)r2start, memsize - r1size,
PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_POPULATE | MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
if (r2 != (void *)r2start) {
fprintf(stderr,
"High region: Could not mmap 0x%lx bytes at 0x%lx\n",
memsize, r2start);
exit(1);
}
if (memstart >= _4GiB)
goto done;
}
r1 = mmap((void *)memstart, r1size,
PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_POPULATE | MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
if (r1 != (void *)memstart) {
fprintf(stderr,
"Low region: Could not mmap 0x%lx bytes at 0x%lx\n",
memsize, memstart);
exit(1);
}
done:
if ((vm->minphys == 0) || (vm->minphys > memstart))
vm->minphys = memstart;
if (vm->maxphys < memstart + memsize - 1)
vm->maxphys = memstart + memsize - 1;
}
bool mmap_file(const char *path, uintptr_t memstart, size_t memsize,
uint64_t protections, size_t offset)
{
int fd = open(path, O_RDONLY);
if (fd == -1) {
fprintf(stderr, "Unable to open %s for reading.\n", path);
return false;
}
void *addr = mmap((void*) memstart, memsize, protections, MAP_PRIVATE,
fd, offset);
int err = errno;
close(fd);
if (addr == MAP_FAILED) {
fprintf(stderr, "Failed to mmap %s, got error %d\n", path, err);
return false;
}
if ((uint64_t) addr != (uint64_t) memstart) {
fprintf(stderr, "Could not mmap %s correctly.\n", path);
if (munmap(addr, memsize) == -1)
perror("Failed to unmap memory; leaking a mapping");
return false;
}
return true;
}
/* populate_stack fills the stack with an argv, envp, and auxv.
* We assume the stack pointer is backed by real memory.
* It will go hard with you if it does not. For your own health,
* stack should be 16-byte aligned. */
void *populate_stack(uintptr_t *stack, int argc, char *argv[],
int envc, char *envp[],
int auxc, struct elf_aux auxv[])
{
/* Func to get the lengths of the argument and environment strings. */
int get_lens(int argc, char *argv[], int arg_lens[])
{
int total = 0;
if (!argc)
return 0;
for (int i = 0; i < argc; i++) {
arg_lens[i] = strlen(argv[i]) + 1;
total += arg_lens[i];
}
return total;
}
/* Function to help map the argument and environment strings, to their
* final location. */
int remap(int argc, char *argv[], char *new_argv[],
char new_argbuf[], int arg_lens[])
{
int offset = 0;
if (!argc)
return 0;
for (int i = 0; i < argc; i++) {
memcpy(new_argbuf + offset, argv[i], arg_lens[i]);
fprintf(stderr, "data: memcpy(%p, %p, %ld)\n",
new_argbuf + offset, argv[i], arg_lens[i]);
fprintf(stderr, "arg: set arg %d, @%p, to %p\n", i,
&new_argv[i], new_argbuf + offset);
new_argv[i] = new_argbuf + offset;
offset += arg_lens[i];
}
new_argv[argc] = NULL;
return offset;
}
/* Start tracking the size of the buffer necessary to hold all of our
* data on the stack. Preallocate space for argc, argv, envp, and auxv
* in this buffer. */
int bufsize = 0;
bufsize += 1 * sizeof(size_t);
bufsize += (auxc + 1) * sizeof(struct elf_aux);
bufsize += (envc + 1) * sizeof(char**);
bufsize += (argc + 1) * sizeof(char**);
fprintf(stderr, "Bufsize for pointers and argc is %d\n", bufsize);
/* Add in the size of the env and arg strings. */
int arg_lens[argc];
int env_lens[envc];
bufsize += get_lens(argc, argv, arg_lens);
bufsize += get_lens(envc, envp, env_lens);
fprintf(stderr, "Bufsize for pointers, argc, and strings is %d\n",
bufsize);
/* Adjust bufsize so that our buffer will ultimately be 16 byte aligned.
*/
bufsize = (bufsize + 15) & ~0xf;
fprintf(stderr,
"Bufsize for pointers, argc, and strings is rounded is %d\n",
bufsize);
/* Set up pointers to all of the appropriate data regions we map to. */
size_t *new_argc = (size_t*)((uint8_t*)stack - bufsize);
char **new_argv = (char**)(new_argc + 1);
char **new_envp = new_argv + argc + 1;
struct elf_aux *new_auxv = (struct elf_aux*)(new_envp + envc + 1);
char *new_argbuf = (char*)(new_auxv + auxc + 1);
fprintf(stderr, "There are %d args, %d env, and %d aux\n", new_argc,
envc, auxc);
fprintf(stderr, "Locations: argc: %p, argv: %p, envp: %p, auxv: %p\n",
new_argc, new_argv, new_envp, new_auxv);
fprintf(stderr, "Locations: argbuf: %p, ", new_argbuf);
fprintf(stderr, "Sizeof argc is %d\n", sizeof(size_t));
/* Map argc into its final location. */
*new_argc = argc;
/* Map all data for argv and envp into its final location. */
int offset = 0;
offset = remap(argc, argv, new_argv, new_argbuf, arg_lens);
if (offset == -1)
return 0;
fprintf(stderr, "Locations: argbuf: %p, envbuf: %p, ", new_argbuf,
new_argbuf + offset);
offset = remap(envc, envp, new_envp, new_argbuf + offset, env_lens);
if (offset == -1)
return 0;
/* Map auxv into its final location. */
struct elf_aux null_aux = {0, 0};
memcpy(new_auxv, auxv, auxc * sizeof(struct elf_aux));
memcpy(new_auxv + auxc, &null_aux, sizeof(struct elf_aux));
fprintf(stderr, "auxbuf: %p\n", new_auxv);
hexdump(stdout, new_auxv, auxc * sizeof(struct elf_aux));
return (uint8_t*)stack - bufsize;
}