blob: af7cc630514f61fa4a9fccb4e9ce808781ad98aa [file] [log] [blame]
#include <mm.h>
#include <string.h>
#include <kmalloc.h>
#include <syscall.h>
#include <elf.h>
#include <pmap.h>
#include <smp.h>
#include <arch/arch.h>
#include <umem.h>
#ifdef CONFIG_64BIT
# define elf_field(obj, field) (elf64 ? (obj##64)->field : (obj##32)->field)
#else
# define elf_field(obj, field) ((obj##32)->field)
#endif
/* Check if the file is valid elf file (i.e. by checking for ELF_MAGIC in the
* header) */
bool is_valid_elf(struct file_or_chan *foc)
{
elf64_t h;
uintptr_t c = switch_to_ktask();
if (foc_read(foc, (char*)&h, sizeof(elf64_t), 0) != sizeof(elf64_t))
goto fail;
if (h.e_magic != ELF_MAGIC) {
goto fail;
}
success:
switch_back_from_ktask(c);
return TRUE;
fail:
switch_back_from_ktask(c);
return FALSE;
}
static uintptr_t populate_stack(struct proc *p, int argc, char *argv[],
int envc, char *envp[],
int auxc, elf_aux_t auxv[])
{
/* Map in pages for p's stack. */
int flags = MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE;
uintptr_t stacksz = USTACK_NUM_PAGES*PGSIZE;
if (do_mmap(p, USTACKTOP-stacksz, stacksz, PROT_READ | PROT_WRITE,
flags, NULL, 0) == MAP_FAILED)
return 0;
/* Function to get the lengths of the argument and environment strings.
*/
int get_lens(int argc, char *argv[], int arg_lens[])
{
int total = 0;
for (int i = 0; i < argc; i++) {
arg_lens[i] = strlen(argv[i]) + 1;
total += arg_lens[i];
}
return total;
}
/* Function to help map the argument and environment strings, to their
* final location. */
int remap(int argc, char *argv[], char *new_argv[],
char new_argbuf[], int arg_lens[])
{
int offset = 0;
char *temp_argv[argc + 1];
for(int i = 0; i < argc; i++) {
if (memcpy_to_user(p, new_argbuf + offset, argv[i],
arg_lens[i]))
return -1;
temp_argv[i] = new_argbuf + offset;
offset += arg_lens[i];
}
temp_argv[argc] = NULL;
if (memcpy_to_user(p, new_argv, temp_argv, sizeof(temp_argv)))
return -1;
return offset;
}
/* Start tracking the size of the buffer necessary to hold all of our
* data on the stack. Preallocate space for argc, argv, envp, and auxv
* in this buffer. */
int bufsize = 0;
bufsize += 1 * sizeof(size_t);
bufsize += (auxc + 1) * sizeof(elf_aux_t);
bufsize += (envc + 1) * sizeof(char**);
bufsize += (argc + 1) * sizeof(char**);
/* Add in the size of the env and arg strings. */
int arg_lens[argc];
int env_lens[envc];
bufsize += get_lens(argc, argv, arg_lens);
bufsize += get_lens(envc, envp, env_lens);
/* Adjust bufsize so that our buffer will ultimately be 16 byte aligned.
*/
bufsize = ROUNDUP(bufsize, 16);
/* Set up pointers to all of the appropriate data regions we map to. */
size_t *new_argc = (size_t*)(USTACKTOP - bufsize);
char **new_argv = (char**)(new_argc + 1);
char **new_envp = new_argv + argc + 1;
elf_aux_t *new_auxv = (elf_aux_t*)(new_envp + envc + 1);
char *new_argbuf = (char*)(new_auxv + auxc + 1);
/* Verify that all data associated with our argv, envp, and auxv arrays
* (and any corresponding strings they point to) will fit in the space
* alloted. */
if (bufsize > ARG_MAX)
return 0;
/* Map argc into its final location. */
if (memcpy_to_user(p, new_argc, &argc, sizeof(size_t)))
return 0;
/* Map all data for argv and envp into its final location. */
int offset = 0;
offset = remap(argc, argv, new_argv, new_argbuf, arg_lens);
if (offset == -1)
return 0;
offset = remap(envc, envp, new_envp, new_argbuf + offset, env_lens);
if (offset == -1)
return 0;
/* Map auxv into its final location. */
elf_aux_t null_aux = {0, 0};
if (memcpy_to_user(p, new_auxv, auxv, auxc * sizeof(elf_aux_t)))
return 0;
if (memcpy_to_user(p, new_auxv + auxc, &null_aux, sizeof(elf_aux_t)))
return 0;
return USTACKTOP - bufsize;
}
/* We need the writable flag for ld. Even though the elf header says it wants
* RX (and not W) for its main program header, it will page fault (eip 56f0,
* 46f0 after being relocated to 0x1000, va 0x20f4). */
static int load_one_elf(struct proc *p, struct file_or_chan *foc,
uintptr_t pg_num, elf_info_t *ei, bool writable)
{
int ret = -1;
ei->phdr = -1;
ei->dynamic = 0;
ei->highest_addr = 0;
off64_t f_off = 0;
void* phdrs = 0;
int mm_perms, mm_flags;
/* When reading on behalf of the kernel, we need to switch to a ktask so
* the VFS (and maybe other places) know. (TODO: KFOP) */
uintptr_t old_ret = switch_to_ktask();
/* Read in ELF header. */
elf64_t elfhdr_storage;
elf32_t* elfhdr32 = (elf32_t*)&elfhdr_storage;
elf64_t* elfhdr64 = &elfhdr_storage;
if (foc_read(foc, (char*)elfhdr64, sizeof(elf64_t), f_off)
!= sizeof(elf64_t)) {
/* if you ever debug this, be sure to 0 out elfhrd_storage in
* advance */
printk("[kernel] load_one_elf: failed to read file\n");
goto fail;
}
if (elfhdr64->e_magic != ELF_MAGIC) {
printk("[kernel] load_one_elf: file is not an elf!\n");
goto fail;
}
bool elf32 = elfhdr32->e_ident[ELF_IDENT_CLASS] == ELFCLASS32;
bool elf64 = elfhdr64->e_ident[ELF_IDENT_CLASS] == ELFCLASS64;
if (elf64 == elf32) {
printk("[kernel] load_one_elf: ID as both 32 and 64 bit\n");
goto fail;
}
#ifndef CONFIG_64BIT
if (elf64) {
printk("[kernel] load_one_elf: 64 bit elf on 32 bit kernel\n");
goto fail;
}
#endif
/* Not sure what RISCV's 64 bit kernel can do here, so this check is x86
* only */
#ifdef CONFIG_X86
if (elf32) {
printk("[kernel] load_one_elf: 32 bit elf on 64 bit kernel\n");
goto fail;
}
#endif
size_t phsz = elf64 ? sizeof(proghdr64_t) : sizeof(proghdr32_t);
uint16_t e_phnum = elf_field(elfhdr, e_phnum);
uint16_t e_phoff = elf_field(elfhdr, e_phoff);
/* Read in program headers. */
if (e_phnum > 10000 || e_phoff % (elf32 ? 4 : 8) != 0) {
printk("[kernel] load_one_elf: Bad program headers\n");
goto fail;
}
phdrs = kmalloc(e_phnum * phsz, 0);
f_off = e_phoff;
if (!phdrs || foc_read(foc, phdrs, e_phnum * phsz, f_off) !=
e_phnum * phsz) {
printk("[kernel] load_one_elf: couldn't get program headers\n");
goto fail;
}
for (int i = 0; i < e_phnum; i++) {
proghdr32_t* ph32 = (proghdr32_t*)phdrs + i;
proghdr64_t* ph64 = (proghdr64_t*)phdrs + i;
uint16_t p_type = elf_field(ph, p_type);
uintptr_t p_va = elf_field(ph, p_va);
uintptr_t p_offset = elf_field(ph, p_offset);
uintptr_t p_align = elf_field(ph, p_align);
uintptr_t p_memsz = elf_field(ph, p_memsz);
uintptr_t p_filesz = elf_field(ph, p_filesz);
uintptr_t p_flags = elf_field(ph, p_flags);
/* Here's the ld hack, mentioned above */
p_flags |= (writable ? ELF_PROT_WRITE : 0);
/* All mmaps need to be fixed to their VAs. If the program
* wants it to be a writable region, we also need the region to
* be private. */
mm_flags = MAP_FIXED | (p_flags & ELF_PROT_WRITE ? MAP_PRIVATE :
MAP_SHARED);
if (p_type == ELF_PROG_PHDR)
ei->phdr = p_va;
else if (p_type == ELF_PROG_INTERP) {
f_off = p_offset;
ssize_t maxlen = sizeof(ei->interp);
ssize_t bytes = foc_read(foc, ei->interp, maxlen,
f_off);
/* trying to catch errors. don't know how big it could
* be, but it should be at least 0. */
if (bytes <= 0) {
printk("[kernel] load_one_elf: could not read ei->interp\n");
goto fail;
}
maxlen = MIN(maxlen, bytes);
if (strnlen(ei->interp, maxlen) == maxlen) {
printk("[kernel] load_one_elf: interpreter name too long\n");
goto fail;
}
ei->dynamic = 1;
}
else if (p_type == ELF_PROG_LOAD && p_memsz) {
if (p_align % PGSIZE) {
printk("[kernel] load_one_elf: not page aligned\n");
goto fail;
}
if (p_offset % PGSIZE != p_va % PGSIZE) {
printk("[kernel] load_one_elf: offset difference \n");
goto fail;
}
uintptr_t filestart = ROUNDDOWN(p_offset, PGSIZE);
uintptr_t filesz = p_offset + p_filesz - filestart;
uintptr_t memstart = ROUNDDOWN(p_va, PGSIZE);
uintptr_t memsz = ROUNDUP(p_va + p_memsz, PGSIZE) -
memstart;
memstart += pg_num * PGSIZE;
if (memstart + memsz > ei->highest_addr)
ei->highest_addr = memstart + memsz;
mm_perms = 0;
mm_perms |= (p_flags & ELF_PROT_READ ? PROT_READ : 0);
mm_perms |= (p_flags & ELF_PROT_WRITE ? PROT_WRITE : 0);
mm_perms |= (p_flags & ELF_PROT_EXEC ? PROT_EXEC : 0);
if (filesz) {
/* Due to elf-ghetto-ness, we need to zero the
* first part of the BSS from the last page of
* the data segment. If we end on a partial
* page, we map it in separately with
* MAP_POPULATE so that we can zero the rest of
* it now. We translate to the KVA so we don't
* need to worry about using the proc's mapping
* */
uintptr_t partial = PGOFF(filesz);
if (filesz - partial) {
/* Map the complete pages. */
if (do_mmap(p, memstart, filesz -
partial, mm_perms, mm_flags,
foc, filestart) ==
MAP_FAILED) {
printk("[kernel] load_one_elf: complete mmap failed\n");
goto fail;
}
}
/* Note that we (probably) only need to do this
* zeroing the end of a partial file page when
* we are dealing with ELF_PROT_WRITE-able PHs,
* and not for all cases. */
if (partial) {
/* Need our own populated, private copy
* of the page so that we can zero the
* remainder - and not zero chunks of
* the real file in the page cache. */
mm_flags &= ~MAP_SHARED;
mm_flags |= MAP_PRIVATE | MAP_POPULATE;
/* Map the final partial page. */
uintptr_t last_page = memstart + filesz
- partial;
if (do_mmap(p, last_page, PGSIZE,
mm_perms, mm_flags, foc,
filestart + filesz -
partial) == MAP_FAILED) {
printk("[kernel] load_one_elf: partial mmap failed\n");
goto fail;
}
pte_t pte = pgdir_walk(p->env_pgdir,
(void*)last_page,
0);
/* if we were able to get a PTE, then
* there is a real page backing the VMR,
* and we need to zero the excess. if
* there isn't, then the page fault code
* should handle it. since we set
* populate above, we should have a PTE,
* except in cases where the offset +
* len window exceeded the file size.
* in this case, we let them mmap it,
* but didn't populate it. there will
* be a PF right away if someone tries
* to use this. check out do_mmap for
* more info. */
if (pte_walk_okay(pte)) {
void *last_page_kva =
KADDR(pte_get_paddr(pte));
memset(last_page_kva + partial,
0, PGSIZE - partial);
}
filesz = ROUNDUP(filesz, PGSIZE);
}
}
/* Any extra pages are mapped anonymously... (a bit
* weird) */
if (filesz < memsz)
if (do_mmap(p, memstart + filesz, memsz-filesz,
PROT_READ | PROT_WRITE, MAP_PRIVATE,
NULL, 0) == MAP_FAILED) {
printk("[kernel] load_one_elf: anon mmap failed\n");
goto fail;
}
}
}
/* map in program headers anyway if not present in binary.
* useful for TLS in static programs. */
if (ei->phdr == -1) {
uintptr_t filestart = ROUNDDOWN(e_phoff, PGSIZE);
uintptr_t filesz = e_phoff + (e_phnum * phsz) - filestart;
void *phdr_addr = do_mmap(p, 0, filesz, PROT_READ | PROT_WRITE,
MAP_PRIVATE, foc, filestart);
if (phdr_addr == MAP_FAILED) {
printk("[kernel] load_one_elf: prog header mmap failed\n");
goto fail;
}
ei->phdr = (long)phdr_addr + e_phoff;
}
ei->entry = elf_field(elfhdr, e_entry) + pg_num * PGSIZE;
ei->phnum = e_phnum;
ei->elf64 = elf64;
ret = 0;
/* Fall-through */
fail:
if (phdrs)
kfree(phdrs);
switch_back_from_ktask(old_ret);
return ret;
}
int load_elf(struct proc *p, struct file_or_chan *foc,
int argc, char *argv[], int envc, char *envp[])
{
elf_info_t ei, interp_ei;
if (load_one_elf(p, foc, 0, &ei, FALSE))
return -1;
if (ei.dynamic) {
struct file_or_chan *interp = foc_open(ei.interp, O_EXEC |
O_READ, 0);
if (!interp)
return -1;
/* Load dynamic linker at 1M. Obvious MIB joke avoided.
* It used to be loaded at page 1, but the existence of valid
* addresses that low masked bad derefs through NULL pointer
* structs. This in turn helped us waste a full day debugging a
* bug in the Go runtime. True! Note that MMAP_LOWEST_VA also
* has this value but we want to make this explicit. */
int error = load_one_elf(p, interp, MMAP_LD_FIXED_VA >> PGSHIFT,
&interp_ei, TRUE);
foc_decref(interp);
if (error)
return -1;
}
/* Set up the auxiliary info for dynamic linker/runtime */
elf_aux_t auxv[] = {{ELF_AUX_PHDR, ei.phdr},
{ELF_AUX_PHENT, sizeof(proghdr32_t)},
{ELF_AUX_PHNUM, ei.phnum},
{ELF_AUX_ENTRY, ei.entry}};
int auxc = sizeof(auxv)/sizeof(auxv[0]);
/* Populate the stack with the required info. */
uintptr_t stack_top = populate_stack(p, argc, argv, envc, envp, auxc,
auxv);
if (!stack_top)
return -1;
/* Initialize the process as an SCP. */
uintptr_t core0_entry = ei.dynamic ? interp_ei.entry : ei.entry;
proc_init_ctx(&p->scp_ctx, 0, core0_entry, stack_top, 0);
p->procinfo->program_end = ei.highest_addr;
p->args_base = (void *) stack_top;
return 0;
}
ssize_t get_startup_argc(struct proc *p)
{
const char *sptr = (const char *) p->args_base;
ssize_t argc = 0;
/* TODO,DL: Use copy_from_user() when available.
*/
if (memcpy_from_user(p, &argc, sptr, sizeof(size_t)))
return -1;
return argc;
}
char *get_startup_argv(struct proc *p, size_t idx, char *argp,
size_t max_size)
{
size_t stack_space = (const char *) USTACKTOP - (const char *)
p->args_base;
const char *sptr = (const char *) p->args_base + sizeof(size_t) +
idx * sizeof(char *);
const char *argv = NULL;
/* TODO,DL: Use copy_from_user() when available.
*/
if (memcpy_from_user(p, &argv, sptr, sizeof(char *)))
return NULL;
/* TODO,DL: Use strncpy_from_user() when available.
*/
max_size = MIN(max_size, stack_space);
if (memcpy_from_user(p, argp, argv, max_size))
return NULL;
argp[max_size - 1] = 0;
return argp;
}