Redefines PTE present vs mapped Mapped now means it points to a physical page in all cases. Present is mapped and with bits set for some valid PTE walk. For instance, a user read works on x86 with PTE_U and PTE_P. PTE_P shouldn't be used from the arch-indep code now. I might have missed something with this commit, like leaking memory or dirty bits. Feel free to check out the usages of is_present, is_mapped, and is_unmapped.
diff --git a/kern/arch/riscv/ros/mmu.h b/kern/arch/riscv/ros/mmu.h index c1de48e..a393b91 100644 --- a/kern/arch/riscv/ros/mmu.h +++ b/kern/arch/riscv/ros/mmu.h
@@ -110,11 +110,18 @@ #define PTE_NOCACHE 0 // PTE bits to turn off caching, if possible // commly used access modes + +#warning "Review RISCV PTEs. Maybe want PTE_E/PTE_R?" + /* arch-indep code doesn't set PTE_P, it just sets a perm */ + + #define PTE_KERN_RW (PTE_SR | PTE_SW | PTE_SX) #define PTE_KERN_RO (PTE_SR | PTE_SX) #define PTE_USER_RW (PTE_SR | PTE_SW | PTE_UR | PTE_UW | PTE_UX) #define PTE_USER_RO (PTE_SR | PTE_UR | PTE_UX) +#define PTE_NONE 0 +#warning "probably remove this" // x86 equivalencies #define PTE_P PTE_E
diff --git a/kern/arch/x86/pmap64.c b/kern/arch/x86/pmap64.c index 658df50..901a9af 100644 --- a/kern/arch/x86/pmap64.c +++ b/kern/arch/x86/pmap64.c
@@ -163,7 +163,7 @@ pa += pgsize) { kpte = get_next_pte(kpte, pgdir, va, PG_WALK_CREATE | pml_shift); assert(kpte); - *kpte = PTE_ADDR(pa) | PTE_P | perm | + *kpte = PTE_ADDR(pa) | perm | (pml_shift != PML1_SHIFT ? PTE_PS : 0); printd("Wrote *kpte %p, for va %p to pa %p tried to cover %p\n", *kpte, va, pa, amt_mapped); @@ -230,8 +230,8 @@ /* Map [va, va+size) of virtual (linear) address space to physical [pa, pa+size) * in the page table rooted at pgdir. Size is a multiple of PGSIZE. Use - * permission bits perm|PTE_P for the entries. Set pml_shift to the shift of - * the largest page size you're willing to use. + * permission bits perm for the entries. Set pml_shift to the shift of the + * largest page size you're willing to use. * * Doesn't handle having pages currently mapped yet, and while supporting that * is relatively easy, doing an insertion of small pages into an existing jumbo @@ -383,7 +383,7 @@ kpte = &pml[PMLx(va, pml_shift)]; if (!(*kpte & PTE_P)) return 0; - perms_here = *kpte & (PTE_PERM | PTE_P); + perms_here = *kpte & PTE_PERM; if (walk_is_complete(kpte, pml_shift, PML1_SHIFT)) return perms_here; return pml_perm_walk(kpte2pml(*kpte), va, pml_shift - BITS_PER_PML) & @@ -450,9 +450,9 @@ /* For the LAPIC and IOAPIC, we use PAT (but not *the* PAT flag) to make * these type UC */ map_segment(boot_pgdir, LAPIC_BASE, APIC_SIZE, LAPIC_PBASE, - PTE_PCD | PTE_PWT | PTE_W | PTE_G, max_jumbo_shift); + PTE_PCD | PTE_PWT | PTE_KERN_RW | PTE_G, max_jumbo_shift); map_segment(boot_pgdir, IOAPIC_BASE, APIC_SIZE, IOAPIC_PBASE, - PTE_PCD | PTE_PWT | PTE_W | PTE_G, max_jumbo_shift); + PTE_PCD | PTE_PWT | PTE_KERN_RW | PTE_G, max_jumbo_shift); /* VPT mapping: recursive PTE inserted at the VPT spot */ boot_kpt[PML4(VPT)] = PADDR(boot_kpt) | PTE_W | PTE_P; /* same for UVPT, accessible by userspace (RO). */ @@ -594,8 +594,8 @@ memset(ept, 0, PGSIZE); /* VPT and UVPT map the proc's page table, with different permissions. */ - kpt[PML4(VPT)] = PTE(LA2PPN(PADDR(kpt)), PTE_P | PTE_KERN_RW); - kpt[PML4(UVPT)] = PTE(LA2PPN(PADDR(kpt)), PTE_P | PTE_USER_RO); + kpt[PML4(VPT)] = PTE(LA2PPN(PADDR(kpt)), PTE_KERN_RW); + kpt[PML4(UVPT)] = PTE(LA2PPN(PADDR(kpt)), PTE_USER_RO); new_pd->kpte = kpt; /* Processes do not have EPTs by default, only once they are VMMs */ @@ -627,7 +627,7 @@ static int print_pte(kpte_t *kpte, uintptr_t kva, int shift, bool visited_subs, void *data) { - if (!(*kpte & PTE_P)) + if (kpte_is_unmapped(kpte)) return 0; switch (shift) { case (PML1_SHIFT):
diff --git a/kern/arch/x86/pmap_ops.h b/kern/arch/x86/pmap_ops.h index 7a2293e..78c05fb 100644 --- a/kern/arch/x86/pmap_ops.h +++ b/kern/arch/x86/pmap_ops.h
@@ -24,13 +24,21 @@ } /* PTE states: - * - present: the PTE is involved in a valid page table walk, with the physaddr - * part pointing to a physical page. + * - present: the PTE is involved in a valid page table walk, can be used + * for some form of hardware access (read, write, user, etc), and with the + * physaddr part pointing to a physical page. * * - mapped: the PTE is involved in some sort of mapping, e.g. a VMR. We're - * storing something in the PTE, but it is isn't necessarily present and - * pointing to an actual physical page. All present are mapped, but not vice - * versa. Mapped could also include paged-out, if we support that later. + * storing something in the PTE, but it is isn't necessarily present. + * Currently, all mapped pages should point to an actual physical page. + * All present are mapped, but not vice versa. Mapped pages can point to a + * real page, but with no access permissions, which is the main distinction + * between present and mapped. + * + * - paged_out: we don't actually use this yet. Since mapped vs present is + * based on the PTE present bits, we'd need to use reserved bits in the PTE to + * differentiate between other states. Right now, paged_out == mapped, as far + * as the code is concerned. * * - unmapped: completely unused. (0 value) */ static inline bool pte_is_present(pte_t pte)
diff --git a/kern/arch/x86/ros/mmu64.h b/kern/arch/x86/ros/mmu64.h index ab97a02..38b8361 100644 --- a/kern/arch/x86/ros/mmu64.h +++ b/kern/arch/x86/ros/mmu64.h
@@ -286,11 +286,12 @@ /* Permissions fields and common access modes. These should be read as 'just * kernel or user too' and 'RO or RW'. USER_RO means read-only for everyone. */ -#define PTE_PERM (PTE_W | PTE_U) -#define PTE_KERN_RW PTE_W // Kernel Read/Write -#define PTE_KERN_RO 0 // Kernel Read-Only -#define PTE_USER_RW (PTE_W | PTE_U) // Kernel/User Read/Write -#define PTE_USER_RO PTE_U // Kernel/User Read-Only +#define PTE_PERM (PTE_W | PTE_U | PTE_P) +#define PTE_KERN_RW (PTE_W | PTE_P) +#define PTE_KERN_RO PTE_P +#define PTE_USER_RW (PTE_W | PTE_U | PTE_P) +#define PTE_USER_RO (PTE_U | PTE_P) +#define PTE_NONE 0 /* The PTE/translation part of a PTE/virtual(linear) address. It's used * frequently to be the page address of a virtual address. Note this doesn't @@ -303,7 +304,7 @@ /* we must guarantee that for any PTE, exactly one of the following is true */ #define PAGE_PRESENT(pte) ((pte) & PTE_P) #define PAGE_UNMAPPED(pte) ((pte) == 0) -#define PAGE_PAGED_OUT(pte) (!PAGE_PRESENT(pte) && !PAGE_UNMAPPED(pte)) +#define PAGE_PAGED_OUT(pte) (0) /* Need to use an OS reserved PTE bit */ /* **************************************** */
diff --git a/kern/src/env.c b/kern/src/env.c index d7a58c7..f2c49c1 100644 --- a/kern/src/env.c +++ b/kern/src/env.c
@@ -103,7 +103,7 @@ assert((uintptr_t)start + len <= UVPT); //since this keeps fucking happening int user_page_free(env_t* e, pte_t pte, void* va, void* arg) { - if (!pte_is_present(pte)) + if (!pte_is_mapped(pte)) return 0; page_t *page = pa2page(pte_get_paddr(pte)); pte_clear(pte);
diff --git a/kern/src/mm.c b/kern/src/mm.c index 94daa68..2b10536 100644 --- a/kern/src/mm.c +++ b/kern/src/mm.c
@@ -288,7 +288,7 @@ return 0; /* pages could be !P, but right now that's only for file backed VMRs * undergoing page removal, which isn't the caller of copy_pages. */ - if (pte_is_present(pte)) { + if (pte_is_mapped(pte)) { /* TODO: check for jumbos */ if (upage_alloc(new_p, &pp, 0)) return -ENOMEM; @@ -459,7 +459,7 @@ return FALSE; } -/* Helper, maps in page at addr, but only if nothing is present there. Returns +/* Helper, maps in page at addr, but only if nothing is mapped there. Returns * 0 on success. If this is called by non-PM code, we'll store your ref in the * PTE. */ static int map_page_at_addr(struct proc *p, struct page *page, uintptr_t addr, @@ -483,10 +483,19 @@ page_decref(page); return 0; } + if (pte_is_mapped(pte)) { + /* we're clobbering an old entry. if we're just updating the prot, then + * it's no big deal. o/w, there might be an issue. */ + if (page2pa(page) != pte_get_paddr(pte)) { + warn_once("Clobbered a PTE mapping (%p -> %p)\n", pte_print(pte), + page2pa(page) | prot); + } + page_decref(pa2page(pte_get_paddr(pte))); + } /* preserve the dirty bit - pm removal could be looking concurrently */ prot |= (pte_is_dirty(pte) ? PTE_D : 0); /* We have a ref to page, which we are storing in the PTE */ - pte_write(pte, page2pa(page), PTE_P | prot); + pte_write(pte, page2pa(page), prot); spin_unlock(&p->pte_lock); return 0; } @@ -705,7 +714,7 @@ pte_t pte; bool shootdown_needed = FALSE; int pte_prot = (prot & PROT_WRITE) ? PTE_USER_RW : - (prot & (PROT_READ|PROT_EXEC)) ? PTE_USER_RO : 0; + (prot & (PROT_READ|PROT_EXEC)) ? PTE_USER_RO : PTE_NONE; /* TODO: this is aggressively splitting, when we might not need to if the * prots are the same as the previous. Plus, there are three excessive * scans. Finally, we might be able to merge when we are done. */ @@ -720,10 +729,14 @@ } vmr->vm_prot = prot; spin_lock(&p->pte_lock); /* walking and changing PTEs */ - /* TODO: use a memwalk */ + /* TODO: use a memwalk. At a minimum, we need to change every existing + * PTE that won't trigger a PF (meaning, present PTEs) to have the new + * prot. The others will fault on access, and we'll change the PTE + * then. In the off chance we have a mapped but not present PTE, we + * might as well change it too, since we're already here. */ for (uintptr_t va = vmr->vm_base; va < vmr->vm_end; va += PGSIZE) { pte = pgdir_walk(p->env_pgdir, (void*)va, 0); - if (pte_walk_okay(pte) && pte_is_present(pte)) { + if (pte_walk_okay(pte) && pte_is_mapped(pte)) { pte_replace_perm(pte, pte_prot); shootdown_needed = TRUE; } @@ -765,11 +778,9 @@ void *arg) { bool *shootdown_needed = (bool*)arg; - struct page *page; /* could put in some checks here for !P and also !0 */ if (!pte_is_present(pte)) /* unmapped (== 0) *ptes are also not PTE_P */ return 0; - page = pa2page(pte_get_paddr(pte)); pte_clear_present(pte); *shootdown_needed = TRUE; return 0; @@ -1164,7 +1175,7 @@ /* it's not strictly necessary to drop paddr's pgoff, but it might save some * vmap heartache in the future. */ if (map_vmap_segment(vaddr, PG_ADDR(paddr), nr_pages, - PTE_P | PTE_KERN_RW | flags)) { + PTE_KERN_RW | flags)) { warn("Unable to map a vmap segment"); /* probably a bug */ return 0; }
diff --git a/kern/src/pagemap.c b/kern/src/pagemap.c index 8c7e5a2..6cf169c 100644 --- a/kern/src/pagemap.c +++ b/kern/src/pagemap.c
@@ -306,7 +306,11 @@ static int __pm_mark_not_present(struct proc *p, pte_t pte, void *va, void *arg) { struct page *page; - if (!pte_is_present(pte)) + /* mapped includes present. Any PTE pointing to a page (mapped) will get + * flagged for removal and have its access prots revoked. We need to deal + * with mapped-but-maybe-not-present in case of a dirtied file that was + * mprotected to PROT_NONE (which is not present) */ + if (pte_is_unmapped(pte)) return 0; page = pa2page(pte_get_paddr(pte)); if (atomic_read(&page->pg_flags) & PG_REMOVAL)
diff --git a/kern/src/pmap.c b/kern/src/pmap.c index cb8351f..b16020f 100644 --- a/kern/src/pmap.c +++ b/kern/src/pmap.c
@@ -223,7 +223,7 @@ * any other state. (TODO: review all other states, maybe rm only for P) */ if (pte_is_mapped(pte)) page_remove(pgdir, va); - pte_write(pte, page2pa(page), PTE_P | perm); + pte_write(pte, page2pa(page), perm); return 0; } @@ -247,7 +247,7 @@ page_t *page_lookup(pgdir_t pgdir, void *va, pte_t *pte_store) { pte_t pte = pgdir_walk(pgdir, va, 0); - if (!pte_walk_okay(pte) || !pte_is_present(pte)) + if (!pte_walk_okay(pte) || !pte_is_mapped(pte)) return 0; if (pte_store) *pte_store = pte; @@ -285,7 +285,7 @@ if (!pte_walk_okay(pte) || pte_is_unmapped(pte)) return; - if (pte_is_present(pte)) { + if (pte_is_mapped(pte)) { /* TODO: (TLB) need to do a shootdown, inval sucks. And might want to * manage the TLB / free pages differently. (like by the caller). * Careful about the proc/memory lock here. */
diff --git a/kern/src/umem.c b/kern/src/umem.c index 38b4d92..87f4e3a 100644 --- a/kern/src/umem.c +++ b/kern/src/umem.c
@@ -34,7 +34,7 @@ const void *start, *end; size_t num_pages, i; pte_t pte; - uintptr_t perm = PTE_P | PTE_USER_RO; + uintptr_t perm = PTE_USER_RO; size_t bytes_copied = 0; static_assert(ULIM % PGSIZE == 0 && ULIM != 0); // prevent wrap-around @@ -100,7 +100,7 @@ const void *start, *end; size_t num_pages, i; pte_t pte; - uintptr_t perm = PTE_P | PTE_USER_RW; + uintptr_t perm = PTE_USER_RW; size_t bytes_copied = 0; static_assert(ULIM % PGSIZE == 0 && ULIM != 0); // prevent wrap-around