Redefines PTE present vs mapped
Mapped now means it points to a physical page in all cases. Present is
mapped and with bits set for some valid PTE walk. For instance, a user
read works on x86 with PTE_U and PTE_P.
PTE_P shouldn't be used from the arch-indep code now.
I might have missed something with this commit, like leaking memory or
dirty bits. Feel free to check out the usages of is_present, is_mapped,
and is_unmapped.
diff --git a/kern/arch/riscv/ros/mmu.h b/kern/arch/riscv/ros/mmu.h
index c1de48e..a393b91 100644
--- a/kern/arch/riscv/ros/mmu.h
+++ b/kern/arch/riscv/ros/mmu.h
@@ -110,11 +110,18 @@
#define PTE_NOCACHE 0 // PTE bits to turn off caching, if possible
// commly used access modes
+
+#warning "Review RISCV PTEs. Maybe want PTE_E/PTE_R?"
+ /* arch-indep code doesn't set PTE_P, it just sets a perm */
+
+
#define PTE_KERN_RW (PTE_SR | PTE_SW | PTE_SX)
#define PTE_KERN_RO (PTE_SR | PTE_SX)
#define PTE_USER_RW (PTE_SR | PTE_SW | PTE_UR | PTE_UW | PTE_UX)
#define PTE_USER_RO (PTE_SR | PTE_UR | PTE_UX)
+#define PTE_NONE 0
+#warning "probably remove this"
// x86 equivalencies
#define PTE_P PTE_E
diff --git a/kern/arch/x86/pmap64.c b/kern/arch/x86/pmap64.c
index 658df50..901a9af 100644
--- a/kern/arch/x86/pmap64.c
+++ b/kern/arch/x86/pmap64.c
@@ -163,7 +163,7 @@
pa += pgsize) {
kpte = get_next_pte(kpte, pgdir, va, PG_WALK_CREATE | pml_shift);
assert(kpte);
- *kpte = PTE_ADDR(pa) | PTE_P | perm |
+ *kpte = PTE_ADDR(pa) | perm |
(pml_shift != PML1_SHIFT ? PTE_PS : 0);
printd("Wrote *kpte %p, for va %p to pa %p tried to cover %p\n",
*kpte, va, pa, amt_mapped);
@@ -230,8 +230,8 @@
/* Map [va, va+size) of virtual (linear) address space to physical [pa, pa+size)
* in the page table rooted at pgdir. Size is a multiple of PGSIZE. Use
- * permission bits perm|PTE_P for the entries. Set pml_shift to the shift of
- * the largest page size you're willing to use.
+ * permission bits perm for the entries. Set pml_shift to the shift of the
+ * largest page size you're willing to use.
*
* Doesn't handle having pages currently mapped yet, and while supporting that
* is relatively easy, doing an insertion of small pages into an existing jumbo
@@ -383,7 +383,7 @@
kpte = &pml[PMLx(va, pml_shift)];
if (!(*kpte & PTE_P))
return 0;
- perms_here = *kpte & (PTE_PERM | PTE_P);
+ perms_here = *kpte & PTE_PERM;
if (walk_is_complete(kpte, pml_shift, PML1_SHIFT))
return perms_here;
return pml_perm_walk(kpte2pml(*kpte), va, pml_shift - BITS_PER_PML) &
@@ -450,9 +450,9 @@
/* For the LAPIC and IOAPIC, we use PAT (but not *the* PAT flag) to make
* these type UC */
map_segment(boot_pgdir, LAPIC_BASE, APIC_SIZE, LAPIC_PBASE,
- PTE_PCD | PTE_PWT | PTE_W | PTE_G, max_jumbo_shift);
+ PTE_PCD | PTE_PWT | PTE_KERN_RW | PTE_G, max_jumbo_shift);
map_segment(boot_pgdir, IOAPIC_BASE, APIC_SIZE, IOAPIC_PBASE,
- PTE_PCD | PTE_PWT | PTE_W | PTE_G, max_jumbo_shift);
+ PTE_PCD | PTE_PWT | PTE_KERN_RW | PTE_G, max_jumbo_shift);
/* VPT mapping: recursive PTE inserted at the VPT spot */
boot_kpt[PML4(VPT)] = PADDR(boot_kpt) | PTE_W | PTE_P;
/* same for UVPT, accessible by userspace (RO). */
@@ -594,8 +594,8 @@
memset(ept, 0, PGSIZE);
/* VPT and UVPT map the proc's page table, with different permissions. */
- kpt[PML4(VPT)] = PTE(LA2PPN(PADDR(kpt)), PTE_P | PTE_KERN_RW);
- kpt[PML4(UVPT)] = PTE(LA2PPN(PADDR(kpt)), PTE_P | PTE_USER_RO);
+ kpt[PML4(VPT)] = PTE(LA2PPN(PADDR(kpt)), PTE_KERN_RW);
+ kpt[PML4(UVPT)] = PTE(LA2PPN(PADDR(kpt)), PTE_USER_RO);
new_pd->kpte = kpt;
/* Processes do not have EPTs by default, only once they are VMMs */
@@ -627,7 +627,7 @@
static int print_pte(kpte_t *kpte, uintptr_t kva, int shift, bool visited_subs,
void *data)
{
- if (!(*kpte & PTE_P))
+ if (kpte_is_unmapped(kpte))
return 0;
switch (shift) {
case (PML1_SHIFT):
diff --git a/kern/arch/x86/pmap_ops.h b/kern/arch/x86/pmap_ops.h
index 7a2293e..78c05fb 100644
--- a/kern/arch/x86/pmap_ops.h
+++ b/kern/arch/x86/pmap_ops.h
@@ -24,13 +24,21 @@
}
/* PTE states:
- * - present: the PTE is involved in a valid page table walk, with the physaddr
- * part pointing to a physical page.
+ * - present: the PTE is involved in a valid page table walk, can be used
+ * for some form of hardware access (read, write, user, etc), and with the
+ * physaddr part pointing to a physical page.
*
* - mapped: the PTE is involved in some sort of mapping, e.g. a VMR. We're
- * storing something in the PTE, but it is isn't necessarily present and
- * pointing to an actual physical page. All present are mapped, but not vice
- * versa. Mapped could also include paged-out, if we support that later.
+ * storing something in the PTE, but it is isn't necessarily present.
+ * Currently, all mapped pages should point to an actual physical page.
+ * All present are mapped, but not vice versa. Mapped pages can point to a
+ * real page, but with no access permissions, which is the main distinction
+ * between present and mapped.
+ *
+ * - paged_out: we don't actually use this yet. Since mapped vs present is
+ * based on the PTE present bits, we'd need to use reserved bits in the PTE to
+ * differentiate between other states. Right now, paged_out == mapped, as far
+ * as the code is concerned.
*
* - unmapped: completely unused. (0 value) */
static inline bool pte_is_present(pte_t pte)
diff --git a/kern/arch/x86/ros/mmu64.h b/kern/arch/x86/ros/mmu64.h
index ab97a02..38b8361 100644
--- a/kern/arch/x86/ros/mmu64.h
+++ b/kern/arch/x86/ros/mmu64.h
@@ -286,11 +286,12 @@
/* Permissions fields and common access modes. These should be read as 'just
* kernel or user too' and 'RO or RW'. USER_RO means read-only for everyone. */
-#define PTE_PERM (PTE_W | PTE_U)
-#define PTE_KERN_RW PTE_W // Kernel Read/Write
-#define PTE_KERN_RO 0 // Kernel Read-Only
-#define PTE_USER_RW (PTE_W | PTE_U) // Kernel/User Read/Write
-#define PTE_USER_RO PTE_U // Kernel/User Read-Only
+#define PTE_PERM (PTE_W | PTE_U | PTE_P)
+#define PTE_KERN_RW (PTE_W | PTE_P)
+#define PTE_KERN_RO PTE_P
+#define PTE_USER_RW (PTE_W | PTE_U | PTE_P)
+#define PTE_USER_RO (PTE_U | PTE_P)
+#define PTE_NONE 0
/* The PTE/translation part of a PTE/virtual(linear) address. It's used
* frequently to be the page address of a virtual address. Note this doesn't
@@ -303,7 +304,7 @@
/* we must guarantee that for any PTE, exactly one of the following is true */
#define PAGE_PRESENT(pte) ((pte) & PTE_P)
#define PAGE_UNMAPPED(pte) ((pte) == 0)
-#define PAGE_PAGED_OUT(pte) (!PAGE_PRESENT(pte) && !PAGE_UNMAPPED(pte))
+#define PAGE_PAGED_OUT(pte) (0) /* Need to use an OS reserved PTE bit */
/* **************************************** */
diff --git a/kern/src/env.c b/kern/src/env.c
index d7a58c7..f2c49c1 100644
--- a/kern/src/env.c
+++ b/kern/src/env.c
@@ -103,7 +103,7 @@
assert((uintptr_t)start + len <= UVPT); //since this keeps fucking happening
int user_page_free(env_t* e, pte_t pte, void* va, void* arg)
{
- if (!pte_is_present(pte))
+ if (!pte_is_mapped(pte))
return 0;
page_t *page = pa2page(pte_get_paddr(pte));
pte_clear(pte);
diff --git a/kern/src/mm.c b/kern/src/mm.c
index 94daa68..2b10536 100644
--- a/kern/src/mm.c
+++ b/kern/src/mm.c
@@ -288,7 +288,7 @@
return 0;
/* pages could be !P, but right now that's only for file backed VMRs
* undergoing page removal, which isn't the caller of copy_pages. */
- if (pte_is_present(pte)) {
+ if (pte_is_mapped(pte)) {
/* TODO: check for jumbos */
if (upage_alloc(new_p, &pp, 0))
return -ENOMEM;
@@ -459,7 +459,7 @@
return FALSE;
}
-/* Helper, maps in page at addr, but only if nothing is present there. Returns
+/* Helper, maps in page at addr, but only if nothing is mapped there. Returns
* 0 on success. If this is called by non-PM code, we'll store your ref in the
* PTE. */
static int map_page_at_addr(struct proc *p, struct page *page, uintptr_t addr,
@@ -483,10 +483,19 @@
page_decref(page);
return 0;
}
+ if (pte_is_mapped(pte)) {
+ /* we're clobbering an old entry. if we're just updating the prot, then
+ * it's no big deal. o/w, there might be an issue. */
+ if (page2pa(page) != pte_get_paddr(pte)) {
+ warn_once("Clobbered a PTE mapping (%p -> %p)\n", pte_print(pte),
+ page2pa(page) | prot);
+ }
+ page_decref(pa2page(pte_get_paddr(pte)));
+ }
/* preserve the dirty bit - pm removal could be looking concurrently */
prot |= (pte_is_dirty(pte) ? PTE_D : 0);
/* We have a ref to page, which we are storing in the PTE */
- pte_write(pte, page2pa(page), PTE_P | prot);
+ pte_write(pte, page2pa(page), prot);
spin_unlock(&p->pte_lock);
return 0;
}
@@ -705,7 +714,7 @@
pte_t pte;
bool shootdown_needed = FALSE;
int pte_prot = (prot & PROT_WRITE) ? PTE_USER_RW :
- (prot & (PROT_READ|PROT_EXEC)) ? PTE_USER_RO : 0;
+ (prot & (PROT_READ|PROT_EXEC)) ? PTE_USER_RO : PTE_NONE;
/* TODO: this is aggressively splitting, when we might not need to if the
* prots are the same as the previous. Plus, there are three excessive
* scans. Finally, we might be able to merge when we are done. */
@@ -720,10 +729,14 @@
}
vmr->vm_prot = prot;
spin_lock(&p->pte_lock); /* walking and changing PTEs */
- /* TODO: use a memwalk */
+ /* TODO: use a memwalk. At a minimum, we need to change every existing
+ * PTE that won't trigger a PF (meaning, present PTEs) to have the new
+ * prot. The others will fault on access, and we'll change the PTE
+ * then. In the off chance we have a mapped but not present PTE, we
+ * might as well change it too, since we're already here. */
for (uintptr_t va = vmr->vm_base; va < vmr->vm_end; va += PGSIZE) {
pte = pgdir_walk(p->env_pgdir, (void*)va, 0);
- if (pte_walk_okay(pte) && pte_is_present(pte)) {
+ if (pte_walk_okay(pte) && pte_is_mapped(pte)) {
pte_replace_perm(pte, pte_prot);
shootdown_needed = TRUE;
}
@@ -765,11 +778,9 @@
void *arg)
{
bool *shootdown_needed = (bool*)arg;
- struct page *page;
/* could put in some checks here for !P and also !0 */
if (!pte_is_present(pte)) /* unmapped (== 0) *ptes are also not PTE_P */
return 0;
- page = pa2page(pte_get_paddr(pte));
pte_clear_present(pte);
*shootdown_needed = TRUE;
return 0;
@@ -1164,7 +1175,7 @@
/* it's not strictly necessary to drop paddr's pgoff, but it might save some
* vmap heartache in the future. */
if (map_vmap_segment(vaddr, PG_ADDR(paddr), nr_pages,
- PTE_P | PTE_KERN_RW | flags)) {
+ PTE_KERN_RW | flags)) {
warn("Unable to map a vmap segment"); /* probably a bug */
return 0;
}
diff --git a/kern/src/pagemap.c b/kern/src/pagemap.c
index 8c7e5a2..6cf169c 100644
--- a/kern/src/pagemap.c
+++ b/kern/src/pagemap.c
@@ -306,7 +306,11 @@
static int __pm_mark_not_present(struct proc *p, pte_t pte, void *va, void *arg)
{
struct page *page;
- if (!pte_is_present(pte))
+ /* mapped includes present. Any PTE pointing to a page (mapped) will get
+ * flagged for removal and have its access prots revoked. We need to deal
+ * with mapped-but-maybe-not-present in case of a dirtied file that was
+ * mprotected to PROT_NONE (which is not present) */
+ if (pte_is_unmapped(pte))
return 0;
page = pa2page(pte_get_paddr(pte));
if (atomic_read(&page->pg_flags) & PG_REMOVAL)
diff --git a/kern/src/pmap.c b/kern/src/pmap.c
index cb8351f..b16020f 100644
--- a/kern/src/pmap.c
+++ b/kern/src/pmap.c
@@ -223,7 +223,7 @@
* any other state. (TODO: review all other states, maybe rm only for P) */
if (pte_is_mapped(pte))
page_remove(pgdir, va);
- pte_write(pte, page2pa(page), PTE_P | perm);
+ pte_write(pte, page2pa(page), perm);
return 0;
}
@@ -247,7 +247,7 @@
page_t *page_lookup(pgdir_t pgdir, void *va, pte_t *pte_store)
{
pte_t pte = pgdir_walk(pgdir, va, 0);
- if (!pte_walk_okay(pte) || !pte_is_present(pte))
+ if (!pte_walk_okay(pte) || !pte_is_mapped(pte))
return 0;
if (pte_store)
*pte_store = pte;
@@ -285,7 +285,7 @@
if (!pte_walk_okay(pte) || pte_is_unmapped(pte))
return;
- if (pte_is_present(pte)) {
+ if (pte_is_mapped(pte)) {
/* TODO: (TLB) need to do a shootdown, inval sucks. And might want to
* manage the TLB / free pages differently. (like by the caller).
* Careful about the proc/memory lock here. */
diff --git a/kern/src/umem.c b/kern/src/umem.c
index 38b4d92..87f4e3a 100644
--- a/kern/src/umem.c
+++ b/kern/src/umem.c
@@ -34,7 +34,7 @@
const void *start, *end;
size_t num_pages, i;
pte_t pte;
- uintptr_t perm = PTE_P | PTE_USER_RO;
+ uintptr_t perm = PTE_USER_RO;
size_t bytes_copied = 0;
static_assert(ULIM % PGSIZE == 0 && ULIM != 0); // prevent wrap-around
@@ -100,7 +100,7 @@
const void *start, *end;
size_t num_pages, i;
pte_t pte;
- uintptr_t perm = PTE_P | PTE_USER_RW;
+ uintptr_t perm = PTE_USER_RW;
size_t bytes_copied = 0;
static_assert(ULIM % PGSIZE == 0 && ULIM != 0); // prevent wrap-around