|  | /* Copyright (c) 2009 The Regents of the University  of California. | 
|  | * See the COPYRIGHT files at the top of this source tree for full | 
|  | * license information. | 
|  | * | 
|  | * Barret Rhoden <brho@cs.berkeley.edu> | 
|  | * Kevin Klues <klueska@cs.berkeley.edu> */ | 
|  |  | 
|  | #include <sys/queue.h> | 
|  | #include <page_alloc.h> | 
|  | #include <pmap.h> | 
|  | #include <kmalloc.h> | 
|  | #include <multiboot.h> | 
|  |  | 
|  | spinlock_t colored_page_free_list_lock = SPINLOCK_INITIALIZER_IRQSAVE; | 
|  |  | 
|  | page_list_t *colored_page_free_list = NULL; | 
|  |  | 
|  | static void page_alloc_bootstrap() { | 
|  | // Allocate space for the array required to manage the free lists | 
|  | size_t list_size = llc_cache->num_colors*sizeof(page_list_t); | 
|  | page_list_t *tmp = (page_list_t*)boot_alloc(list_size,PGSIZE); | 
|  | colored_page_free_list = tmp; | 
|  | for (int i = 0; i < llc_cache->num_colors; i++) | 
|  | BSD_LIST_INIT(&colored_page_free_list[i]); | 
|  | } | 
|  |  | 
|  | /* Can do whatever here.  For now, our page allocator just works with colors, | 
|  | * not NUMA zones or anything. */ | 
|  | static void track_free_page(struct page *page) | 
|  | { | 
|  | BSD_LIST_INSERT_HEAD(&colored_page_free_list[get_page_color(page2ppn(page), | 
|  | llc_cache)], | 
|  | page, pg_link); | 
|  | nr_free_pages++; | 
|  | /* Page was previous marked as busy, need to set it free explicitly */ | 
|  | page_setref(page, 0); | 
|  | } | 
|  |  | 
|  | static struct page *pa64_to_page(uint64_t paddr) | 
|  | { | 
|  | return &pages[paddr >> PGSHIFT]; | 
|  | } | 
|  |  | 
|  | static bool pa64_is_in_kernel(uint64_t paddr) | 
|  | { | 
|  | extern char end[]; | 
|  | /* kernel is linked and loaded here (in kernel{32,64}.ld */ | 
|  | return (EXTPHYSMEM <= paddr) && (paddr < PADDR(end)); | 
|  | } | 
|  |  | 
|  | /* Helper.  For every page in the entry, this will determine whether or not the | 
|  | * page is free, and handle accordingly.  All pages are marked as busy by | 
|  | * default, and we're just determining which of them could be free. */ | 
|  | static void parse_mboot_region(struct multiboot_mmap_entry *entry, void *data) | 
|  | { | 
|  | physaddr_t boot_freemem_paddr = (physaddr_t)data; | 
|  | bool in_bootzone = (entry->addr <= boot_freemem_paddr) && | 
|  | (boot_freemem_paddr < entry->addr + entry->len); | 
|  |  | 
|  | if (entry->type != MULTIBOOT_MEMORY_AVAILABLE) | 
|  | return; | 
|  | /* TODO: we'll have some issues with jumbo allocation */ | 
|  | /* Most entries are page aligned, though on some machines below EXTPHYSMEM | 
|  | * we may have some that aren't.  If two regions collide on the same page | 
|  | * (one of them starts unaligned), we need to only handle the page once, and | 
|  | * err on the side of being busy. | 
|  | * | 
|  | * Since these regions happen below EXTPHYSMEM, they are all marked busy (or | 
|  | * else we'll panic).  I'll probably rewrite this for jumbos before I find a | 
|  | * machine with unaligned mboot entries in higher memory. */ | 
|  | if (PGOFF(entry->addr)) | 
|  | assert(entry->addr < EXTPHYSMEM); | 
|  | for (uint64_t i = ROUNDDOWN(entry->addr, PGSIZE); | 
|  | i < entry->addr + entry->len; | 
|  | i += PGSIZE) { | 
|  | /* Skip pages we'll never map (above KERNBASE).  Once we hit one of | 
|  | * them, we know the rest are too (for this entry). */ | 
|  | if (i >= max_paddr) | 
|  | return; | 
|  | /* Mark low mem as busy (multiboot stuff is there, usually, too).  Since | 
|  | * that memory may be freed later (like the smp_boot page), we'll treat | 
|  | * it like it is busy/allocated. */ | 
|  | if (i < EXTPHYSMEM) | 
|  | continue; | 
|  | /* Mark as busy pages already allocated in boot_alloc() */ | 
|  | if (in_bootzone && (i < boot_freemem_paddr)) | 
|  | continue; | 
|  | /* Need to double check for the kernel, in case it wasn't in the | 
|  | * bootzone.  If it was in the bootzone, we already skipped it. */ | 
|  | if (pa64_is_in_kernel(i)) | 
|  | continue; | 
|  | track_free_page(pa64_to_page(i)); | 
|  | } | 
|  | } | 
|  |  | 
|  | static void check_range(uint64_t start, uint64_t end, int expect) | 
|  | { | 
|  | int ref; | 
|  | if (PGOFF(start)) | 
|  | printk("Warning: check_range given unaligned addr 0x%016llx\n", start); | 
|  | for (uint64_t i = start; i < end; i += PGSIZE)  { | 
|  | ref = kref_refcnt(&pa64_to_page(i)->pg_kref); | 
|  | if (ref != expect) { | 
|  | printk("Error: while checking range [0x%016llx, 0x%016llx), " | 
|  | "physaddr 0x%016llx refcnt was %d, expected %d\n", start, | 
|  | end, i, ref, expect); | 
|  | panic(""); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | /* Note this doesn't check all of memory.  There are some chunks of 'memory' | 
|  | * that aren't reported by MB at all, like the VRAM sections at 0xa0000. */ | 
|  | static void check_mboot_region(struct multiboot_mmap_entry *entry, void *data) | 
|  | { | 
|  | extern char end[]; | 
|  | physaddr_t boot_freemem_paddr = (physaddr_t)data; | 
|  | bool in_bootzone = (entry->addr <= boot_freemem_paddr) && | 
|  | (boot_freemem_paddr < entry->addr + entry->len); | 
|  | /* Need to deal with 32b wrap-around */ | 
|  | uint64_t zone_end = MIN(entry->addr + entry->len, (uint64_t)max_paddr); | 
|  |  | 
|  | if (entry->type != MULTIBOOT_MEMORY_AVAILABLE) { | 
|  | check_range(entry->addr, zone_end, 1); | 
|  | return; | 
|  | } | 
|  | if (zone_end <= EXTPHYSMEM) { | 
|  | check_range(entry->addr, zone_end, 1); | 
|  | return; | 
|  | } | 
|  | /* this may include the kernel */ | 
|  | if (in_bootzone) { | 
|  | /* boot_freemem might not be page aligned.  If it's part-way through a | 
|  | * page, that page should be busy */ | 
|  | check_range(entry->addr, ROUNDUP(PADDR(boot_freemem), PGSIZE), 1); | 
|  | check_range(ROUNDUP(PADDR(boot_freemem), PGSIZE), zone_end, 0); | 
|  | assert(zone_end == PADDR(boot_freelimit)); | 
|  | return; | 
|  | } | 
|  | /* kernel's range (hardcoded in the linker script).  If we're checking now, | 
|  | * it means the kernel is not in the same entry as the bootzone. */ | 
|  | if (entry->addr == EXTPHYSMEM) { | 
|  | check_range(EXTPHYSMEM, PADDR(end), 1); | 
|  | check_range(ROUNDUP(PADDR(end), PGSIZE), zone_end, 0); | 
|  | return; | 
|  | } | 
|  | } | 
|  |  | 
|  | /* Since we can't parse multiboot mmap entries, we need to just guess at what | 
|  | * pages are free and which ones aren't. | 
|  | * | 
|  | * Despite the lack of info from mbi, I know there is a magic hole in physical | 
|  | * memory that we can't use, from the IOAPIC_PBASE on up [0xfec00000, | 
|  | * 0xffffffff] (I'm being pessimistic).  But, that's not pessimistic enough! | 
|  | * Qemu still doesn't like that.   From using 0xe0000000 instead works for mine. | 
|  | * According to http://wiki.osdev.org/Memory_Map_(x86), some systems could | 
|  | * reserve from [0xc0000000, 0xffffffff].  Anyway, in lieu of real memory | 
|  | * detection, I'm just skipping that entire region. | 
|  | * | 
|  | * We may or may not have more free memory above this magic hole, depending on | 
|  | * both the amount of RAM we have as well as 32 vs 64 bit. | 
|  | * | 
|  | * So we'll go with two free memory regions: | 
|  | * | 
|  | * 		[ 0, ROUNDUP(boot_freemem_paddr, PGSIZE) ) = busy | 
|  | * 		[ ROUNDUP(boot_freemem_paddr, PGSIZE), TOP_OF_1 ) = free | 
|  | * 		[ MAGIC_HOLE, 0x0000000100000000 ) = busy | 
|  | * 		(and maybe this:) | 
|  | * 		[ 0x0000000100000000, max_paddr ) = free | 
|  | * | 
|  | * where TOP_OF_1 is the min of IOAPIC_PBASE and max_paddr. | 
|  | * | 
|  | * For the busy regions, I don't actually need to mark the pages as busy.  They | 
|  | * were marked busy when the pages array was created (same as when we parse | 
|  | * multiboot info).  I'll just assert that they are properly marked as busy. | 
|  | * | 
|  | * As with parsing mbi regions, this will ignore the hairy areas below | 
|  | * EXTPHYSMEM, and mark the entire kernel and anything we've boot alloc'd as | 
|  | * busy. */ | 
|  | static void account_for_pages(physaddr_t boot_freemem_paddr) | 
|  | { | 
|  | physaddr_t top_of_busy = ROUNDUP(boot_freemem_paddr, PGSIZE); | 
|  | physaddr_t top_of_free_1 = MIN(0xc0000000, max_paddr); | 
|  | physaddr_t start_of_free_2; | 
|  |  | 
|  | printk("Warning: poor memory detection (qemu?).  May lose 1GB of RAM\n"); | 
|  | for (physaddr_t i = 0; i < top_of_busy; i += PGSIZE) | 
|  | assert(kref_refcnt(&pa64_to_page(i)->pg_kref) == 1); | 
|  | for (physaddr_t i = top_of_busy; i < top_of_free_1; i += PGSIZE) | 
|  | track_free_page(pa64_to_page(i)); | 
|  | /* If max_paddr is less than the start of our potential second free mem | 
|  | * region, we can just leave.  We also don't want to poke around the pages | 
|  | * array either (and accidentally run off the end of the array). | 
|  | * | 
|  | * Additionally, 32 bit doesn't acknowledge pmem above the 4GB mark. */ | 
|  | start_of_free_2 = 0x0000000100000000; | 
|  | if (max_paddr < start_of_free_2) | 
|  | return; | 
|  | for (physaddr_t i = top_of_free_1; i < start_of_free_2; i += PGSIZE) | 
|  | assert(kref_refcnt(&pa64_to_page(i)->pg_kref) == 1); | 
|  | for (physaddr_t i = start_of_free_2; i < max_paddr; i += PGSIZE) | 
|  | track_free_page(pa64_to_page(i)); | 
|  | } | 
|  |  | 
|  | /* Initialize the memory free lists.  After this, do not use boot_alloc. */ | 
|  | void page_alloc_init(struct multiboot_info *mbi) | 
|  | { | 
|  | page_alloc_bootstrap(); | 
|  | /* First, we need to initialize the pages array such that all memory is busy | 
|  | * by default. | 
|  | * | 
|  | * To init the free list(s), each page that is already allocated/busy will | 
|  | * remain increfed.  All other pages that were reported as 'free' will be | 
|  | * added to a free list.  Their refcnts are set to 0. | 
|  | * | 
|  | * To avoid a variety of headaches, any memory below 1MB is considered busy. | 
|  | * Likewise, everything in the kernel, up to _end is also busy.  And | 
|  | * everything we've already boot_alloc'd is busy.  These chunks of memory | 
|  | * are reported as 'free' by multiboot. | 
|  | * | 
|  | * We'll also abort the mapping for any addresses over max_paddr, since | 
|  | * we'll never use them.  'pages' does not track them either. | 
|  | * | 
|  | * One special note: we actually use the memory at 0x1000 for smp_boot. | 
|  | * It'll get set to 'used' like the others; just FYI. | 
|  | * | 
|  | * Finally, if we want to use actual jumbo page allocation (not just | 
|  | * mapping), we need to round up _end, and make sure all of multiboot's | 
|  | * sections are jumbo-aligned. */ | 
|  | physaddr_t boot_freemem_paddr = PADDR(ROUNDUP(boot_freemem, PGSIZE)); | 
|  |  | 
|  | for (long i = 0; i < max_nr_pages; i++) | 
|  | page_setref(&pages[i], 1); | 
|  | if (mboot_has_mmaps(mbi)) { | 
|  | mboot_foreach_mmap(mbi, parse_mboot_region, (void*)boot_freemem_paddr); | 
|  | /* Test the page alloc - if this gets slow, we can CONFIG it */ | 
|  | mboot_foreach_mmap(mbi, check_mboot_region, (void*)boot_freemem_paddr); | 
|  | } else { | 
|  | /* No multiboot mmap regions (probably run from qemu with -kernel) */ | 
|  | account_for_pages(boot_freemem_paddr); | 
|  | } | 
|  | printk("Number of free pages: %lu\n", nr_free_pages); | 
|  | printk("Page alloc init successful\n"); | 
|  | } |