| /* Copyright (c) 2009 The Regents of the University of California. |
| * See the COPYRIGHT files at the top of this source tree for full |
| * license information. |
| * |
| * Barret Rhoden <brho@cs.berkeley.edu> |
| * Kevin Klues <klueska@cs.berkeley.edu> */ |
| |
| #ifdef __SHARC__ |
| #pragma nosharc |
| #define SINIT(x) x |
| #endif |
| |
| #include <sys/queue.h> |
| #include <page_alloc.h> |
| #include <pmap.h> |
| #include <kmalloc.h> |
| #include <multiboot.h> |
| |
| spinlock_t colored_page_free_list_lock = SPINLOCK_INITIALIZER_IRQSAVE; |
| |
| page_list_t LCKD(&colored_page_free_list_lock) * CT(llc_cache->num_colors) RO |
| colored_page_free_list = NULL; |
| |
| static void page_alloc_bootstrap() { |
| // Allocate space for the array required to manage the free lists |
| size_t list_size = llc_cache->num_colors*sizeof(page_list_t); |
| page_list_t LCKD(&colored_page_free_list_lock)*tmp = |
| (page_list_t*)boot_alloc(list_size,PGSIZE); |
| colored_page_free_list = SINIT(tmp); |
| for (int i = 0; i < llc_cache->num_colors; i++) |
| BSD_LIST_INIT(&colored_page_free_list[i]); |
| } |
| |
| /* Can do whatever here. For now, our page allocator just works with colors, |
| * not NUMA zones or anything. */ |
| static void track_free_page(struct page *page) |
| { |
| BSD_LIST_INSERT_HEAD(&colored_page_free_list[get_page_color(page2ppn(page), |
| llc_cache)], |
| page, pg_link); |
| nr_free_pages++; |
| /* Page was previous marked as busy, need to set it free explicitly */ |
| page_setref(page, 0); |
| } |
| |
| static struct page *pa64_to_page(uint64_t paddr) |
| { |
| return &pages[paddr >> PGSHIFT]; |
| } |
| |
| static bool pa64_is_in_kernel(uint64_t paddr) |
| { |
| extern char end[]; |
| /* kernel is linked and loaded here (in kernel{32,64}.ld */ |
| return (EXTPHYSMEM <= paddr) && (paddr < PADDR(end)); |
| } |
| |
| /* Helper. For every page in the entry, this will determine whether or not the |
| * page is free, and handle accordingly. All pages are marked as busy by |
| * default, and we're just determining which of them could be free. */ |
| static void parse_mboot_region(struct multiboot_mmap_entry *entry, void *data) |
| { |
| physaddr_t boot_freemem_paddr = (physaddr_t)data; |
| bool in_bootzone = (entry->addr <= boot_freemem_paddr) && |
| (boot_freemem_paddr < entry->addr + entry->len); |
| |
| if (entry->type != MULTIBOOT_MEMORY_AVAILABLE) |
| return; |
| /* TODO: we'll have some issues with jumbo allocation */ |
| /* Most entries are page aligned, though on some machines below EXTPHYSMEM |
| * we may have some that aren't. If two regions collide on the same page |
| * (one of them starts unaligned), we need to only handle the page once, and |
| * err on the side of being busy. |
| * |
| * Since these regions happen below EXTPHYSMEM, they are all marked busy (or |
| * else we'll panic). I'll probably rewrite this for jumbos before I find a |
| * machine with unaligned mboot entries in higher memory. */ |
| if (PGOFF(entry->addr)) |
| assert(entry->addr < EXTPHYSMEM); |
| for (uint64_t i = ROUNDDOWN(entry->addr, PGSIZE); |
| i < entry->addr + entry->len; |
| i += PGSIZE) { |
| /* Skip pages we'll never map (above KERNBASE). Once we hit one of |
| * them, we know the rest are too (for this entry). */ |
| if (i >= max_paddr) |
| return; |
| /* Mark low mem as busy (multiboot stuff is there, usually, too). Since |
| * that memory may be freed later (like the smp_boot page), we'll treat |
| * it like it is busy/allocated. */ |
| if (i < EXTPHYSMEM) |
| continue; |
| /* Mark as busy pages already allocated in boot_alloc() */ |
| if (in_bootzone && (i < boot_freemem_paddr)) |
| continue; |
| /* Need to double check for the kernel, in case it wasn't in the |
| * bootzone. If it was in the bootzone, we already skipped it. */ |
| if (pa64_is_in_kernel(i)) |
| continue; |
| track_free_page(pa64_to_page(i)); |
| } |
| } |
| |
| static void check_range(uint64_t start, uint64_t end, int expect) |
| { |
| int ref; |
| if (PGOFF(start)) |
| printk("Warning: check_range given unaligned addr 0x%016llx\n", start); |
| for (uint64_t i = start; i < end; i += PGSIZE) { |
| ref = kref_refcnt(&pa64_to_page(i)->pg_kref); |
| if (ref != expect) { |
| printk("Error: while checking range [0x%016llx, 0x%016llx), " |
| "physaddr 0x%016llx refcnt was %d, expected %d\n", start, |
| end, i, ref, expect); |
| panic(""); |
| } |
| } |
| } |
| |
| /* Note this doesn't check all of memory. There are some chunks of 'memory' |
| * that aren't reported by MB at all, like the VRAM sections at 0xa0000. */ |
| static void check_mboot_region(struct multiboot_mmap_entry *entry, void *data) |
| { |
| extern char end[]; |
| physaddr_t boot_freemem_paddr = (physaddr_t)data; |
| bool in_bootzone = (entry->addr <= boot_freemem_paddr) && |
| (boot_freemem_paddr < entry->addr + entry->len); |
| /* Need to deal with 32b wrap-around */ |
| uint64_t zone_end = MIN(entry->addr + entry->len, (uint64_t)max_paddr); |
| |
| if (entry->type != MULTIBOOT_MEMORY_AVAILABLE) { |
| check_range(entry->addr, zone_end, 1); |
| return; |
| } |
| if (zone_end <= EXTPHYSMEM) { |
| check_range(entry->addr, zone_end, 1); |
| return; |
| } |
| /* this may include the kernel */ |
| if (in_bootzone) { |
| /* boot_freemem might not be page aligned. If it's part-way through a |
| * page, that page should be busy */ |
| check_range(entry->addr, ROUNDUP(PADDR(boot_freemem), PGSIZE), 1); |
| check_range(ROUNDUP(PADDR(boot_freemem), PGSIZE), zone_end, 0); |
| assert(zone_end == PADDR(boot_freelimit)); |
| return; |
| } |
| /* kernel's range (hardcoded in the linker script). If we're checking now, |
| * it means the kernel is not in the same entry as the bootzone. */ |
| if (entry->addr == EXTPHYSMEM) { |
| check_range(EXTPHYSMEM, PADDR(end), 1); |
| check_range(ROUNDUP(PADDR(end), PGSIZE), zone_end, 0); |
| return; |
| } |
| } |
| |
| /* Since we can't parse multiboot mmap entries, we need to just guess at what |
| * pages are free and which ones aren't. |
| * |
| * Despite the lack of info from mbi, I know there is a magic hole in physical |
| * memory that we can't use, from the IOAPIC_PBASE on up [0xfec00000, |
| * 0xffffffff] (I'm being pessimistic). But, that's not pessimistic enough! |
| * Qemu still doesn't like that. From using 0xe0000000 instead works for mine. |
| * According to http://wiki.osdev.org/Memory_Map_(x86), some systems could |
| * reserve from [0xc0000000, 0xffffffff]. Anyway, in lieu of real memory |
| * detection, I'm just skipping that entire region. |
| * |
| * We may or may not have more free memory above this magic hole, depending on |
| * both the amount of RAM we have as well as 32 vs 64 bit. |
| * |
| * So we'll go with two free memory regions: |
| * |
| * [ 0, ROUNDUP(boot_freemem_paddr, PGSIZE) ) = busy |
| * [ ROUNDUP(boot_freemem_paddr, PGSIZE), TOP_OF_1 ) = free |
| * [ MAGIC_HOLE, 0x0000000100000000 ) = busy |
| * (and maybe this:) |
| * [ 0x0000000100000000, max_paddr ) = free |
| * |
| * where TOP_OF_1 is the min of IOAPIC_PBASE and max_paddr. |
| * |
| * For the busy regions, I don't actually need to mark the pages as busy. They |
| * were marked busy when the pages array was created (same as when we parse |
| * multiboot info). I'll just assert that they are properly marked as busy. |
| * |
| * As with parsing mbi regions, this will ignore the hairy areas below |
| * EXTPHYSMEM, and mark the entire kernel and anything we've boot alloc'd as |
| * busy. */ |
| static void account_for_pages(physaddr_t boot_freemem_paddr) |
| { |
| physaddr_t top_of_busy = ROUNDUP(boot_freemem_paddr, PGSIZE); |
| physaddr_t top_of_free_1 = MIN(0xc0000000, max_paddr); |
| physaddr_t start_of_free_2; |
| |
| printk("Warning: poor memory detection (qemu?). May lose 1GB of RAM\n"); |
| for (physaddr_t i = 0; i < top_of_busy; i += PGSIZE) |
| assert(kref_refcnt(&pa64_to_page(i)->pg_kref) == 1); |
| for (physaddr_t i = top_of_busy; i < top_of_free_1; i += PGSIZE) |
| track_free_page(pa64_to_page(i)); |
| /* If max_paddr is less than the start of our potential second free mem |
| * region, we can just leave. We also don't want to poke around the pages |
| * array either (and accidentally run off the end of the array). |
| * |
| * Additionally, 32 bit doesn't acknowledge pmem above the 4GB mark. */ |
| start_of_free_2 = 0x0000000100000000; |
| if (max_paddr < start_of_free_2) |
| return; |
| for (physaddr_t i = top_of_free_1; i < start_of_free_2; i += PGSIZE) |
| assert(kref_refcnt(&pa64_to_page(i)->pg_kref) == 1); |
| for (physaddr_t i = start_of_free_2; i < max_paddr; i += PGSIZE) |
| track_free_page(pa64_to_page(i)); |
| } |
| |
| /* Initialize the memory free lists. After this, do not use boot_alloc. */ |
| void page_alloc_init(struct multiboot_info *mbi) |
| { |
| page_alloc_bootstrap(); |
| /* First, we need to initialize the pages array such that all memory is busy |
| * by default. |
| * |
| * To init the free list(s), each page that is already allocated/busy will |
| * remain increfed. All other pages that were reported as 'free' will be |
| * added to a free list. Their refcnts are set to 0. |
| * |
| * To avoid a variety of headaches, any memory below 1MB is considered busy. |
| * Likewise, everything in the kernel, up to _end is also busy. And |
| * everything we've already boot_alloc'd is busy. These chunks of memory |
| * are reported as 'free' by multiboot. |
| * |
| * We'll also abort the mapping for any addresses over max_paddr, since |
| * we'll never use them. 'pages' does not track them either. |
| * |
| * One special note: we actually use the memory at 0x1000 for smp_boot. |
| * It'll get set to 'used' like the others; just FYI. |
| * |
| * Finally, if we want to use actual jumbo page allocation (not just |
| * mapping), we need to round up _end, and make sure all of multiboot's |
| * sections are jumbo-aligned. */ |
| physaddr_t boot_freemem_paddr = PADDR(ROUNDUP(boot_freemem, PGSIZE)); |
| |
| for (long i = 0; i < max_nr_pages; i++) |
| page_setref(&pages[i], 1); |
| if (mboot_has_mmaps(mbi)) { |
| mboot_foreach_mmap(mbi, parse_mboot_region, (void*)boot_freemem_paddr); |
| /* Test the page alloc - if this gets slow, we can CONFIG it */ |
| mboot_foreach_mmap(mbi, check_mboot_region, (void*)boot_freemem_paddr); |
| } else { |
| /* No multiboot mmap regions (probably run from qemu with -kernel) */ |
| account_for_pages(boot_freemem_paddr); |
| } |
| printk("Number of free pages: %lu\n", nr_free_pages); |
| printk("Page alloc init successful\n"); |
| } |