| /* |
| * Copyright (c) 2009 The Regents of the University of California |
| * Barret Rhoden <brho@cs.berkeley.edu> |
| * Kevin Klues <klueska@cs.berkeley.edu> |
| * See LICENSE for details. |
| * |
| * Slab allocator, based on the SunOS 5.4 allocator paper. |
| * |
| * Note that we don't have a hash table for buf to bufctl for the large buffer |
| * objects, so we use the same style for small objects: store the pointer to the |
| * controlling bufctl at the top of the slab object. Fix this with TODO (BUF). |
| * |
| * Ported directly from the kernel's slab allocator. */ |
| |
| #include <parlib/slab.h> |
| #include <stdio.h> |
| #include <parlib/assert.h> |
| #include <parlib/parlib.h> |
| #include <parlib/stdio.h> |
| #include <sys/mman.h> |
| #include <sys/param.h> |
| |
| struct kmem_cache_list kmem_caches; |
| struct spin_pdr_lock kmem_caches_lock; |
| |
| /* Backend/internal functions, defined later. Grab the lock before calling |
| * these. */ |
| static void kmem_cache_grow(struct kmem_cache *cp); |
| |
| /* Cache of the kmem_cache objects, needed for bootstrapping */ |
| struct kmem_cache kmem_cache_cache; |
| struct kmem_cache *kmem_slab_cache, *kmem_bufctl_cache; |
| |
| static void __kmem_cache_create(struct kmem_cache *kc, const char *name, |
| size_t obj_size, int align, int flags, |
| int (*ctor)(void *, void *, int), |
| void (*dtor)(void *, void *), |
| void *priv) |
| { |
| assert(kc); |
| assert(align); |
| spin_pdr_init(&kc->cache_lock); |
| kc->name = name; |
| kc->obj_size = obj_size; |
| kc->align = align; |
| kc->flags = flags; |
| TAILQ_INIT(&kc->full_slab_list); |
| TAILQ_INIT(&kc->partial_slab_list); |
| TAILQ_INIT(&kc->empty_slab_list); |
| kc->ctor = ctor; |
| kc->dtor = dtor; |
| kc->priv = priv; |
| kc->nr_cur_alloc = 0; |
| |
| /* put in cache list based on it's size */ |
| struct kmem_cache *i, *prev = NULL; |
| spin_pdr_lock(&kmem_caches_lock); |
| /* find the kmem_cache before us in the list. yes, this is O(n). */ |
| SLIST_FOREACH(i, &kmem_caches, link) { |
| if (i->obj_size < kc->obj_size) |
| prev = i; |
| else |
| break; |
| } |
| if (prev) |
| SLIST_INSERT_AFTER(prev, kc, link); |
| else |
| SLIST_INSERT_HEAD(&kmem_caches, kc, link); |
| spin_pdr_unlock(&kmem_caches_lock); |
| } |
| |
| static void kmem_cache_init(void *arg) |
| { |
| spin_pdr_init(&kmem_caches_lock); |
| SLIST_INIT(&kmem_caches); |
| /* We need to call the __ version directly to bootstrap the global |
| * kmem_cache_cache. */ |
| __kmem_cache_create(&kmem_cache_cache, "kmem_cache", |
| sizeof(struct kmem_cache), |
| __alignof__(struct kmem_cache), 0, NULL, NULL, |
| NULL); |
| /* Build the slab and bufctl caches */ |
| kmem_slab_cache = kmem_cache_alloc(&kmem_cache_cache, 0); |
| __kmem_cache_create(kmem_slab_cache, "kmem_slab", |
| sizeof(struct kmem_slab), |
| __alignof__(struct kmem_slab), 0, NULL, NULL, NULL); |
| kmem_bufctl_cache = kmem_cache_alloc(&kmem_cache_cache, 0); |
| __kmem_cache_create(kmem_bufctl_cache, "kmem_bufctl", |
| sizeof(struct kmem_bufctl), |
| __alignof__(struct kmem_bufctl), 0, NULL, NULL, |
| NULL); |
| } |
| |
| /* Cache management */ |
| struct kmem_cache *kmem_cache_create(const char *name, size_t obj_size, |
| int align, int flags, |
| int (*ctor)(void *, void *, int), |
| void (*dtor)(void *, void *), |
| void *priv) |
| { |
| struct kmem_cache *kc; |
| static parlib_once_t once = PARLIB_ONCE_INIT; |
| |
| parlib_run_once(&once, kmem_cache_init, NULL); |
| kc = kmem_cache_alloc(&kmem_cache_cache, 0); |
| __kmem_cache_create(kc, name, obj_size, align, flags, ctor, dtor, priv); |
| return kc; |
| } |
| |
| static void kmem_slab_destroy(struct kmem_cache *cp, struct kmem_slab *a_slab) |
| { |
| if (cp->obj_size <= SLAB_LARGE_CUTOFF) { |
| /* Deconstruct all the objects, if necessary */ |
| if (cp->dtor) { |
| void *buf = a_slab->free_small_obj; |
| for (int i = 0; i < a_slab->num_total_obj; i++) { |
| cp->dtor(buf, cp->priv); |
| buf += a_slab->obj_size; |
| } |
| } |
| munmap((void*)ROUNDDOWN((uintptr_t)a_slab, PGSIZE), PGSIZE); |
| } else { |
| struct kmem_bufctl *i; |
| void *page_start = (void*)-1; |
| /* compute how many pages are allocated, same as in grow */ |
| size_t nr_pgs = ROUNDUP(NUM_BUF_PER_SLAB * a_slab->obj_size, |
| PGSIZE) / PGSIZE; |
| TAILQ_FOREACH(i, &a_slab->bufctl_freelist, link) { |
| // Track the lowest buffer address, which is the start |
| // of the buffer |
| page_start = MIN(page_start, i->buf_addr); |
| /* Deconstruct all the objects, if necessary */ |
| if (cp->dtor) // TODO: (BUF) |
| cp->dtor(i->buf_addr, cp->priv); |
| kmem_cache_free(kmem_bufctl_cache, i); |
| } |
| // free the pages for the slab's buffer |
| munmap(page_start, nr_pgs * PGSIZE); |
| // free the slab object |
| kmem_cache_free(kmem_slab_cache, a_slab); |
| } |
| } |
| |
| /* Once you call destroy, never use this cache again... o/w there may be weird |
| * races, and other serious issues. */ |
| void kmem_cache_destroy(struct kmem_cache *cp) |
| { |
| struct kmem_slab *a_slab, *next; |
| |
| spin_pdr_lock(&cp->cache_lock); |
| assert(TAILQ_EMPTY(&cp->full_slab_list)); |
| assert(TAILQ_EMPTY(&cp->partial_slab_list)); |
| /* Clean out the empty list. We can't use a regular FOREACH here, since |
| * the link element is stored in the slab struct, which is stored on the |
| * page that we are freeing. */ |
| a_slab = TAILQ_FIRST(&cp->empty_slab_list); |
| while (a_slab) { |
| next = TAILQ_NEXT(a_slab, link); |
| kmem_slab_destroy(cp, a_slab); |
| a_slab = next; |
| } |
| spin_pdr_lock(&kmem_caches_lock); |
| SLIST_REMOVE(&kmem_caches, cp, kmem_cache, link); |
| spin_pdr_unlock(&kmem_caches_lock); |
| kmem_cache_free(&kmem_cache_cache, cp); |
| spin_pdr_unlock(&cp->cache_lock); |
| } |
| |
| /* Front end: clients of caches use these */ |
| void *kmem_cache_alloc(struct kmem_cache *cp, int flags) |
| { |
| void *retval = NULL; |
| spin_pdr_lock(&cp->cache_lock); |
| // look at partial list |
| struct kmem_slab *a_slab = TAILQ_FIRST(&cp->partial_slab_list); |
| |
| // if none, go to empty list and get an empty and make it partial |
| if (!a_slab) { |
| if (TAILQ_EMPTY(&cp->empty_slab_list)) |
| // TODO: think about non-sleeping flags |
| kmem_cache_grow(cp); |
| // move to partial list |
| a_slab = TAILQ_FIRST(&cp->empty_slab_list); |
| TAILQ_REMOVE(&cp->empty_slab_list, a_slab, link); |
| TAILQ_INSERT_HEAD(&cp->partial_slab_list, a_slab, link); |
| } |
| // have a partial now (a_slab), get an item, return item |
| if (cp->obj_size <= SLAB_LARGE_CUTOFF) { |
| retval = a_slab->free_small_obj; |
| /* adding the size of the cache_obj to get to the pointer at end |
| * of the buffer pointing to the next free_small_obj */ |
| a_slab->free_small_obj = *(uintptr_t**)(a_slab->free_small_obj + |
| cp->obj_size); |
| } else { |
| // rip the first bufctl out of the partial slab's buf list |
| struct kmem_bufctl *a_bufctl = |
| TAILQ_FIRST(&a_slab->bufctl_freelist); |
| TAILQ_REMOVE(&a_slab->bufctl_freelist, a_bufctl, link); |
| retval = a_bufctl->buf_addr; |
| } |
| a_slab->num_busy_obj++; |
| // Check if we are full, if so, move to the full list |
| if (a_slab->num_busy_obj == a_slab->num_total_obj) { |
| TAILQ_REMOVE(&cp->partial_slab_list, a_slab, link); |
| TAILQ_INSERT_HEAD(&cp->full_slab_list, a_slab, link); |
| } |
| cp->nr_cur_alloc++; |
| spin_pdr_unlock(&cp->cache_lock); |
| return retval; |
| } |
| |
| static inline struct kmem_bufctl *buf2bufctl(void *buf, size_t offset) |
| { |
| // TODO: hash table for back reference (BUF) |
| return *((struct kmem_bufctl**)(buf + offset)); |
| } |
| |
| void kmem_cache_free(struct kmem_cache *cp, void *buf) |
| { |
| struct kmem_slab *a_slab; |
| struct kmem_bufctl *a_bufctl; |
| |
| spin_pdr_lock(&cp->cache_lock); |
| if (cp->obj_size <= SLAB_LARGE_CUTOFF) { |
| // find its slab |
| a_slab = (struct kmem_slab*)(ROUNDDOWN((uintptr_t)buf, PGSIZE) + |
| PGSIZE - sizeof(struct kmem_slab)); |
| /* write location of next free small obj to the space at the end |
| * of the buffer, then list buf as the next free small obj */ |
| *(uintptr_t**)(buf + cp->obj_size) = a_slab->free_small_obj; |
| a_slab->free_small_obj = buf; |
| } else { |
| /* Give the bufctl back to the parent slab */ |
| // TODO: (BUF) change the interface to not take an offset |
| a_bufctl = buf2bufctl(buf, cp->obj_size); |
| a_slab = a_bufctl->my_slab; |
| TAILQ_INSERT_HEAD(&a_slab->bufctl_freelist, a_bufctl, link); |
| } |
| a_slab->num_busy_obj--; |
| cp->nr_cur_alloc--; |
| // if it was full, move it to partial |
| if (a_slab->num_busy_obj + 1 == a_slab->num_total_obj) { |
| TAILQ_REMOVE(&cp->full_slab_list, a_slab, link); |
| TAILQ_INSERT_HEAD(&cp->partial_slab_list, a_slab, link); |
| } else if (!a_slab->num_busy_obj) { |
| // if there are none, move to from partial to empty |
| TAILQ_REMOVE(&cp->partial_slab_list, a_slab, link); |
| TAILQ_INSERT_HEAD(&cp->empty_slab_list, a_slab, link); |
| } |
| spin_pdr_unlock(&cp->cache_lock); |
| } |
| |
| /* Back end: internal functions */ |
| /* When this returns, the cache has at least one slab in the empty list. If |
| * page_alloc fails, there are some serious issues. This only grows by one slab |
| * at a time. |
| * |
| * Grab the cache lock before calling this. |
| * |
| * TODO: think about page colouring issues with kernel memory allocation. */ |
| static void kmem_cache_grow(struct kmem_cache *cp) |
| { |
| struct kmem_slab *a_slab; |
| struct kmem_bufctl *a_bufctl; |
| void *a_page; |
| int ctor_ret; |
| |
| if (cp->obj_size <= SLAB_LARGE_CUTOFF) { |
| // Just get a single page for small slabs |
| a_page = mmap(0, PGSIZE, PROT_READ | PROT_WRITE | PROT_EXEC, |
| MAP_POPULATE | MAP_ANONYMOUS | MAP_PRIVATE, -1, |
| 0); |
| assert(a_page != MAP_FAILED); |
| // the slab struct is stored at the end of the page |
| a_slab = (struct kmem_slab*)(a_page + PGSIZE - |
| sizeof(struct kmem_slab)); |
| // Need to add room for the next free item pointer in the object |
| // buffer. |
| a_slab->obj_size = ROUNDUP(cp->obj_size + sizeof(uintptr_t), |
| cp->align); |
| a_slab->num_busy_obj = 0; |
| a_slab->num_total_obj = (PGSIZE - sizeof(struct kmem_slab)) / |
| a_slab->obj_size; |
| // TODO: consider staggering this IAW section 4.3 |
| a_slab->free_small_obj = a_page; |
| /* Walk and create the free list, which is circular. Each item |
| * stores the location of the next one at the end of the block. |
| */ |
| void *buf = a_slab->free_small_obj; |
| |
| for (int i = 0; i < a_slab->num_total_obj - 1; i++) { |
| // Initialize the object, if necessary |
| if (cp->ctor) { |
| ctor_ret = cp->ctor(buf, cp->priv, 0); |
| assert(!ctor_ret); |
| } |
| *(uintptr_t**)(buf + cp->obj_size) = buf + |
| a_slab->obj_size; |
| buf += a_slab->obj_size; |
| } |
| /* Initialize the final object (note the -1 in the for loop). */ |
| if (cp->ctor) { |
| ctor_ret = cp->ctor(buf, cp->priv, 0); |
| assert(!ctor_ret); |
| } |
| *((uintptr_t**)(buf + cp->obj_size)) = NULL; |
| } else { |
| a_slab = kmem_cache_alloc(kmem_slab_cache, 0); |
| // TODO: hash table for back reference (BUF) |
| a_slab->obj_size = ROUNDUP(cp->obj_size + sizeof(uintptr_t), |
| cp->align); |
| /* Need at least nr_pgs to hold NUM_BUF objects. Note we don't |
| * round up to the next higher order (power of 2) number of |
| * pages, like we do in the kernel. */ |
| size_t nr_pgs = ROUNDUP(NUM_BUF_PER_SLAB * a_slab->obj_size, |
| PGSIZE) / PGSIZE; |
| void *buf = mmap(0, nr_pgs * PGSIZE, PROT_READ | PROT_WRITE | |
| PROT_EXEC, MAP_POPULATE | MAP_ANONYMOUS | |
| MAP_PRIVATE, -1, 0); |
| |
| assert(buf != MAP_FAILED); |
| a_slab->num_busy_obj = 0; |
| a_slab->num_total_obj = nr_pgs * PGSIZE / a_slab->obj_size; |
| TAILQ_INIT(&a_slab->bufctl_freelist); |
| /* for each buffer, set up a bufctl and point to the buffer */ |
| for (int i = 0; i < a_slab->num_total_obj; i++) { |
| // Initialize the object, if necessary |
| if (cp->ctor) { |
| ctor_ret = cp->ctor(buf, cp->priv, 0); |
| assert(!ctor_ret); |
| } |
| a_bufctl = kmem_cache_alloc(kmem_bufctl_cache, 0); |
| TAILQ_INSERT_HEAD(&a_slab->bufctl_freelist, a_bufctl, |
| link); |
| a_bufctl->buf_addr = buf; |
| a_bufctl->my_slab = a_slab; |
| // TODO: (BUF) write the bufctl reference at the bottom |
| // of the buffer. |
| *(struct kmem_bufctl**)(buf + cp->obj_size) = a_bufctl; |
| buf += a_slab->obj_size; |
| } |
| } |
| // add a_slab to the empty_list |
| TAILQ_INSERT_HEAD(&cp->empty_slab_list, a_slab, link); |
| } |
| |
| /* This deallocs every slab from the empty list. TODO: think a bit more about |
| * this. We can do things like not free all of the empty lists to prevent |
| * thrashing. See 3.4 in the paper. */ |
| void kmem_cache_reap(struct kmem_cache *cp) |
| { |
| struct kmem_slab *a_slab, *next; |
| |
| // Destroy all empty slabs. Refer to the notes about the while loop |
| spin_pdr_lock(&cp->cache_lock); |
| a_slab = TAILQ_FIRST(&cp->empty_slab_list); |
| while (a_slab) { |
| next = TAILQ_NEXT(a_slab, link); |
| kmem_slab_destroy(cp, a_slab); |
| a_slab = next; |
| } |
| spin_pdr_unlock(&cp->cache_lock); |
| } |
| |
| void print_kmem_cache(struct kmem_cache *cp) |
| { |
| spin_pdr_lock(&cp->cache_lock); |
| printf("\nPrinting kmem_cache:\n---------------------\n"); |
| printf("Name: %s\n", cp->name); |
| printf("Objsize: %d\n", cp->obj_size); |
| printf("Align: %d\n", cp->align); |
| printf("Flags: 0x%08x\n", cp->flags); |
| printf("Constructor: 0x%08x\n", cp->ctor); |
| printf("Destructor: 0x%08x\n", cp->dtor); |
| printf("Slab Full: 0x%08x\n", cp->full_slab_list); |
| printf("Slab Partial: 0x%08x\n", cp->partial_slab_list); |
| printf("Slab Empty: 0x%08x\n", cp->empty_slab_list); |
| printf("Current Allocations: %d\n", cp->nr_cur_alloc); |
| spin_pdr_unlock(&cp->cache_lock); |
| } |
| |
| void print_kmem_slab(struct kmem_slab *slab) |
| { |
| printf("\nPrinting kmem_slab:\n---------------------\n"); |
| printf("Objsize: %d (%p)\n", slab->obj_size, slab->obj_size); |
| printf("NumBusy: %d\n", slab->num_busy_obj); |
| printf("Num_total: %d\n", slab->num_total_obj); |
| if (slab->obj_size + sizeof(uintptr_t) < SLAB_LARGE_CUTOFF) { |
| printf("Free Small obj: 0x%08x\n", slab->free_small_obj); |
| void *buf = slab->free_small_obj; |
| for (int i = 0; i < slab->num_total_obj; i++) { |
| printf("Addr of buf: 0x%08x, Addr of next: 0x%08x\n", |
| buf, *((uintptr_t**)buf)); |
| buf += slab->obj_size; |
| } |
| } else { |
| printf("This is a big slab!\n"); |
| } |
| } |
| |