arena: add arena tests And some details about the difference between qcaches and regular slab allocators. Signed-off-by: Barret Rhoden <brho@cs.berkeley.edu>
diff --git a/kern/src/arena.c b/kern/src/arena.c index 3dd0084..5a006d9 100644 --- a/kern/src/arena.c +++ b/kern/src/arena.c
@@ -51,6 +51,9 @@ * poke the CV if the first waiter is likely to succeed. * - Reclaim: have a ktask that sleeps on a rendez. We poke it, even from IRQ * context. It qlocks arenas_and_slabs_lock, then does the reclaim. + * - There's an issue with when slab objects get deconstructed, and how that + * interacts with what I wanted to do with kstacks and TLB shootdowns. I + * think right now (2019-09) there is a problem with it. * * FAQ: * - Does allocating memory from an arena require it to take a btag? Yes - @@ -69,6 +72,28 @@ * will have their own stats, and it'd be a minor pain to sync up with them * all the time. Also, the important stat is when the base arena starts to * run out of memory, and base arenas don't have qcaches, so it's moot. + * - What's the difference between a kmem_cache (slab / KC) that is used as a + * qcache and one that just sources from the arena? Two things: qcaches are + * set up to be KMC_NOTOUCH, meaning that the objects we allocate cannot be + * treated as memory at any point. When the KC is "pro-touch", it can use + * the memory of unallocated objects, meaning allocated from the arena to the + * KC but not allocated to the user, for bookkeeping. NOTOUCH means we cannot + * do that. e.g. an integer allocator: don't just write to random addresses! + * In general, all arenas are NOTOUCH by default, to avoid these sorts of + * disasters. The only time you'd want 'pro-touch' is for small-size KCs that + * are actually backed by memory. In those cases, you just make slabs that + * source from an arena that are not qcaches. These are the kmalloc slabs. + * The other difference between a qcache and a KC sourcing is the qcaches have + * a different import_amt. It's just a performance decision recommended in + * the vmem paper. import_amt is only used with large or no-touch objects, + * which is always the case with qcaches. Small, pro-touch KCs just grab a + * page at a time and use that for the slab struct and linked list. Finally, + * all qcaches, being NOTOUCH, therefore use bufctls in the slab. These are + * basically the same as the arena btags. So ultimately, all objects + * allocated from an arena, even those from qcaches, have some sort of btag. + * Each slab has an *additional* BT in the arena, representing the source's + * alloc. (Actually, that alloc can be from another, larger qcache of the + * arena!). */ #include <arena.h>
diff --git a/kern/src/ktest/Kbuild b/kern/src/ktest/Kbuild index 4498f30..5b74f7c 100644 --- a/kern/src/ktest/Kbuild +++ b/kern/src/ktest/Kbuild
@@ -1,3 +1,4 @@ obj-y += ktest.o +obj-$(CONFIG_KTEST_ARENA) += kt_arena.o obj-$(CONFIG_PB_KTESTS) += pb_ktests.o obj-$(CONFIG_NET_KTESTS) += net_ktests.o
diff --git a/kern/src/ktest/Kconfig.kernel b/kern/src/ktest/Kconfig.kernel index 0aa2a21..ebbc874 100644 --- a/kern/src/ktest/Kconfig.kernel +++ b/kern/src/ktest/Kconfig.kernel
@@ -6,3 +6,10 @@ source "kern/src/ktest/Kconfig.postboot" source "kern/src/ktest/Kconfig.net" + +config KTEST_ARENA + depends on KERNEL_TESTING + bool "Arena kernel test" + default y + help + Run the arena tests
diff --git a/kern/src/ktest/kt_arena.c b/kern/src/ktest/kt_arena.c new file mode 100644 index 0000000..29c3978 --- /dev/null +++ b/kern/src/ktest/kt_arena.c
@@ -0,0 +1,458 @@ +#include <arena.h> +#include <slab.h> +#include <ktest.h> +#include <linker_func.h> + +KTEST_SUITE("ARENA") + +static bool test_nextfit(void) +{ + struct arena *a; + void *o1, *o2, *o3; + + a = arena_create(__func__, (void*)1, 30, 1, NULL, NULL, NULL, 0, + MEM_WAIT); + o1 = arena_alloc(a, 1, MEM_WAIT | ARENA_NEXTFIT); + o2 = arena_alloc(a, 1, MEM_WAIT | ARENA_NEXTFIT); + /* If we didn't NEXTFIT, the allocator would likely give us '1' back */ + arena_free(a, o1, 1); + o3 = arena_alloc(a, 1, MEM_WAIT | ARENA_NEXTFIT); + KT_ASSERT(o3 == o2 + 1); + arena_free(a, o2, 1); + arena_free(a, o3, 1); + arena_destroy(a); + + return true; +} + +static bool test_bestfit(void) +{ + struct arena *a; + void *o1; + + a = arena_create(__func__, NULL, 0, 1, NULL, NULL, NULL, 0, MEM_WAIT); + /* Each span will be an independent chunk in the allocator. Their base + * values don't matter; they just identify the spans. + * + * BESTFIT for 65 should be 67. INSTANTFIT should be 128. The (128-1) + * objects around 67 are to make sure we check all objects on the 2^6 + * list. */ + arena_add(a, (void*)1000, 64, MEM_WAIT); + arena_add(a, (void*)3000, 128 - 1, MEM_WAIT); + arena_add(a, (void*)2000, 67, MEM_WAIT); + arena_add(a, (void*)4000, 128 - 1, MEM_WAIT); + arena_add(a, (void*)5000, 128, MEM_WAIT); + o1 = arena_alloc(a, 65, MEM_WAIT | ARENA_BESTFIT); + KT_ASSERT(o1 == (void*)2000); + arena_free(a, o1, 65); + arena_destroy(a); + + return true; +} + +static bool test_instantfit(void) +{ + struct arena *a; + void *o1; + + a = arena_create(__func__, NULL, 0, 1, NULL, NULL, NULL, 0, MEM_WAIT); + arena_add(a, (void*)1000, 64, MEM_WAIT); + arena_add(a, (void*)2000, 67, MEM_WAIT); + arena_add(a, (void*)3000, 64, MEM_WAIT); + arena_add(a, (void*)4000, 128, MEM_WAIT); + o1 = arena_alloc(a, 65, MEM_WAIT | ARENA_INSTANTFIT); + KT_ASSERT(o1 == (void*)4000); + arena_free(a, o1, 65); + arena_destroy(a); + + return true; +} + +static bool test_quantum_align(void) +{ + struct arena *a; + void *o1, *o2; + + a = arena_create(__func__, NULL, 0, 32, NULL, NULL, NULL, 0, MEM_WAIT); + /* this should give us one object only: */ + arena_add(a, (void*)(4096 + 1), 64, MEM_WAIT); + /* 1 gets rounded up to quantum, so we're really asking for 32 */ + o1 = arena_alloc(a, 1, MEM_WAIT); + KT_ASSERT(o1 == ROUNDUP((void*)(4096 + 1), a->quantum)); + /* Should be nothing quantum-sized left */ + o2 = arena_alloc(a, 1, MEM_ATOMIC); + KT_ASSERT(o2 == NULL); + arena_free(a, o1, 1); + arena_destroy(a); + + return true; +} + +static bool test_odd_quantum(void) +{ + struct arena *a; + void *o1, *o2; + + a = arena_create(__func__, NULL, 0, 7, NULL, NULL, NULL, 0, MEM_WAIT); + arena_add(a, (void*)7, 49, MEM_WAIT); + o1 = arena_alloc(a, 7, MEM_WAIT); + KT_ASSERT(o1 == (void*)7); + o2 = arena_alloc(a, 7, MEM_WAIT); + KT_ASSERT(o2 == (void*)14); + arena_free(a, o1, 7); + arena_free(a, o2, 7); + + /* In older arena code, this would fragment such that it could hand out + * non-quantum-aligned objects. */ + o1 = arena_xalloc(a, 7, 4, 0, 0, NULL, NULL, MEM_WAIT); + o2 = arena_alloc(a, 7, MEM_WAIT); + KT_ASSERT(!((uintptr_t)o2 % 7)); + arena_xfree(a, o1, 7); + arena_free(a, o2, 7); + arena_destroy(a); + + return true; +} + +/* The nocross-fallback hops over the first nocross boundary in a segment try, + * in the hopes that the rest of the segment can satisfy the constraints. */ +static bool test_nocross_fallback(void) +{ + struct arena *a; + void *o1; + + a = arena_create(__func__, NULL, 0, 3, NULL, NULL, NULL, 0, MEM_WAIT); + arena_add(a, (void*)3, 20, MEM_WAIT); + o1 = arena_xalloc(a, 3, 1, 0, 4, NULL, NULL, MEM_WAIT); + /* 6 would be wrong. We hopped over 4, but then didn't check that + * segment either (crosses 8). */ + KT_ASSERT(o1 == (void*)9); + arena_xfree(a, o1, 3); + arena_destroy(a); + + return true; +} + +static bool test_xalloc_from_freelist(void) +{ + struct arena *a; + void *o1; + + a = arena_create(__func__, NULL, 0, 1, NULL, NULL, NULL, 0, MEM_WAIT); + /* one object on the order 3 list: size [8, 15]. it also starts at 15, + * which will satisfy align=8 phase=7. */ + arena_add(a, (void*)15, 15, MEM_WAIT); + /* adding phase + ALIGN(align) would have us look on the order 4 list, + * which is what older code did. */ + o1 = arena_xalloc(a, 15, 8, 7, 0, NULL, NULL, + MEM_ATOMIC | ARENA_BESTFIT); + KT_ASSERT(o1 == (void*)15); + arena_xfree(a, o1, 15); + arena_destroy(a); + + return true; +} + +/* Right now, instantfit failures do *not* fall back to bestfit. If we ever do + * that, we can turn on this test. {,x}alloc with a source will fallback to + * bestfit *after* it went to the source. */ +static bool test_alloc_instantfit_fallback(void) +{ + struct arena *a; + void *o1; + + a = arena_create(__func__, NULL, 0, 1, NULL, NULL, NULL, 0, MEM_WAIT); + /* one object on the order 3 list: size [8, 15], at 1. */ + arena_add(a, (void*)1, 15, MEM_WAIT); + o1 = arena_alloc(a, 15, MEM_ATOMIC); + KT_ASSERT(o1 == (void*)1); + arena_free(a, o1, 15); + o1 = arena_xalloc(a, 15, 1, 0, 0, NULL, NULL, MEM_ATOMIC); + KT_ASSERT(o1 == (void*)1); + arena_xfree(a, o1, 15); + arena_destroy(a); + + return true; +} + +static bool test_qcache(void) +{ + struct arena *a; + void *o1, *o2, *o3, *o4; + + /* 3 qcaches */ + a = arena_create(__func__, NULL, 0, 1, NULL, NULL, NULL, 3, MEM_WAIT); + arena_add(a, (void*)1, 10000, MEM_WAIT); + /* Alloc from each qc, plus the arena. */ + o1 = arena_alloc(a, 1, MEM_WAIT); + o2 = arena_alloc(a, 2, MEM_WAIT); + o3 = arena_alloc(a, 3, MEM_WAIT); + o4 = arena_alloc(a, 4, MEM_WAIT); + + arena_free(a, o1, 1); + arena_free(a, o2, 2); + arena_free(a, o3, 3); + arena_free(a, o4, 4); + arena_destroy(a); + + return true; +} + +static bool test_qc_odd_quantum(void) +{ + struct arena *a; + void *o[4]; + + /* 3 qcaches, non-power-of-two quantum. This checks the slab guarantee + * that if slab objects (qcaches) are a multiple of source->quantum, + * then all allocations are multiples of quantum. */ + a = arena_create(__func__, NULL, 0, 7, NULL, NULL, NULL, 21, MEM_WAIT); + arena_add(a, (void*)7, 10000, MEM_WAIT); + /* Alloc from each qc, plus the arena, ensure quantum alignment. */ + for (int i = 1; i < 4; i++) { + size_t amt = 7 * i; + + /* Get a few before checking them all */ + for (int j = 0; j < ARRAY_SIZE(o); j++) + o[j] = arena_alloc(a, amt, MEM_WAIT); + for (int j = 0; j < ARRAY_SIZE(o); j++) + KT_ASSERT(!((uintptr_t)o[j] % 7)); + for (int j = 0; j < ARRAY_SIZE(o); j++) + arena_free(a, o[j], amt); + } + arena_destroy(a); + + return true; +} + +/* slab code had an issue with align > PGSIZE. QCs are quantum aligned, so + * quantum > PGSIZE with a QC caused trouble. */ +static bool test_qc_large_quantum(void) +{ + struct arena *a; + void *o1; + + a = arena_create(__func__, NULL, 0, 8192, NULL, NULL, NULL, 8192, + MEM_WAIT); + arena_add(a, (void*)8192, 8192 * 4, MEM_WAIT); + o1 = arena_alloc(a, 8192, MEM_WAIT); + arena_free(a, o1, 8192); + arena_destroy(a); + + return true; +} + +/* Just examples of stuff you can do. */ +static void *tiaf(struct arena *a, size_t amt, int flags) +{ + void *obj = arena_alloc(a, amt, flags); + + return (void*)((uintptr_t)obj << 15); +} + +static void tiff(struct arena *a, void *obj, size_t amt) +{ + arena_free(a, (void*)((uintptr_t)obj >> 15), amt); +} + +static bool test_import(void) +{ + struct arena *a, *s; + void *o1, *o2; + + s = arena_create("test_import-source", NULL, 0, 4096, NULL, NULL, NULL, + 0, MEM_WAIT); + arena_add(s, (void*)4096, 4096 * 4, MEM_WAIT); + a = arena_create("test_import-actual", NULL, 0, 1, tiaf, tiff, s, 2, + MEM_WAIT); + + o1 = arena_alloc(a, 1, MEM_WAIT); + o2 = arena_alloc(a, 2, MEM_WAIT); + /* Make sure our handlers run. The source gives 'a' addresses around + * 4096, which the import funcs translate to above 1 << 15. */ + KT_ASSERT((uintptr_t)o1 >= (1 << 15)); + KT_ASSERT((uintptr_t)o2 >= (1 << 15)); + + arena_free(a, o1, 1); + arena_free(a, o2, 2); + arena_destroy(a); + arena_destroy(s); + + return true; +} + +static bool test_import_slab(void) +{ + struct arena *s; + struct kmem_cache *kc; + void *o[3]; + + s = arena_create(__func__, NULL, 0, 7, NULL, NULL, NULL, + 0, MEM_WAIT); + /* We need to have a sizable amount here, since the KCs will pull a lot + * of resources when growing. 7000 isn't enough. */ + arena_add(s, (void*)7, 70000, MEM_WAIT); + + /* Quantum-preserving guarantee */ + kc = kmem_cache_create("test_import_slab-QP", 14, 1, KMC_NOTOUCH, s, + NULL, NULL, NULL); + for (int i = 0; i < ARRAY_SIZE(o); i++) + o[i] = kmem_cache_alloc(kc, MEM_WAIT); + for (int i = 0; i < ARRAY_SIZE(o); i++) + KT_ASSERT(!((uintptr_t)o[i] % 7)); + for (int i = 0; i < ARRAY_SIZE(o); i++) + kmem_cache_free(kc, o[i]); + kmem_cache_destroy(kc); + + + /* Listen to slab's alignment guarantee */ + kc = kmem_cache_create("test_import_slab-AG", 1, 16, KMC_NOTOUCH, NULL, + NULL, NULL, NULL); + for (int i = 0; i < ARRAY_SIZE(o); i++) + o[i] = kmem_cache_alloc(kc, MEM_WAIT); + for (int i = 0; i < ARRAY_SIZE(o); i++) + KT_ASSERT(ALIGNED(o[i], 16)); + for (int i = 0; i < ARRAY_SIZE(o); i++) + kmem_cache_free(kc, o[i]); + kmem_cache_destroy(kc); + + + arena_destroy(s); + + return true; +} + +/* Arena import code wasn't grabbing enough, such that when we aligned the + * source object to a's np2sb (which happened to be a power of 2), we had + * nothing left to actually put in the arena. + * + * Additionally, arena's weren't freeing the segment back to their sources. */ +static bool test_import_alignment(void) +{ + struct arena *s, *a; + void *o1; + + s = arena_create("test_import_alignment-s", NULL, 0, 1, + NULL, NULL, NULL, 0, MEM_WAIT); + arena_add(s, (void*)1, 1000, MEM_WAIT); + a = arena_create("test_import_alignment-a", NULL, 0, 16, + arena_alloc, arena_free, s, + 0, MEM_WAIT); + o1 = arena_alloc(a, 16, MEM_WAIT); + KT_ASSERT(o1); + arena_free(a, o1, 16); + arena_destroy(a); + arena_destroy(s); + + return true; +} + +static bool test_xalloc(void) +{ + struct arena *a; + void *o1, *o2, *o3, *o4; + + a = arena_create(__func__, NULL, 0, 3, NULL, NULL, NULL, 0, MEM_WAIT); + arena_add(a, (void*)3, 4096, MEM_WAIT); + + /* align 16, phase 6 */ + o1 = arena_xalloc(a, 3, 16, 6, 0, NULL, NULL, MEM_WAIT); + KT_ASSERT(ALIGNED((uintptr_t)o1 - 6, 16)); + KT_ASSERT(!((uintptr_t)o1 % 3)); + + /* nocross 16 */ + o2 = arena_xalloc(a, 15, 1, 0, 16, NULL, NULL, MEM_WAIT); + KT_ASSERT(!((uintptr_t)o2 % 3)); + KT_ASSERT(ROUNDUP(o2 + 1, 16) >= o2 + 15); + + /* min 81, max 252. should be available. */ + o3 = arena_xalloc(a, 3, 1, 0, 0, (void*)81, (void*)252, MEM_WAIT); + KT_ASSERT(!((uintptr_t)o3 % 3)); + KT_ASSERT(81 <= (uintptr_t)o3 && (uintptr_t)o3 < 252); + + /* older xalloc code could hand out non-free segments! */ + o4 = arena_xalloc(a, 3, 1, 0, 0, (void*)81, (void*)252, MEM_WAIT); + KT_ASSERT(!((uintptr_t)o4 % 3)); + KT_ASSERT(81 <= (uintptr_t)o4 && (uintptr_t)o4 < 252); + KT_ASSERT(o4 != o3); + + arena_xfree(a, o1, 3); + arena_xfree(a, o2, 15); + arena_xfree(a, o3, 3); + arena_xfree(a, o4, 3); + arena_destroy(a); + + return true; +} + +static bool test_xalloc_minmax(void) +{ + struct arena *a; + void *o1, *o2; + + a = arena_create(__func__, NULL, 0, 1, NULL, NULL, NULL, 0, MEM_WAIT); + arena_add(a, (void*)1, 100, MEM_WAIT); + o1 = arena_xalloc(a, 20, 1, 0, 0, (void*)10, (void*)30, MEM_ATOMIC); + KT_ASSERT((uintptr_t)o1 == 10); + o2 = arena_xalloc(a, 20, 1, 0, 0, (void*)30, (void*)50, MEM_ATOMIC); + KT_ASSERT((uintptr_t)o2 == 30); + arena_xfree(a, o1, 20); + arena_xfree(a, o2, 20); + arena_destroy(a); + + return true; +} + +/* Note we don't use qcaches - they throw off the measurements, since all qcache + * objects (free or not) are counted as allocated from the arena's perspective. + */ +static bool test_accounting(void) +{ + struct arena *a; + void *o1, *o2; + + a = arena_create(__func__, NULL, 0, 1, NULL, NULL, NULL, 0, MEM_WAIT); + arena_add(a, (void*)1, 100, MEM_WAIT); + KT_ASSERT(arena_amt_free(a) == 100); + KT_ASSERT(arena_amt_total(a) == 100); + + /* Ensuring some fragmentation */ + o1 = arena_xalloc(a, 15, 1, 0, 0, (void*)10, (void*)40, MEM_WAIT); + o2 = arena_xalloc(a, 15, 1, 0, 0, (void*)50, (void*)90, MEM_WAIT); + + KT_ASSERT(arena_amt_free(a) == 70); + KT_ASSERT(arena_amt_total(a) == 100); + + arena_free(a, o1, 15); + arena_free(a, o2, 15); + arena_destroy(a); + + return true; +} + +static struct ktest ktests[] = { + KTEST_REG(nextfit, CONFIG_KTEST_ARENA), + KTEST_REG(bestfit, CONFIG_KTEST_ARENA), + KTEST_REG(instantfit, CONFIG_KTEST_ARENA), + KTEST_REG(quantum_align, CONFIG_KTEST_ARENA), + KTEST_REG(odd_quantum, CONFIG_KTEST_ARENA), + KTEST_REG(nocross_fallback, CONFIG_KTEST_ARENA), + KTEST_REG(xalloc_from_freelist, CONFIG_KTEST_ARENA), + KTEST_REG(qcache, CONFIG_KTEST_ARENA), + KTEST_REG(qc_odd_quantum, CONFIG_KTEST_ARENA), + KTEST_REG(qc_large_quantum, CONFIG_KTEST_ARENA), + KTEST_REG(import, CONFIG_KTEST_ARENA), + KTEST_REG(import_slab, CONFIG_KTEST_ARENA), + KTEST_REG(import_alignment, CONFIG_KTEST_ARENA), + KTEST_REG(xalloc, CONFIG_KTEST_ARENA), + KTEST_REG(xalloc_minmax, CONFIG_KTEST_ARENA), + KTEST_REG(accounting, CONFIG_KTEST_ARENA), +}; + +static int num_ktests = sizeof(ktests) / sizeof(struct ktest); + +static void __init register_arena_ktests(void) +{ + REGISTER_KTESTS(ktests, num_ktests); +} +init_func_1(register_arena_ktests);