| /* Copyright (c) 2010 The Regents of the University of California |
| * Barret Rhoden <brho@cs.berkeley.edu> |
| * See LICENSE for details. |
| * |
| * Radix Trees! Just the basics, doesn't do tagging or anything fancy. */ |
| |
| #include <ros/errno.h> |
| #include <radix.h> |
| #include <slab.h> |
| #include <kmalloc.h> |
| #include <string.h> |
| #include <stdio.h> |
| #include <rcu.h> |
| |
| struct kmem_cache *radix_kcache; |
| static struct radix_node *__radix_lookup_node(struct radix_tree *tree, |
| unsigned long key, |
| bool extend); |
| static void __radix_remove_slot(struct radix_node *r_node, struct radix_node **slot); |
| |
| /* Initializes the radix tree system, mostly just builds the kcache */ |
| void radix_init(void) |
| { |
| radix_kcache = kmem_cache_create("radix_nodes", |
| sizeof(struct radix_node), |
| __alignof__(struct radix_node), 0, |
| NULL, 0, 0, NULL); |
| } |
| |
| /* Initializes a tree dynamically */ |
| void radix_tree_init(struct radix_tree *tree) |
| { |
| tree->seq = SEQCTR_INITIALIZER; |
| tree->root = 0; |
| tree->depth = 0; |
| tree->upper_bound = 0; |
| } |
| |
| static bool __should_not_run_cb(void **slot, unsigned long tree_idx, void *a) |
| { |
| panic("Tried destroying a non-empty tree! (slot %p, idx %lu)", |
| *slot, tree_idx); |
| } |
| |
| /* Will clean up all the memory associated with a tree. Shouldn't be necessary |
| * if you delete all of the items, which you should do anyways since they are |
| * usually void*. Might expand this to have a function to call on every leaf |
| * slot. */ |
| void radix_tree_destroy(struct radix_tree *tree) |
| { |
| /* Currently, we may have a root node, even if all the elements were |
| * removed */ |
| radix_for_each_slot(tree, __should_not_run_cb, NULL); |
| if (tree->root) { |
| kmem_cache_free(radix_kcache, tree->root); |
| tree->root = NULL; |
| } |
| } |
| |
| /* Attempts to insert an item in the tree at the given key. ENOMEM if we ran |
| * out of memory, EEXIST if an item is already in the tree. On success, will |
| * also return the slot pointer, if requested. |
| * |
| * Caller must maintain mutual exclusion (qlock) */ |
| int radix_insert(struct radix_tree *tree, unsigned long key, void *item, |
| void ***slot_p) |
| { |
| struct radix_node *r_node; |
| void **slot; |
| |
| /* Is the tree tall enough? if not, it needs to grow a level. This |
| * will also create the initial node (upper bound starts at 0). */ |
| while (key >= tree->upper_bound) { |
| r_node = kmem_cache_alloc(radix_kcache, MEM_WAIT); |
| memset(r_node, 0, sizeof(struct radix_node)); |
| if (tree->root) { |
| /* tree->root is the old root, now a child of the future |
| * root */ |
| r_node->items[0] = tree->root; |
| tree->root->parent = r_node; |
| tree->root->my_slot = |
| (struct radix_node**)&r_node->items[0]; |
| r_node->num_items = 1; |
| } else { |
| /* if there was no root before, we're both the root and |
| * a leaf */ |
| r_node->leaf = TRUE; |
| r_node->parent = 0; |
| } |
| /* Need to atomically change root, depth, and upper_bound for |
| * our readers, who will check the seq ctr. */ |
| __seq_start_write(&tree->seq); |
| tree->root = r_node; |
| r_node->my_slot = &tree->root; |
| tree->depth++; |
| tree->upper_bound = 1ULL << (LOG_RNODE_SLOTS * tree->depth); |
| __seq_end_write(&tree->seq); |
| } |
| assert(tree->root); |
| /* the tree now thinks it is tall enough, so find the last node, insert |
| * in it, etc */ |
| /* This gives us an rcu-protected pointer, though we hold the lock. */ |
| r_node = __radix_lookup_node(tree, key, TRUE); |
| assert(r_node); /* we want an ENOMEM actually, but i want to see this */ |
| slot = &r_node->items[key & (NR_RNODE_SLOTS - 1)]; |
| if (*slot) |
| return -EEXIST; |
| rcu_assign_pointer(*slot, item); |
| r_node->num_items++; |
| if (slot_p) |
| *slot_p = slot; /* passing back an rcu-protected pointer */ |
| return 0; |
| } |
| |
| static void __rnode_free_rcu(struct rcu_head *head) |
| { |
| struct radix_node *r_node = container_of(head, struct radix_node, rcu); |
| |
| kmem_cache_free(radix_kcache, r_node); |
| } |
| |
| /* Removes an item from it's parent's structure, freeing the parent if there is |
| * nothing left, potentially recursively. */ |
| static void __radix_remove_slot(struct radix_node *r_node, |
| struct radix_node **slot) |
| { |
| assert(*slot); /* make sure there is something there */ |
| rcu_assign_pointer(*slot, NULL); |
| r_node->num_items--; |
| /* this check excludes the root, but the if else handles it. For now, |
| * once we have a root, we'll always keep it (will need some changing in |
| * radix_insert() */ |
| if (!r_node->num_items && r_node->parent) { |
| if (r_node->parent) |
| __radix_remove_slot(r_node->parent, r_node->my_slot); |
| else /* we're the last node, attached to the actual tree */ |
| *(r_node->my_slot) = 0; |
| call_rcu(&r_node->rcu, __rnode_free_rcu); |
| } |
| } |
| |
| /* Removes a key/item from the tree, returning that item (the void*). If it |
| * detects a radix_node is now unused, it will dealloc that node. Though the |
| * tree will still think it is tall enough to handle its old upper_bound. It |
| * won't "shrink". |
| * |
| * Caller must maintain mutual exclusion (qlock) */ |
| void *radix_delete(struct radix_tree *tree, unsigned long key) |
| { |
| void **slot; |
| void *retval; |
| struct radix_node *r_node; |
| |
| /* This is an rcu-protected pointer, though the caller holds a lock. */ |
| r_node = __radix_lookup_node(tree, key, false); |
| if (!r_node) |
| return 0; |
| slot = &r_node->items[key & (NR_RNODE_SLOTS - 1)]; |
| retval = rcu_dereference(*slot); |
| if (retval) { |
| __radix_remove_slot(r_node, (struct radix_node**)slot); |
| } else { |
| /* it's okay to delete an empty, but i want to know about it for |
| * now */ |
| warn("Tried to remove a non-existant item from a radix tree!"); |
| } |
| return retval; |
| } |
| |
| /* Returns the item for a given key. 0 means no item, etc. */ |
| void *radix_lookup(struct radix_tree *tree, unsigned long key) |
| { |
| void **slot = radix_lookup_slot(tree, key); |
| |
| if (!slot) |
| return 0; |
| /* slot was rcu-protected, pointing into the memory of an r_node. we |
| * also want *slot, which is "void *item" to be an rcu-protected |
| * pointer. */ |
| return rcu_dereference(*slot); |
| } |
| |
| /* Returns a pointer to the radix_node holding a given key. 0 if there is no |
| * such node, due to the tree being too small or something. |
| * |
| * If the depth is greater than one, we need to walk down the tree a level. The |
| * key is 'partitioned' among the levels of the tree, like so: |
| * ......444444333333222222111111 |
| * |
| * If an interior node of the tree is missing, this will add one if it was |
| * directed to extend the tree. |
| * |
| * If we might extend, the caller must maintain mutual exclusion (qlock) */ |
| static struct radix_node *__radix_lookup_node(struct radix_tree *tree, |
| unsigned long key, bool extend) |
| { |
| unsigned long idx, upper_bound; |
| unsigned int depth; |
| seq_ctr_t seq; |
| struct radix_node *child_node, *r_node; |
| |
| do { |
| seq = ACCESS_ONCE(tree->seq); |
| rmb(); |
| r_node = rcu_dereference(tree->root); |
| depth = tree->depth; |
| upper_bound = tree->upper_bound; |
| } while (seqctr_retry(tree->seq, seq)); |
| |
| if (key >= upper_bound) { |
| if (extend) |
| warn("Bound (%d) not set for key %d!\n", upper_bound, |
| key); |
| return 0; |
| } |
| for (int i = depth; i > 1; i--) { /* i = ..., 4, 3, 2 */ |
| idx = (key >> (LOG_RNODE_SLOTS * (i - 1))) |
| & (NR_RNODE_SLOTS - 1); |
| /* There might not be a node at this part of the tree */ |
| if (!r_node->items[idx]) { |
| if (!extend) |
| return 0; |
| child_node = kmem_cache_alloc(radix_kcache, MEM_WAIT); |
| memset(child_node, 0, sizeof(struct radix_node)); |
| /* when we are on the last iteration (i == 2), the child |
| * will be a leaf. */ |
| child_node->leaf = (i == 2) ? TRUE : FALSE; |
| child_node->parent = r_node; |
| child_node->my_slot = |
| (struct radix_node**)&r_node->items[idx]; |
| r_node->num_items++; |
| rcu_assign_pointer(r_node->items[idx], child_node); |
| } |
| r_node = |
| (struct radix_node*)rcu_dereference(r_node->items[idx]); |
| } |
| return r_node; |
| } |
| |
| /* Returns a pointer to the slot for the given key. 0 if there is no such slot, |
| * etc */ |
| void **radix_lookup_slot(struct radix_tree *tree, unsigned long key) |
| { |
| struct radix_node *r_node = __radix_lookup_node(tree, key, FALSE); |
| |
| if (!r_node) |
| return 0; |
| key = key & (NR_RNODE_SLOTS - 1); |
| /* r_node is rcu-protected. Our retval is too, since it's a pointer |
| * into the same object as r_node. */ |
| return &r_node->items[key]; |
| } |
| |
| /* [x_left, x_right) and [y_left, y_right). */ |
| static bool ranges_overlap(unsigned long x_left, unsigned long x_right, |
| unsigned long y_left, unsigned long y_right) |
| { |
| return ((x_left <= y_left) && (y_left < x_right)) || |
| ((y_left <= x_left) && (x_left < y_right)); |
| } |
| |
| /* Given an index at a depth for a child, returns whether part of it is in the |
| * global range. |
| * |
| * Recall the key is partitioned like so: ....444444333333222222111111. The |
| * depth is 1 when we're in the last rnode and our children are items. When |
| * we're an intermediate node, our depth is higher, and our start/end is the |
| * entire reach of us + our children. */ |
| static bool child_overlaps_range(unsigned long idx, int depth, |
| unsigned long glb_start_idx, |
| unsigned long glb_end_idx) |
| { |
| unsigned long start = idx << (LOG_RNODE_SLOTS * (depth - 1)); |
| unsigned long end = (idx + 1) << (LOG_RNODE_SLOTS * (depth - 1)); |
| |
| return ranges_overlap(start, end, glb_start_idx, glb_end_idx); |
| } |
| |
| /* Helper for walking down a tree. |
| * - depth == 1 means we're on the last radix_node, whose items are all the |
| * actual items. |
| * - tree_idx is the index for our item/node. It encodes the path we took |
| * through the radix tree to get to where we are. For leaves (items), it is |
| * their index in the address space. For internal rnodes, it is a temporary |
| * number, but combined with the depth, you can determine the range of the |
| * rnode's descendents. |
| * - glb_start_idx and glb_end_idx is the global start and end for the entire |
| * for_each operation. |
| * |
| * Returns true if our r_node *was already deleted*. When we call |
| * __radix_remove_slot(), if we removed the last item for r_node, the removal |
| * code will recurse *up* the tree, such that r_node might already be freed. |
| * Same goes for our parent! Hence, we're careful to only access r_node when we |
| * know we have children (once we enter the loop and might remove a slot). */ |
| static bool rnode_for_each(struct radix_node *r_node, int depth, |
| unsigned long tree_idx, unsigned long glb_start_idx, |
| unsigned long glb_end_idx, |
| radix_cb_t cb, void *arg) |
| { |
| unsigned int num_children = ACCESS_ONCE(r_node->num_items); |
| |
| /* The tree_idx we were passed was from our parent's perspective. We |
| * need shift it over each time we walk down to put it in terms of our |
| * level/depth. Or think of it as making room for our bits (the values |
| * of i). */ |
| tree_idx <<= LOG_RNODE_SLOTS; |
| for (int i = 0; num_children && (i < NR_RNODE_SLOTS); i++) { |
| if (r_node->items[i]) { |
| /* If we really care, we can try to abort the rest of |
| * the loop. Not a big deal */ |
| if (!child_overlaps_range(tree_idx + i, depth, |
| glb_start_idx, glb_end_idx)) |
| continue; |
| if (depth > 1) { |
| if (rnode_for_each(r_node->items[i], depth - 1, |
| tree_idx + i, glb_start_idx, |
| glb_end_idx, cb, arg)) |
| num_children--; |
| } else { |
| if (cb(&r_node->items[i], tree_idx + i, arg)) { |
| __radix_remove_slot(r_node, |
| (struct radix_node**) |
| &r_node->items[i]); |
| num_children--; |
| } |
| } |
| } |
| } |
| return num_children ? false : true; |
| } |
| |
| /* [start_idx, end_idx). |
| * |
| * Caller must maintain mutual exclusion (qlock) */ |
| void radix_for_each_slot_in_range(struct radix_tree *tree, |
| unsigned long start_idx, |
| unsigned long end_idx, |
| radix_cb_t cb, void *arg) |
| { |
| if (!tree->root) |
| return; |
| rnode_for_each(tree->root, tree->depth, 0, start_idx, end_idx, cb, arg); |
| } |
| |
| /* Caller must maintain mutual exclusion (qlock) */ |
| void radix_for_each_slot(struct radix_tree *tree, radix_cb_t cb, void *arg) |
| { |
| radix_for_each_slot_in_range(tree, 0, ULONG_MAX, cb, arg); |
| } |
| |
| int radix_gang_lookup(struct radix_tree *tree, void **results, |
| unsigned long first, unsigned int max_items) |
| { |
| panic("Not implemented"); |
| return -1; /* TODO! */ |
| } |
| |
| |
| int radix_grow(struct radix_tree *tree, unsigned long max) |
| { |
| panic("Not implemented"); |
| return -1; /* TODO! */ |
| } |
| |
| int radix_preload(struct radix_tree *tree, int flags) |
| { |
| panic("Not implemented"); |
| return -1; /* TODO! */ |
| } |
| |
| |
| void *radix_tag_set(struct radix_tree *tree, unsigned long key, int tag) |
| { |
| panic("Tagging not implemented!"); |
| return (void*)-1; /* TODO! */ |
| } |
| |
| void *radix_tag_clear(struct radix_tree *tree, unsigned long key, int tag) |
| { |
| panic("Tagging not implemented!"); |
| return (void*)-1; /* TODO! */ |
| } |
| |
| int radix_tag_get(struct radix_tree *tree, unsigned long key, int tag) |
| { |
| panic("Tagging not implemented!"); |
| return -1; /* TODO! */ |
| } |
| |
| int radix_tree_tagged(struct radix_tree *tree, int tag) |
| { |
| panic("Tagging not implemented!"); |
| return -1; /* TODO! */ |
| } |
| |
| int radix_tag_gang_lookup(struct radix_tree *tree, void **results, |
| unsigned long first, unsigned int max_items, int tag) |
| { |
| panic("Tagging not implemented!"); |
| return -1; /* TODO! */ |
| } |
| |
| void print_radix_tree(struct radix_tree *tree) |
| { |
| printk("Tree %p, Depth: %d, Bound: %d\n", tree, tree->depth, |
| tree->upper_bound); |
| |
| void print_rnode(struct radix_node *r_node, int depth) |
| { |
| if (!r_node) |
| return; |
| char buf[32] = {0}; |
| for (int i = 0; i < depth; i++) |
| buf[i] = '\t'; |
| printk("%sRnode %p, parent %p, myslot %p, %d items, leaf? %d\n", |
| buf, r_node, r_node->parent, r_node->my_slot, |
| r_node->num_items, r_node->leaf); |
| for (int i = 0; i < NR_RNODE_SLOTS; i++) { |
| if (!r_node->items[i]) |
| continue; |
| if (r_node->leaf) |
| printk("\t%sRnode Item %d: %p\n", buf, i, |
| r_node->items[i]); |
| else |
| print_rnode(r_node->items[i], depth + 1); |
| } |
| } |
| print_rnode(tree->root, 0); |
| } |