blob: 4c881d2733b8edf3a98981cfe33f95557ebb40d0 [file] [log] [blame]
/* Copyright (c) 2010 The Regents of the University of California
* Barret Rhoden <brho@cs.berkeley.edu>
* See LICENSE for details.
*
* Radix Trees! Just the basics, doesn't do tagging or anything fancy. */
#include <ros/errno.h>
#include <radix.h>
#include <slab.h>
#include <kmalloc.h>
#include <string.h>
#include <stdio.h>
#include <rcu.h>
struct kmem_cache *radix_kcache;
static struct radix_node *__radix_lookup_node(struct radix_tree *tree,
unsigned long key,
bool extend);
static void __radix_remove_slot(struct radix_node *r_node, struct radix_node **slot);
/* Initializes the radix tree system, mostly just builds the kcache */
void radix_init(void)
{
radix_kcache = kmem_cache_create("radix_nodes",
sizeof(struct radix_node),
__alignof__(struct radix_node), 0,
NULL, 0, 0, NULL);
}
/* Initializes a tree dynamically */
void radix_tree_init(struct radix_tree *tree)
{
tree->seq = SEQCTR_INITIALIZER;
tree->root = 0;
tree->depth = 0;
tree->upper_bound = 0;
}
static bool __should_not_run_cb(void **slot, unsigned long tree_idx, void *a)
{
panic("Tried destroying a non-empty tree! (slot %p, idx %lu)",
*slot, tree_idx);
}
/* Will clean up all the memory associated with a tree. Shouldn't be necessary
* if you delete all of the items, which you should do anyways since they are
* usually void*. Might expand this to have a function to call on every leaf
* slot. */
void radix_tree_destroy(struct radix_tree *tree)
{
/* Currently, we may have a root node, even if all the elements were
* removed */
radix_for_each_slot(tree, __should_not_run_cb, NULL);
if (tree->root) {
kmem_cache_free(radix_kcache, tree->root);
tree->root = NULL;
}
}
/* Attempts to insert an item in the tree at the given key. ENOMEM if we ran
* out of memory, EEXIST if an item is already in the tree. On success, will
* also return the slot pointer, if requested.
*
* Caller must maintain mutual exclusion (qlock) */
int radix_insert(struct radix_tree *tree, unsigned long key, void *item,
void ***slot_p)
{
struct radix_node *r_node;
void **slot;
/* Is the tree tall enough? if not, it needs to grow a level. This
* will also create the initial node (upper bound starts at 0). */
while (key >= tree->upper_bound) {
r_node = kmem_cache_alloc(radix_kcache, MEM_WAIT);
memset(r_node, 0, sizeof(struct radix_node));
if (tree->root) {
/* tree->root is the old root, now a child of the future
* root */
r_node->items[0] = tree->root;
tree->root->parent = r_node;
tree->root->my_slot =
(struct radix_node**)&r_node->items[0];
r_node->num_items = 1;
} else {
/* if there was no root before, we're both the root and
* a leaf */
r_node->leaf = TRUE;
r_node->parent = 0;
}
/* Need to atomically change root, depth, and upper_bound for
* our readers, who will check the seq ctr. */
__seq_start_write(&tree->seq);
tree->root = r_node;
r_node->my_slot = &tree->root;
tree->depth++;
tree->upper_bound = 1ULL << (LOG_RNODE_SLOTS * tree->depth);
__seq_end_write(&tree->seq);
}
assert(tree->root);
/* the tree now thinks it is tall enough, so find the last node, insert
* in it, etc */
/* This gives us an rcu-protected pointer, though we hold the lock. */
r_node = __radix_lookup_node(tree, key, TRUE);
assert(r_node); /* we want an ENOMEM actually, but i want to see this */
slot = &r_node->items[key & (NR_RNODE_SLOTS - 1)];
if (*slot)
return -EEXIST;
rcu_assign_pointer(*slot, item);
r_node->num_items++;
if (slot_p)
*slot_p = slot; /* passing back an rcu-protected pointer */
return 0;
}
static void __rnode_free_rcu(struct rcu_head *head)
{
struct radix_node *r_node = container_of(head, struct radix_node, rcu);
kmem_cache_free(radix_kcache, r_node);
}
/* Removes an item from it's parent's structure, freeing the parent if there is
* nothing left, potentially recursively. */
static void __radix_remove_slot(struct radix_node *r_node,
struct radix_node **slot)
{
assert(*slot); /* make sure there is something there */
rcu_assign_pointer(*slot, NULL);
r_node->num_items--;
/* this check excludes the root, but the if else handles it. For now,
* once we have a root, we'll always keep it (will need some changing in
* radix_insert() */
if (!r_node->num_items && r_node->parent) {
if (r_node->parent)
__radix_remove_slot(r_node->parent, r_node->my_slot);
else /* we're the last node, attached to the actual tree */
*(r_node->my_slot) = 0;
call_rcu(&r_node->rcu, __rnode_free_rcu);
}
}
/* Removes a key/item from the tree, returning that item (the void*). If it
* detects a radix_node is now unused, it will dealloc that node. Though the
* tree will still think it is tall enough to handle its old upper_bound. It
* won't "shrink".
*
* Caller must maintain mutual exclusion (qlock) */
void *radix_delete(struct radix_tree *tree, unsigned long key)
{
void **slot;
void *retval;
struct radix_node *r_node;
/* This is an rcu-protected pointer, though the caller holds a lock. */
r_node = __radix_lookup_node(tree, key, false);
if (!r_node)
return 0;
slot = &r_node->items[key & (NR_RNODE_SLOTS - 1)];
retval = rcu_dereference(*slot);
if (retval) {
__radix_remove_slot(r_node, (struct radix_node**)slot);
} else {
/* it's okay to delete an empty, but i want to know about it for
* now */
warn("Tried to remove a non-existant item from a radix tree!");
}
return retval;
}
/* Returns the item for a given key. 0 means no item, etc. */
void *radix_lookup(struct radix_tree *tree, unsigned long key)
{
void **slot = radix_lookup_slot(tree, key);
if (!slot)
return 0;
/* slot was rcu-protected, pointing into the memory of an r_node. we
* also want *slot, which is "void *item" to be an rcu-protected
* pointer. */
return rcu_dereference(*slot);
}
/* Returns a pointer to the radix_node holding a given key. 0 if there is no
* such node, due to the tree being too small or something.
*
* If the depth is greater than one, we need to walk down the tree a level. The
* key is 'partitioned' among the levels of the tree, like so:
* ......444444333333222222111111
*
* If an interior node of the tree is missing, this will add one if it was
* directed to extend the tree.
*
* If we might extend, the caller must maintain mutual exclusion (qlock) */
static struct radix_node *__radix_lookup_node(struct radix_tree *tree,
unsigned long key, bool extend)
{
unsigned long idx, upper_bound;
unsigned int depth;
seq_ctr_t seq;
struct radix_node *child_node, *r_node;
do {
seq = ACCESS_ONCE(tree->seq);
rmb();
r_node = rcu_dereference(tree->root);
depth = tree->depth;
upper_bound = tree->upper_bound;
} while (seqctr_retry(tree->seq, seq));
if (key >= upper_bound) {
if (extend)
warn("Bound (%d) not set for key %d!\n", upper_bound,
key);
return 0;
}
for (int i = depth; i > 1; i--) { /* i = ..., 4, 3, 2 */
idx = (key >> (LOG_RNODE_SLOTS * (i - 1)))
& (NR_RNODE_SLOTS - 1);
/* There might not be a node at this part of the tree */
if (!r_node->items[idx]) {
if (!extend)
return 0;
child_node = kmem_cache_alloc(radix_kcache, MEM_WAIT);
memset(child_node, 0, sizeof(struct radix_node));
/* when we are on the last iteration (i == 2), the child
* will be a leaf. */
child_node->leaf = (i == 2) ? TRUE : FALSE;
child_node->parent = r_node;
child_node->my_slot =
(struct radix_node**)&r_node->items[idx];
r_node->num_items++;
rcu_assign_pointer(r_node->items[idx], child_node);
}
r_node =
(struct radix_node*)rcu_dereference(r_node->items[idx]);
}
return r_node;
}
/* Returns a pointer to the slot for the given key. 0 if there is no such slot,
* etc */
void **radix_lookup_slot(struct radix_tree *tree, unsigned long key)
{
struct radix_node *r_node = __radix_lookup_node(tree, key, FALSE);
if (!r_node)
return 0;
key = key & (NR_RNODE_SLOTS - 1);
/* r_node is rcu-protected. Our retval is too, since it's a pointer
* into the same object as r_node. */
return &r_node->items[key];
}
/* [x_left, x_right) and [y_left, y_right). */
static bool ranges_overlap(unsigned long x_left, unsigned long x_right,
unsigned long y_left, unsigned long y_right)
{
return ((x_left <= y_left) && (y_left < x_right)) ||
((y_left <= x_left) && (x_left < y_right));
}
/* Given an index at a depth for a child, returns whether part of it is in the
* global range.
*
* Recall the key is partitioned like so: ....444444333333222222111111. The
* depth is 1 when we're in the last rnode and our children are items. When
* we're an intermediate node, our depth is higher, and our start/end is the
* entire reach of us + our children. */
static bool child_overlaps_range(unsigned long idx, int depth,
unsigned long glb_start_idx,
unsigned long glb_end_idx)
{
unsigned long start = idx << (LOG_RNODE_SLOTS * (depth - 1));
unsigned long end = (idx + 1) << (LOG_RNODE_SLOTS * (depth - 1));
return ranges_overlap(start, end, glb_start_idx, glb_end_idx);
}
/* Helper for walking down a tree.
* - depth == 1 means we're on the last radix_node, whose items are all the
* actual items.
* - tree_idx is the index for our item/node. It encodes the path we took
* through the radix tree to get to where we are. For leaves (items), it is
* their index in the address space. For internal rnodes, it is a temporary
* number, but combined with the depth, you can determine the range of the
* rnode's descendents.
* - glb_start_idx and glb_end_idx is the global start and end for the entire
* for_each operation.
*
* Returns true if our r_node *was already deleted*. When we call
* __radix_remove_slot(), if we removed the last item for r_node, the removal
* code will recurse *up* the tree, such that r_node might already be freed.
* Same goes for our parent! Hence, we're careful to only access r_node when we
* know we have children (once we enter the loop and might remove a slot). */
static bool rnode_for_each(struct radix_node *r_node, int depth,
unsigned long tree_idx, unsigned long glb_start_idx,
unsigned long glb_end_idx,
radix_cb_t cb, void *arg)
{
unsigned int num_children = ACCESS_ONCE(r_node->num_items);
/* The tree_idx we were passed was from our parent's perspective. We
* need shift it over each time we walk down to put it in terms of our
* level/depth. Or think of it as making room for our bits (the values
* of i). */
tree_idx <<= LOG_RNODE_SLOTS;
for (int i = 0; num_children && (i < NR_RNODE_SLOTS); i++) {
if (r_node->items[i]) {
/* If we really care, we can try to abort the rest of
* the loop. Not a big deal */
if (!child_overlaps_range(tree_idx + i, depth,
glb_start_idx, glb_end_idx))
continue;
if (depth > 1) {
if (rnode_for_each(r_node->items[i], depth - 1,
tree_idx + i, glb_start_idx,
glb_end_idx, cb, arg))
num_children--;
} else {
if (cb(&r_node->items[i], tree_idx + i, arg)) {
__radix_remove_slot(r_node,
(struct radix_node**)
&r_node->items[i]);
num_children--;
}
}
}
}
return num_children ? false : true;
}
/* [start_idx, end_idx).
*
* Caller must maintain mutual exclusion (qlock) */
void radix_for_each_slot_in_range(struct radix_tree *tree,
unsigned long start_idx,
unsigned long end_idx,
radix_cb_t cb, void *arg)
{
if (!tree->root)
return;
rnode_for_each(tree->root, tree->depth, 0, start_idx, end_idx, cb, arg);
}
/* Caller must maintain mutual exclusion (qlock) */
void radix_for_each_slot(struct radix_tree *tree, radix_cb_t cb, void *arg)
{
radix_for_each_slot_in_range(tree, 0, ULONG_MAX, cb, arg);
}
int radix_gang_lookup(struct radix_tree *tree, void **results,
unsigned long first, unsigned int max_items)
{
panic("Not implemented");
return -1; /* TODO! */
}
int radix_grow(struct radix_tree *tree, unsigned long max)
{
panic("Not implemented");
return -1; /* TODO! */
}
int radix_preload(struct radix_tree *tree, int flags)
{
panic("Not implemented");
return -1; /* TODO! */
}
void *radix_tag_set(struct radix_tree *tree, unsigned long key, int tag)
{
panic("Tagging not implemented!");
return (void*)-1; /* TODO! */
}
void *radix_tag_clear(struct radix_tree *tree, unsigned long key, int tag)
{
panic("Tagging not implemented!");
return (void*)-1; /* TODO! */
}
int radix_tag_get(struct radix_tree *tree, unsigned long key, int tag)
{
panic("Tagging not implemented!");
return -1; /* TODO! */
}
int radix_tree_tagged(struct radix_tree *tree, int tag)
{
panic("Tagging not implemented!");
return -1; /* TODO! */
}
int radix_tag_gang_lookup(struct radix_tree *tree, void **results,
unsigned long first, unsigned int max_items, int tag)
{
panic("Tagging not implemented!");
return -1; /* TODO! */
}
void print_radix_tree(struct radix_tree *tree)
{
printk("Tree %p, Depth: %d, Bound: %d\n", tree, tree->depth,
tree->upper_bound);
void print_rnode(struct radix_node *r_node, int depth)
{
if (!r_node)
return;
char buf[32] = {0};
for (int i = 0; i < depth; i++)
buf[i] = '\t';
printk("%sRnode %p, parent %p, myslot %p, %d items, leaf? %d\n",
buf, r_node, r_node->parent, r_node->my_slot,
r_node->num_items, r_node->leaf);
for (int i = 0; i < NR_RNODE_SLOTS; i++) {
if (!r_node->items[i])
continue;
if (r_node->leaf)
printk("\t%sRnode Item %d: %p\n", buf, i,
r_node->items[i]);
else
print_rnode(r_node->items[i], depth + 1);
}
}
print_rnode(tree->root, 0);
}