| /* Copyright (c) 2010 The Regents of the University of California | 
 |  * Barret Rhoden <brho@cs.berkeley.edu> | 
 |  * See LICENSE for details. | 
 |  * | 
 |  * Radix Trees!  Just the basics, doesn't do tagging or anything fancy. */ | 
 |  | 
 | #include <ros/errno.h> | 
 | #include <radix.h> | 
 | #include <slab.h> | 
 | #include <kmalloc.h> | 
 | #include <string.h> | 
 | #include <stdio.h> | 
 | #include <rcu.h> | 
 |  | 
 | struct kmem_cache *radix_kcache; | 
 | static struct radix_node *__radix_lookup_node(struct radix_tree *tree, | 
 |                                               unsigned long key, | 
 |                                               bool extend); | 
 | static void __radix_remove_slot(struct radix_node *r_node, struct radix_node **slot); | 
 |  | 
 | /* Initializes the radix tree system, mostly just builds the kcache */ | 
 | void radix_init(void) | 
 | { | 
 | 	radix_kcache = kmem_cache_create("radix_nodes", | 
 | 					 sizeof(struct radix_node), | 
 | 					 __alignof__(struct radix_node), 0, | 
 | 					 NULL, 0, 0, NULL); | 
 | } | 
 |  | 
 | /* Initializes a tree dynamically */ | 
 | void radix_tree_init(struct radix_tree *tree) | 
 | { | 
 | 	tree->seq = SEQCTR_INITIALIZER; | 
 | 	tree->root = 0; | 
 | 	tree->depth = 0; | 
 | 	tree->upper_bound = 0; | 
 | } | 
 |  | 
 | static bool __should_not_run_cb(void **slot, unsigned long tree_idx, void *a) | 
 | { | 
 | 	panic("Tried destroying a non-empty tree! (slot %p, idx %lu)", | 
 | 	      *slot, tree_idx); | 
 | } | 
 |  | 
 | /* Will clean up all the memory associated with a tree.  Shouldn't be necessary | 
 |  * if you delete all of the items, which you should do anyways since they are | 
 |  * usually void*.  Might expand this to have a function to call on every leaf | 
 |  * slot. */ | 
 | void radix_tree_destroy(struct radix_tree *tree) | 
 | { | 
 | 	/* Currently, we may have a root node, even if all the elements were removed | 
 | 	 */ | 
 | 	radix_for_each_slot(tree, __should_not_run_cb, NULL); | 
 | 	if (tree->root) { | 
 | 		kmem_cache_free(radix_kcache, tree->root); | 
 | 		tree->root = NULL; | 
 | 	} | 
 | } | 
 |  | 
 | /* Attempts to insert an item in the tree at the given key.  ENOMEM if we ran | 
 |  * out of memory, EEXIST if an item is already in the tree.  On success, will | 
 |  * also return the slot pointer, if requested. | 
 |  * | 
 |  * Caller must maintain mutual exclusion (qlock) */ | 
 | int radix_insert(struct radix_tree *tree, unsigned long key, void *item, | 
 |                  void ***slot_p) | 
 | { | 
 | 	struct radix_node *r_node; | 
 | 	void **slot; | 
 |  | 
 | 	/* Is the tree tall enough?  if not, it needs to grow a level.  This will | 
 | 	 * also create the initial node (upper bound starts at 0). */ | 
 | 	while (key >= tree->upper_bound) { | 
 | 		r_node = kmem_cache_alloc(radix_kcache, MEM_WAIT); | 
 | 		memset(r_node, 0, sizeof(struct radix_node)); | 
 | 		if (tree->root) { | 
 | 			/* tree->root is the old root, now a child of the future root */ | 
 | 			r_node->items[0] = tree->root; | 
 | 			tree->root->parent = r_node; | 
 | 			tree->root->my_slot = (struct radix_node**)&r_node->items[0]; | 
 | 			r_node->num_items = 1; | 
 | 		} else { | 
 | 			/* if there was no root before, we're both the root and a leaf */ | 
 | 			r_node->leaf = TRUE; | 
 | 			r_node->parent = 0; | 
 | 		} | 
 | 		/* Need to atomically change root, depth, and upper_bound for our | 
 | 		 * readers, who will check the seq ctr. */ | 
 | 		__seq_start_write(&tree->seq); | 
 | 		tree->root = r_node; | 
 | 		r_node->my_slot = &tree->root; | 
 | 		tree->depth++; | 
 | 		tree->upper_bound = 1ULL << (LOG_RNODE_SLOTS * tree->depth); | 
 | 		__seq_end_write(&tree->seq); | 
 | 	} | 
 | 	assert(tree->root); | 
 | 	/* the tree now thinks it is tall enough, so find the last node, insert in | 
 | 	 * it, etc */ | 
 | 	/* This gives us an rcu-protected pointer, though we hold the lock. */ | 
 | 	r_node = __radix_lookup_node(tree, key, TRUE); | 
 | 	assert(r_node);		/* we want an ENOMEM actually, but i want to see this */ | 
 | 	slot = &r_node->items[key & (NR_RNODE_SLOTS - 1)]; | 
 | 	if (*slot) | 
 | 		return -EEXIST; | 
 | 	rcu_assign_pointer(*slot, item); | 
 | 	r_node->num_items++; | 
 | 	if (slot_p) | 
 | 		*slot_p = slot;	/* passing back an rcu-protected pointer */ | 
 | 	return 0; | 
 | } | 
 |  | 
 | static void __rnode_free_rcu(struct rcu_head *head) | 
 | { | 
 | 	struct radix_node *r_node = container_of(head, struct radix_node, rcu); | 
 |  | 
 | 	kmem_cache_free(radix_kcache, r_node); | 
 | } | 
 |  | 
 | /* Removes an item from it's parent's structure, freeing the parent if there is | 
 |  * nothing left, potentially recursively. */ | 
 | static void __radix_remove_slot(struct radix_node *r_node, | 
 |                                 struct radix_node **slot) | 
 | { | 
 | 	assert(*slot);		/* make sure there is something there */ | 
 | 	rcu_assign_pointer(*slot, NULL); | 
 | 	r_node->num_items--; | 
 | 	/* this check excludes the root, but the if else handles it.  For now, once | 
 | 	 * we have a root, we'll always keep it (will need some changing in | 
 | 	 * radix_insert() */ | 
 | 	if (!r_node->num_items && r_node->parent) { | 
 | 		if (r_node->parent) | 
 | 			__radix_remove_slot(r_node->parent, r_node->my_slot); | 
 | 		else			/* we're the last node, attached to the actual tree */ | 
 | 			*(r_node->my_slot) = 0; | 
 | 		call_rcu(&r_node->rcu, __rnode_free_rcu); | 
 | 	} | 
 | } | 
 |  | 
 | /* Removes a key/item from the tree, returning that item (the void*).  If it | 
 |  * detects a radix_node is now unused, it will dealloc that node.  Though the | 
 |  * tree will still think it is tall enough to handle its old upper_bound.  It | 
 |  * won't "shrink". | 
 |  * | 
 |  * Caller must maintain mutual exclusion (qlock) */ | 
 | void *radix_delete(struct radix_tree *tree, unsigned long key) | 
 | { | 
 | 	void **slot; | 
 | 	void *retval; | 
 | 	struct radix_node *r_node; | 
 |  | 
 | 	/* This is an rcu-protected pointer, though the caller holds a lock. */ | 
 | 	r_node = __radix_lookup_node(tree, key, false); | 
 | 	if (!r_node) | 
 | 		return 0; | 
 | 	slot = &r_node->items[key & (NR_RNODE_SLOTS - 1)]; | 
 | 	retval = rcu_dereference(*slot); | 
 | 	if (retval) { | 
 | 		__radix_remove_slot(r_node, (struct radix_node**)slot); | 
 | 	} else { | 
 | 		/* it's okay to delete an empty, but i want to know about it for now */ | 
 | 		warn("Tried to remove a non-existant item from a radix tree!"); | 
 | 	} | 
 | 	return retval; | 
 | } | 
 |  | 
 | /* Returns the item for a given key.  0 means no item, etc. */ | 
 | void *radix_lookup(struct radix_tree *tree, unsigned long key) | 
 | { | 
 | 	void **slot = radix_lookup_slot(tree, key); | 
 |  | 
 | 	if (!slot) | 
 | 		return 0; | 
 | 	/* slot was rcu-protected, pointing into the memory of an r_node.  we also | 
 | 	 * want *slot, which is "void *item" to be an rcu-protected pointer. */ | 
 | 	return rcu_dereference(*slot); | 
 | } | 
 |  | 
 | /* Returns a pointer to the radix_node holding a given key.  0 if there is no | 
 |  * such node, due to the tree being too small or something. | 
 |  * | 
 |  * If the depth is greater than one, we need to walk down the tree a level.  The | 
 |  * key is 'partitioned' among the levels of the tree, like so: | 
 |  * ......444444333333222222111111 | 
 |  * | 
 |  * If an interior node of the tree is missing, this will add one if it was | 
 |  * directed to extend the tree. | 
 |  * | 
 |  * If we might extend, the caller must maintain mutual exclusion (qlock) */ | 
 | static struct radix_node *__radix_lookup_node(struct radix_tree *tree, | 
 |                                               unsigned long key, bool extend) | 
 | { | 
 | 	unsigned long idx, upper_bound; | 
 | 	unsigned int depth; | 
 | 	seq_ctr_t seq; | 
 | 	struct radix_node *child_node, *r_node; | 
 |  | 
 | 	do { | 
 | 		seq = ACCESS_ONCE(tree->seq); | 
 | 		rmb(); | 
 | 		r_node = rcu_dereference(tree->root); | 
 | 		depth = tree->depth; | 
 | 		upper_bound = tree->upper_bound; | 
 | 	} while (seqctr_retry(tree->seq, seq)); | 
 |  | 
 | 	if (key	>= upper_bound) { | 
 | 		if (extend) | 
 | 			warn("Bound (%d) not set for key %d!\n", upper_bound, key); | 
 | 		return 0; | 
 | 	} | 
 | 	for (int i = depth; i > 1; i--) {	 /* i = ..., 4, 3, 2 */ | 
 | 		idx = (key >> (LOG_RNODE_SLOTS * (i - 1))) & (NR_RNODE_SLOTS - 1); | 
 | 		/* There might not be a node at this part of the tree */ | 
 | 		if (!r_node->items[idx]) { | 
 | 			if (!extend) | 
 | 				return 0; | 
 | 			child_node = kmem_cache_alloc(radix_kcache, MEM_WAIT); | 
 | 			memset(child_node, 0, sizeof(struct radix_node)); | 
 | 			/* when we are on the last iteration (i == 2), the child will be | 
 | 			 * a leaf. */ | 
 | 			child_node->leaf = (i == 2) ? TRUE : FALSE; | 
 | 			child_node->parent = r_node; | 
 | 			child_node->my_slot = (struct radix_node**)&r_node->items[idx]; | 
 | 			r_node->num_items++; | 
 | 			rcu_assign_pointer(r_node->items[idx], child_node); | 
 | 		} | 
 | 		r_node = (struct radix_node*)rcu_dereference(r_node->items[idx]); | 
 | 	} | 
 | 	return r_node; | 
 | } | 
 |  | 
 | /* Returns a pointer to the slot for the given key.  0 if there is no such slot, | 
 |  * etc */ | 
 | void **radix_lookup_slot(struct radix_tree *tree, unsigned long key) | 
 | { | 
 | 	struct radix_node *r_node = __radix_lookup_node(tree, key, FALSE); | 
 |  | 
 | 	if (!r_node) | 
 | 		return 0; | 
 | 	key = key & (NR_RNODE_SLOTS - 1); | 
 | 	/* r_node is rcu-protected.  Our retval is too, since it's a pointer into | 
 | 	 * the same object as r_node. */ | 
 | 	return &r_node->items[key]; | 
 | } | 
 |  | 
 | /* [x_left, x_right) and [y_left, y_right). */ | 
 | static bool ranges_overlap(unsigned long x_left, unsigned long x_right, | 
 |                            unsigned long y_left, unsigned long y_right) | 
 | { | 
 | 	return ((x_left <= y_left) && (y_left < x_right)) || | 
 | 	       ((y_left <= x_left) && (x_left < y_right)); | 
 | } | 
 |  | 
 | /* Given an index at a depth for a child, returns whether part of it is in the | 
 |  * global range. | 
 |  * | 
 |  * Recall the key is partitioned like so: ....444444333333222222111111.  The | 
 |  * depth is 1 when we're in the last rnode and our children are items.  When | 
 |  * we're an intermediate node, our depth is higher, and our start/end is the | 
 |  * entire reach of us + our children. */ | 
 | static bool child_overlaps_range(unsigned long idx, int depth, | 
 |                                  unsigned long glb_start_idx, | 
 |                                  unsigned long glb_end_idx) | 
 | { | 
 | 	unsigned long start = idx << (LOG_RNODE_SLOTS * (depth - 1)); | 
 | 	unsigned long end = (idx + 1) << (LOG_RNODE_SLOTS * (depth - 1)); | 
 |  | 
 | 	return ranges_overlap(start, end, glb_start_idx, glb_end_idx); | 
 | } | 
 |  | 
 | /* Helper for walking down a tree. | 
 |  * - depth == 1 means we're on the last radix_node, whose items are all the | 
 |  *   actual items. | 
 |  * - tree_idx is the index for our item/node.  It encodes the path we took | 
 |  *   through the radix tree to get to where we are.  For leaves (items), it is | 
 |  *   their index in the address space.  For internal rnodes, it is a temporary | 
 |  *   number, but combined with the depth, you can determine the range of the | 
 |  *   rnode's descendents. | 
 |  * - glb_start_idx and glb_end_idx is the global start and end for the entire | 
 |  *   for_each operation. | 
 |  * | 
 |  * Returns true if our r_node *was already deleted*.  When we call | 
 |  * __radix_remove_slot(), if we removed the last item for r_node, the removal | 
 |  * code will recurse *up* the tree, such that r_node might already be freed. | 
 |  * Same goes for our parent!  Hence, we're careful to only access r_node when we | 
 |  * know we have children (once we enter the loop and might remove a slot). */ | 
 | static bool rnode_for_each(struct radix_node *r_node, int depth, | 
 |                            unsigned long tree_idx, unsigned long glb_start_idx, | 
 |                            unsigned long glb_end_idx, | 
 |                            radix_cb_t cb, void *arg) | 
 | { | 
 | 	unsigned int num_children = ACCESS_ONCE(r_node->num_items); | 
 |  | 
 | 	/* The tree_idx we were passed was from our parent's perspective.  We need | 
 | 	 * shift it over each time we walk down to put it in terms of our | 
 | 	 * level/depth.  Or think of it as making room for our bits (the values of | 
 | 	 * i). */ | 
 | 	tree_idx <<= LOG_RNODE_SLOTS; | 
 | 	for (int i = 0; num_children && (i < NR_RNODE_SLOTS); i++) { | 
 | 		if (r_node->items[i]) { | 
 | 			/* If we really care, we can try to abort the rest of the loop.  Not | 
 | 			 * a big deal */ | 
 | 			if (!child_overlaps_range(tree_idx + i, depth, glb_start_idx, | 
 | 			                          glb_end_idx)) | 
 | 				continue; | 
 | 			if (depth > 1) { | 
 | 				if (rnode_for_each(r_node->items[i], depth - 1, tree_idx + i, | 
 | 				                   glb_start_idx, glb_end_idx, cb, arg)) | 
 | 					num_children--; | 
 | 			} else { | 
 | 				if (cb(&r_node->items[i], tree_idx + i, arg)) { | 
 | 					__radix_remove_slot(r_node, | 
 | 					                    (struct radix_node**)&r_node->items[i]); | 
 | 					num_children--; | 
 | 				} | 
 | 			} | 
 | 		} | 
 | 	} | 
 | 	return num_children ? false : true; | 
 | } | 
 |  | 
 | /* [start_idx, end_idx). | 
 |  * | 
 |  * Caller must maintain mutual exclusion (qlock) */ | 
 | void radix_for_each_slot_in_range(struct radix_tree *tree, | 
 |                                   unsigned long start_idx, | 
 |                                   unsigned long end_idx, | 
 |                                   radix_cb_t cb, void *arg) | 
 | { | 
 | 	if (!tree->root) | 
 | 		return; | 
 | 	rnode_for_each(tree->root, tree->depth, 0, start_idx, end_idx, cb, arg); | 
 | } | 
 |  | 
 | /* Caller must maintain mutual exclusion (qlock) */ | 
 | void radix_for_each_slot(struct radix_tree *tree, radix_cb_t cb, void *arg) | 
 | { | 
 | 	radix_for_each_slot_in_range(tree, 0, ULONG_MAX, cb, arg); | 
 | } | 
 |  | 
 | int radix_gang_lookup(struct radix_tree *tree, void **results, | 
 |                       unsigned long first, unsigned int max_items) | 
 | { | 
 | 	panic("Not implemented"); | 
 | 	return -1; /* TODO! */ | 
 | } | 
 |  | 
 |  | 
 | int radix_grow(struct radix_tree *tree, unsigned long max) | 
 | { | 
 | 	panic("Not implemented"); | 
 | 	return -1; /* TODO! */ | 
 | } | 
 |  | 
 | int radix_preload(struct radix_tree *tree, int flags) | 
 | { | 
 | 	panic("Not implemented"); | 
 | 	return -1; /* TODO! */ | 
 | } | 
 |  | 
 |  | 
 | void *radix_tag_set(struct radix_tree *tree, unsigned long key, int tag) | 
 | { | 
 | 	panic("Tagging not implemented!"); | 
 | 	return (void*)-1; /* TODO! */ | 
 | } | 
 |  | 
 | void *radix_tag_clear(struct radix_tree *tree, unsigned long key, int tag) | 
 | { | 
 | 	panic("Tagging not implemented!"); | 
 | 	return (void*)-1; /* TODO! */ | 
 | } | 
 |  | 
 | int radix_tag_get(struct radix_tree *tree, unsigned long key, int tag) | 
 | { | 
 | 	panic("Tagging not implemented!"); | 
 | 	return -1; /* TODO! */ | 
 | } | 
 |  | 
 | int radix_tree_tagged(struct radix_tree *tree, int tag) | 
 | { | 
 | 	panic("Tagging not implemented!"); | 
 | 	return -1; /* TODO! */ | 
 | } | 
 |  | 
 | int radix_tag_gang_lookup(struct radix_tree *tree, void **results, | 
 |                           unsigned long first, unsigned int max_items, int tag) | 
 | { | 
 | 	panic("Tagging not implemented!"); | 
 | 	return -1; /* TODO! */ | 
 | } | 
 |  | 
 | void print_radix_tree(struct radix_tree *tree) | 
 | { | 
 | 	printk("Tree %p, Depth: %d, Bound: %d\n", tree, tree->depth, | 
 | 	       tree->upper_bound); | 
 |  | 
 | 	void print_rnode(struct radix_node *r_node, int depth) | 
 | 	{ | 
 | 		if (!r_node) | 
 | 			return; | 
 | 		char buf[32] = {0}; | 
 | 		for (int i = 0; i < depth; i++) | 
 | 			buf[i] = '\t'; | 
 | 		printk("%sRnode %p, parent %p, myslot %p, %d items, leaf? %d\n", | 
 | 		       buf, r_node, r_node->parent, r_node->my_slot, r_node->num_items, | 
 | 		       r_node->leaf); | 
 | 		for (int i = 0; i < NR_RNODE_SLOTS; i++) { | 
 | 			if (!r_node->items[i]) | 
 | 				continue; | 
 | 			if (r_node->leaf) | 
 | 				printk("\t%sRnode Item %d: %p\n", buf, i, r_node->items[i]); | 
 | 			else | 
 | 				print_rnode(r_node->items[i], depth + 1); | 
 | 		} | 
 | 	} | 
 | 	print_rnode(tree->root, 0); | 
 | } |