rcu: Panic if a call_rcu() CB blocks

RCU callbacks are not allowed to block.  Here's what Linux has to say:

	... the callback function must be written to be called from softirq
	context.  In particular, it cannot block.

Same goes for us.  If a CB blocks, then all further CBs will be blocked
too, and the code that will wake up the first CB could also be blocked on a
later CB.

Note the distinction between RCU callbacks and RCU threads.  The threads
are critical parts of implementing RCU; they are never allowed to block on
RCU primitives.  Yes, any RCU callback will be run by an RCU thread, but
the problem is slightly different.

Signed-off-by: Barret Rhoden <brho@cs.berkeley.edu>
diff --git a/kern/include/trap.h b/kern/include/trap.h
index ab26759..2a7f081 100644
--- a/kern/include/trap.h
+++ b/kern/include/trap.h
@@ -160,7 +160,8 @@
 #define __CTX_IRQ_D_MASK			((1 << 8) - 1)
 #define __CTX_KTRAP_D_MASK			((1 << 8) - 1)
 #define __CTX_NESTED_CTX_MASK		((1 << 16) - 1)
-#define __CTX_EARLY_RKM 			(1 << __CTX_FLAG_SHIFT)
+#define __CTX_EARLY_RKM				(1 << (__CTX_FLAG_SHIFT + 0))
+#define __CTX_RCU_CB				(1 << (__CTX_FLAG_SHIFT + 1))
 
 /* Basic functions to get or change depths */
 
@@ -188,6 +189,12 @@
 #define clear_rkmsg(pcpui)                                                     \
 	((pcpui)->__ctx_depth &= ~__CTX_EARLY_RKM)
 
+#define set_rcu_cb(pcpui)                                                      \
+	((pcpui)->__ctx_depth |= __CTX_RCU_CB)
+
+#define clear_rcu_cb(pcpui)                                                    \
+	((pcpui)->__ctx_depth &= ~__CTX_RCU_CB)
+
 /* Functions to query the kernel context depth/state.  I haven't fully decided
  * on whether or not 'default' context includes RKMs or not.  Will depend on
  * how we use it.  Check the code below to see what the latest is. */
@@ -198,14 +205,17 @@
 #define in_early_rkmsg_ctx(pcpui)                                              \
 	((pcpui)->__ctx_depth & __CTX_EARLY_RKM)
 
+#define in_rcu_cb_ctx(pcpui)                                                   \
+	((pcpui)->__ctx_depth & __CTX_RCU_CB)
+
 /* Right now, anything (KTRAP, IRQ, or RKM) makes us not 'default' */
 #define in_default_ctx(pcpui)                                                  \
 	(!(pcpui)->__ctx_depth)
 
 /* Can block only if we have no nested contexts (ktraps or irqs, (which are
- * potentially nested contexts)) */
+ * potentially nested contexts)) and not in an RCU CB*/
 #define can_block(pcpui)                                                       \
-	(!((pcpui)->__ctx_depth & __CTX_NESTED_CTX_MASK))
+	(!((pcpui)->__ctx_depth & (__CTX_NESTED_CTX_MASK | __CTX_RCU_CB)))
 
 /* TRUE if we are allowed to spin, given that the 'lock' was declared as not
  * grabbable from IRQ context.  Meaning, we can't grab the lock from any nested
diff --git a/kern/src/rcu.c b/kern/src/rcu.c
index 2aec4ac..8481025 100644
--- a/kern/src/rcu.c
+++ b/kern/src/rcu.c
@@ -116,8 +116,10 @@
 	struct sync_cb_blob b[1];
 	struct semaphore sem[1];
 
-	if (is_rcu_ktask(current_kthread))
+	if (in_rcu_cb_ctx(this_pcpui_ptr()))
 		panic("Attempted synchronize_rcu() from an RCU callback!");
+	if (is_rcu_ktask(current_kthread))
+		panic("Attempted synchronize_rcu() from an RCU thread!");
 	sem_init(sem, 0);
 	init_rcu_head_on_stack(&b->h);
 	b->sem = sem;
@@ -229,8 +231,10 @@
 	struct sync_cb_blob *b;
 	int nr_sent = 0;
 
-	if (is_rcu_ktask(current_kthread))
+	if (in_rcu_cb_ctx(this_pcpui_ptr()))
 		panic("Attempted rcu_barrier() from an RCU callback!");
+	if (is_rcu_ktask(current_kthread))
+		panic("Attempted rcu_barrier() from an RCU thread!");
 	/* TODO: if we have concurrent rcu_barriers, we might be able to share the
 	 * CBs.  Say we have 1 CB on a core, then N rcu_barriers.  We'll have N
 	 * call_rcus in flight, though we could share.  Linux does this with a mtx
@@ -514,10 +518,16 @@
 		assert(list_empty(&work));
 		return;
 	}
+	/* When we're in an RCU callback, we can't block.  In our non-preemptive
+	 * world, not blocking also means our kthread won't migrate from this core,
+	 * such that the pcpui pointer (and thus the specific __ctx_depth) won't
+	 * change. */
+	set_rcu_cb(this_pcpui_ptr());
 	list_for_each_entry_safe(head, temp, &work, link) {
 		list_del(&head->link);
 		rcu_exec_cb(head);
 	}
+	clear_rcu_cb(this_pcpui_ptr());
 
 	/* We kept nr_cbs in place until the CBs, which could block, completed.
 	 * This allows other readers (rcu_barrier()) of our pcpui to tell if we have