| /* Copyright (c) 2018 Google Inc |
| * Barret Rhoden <brho@cs.berkeley.edu> |
| * See LICENSE for details. |
| * |
| * RCU. We borrow a few things from Linux - mostly the header bits and the |
| * tree-rcu structure. |
| * |
| * Acronyms/definitions: |
| * - CB: RCU callbacks (call_rcu) |
| * - QS: quiescent state - a time when we know a core isn't in an RCU read-side |
| * critical section. |
| * - GP: grace period. Some quotes from Linux/Paul: |
| * - "A time period during which all such pre-existing readers complete is |
| * called a 'grace period'." |
| * - "Anything outside of an RCU read-side critical section is a quiescent |
| * state, and a grace period is any time period in which every CPU (or task, |
| * for |
| * - gpnum: number of the current grace period we are working on |
| * - completed: number of the grace periods completed |
| * |
| * We differ in a few ways from Linux's implementation: |
| * |
| * - Callbacks run on management cores (a.k.a, LL cores, e.g. core 0). This way |
| * we don't have to kick idle or user space cores to run their CBs, and those |
| * CBs don't interfere with a possibly unrelated process. |
| * |
| * - Our RCU is most similar to rcu_sched (classic RCU), and not the preemptible |
| * RCU. Our kthreads don't get preempted, so we don't need to worry about |
| * read side critical sections being interrupted. |
| * |
| * - There is no softirq processing to note the passing of GPs or to run CBs. |
| * |
| * - Our tree uses atomic ops to trace grace periods within the rcu_nodes. |
| * Linux's tree-rcu uses locks. They need the locks since under some |
| * circumstances, a QS would be marked during a read-side critical section, |
| * and the QS marking needed to track the gpnum to keep the QS matched to the |
| * GP. See |
| * https://www.kernel.org/doc/Documentation/RCU/Design/Data-Structures/Data-Structures.html |
| * and grep "Come on". We don't need to worry about this since we only mark a |
| * QS under two situations: |
| * |
| * - The core knows it is does not hold an rcu_read_lock, so we can always |
| * mark QS. |
| * - The GP kthread saw the core either idle or in userspace after the gp |
| * started. That means we know that core had a QS after the GP started. |
| * |
| * So any time we mark a QS is actually a QS. I think Linux has times where |
| * they note a QS for an older GP, and set a note to mark that QS *for that |
| * GP* in the future. Their locks make sure they are marking for the right |
| * gpnum. There might be some element of the rnps not knowing about the |
| * latest GP yet too. |
| * |
| * - We do use locking at the per-core level to decide whether or not to start |
| * mark a QS for a given GP. (lock, compare gp_acked to gpnum, etc). This |
| * ensures only one thread (the core or the GP kth) marks the core for a given |
| * GP. We actually could handle it if the both did, (make the trickle-up |
| * idempotent, which we do for the interior nodes) but we could run into |
| * situations where a core checks in for a GP before the global gpnum was set. |
| * This could happen when the GP kth is resetting the tree for the next GP. |
| * I think it'd be OK, but not worth the hassle and confusion. |
| * |
| * - We have a kthread for GP management, like Linux. Callbacks are enqueued |
| * locally (on the core that calls call_rcu), like Linux. We have a kthread |
| * per management core to process the callbacks, and these threads will handle |
| * the callbacks of *all* cores. Each core has a specific mgmt kthread that |
| * will run its callbacks. It is important that a particular core's callbacks |
| * are processed by the same thread - I rely on this to implement rcu_barrier |
| * easily. In that case, we just need to schedule a CB on every core that has |
| * CBs, and when those N CBs are done, our barrier passed. This relies on CBs |
| * being processed in order for a given core. We could do the barrier in |
| * other ways, but it doesn't seem like a big deal. |
| * |
| * - I kept around some seq counter and locking stuff in rcu_helper.h. We might |
| * use that in the future. |
| */ |
| |
| #include <rcu.h> |
| #include <kthread.h> |
| #include <smp.h> |
| #include <kmalloc.h> |
| |
| /* How many CBs to queue up before we trigger a GP */ |
| #define RCU_CB_THRESH 10 |
| /* How long (usec) we wait between running a GP if we weren't triggered. */ |
| #define RCU_GP_MIN_PERIOD 25000 |
| /* How long (usec) we wait for cores to check in. */ |
| #define RCU_GP_TARDY_PERIOD 1000 |
| |
| /* In rcu_tree_helper.c */ |
| extern int rcu_num_cores; |
| extern int rcu_num_lvls; |
| |
| /* Controls whether we skip cores when we expedite, which forces tardy cores. */ |
| static bool rcu_debug_tardy; |
| |
| /* Externed in rcu_tree_helper.c */ |
| struct rcu_state rcu_state; |
| |
| |
| DEFINE_PERCPU(struct rcu_pcpui, rcu_pcpui); |
| |
| struct sync_cb_blob { |
| struct rcu_head h; |
| struct semaphore *sem; |
| }; |
| |
| static void __sync_cb(struct rcu_head *head) |
| { |
| struct sync_cb_blob *b = container_of(head, struct sync_cb_blob, h); |
| |
| sem_up(b->sem); |
| } |
| |
| void synchronize_rcu(void) |
| { |
| struct sync_cb_blob b[1]; |
| struct semaphore sem[1]; |
| |
| if (!can_block(this_pcpui_ptr())) |
| panic("Attempted %s() from an unblockable context!", __func__); |
| if (is_rcu_ktask(current_kthread)) |
| panic("Attempted %s() from an RCU thread!", __func__); |
| sem_init(sem, 0); |
| init_rcu_head_on_stack(&b->h); |
| b->sem = sem; |
| call_rcu(&b->h, __sync_cb); |
| sem_down(sem); |
| } |
| |
| static inline bool gp_in_progress(struct rcu_state *rsp) |
| { |
| unsigned long completed = READ_ONCE(rsp->completed); |
| unsigned long gpnum = READ_ONCE(rsp->gpnum); |
| |
| assert(gpnum - completed <= 1); |
| return completed != gpnum; |
| } |
| |
| /* Wakes the kthread to run a grace period if it isn't already running. |
| * |
| * If 'force', we'll make sure it runs a fresh GP, which will catch all CBs |
| * registered before this call. That's not 100% true. It might be possible on |
| * some non-x86 architectures for the writes that wake the ktask are reordered |
| * before the read of gpnum that our caller made. Thus the caller could have a |
| * CB in a later GP. Worst case, they'll wait an extra GP timeout. Not too |
| * concerned, though I probably should be. */ |
| static void wake_gp_ktask(struct rcu_state *rsp, bool force) |
| { |
| if (!force && gp_in_progress(rsp)) |
| return; |
| rsp->gp_ktask_ctl = 1; |
| rendez_wakeup(&rsp->gp_ktask_rv); |
| } |
| |
| static void rcu_exec_cb(struct rcu_head *head) |
| { |
| if (__is_kfree_rcu_offset((unsigned long)head->func)) |
| kfree((void*)head - (unsigned long)head->func); |
| else |
| head->func(head); |
| } |
| |
| void __early_call_rcu(struct rcu_head *head) |
| { |
| extern bool booting; |
| |
| assert(booting); |
| assert(core_id() == 0); |
| run_as_rkm(rcu_exec_cb, head); |
| } |
| |
| /* This could be called from a remote core, e.g. rcu_barrier(). Returns the |
| * number of enqueued CBs, including the one we pass in. */ |
| static int __call_rcu_rpi(struct rcu_state *rsp, struct rcu_pcpui *rpi, |
| struct rcu_head *head, rcu_callback_t func) |
| { |
| unsigned int nr_cbs; |
| |
| head->func = func; |
| |
| if (!rpi->booted) { |
| __early_call_rcu(head); |
| return 0; |
| } |
| /* rsp->gpnum is the one we're either working on (if > completed) or the |
| * one we already did. Either way, it's a GP that may have already been |
| * ACKed during a core's QS, and that core could have started a |
| * read-side critical section that must complete before CB runs. That |
| * requires another GP. */ |
| head->gpnum = READ_ONCE(rsp->gpnum) + 1; |
| spin_lock_irqsave(&rpi->lock); |
| list_add_tail(&head->link, &rpi->cbs); |
| nr_cbs = ++rpi->nr_cbs; |
| spin_unlock_irqsave(&rpi->lock); |
| /* rcu_barrier requires that the write to ->nr_cbs be visible before any |
| * future writes. unlock orders the write inside, but doesn't prevent |
| * other writes from moving in. Technically, our lock implementations |
| * do that, but it's not part of our definition. Maybe it should be. |
| * Til then: */ |
| wmb(); |
| return nr_cbs; |
| } |
| |
| /* Minus the kfree offset check */ |
| static void __call_rcu(struct rcu_head *head, rcu_callback_t func) |
| { |
| struct rcu_pcpui *rpi = PERCPU_VARPTR(rcu_pcpui); |
| struct rcu_state *rsp = rpi->rsp; |
| unsigned int thresh; |
| |
| thresh = __call_rcu_rpi(rsp, rpi, head, func); |
| if (thresh > RCU_CB_THRESH) |
| wake_gp_ktask(rpi->rsp, false); |
| } |
| |
| void call_rcu(struct rcu_head *head, rcu_callback_t func) |
| { |
| assert(!__is_kfree_rcu_offset((unsigned long)func)); |
| __call_rcu(head, func); |
| } |
| |
| void rcu_barrier(void) |
| { |
| struct rcu_state *rsp = PERCPU_VAR(rcu_pcpui).rsp; |
| struct rcu_pcpui *rpi; |
| struct semaphore sem[1]; |
| struct sync_cb_blob *b; |
| int nr_sent = 0; |
| |
| if (!can_block(this_pcpui_ptr())) |
| panic("Attempted %s() from an unblockable context!", __func__); |
| if (is_rcu_ktask(current_kthread)) |
| panic("Attempted %s() from an RCU thread!", __func__); |
| /* TODO: if we have concurrent rcu_barriers, we might be able to share |
| * the CBs. Say we have 1 CB on a core, then N rcu_barriers. We'll |
| * have N call_rcus in flight, though we could share. Linux does this |
| * with a mtx and some accounting, I think. */ |
| b = kzmalloc(sizeof(struct sync_cb_blob) * num_cores, MEM_WAIT); |
| /* Remember, you block when sem is <= 0. We'll get nr_sent ups, and |
| * we'll down 1 for each. This is just like the synchronize_rcu() case; |
| * there, nr_sent == 1. */ |
| sem_init(sem, 0); |
| /* Order any signal we received from someone who called call_rcu() |
| * before our rpi->nr_cbs reads. */ |
| rmb(); |
| for_each_core(i) { |
| rpi = _PERCPU_VARPTR(rcu_pcpui, i); |
| /* Lockless peek at nr_cbs. Two things to note here: |
| * - We look at nr_cbs and not the list, since there could be |
| * CBs on the stack-local work list or that have blocked. |
| * - The guarantee is that we wait for any CBs from call_rcus |
| * that can be proved to happen before rcu_barrier. That |
| * means call_rcu had to return, which means it had to set the |
| * nr_cbs. */ |
| if (!rpi->nr_cbs) |
| continue; |
| init_rcu_head_on_stack(&b[i].h); |
| b[i].sem = sem; |
| __call_rcu_rpi(rsp, rpi, &b[i].h, __sync_cb); |
| nr_sent++; |
| } |
| if (!nr_sent) { |
| kfree(b); |
| return; |
| } |
| wake_gp_ktask(rpi->rsp, true); |
| /* sem_down_bulk is currently slow. Even with some fixes, we actually |
| * want a barrier, which you could imagine doing with a tree. |
| * sem_down_bulk() doesn't have the info that we have: that the wakeups |
| * are coming from N cores on the leaves of the tree. */ |
| sem_down_bulk(sem, nr_sent); |
| kfree(b); |
| } |
| |
| void rcu_force_quiescent_state(void) |
| { |
| /* It's unclear if we want to block until the QS has passed */ |
| wake_gp_ktask(PERCPU_VAR(rcu_pcpui).rsp, true); |
| } |
| |
| void kfree_call_rcu(struct rcu_head *head, rcu_callback_t off) |
| { |
| __call_rcu(head, off); |
| } |
| |
| /* Clears the bits core(s) in grpmask present in rnp, trickling up to the root. |
| * Note that a 1 in qsmask means you haven't checked in - like a todo list. |
| * Last one out kicks the GP kthread. */ |
| static void __mark_qs(struct rcu_state *rsp, struct rcu_node *rnp, |
| unsigned long grpmask) |
| { |
| unsigned long new_qsm; |
| |
| new_qsm = __sync_and_and_fetch(&rnp->qsmask, ~grpmask); |
| /* I don't fully understand this, but we need some form of transitive |
| * barrier across the entire tree. Linux does this when they |
| * lock/unlock. Our equivalent is the atomic op. */ |
| smp_mb__after_unlock_lock(); |
| /* Only one thread will get 0 back - the last one to check in */ |
| if (new_qsm) |
| return; |
| if (rnp->parent) |
| __mark_qs(rsp, rnp->parent, rnp->grpmask); |
| else |
| rendez_wakeup(&rsp->gp_ktask_rv); |
| } |
| |
| static void rcu_report_qs_rpi(struct rcu_state *rsp, struct rcu_pcpui *rpi) |
| { |
| /* Note we don't check ->completed == ->gpnum (gp_in_progress()). We |
| * only care if our core hasn't reported in for a GP. This time is a |
| * subset of gp_in_progress. */ |
| if (rpi->gp_acked == READ_ONCE(rsp->gpnum)) { |
| /* If a GP starts right afterwards, oh well. Catch it next |
| * time. */ |
| return; |
| } |
| /* Lock ensures we only report a QS once per GP. */ |
| spin_lock_irqsave(&rpi->lock); |
| if (rpi->gp_acked == READ_ONCE(rsp->gpnum)) { |
| spin_unlock_irqsave(&rpi->lock); |
| return; |
| } |
| /* A gp can start concurrently, but once started, we should never be |
| * behind by more than 1. */ |
| assert(rpi->gp_acked + 1 == READ_ONCE(rsp->gpnum)); |
| /* Up our gp_acked before actually marking it. I don't want to hold the |
| * lock too long (e.g. some debug code in rendez_wakeup() calls |
| * call_rcu). So we've unlocked, but haven't actually checked in yet - |
| * that's fine. No one else will attempt to check in until the next GP, |
| * which can't happen until after we check in for this GP. */ |
| rpi->gp_acked++; |
| spin_unlock_irqsave(&rpi->lock); |
| __mark_qs(rsp, rpi->my_node, rpi->grpmask); |
| } |
| |
| /* Cores advertise when they are in QSs. If the core already reported in, or if |
| * we're not in a GP, this is a quick check (given a global read of ->gpnum). */ |
| void rcu_report_qs(void) |
| { |
| rcu_report_qs_rpi(&rcu_state, PERCPU_VARPTR(rcu_pcpui)); |
| } |
| |
| /* For debugging checks on large trees. Keep this in sync with |
| * rcu_init_fake_cores(). */ |
| static void rcu_report_qs_fake_cores(struct rcu_state *rsp) |
| { |
| struct rcu_node *rnp; |
| |
| rnp = rsp->level[rcu_num_lvls - 1]; |
| for (int i = num_cores; i < rcu_num_cores; i++) { |
| while (i > rnp->grphi) |
| rnp++; |
| if (rcu_debug_tardy && (i % 2)) |
| continue; |
| __mark_qs(rsp, rnp, 1 << (i - rnp->grplo)); |
| } |
| } |
| |
| static void rcu_report_qs_remote_core(struct rcu_state *rsp, int coreid) |
| { |
| int cpu_state = READ_ONCE(pcpui_var(coreid, cpu_state)); |
| struct rcu_pcpui *rpi = _PERCPU_VARPTR(rcu_pcpui, coreid); |
| |
| /* Lockless peek. If we ever saw them idle/user after a GP started, we |
| * know they had a QS, and we know we're still in the original GP. */ |
| if (cpu_state == CPU_STATE_IDLE || cpu_state == CPU_STATE_USER) |
| rcu_report_qs_rpi(rsp, rpi); |
| } |
| |
| /* Checks every core, remotely via the cpu state, to see if it is in a QS. |
| * This is like an expedited grace period. */ |
| static void rcu_report_qs_remote_cores(struct rcu_state *rsp) |
| { |
| for_each_core(i) { |
| if (rcu_debug_tardy && (i % 2)) |
| continue; |
| rcu_report_qs_remote_core(rsp, i); |
| } |
| } |
| |
| static void rcu_report_qs_tardy_cores(struct rcu_state *rsp) |
| { |
| struct rcu_node *rnp; |
| unsigned long qsmask; |
| int i; |
| |
| rcu_for_each_leaf_node(rsp, rnp) { |
| qsmask = READ_ONCE(rnp->qsmask); |
| if (!qsmask) |
| continue; |
| for_each_set_bit(i, &qsmask, BITS_PER_LONG) { |
| /* Fake cores */ |
| if (i + rnp->grplo >= num_cores) { |
| __mark_qs(rsp, rnp, 1 << i); |
| continue; |
| } |
| rcu_report_qs_remote_core(rsp, i + rnp->grplo); |
| } |
| } |
| } |
| |
| static int root_qsmask_empty(void *arg) |
| { |
| struct rcu_state *rsp = arg; |
| |
| return READ_ONCE(rsp->node[0].qsmask) == 0 ? 1 : 0; |
| } |
| |
| static void rcu_run_gp(struct rcu_state *rsp) |
| { |
| struct rcu_node *rnp; |
| |
| assert(rsp->gpnum == rsp->completed); |
| /* Initialize the tree for accumulating QSs. We know there are no users |
| * on the tree. The only time a core looks at the tree is when |
| * reporting a QS for a GP. The previous GP is done, thus all cores |
| * reported their GP already (for the previous GP), and they won't try |
| * again until we advertise the next GP. */ |
| rcu_for_each_node_breadth_first(rsp, rnp) |
| rnp->qsmask = rnp->qsmaskinit; |
| /* Need the tree set for reporting QSs before advertising the GP */ |
| wmb(); |
| WRITE_ONCE(rsp->gpnum, rsp->gpnum + 1); |
| /* At this point, the cores can start reporting in. */ |
| /* Fake cores help test a tree larger than num_cores. */ |
| rcu_report_qs_fake_cores(rsp); |
| /* Expediting aggressively. We could also wait briefly and then check |
| * the tardy cores. */ |
| rcu_report_qs_remote_cores(rsp); |
| /* Note that even when we expedite the GP by checking remote cores, |
| * there's a race where a core halted but we didn't see it. (they |
| * report QS, decide to halt, pause, we start GP, see they haven't |
| * halted, etc. They could report the QS after setting the state, but I |
| * didn't want to . */ |
| do { |
| rendez_sleep_timeout(&rsp->gp_ktask_rv, root_qsmask_empty, rsp, |
| RCU_GP_TARDY_PERIOD); |
| rcu_report_qs_tardy_cores(rsp); |
| } while (!root_qsmask_empty(rsp)); |
| /* Not sure if we need any barriers here. Once we post 'completed', the |
| * CBs can start running. But no one should touch the tree til gpnum is |
| * incremented. */ |
| WRITE_ONCE(rsp->completed, rsp->gpnum); |
| } |
| |
| static int should_wake_ctl(void *arg) |
| { |
| int *ctl = arg; |
| |
| return *ctl != 0 ? 1 : 0; |
| } |
| |
| static void wake_mgmt_ktasks(struct rcu_state *rsp) |
| { |
| struct rcu_pcpui *rpi; |
| |
| /* TODO: For each mgmt core */ |
| rpi = _PERCPU_VARPTR(rcu_pcpui, 0); |
| rpi->mgmt_ktask_ctl = 1; |
| rendez_wakeup(&rpi->mgmt_ktask_rv); |
| } |
| |
| static void rcu_gp_ktask(void *arg) |
| { |
| struct rcu_state *rsp = arg; |
| |
| current_kthread->flags |= KTH_IS_RCU_KTASK; |
| while (1) { |
| rendez_sleep_timeout(&rsp->gp_ktask_rv, should_wake_ctl, |
| &rsp->gp_ktask_ctl, RCU_GP_MIN_PERIOD); |
| rsp->gp_ktask_ctl = 0; |
| /* Our write of 0 must happen before starting the GP. If |
| * rcu_barrier's CBs miss the start of the GP (and thus are in |
| * an unscheduled GP), their write of 1 must happen after our |
| * write of 0 so that we rerun. This is the post-and-poke |
| * pattern. It's not a huge deal, since we'll catch it after |
| * the GP period timeout. */ |
| wmb(); |
| rcu_run_gp(rsp); |
| wake_mgmt_ktasks(rsp); |
| }; |
| } |
| |
| static void run_rcu_cbs(struct rcu_state *rsp, int coreid) |
| { |
| struct rcu_pcpui *rpi = _PERCPU_VARPTR(rcu_pcpui, coreid); |
| struct list_head work = LIST_HEAD_INIT(work); |
| struct rcu_head *head, *temp, *last_for_gp = NULL; |
| int nr_cbs = 0; |
| unsigned long completed; |
| |
| /* We'll run the CBs for any GP completed so far, but not any GP that |
| * could be completed concurrently. "CBs for a GP" means callbacks that |
| * must wait for that GP to complete. */ |
| completed = READ_ONCE(rsp->completed); |
| |
| /* This lockless peek is an optimization. We're guaranteed to not miss |
| * the CB for the given GP: If the core had a CB for this GP, it must |
| * have put it on the list before checking in, before the GP completes, |
| * and before we run. */ |
| if (list_empty(&rpi->cbs)) |
| return; |
| |
| spin_lock_irqsave(&rpi->lock); |
| list_for_each_entry(head, &rpi->cbs, link) { |
| if (ULONG_CMP_LT(completed, head->gpnum)) |
| break; |
| nr_cbs++; |
| last_for_gp = head; |
| } |
| if (last_for_gp) |
| list_cut_position(&work, &rpi->cbs, &last_for_gp->link); |
| spin_unlock_irqsave(&rpi->lock); |
| |
| if (!nr_cbs) { |
| assert(list_empty(&work)); |
| return; |
| } |
| /* When we're in an RCU callback, we can't block. In our non-preemptive |
| * world, not blocking also means our kthread won't migrate from this core, |
| * such that the pcpui pointer (and thus the specific __ctx_depth) won't |
| * change. */ |
| set_cannot_block(this_pcpui_ptr()); |
| list_for_each_entry_safe(head, temp, &work, link) { |
| list_del(&head->link); |
| rcu_exec_cb(head); |
| } |
| clear_cannot_block(this_pcpui_ptr()); |
| |
| /* We kept nr_cbs in place until the CBs, which could block, completed. |
| * This allows other readers (rcu_barrier()) of our pcpui to tell if we |
| * have any CBs pending. This relies on us being the only |
| * consumer/runner of CBs for this core. */ |
| spin_lock_irqsave(&rpi->lock); |
| rpi->nr_cbs -= nr_cbs; |
| spin_unlock_irqsave(&rpi->lock); |
| } |
| |
| static void rcu_mgmt_ktask(void *arg) |
| { |
| struct rcu_pcpui *rpi = arg; |
| struct rcu_state *rsp = rpi->rsp; |
| |
| current_kthread->flags |= KTH_IS_RCU_KTASK; |
| while (1) { |
| rendez_sleep(&rpi->mgmt_ktask_rv, should_wake_ctl, |
| &rpi->mgmt_ktask_ctl); |
| rpi->mgmt_ktask_ctl = 0; |
| /* TODO: given the number of mgmt kthreads, we need to assign |
| * cores */ |
| for_each_core(i) |
| run_rcu_cbs(rsp, i); |
| }; |
| } |
| |
| void rcu_init_pcpui(struct rcu_state *rsp, struct rcu_pcpui *rpi, int coreid) |
| { |
| struct rcu_node *rnp = rpi->my_node; |
| |
| rpi->rsp = rsp; |
| assert(rnp->grplo <= coreid); |
| assert(coreid <= rnp->grphi); |
| rpi->coreid = coreid; |
| rpi->grpnum = coreid - rnp->grplo; |
| rpi->grpmask = 1 << rpi->grpnum; |
| rpi->booted = false; |
| |
| /* We're single threaded now, so this is OK. */ |
| rnp->qsmaskinit |= rpi->grpmask; |
| |
| spinlock_init_irqsave(&rpi->lock); |
| INIT_LIST_HEAD(&rpi->cbs); |
| rpi->nr_cbs = 0; |
| rpi->gp_acked = rsp->completed; |
| |
| /* TODO: For each mgmt core only */ |
| if (coreid == 0) { |
| rendez_init(&rpi->mgmt_ktask_rv); |
| rpi->mgmt_ktask_ctl = 0; |
| } |
| } |
| |
| /* Initializes the fake cores. Works with rcu_report_qs_fake_cores() */ |
| static void rcu_init_fake_cores(struct rcu_state *rsp) |
| { |
| struct rcu_node *rnp; |
| |
| rnp = rsp->level[rcu_num_lvls - 1]; |
| for (int i = num_cores; i < rcu_num_cores; i++) { |
| while (i > rnp->grphi) |
| rnp++; |
| rnp->qsmaskinit |= 1 << (i - rnp->grplo); |
| } |
| } |
| |
| void rcu_init(void) |
| { |
| struct rcu_state *rsp = &rcu_state; |
| struct rcu_pcpui *rpi; |
| |
| rcu_init_geometry(); |
| rcu_init_one(rsp); |
| rcu_init_fake_cores(rsp); |
| rcu_dump_rcu_node_tree(rsp); |
| |
| ktask("rcu_gp", rcu_gp_ktask, rsp); |
| /* TODO: For each mgmt core */ |
| ktask("rcu_mgmt_0", rcu_mgmt_ktask, _PERCPU_VARPTR(rcu_pcpui, 0)); |
| |
| /* If we have a call_rcu before percpu_init, we might be using the spot |
| * in the actual __percpu .section. We'd be core 0, so that'd be OK, |
| * since all we're using it for is reading 'booted'. */ |
| for_each_core(i) { |
| rpi = _PERCPU_VARPTR(rcu_pcpui, i); |
| rpi->booted = true; |
| } |
| } |