| /* Copyright (c) 2011 The Regents of the University of California |
| * Barret Rhoden <brho@cs.berkeley.edu> |
| * See LICENSE for details. |
| * |
| * Kernel utility functions for sending events and notifications (IPIs) to |
| * processes. */ |
| |
| #include <ucq.h> |
| #include <ceq.h> |
| #include <bitmask.h> |
| #include <event.h> |
| #include <atomic.h> |
| #include <process.h> |
| #include <smp.h> |
| #include <umem.h> |
| #include <stdio.h> |
| #include <assert.h> |
| #include <pmap.h> |
| #include <schedule.h> |
| |
| /* Userspace could give us a vcoreid that causes us to compute a vcpd that is |
| * outside procdata. If we hit UWLIM, then we've gone farther than we should. |
| * We check the vcoreid, instead of the resulting address, to avoid issues like |
| * address wrap-around. */ |
| static bool vcoreid_is_safe(uint32_t vcoreid) |
| { |
| /* MAX_NUM_VCORES == MAX_NUM_CORES (check procinfo/procdata) */ |
| return vcoreid < MAX_NUM_CORES; |
| } |
| |
| /* Note these three helpers return the user address of the mbox, not the KVA. |
| * Load current to access this, and it will work for any process. */ |
| static struct event_mbox *get_vcpd_mbox_priv(uint32_t vcoreid) |
| { |
| return &__procdata.vcore_preempt_data[vcoreid].ev_mbox_private; |
| } |
| |
| static struct event_mbox *get_vcpd_mbox_pub(uint32_t vcoreid) |
| { |
| return &__procdata.vcore_preempt_data[vcoreid].ev_mbox_public; |
| } |
| |
| static struct event_mbox *get_vcpd_mbox(uint32_t vcoreid, int ev_flags) |
| { |
| if (ev_flags & EVENT_VCORE_PRIVATE) |
| return get_vcpd_mbox_priv(vcoreid); |
| else |
| return get_vcpd_mbox_pub(vcoreid); |
| } |
| |
| /* Can we message the vcore? (Will it check its messages). Note this checks |
| * procdata via the user pointer. */ |
| static bool can_msg_vcore(uint32_t vcoreid) |
| { |
| struct preempt_data *vcpd = &__procdata.vcore_preempt_data[vcoreid]; |
| return atomic_read(&vcpd->flags) & VC_CAN_RCV_MSG; |
| } |
| |
| /* Says a vcore can be messaged. Only call this once you are sure this is true |
| * (holding the proc_lock, etc). */ |
| static void set_vcore_msgable(uint32_t vcoreid) |
| { |
| struct preempt_data *vcpd = &__procdata.vcore_preempt_data[vcoreid]; |
| atomic_or(&vcpd->flags, VC_CAN_RCV_MSG); |
| } |
| |
| static void send_evbitmap_msg(struct evbitmap *evbm, struct event_msg *msg) |
| { |
| SET_BITMASK_BIT_ATOMIC(evbm->bitmap, msg->ev_type); |
| wmb(); |
| evbm->check_bits = TRUE; |
| } |
| |
| /* Posts a message to the mbox. mbox is a pointer to user-accessible memory. |
| * If mbox is a user-provided pointer, make sure that you've checked it. |
| * Regardless make sure you have that process's address space loaded. */ |
| static void post_ev_msg(struct proc *p, struct event_mbox *mbox, |
| struct event_msg *msg, int ev_flags) |
| { |
| printd("[kernel] Sending event type %d to mbox %p\n", |
| msg->ev_type, mbox); |
| /* Sanity check */ |
| assert(p); |
| switch (mbox->type) { |
| case (EV_MBOX_UCQ): |
| send_ucq_msg(&mbox->ucq, p, msg); |
| break; |
| case (EV_MBOX_BITMAP): |
| send_evbitmap_msg(&mbox->evbm, msg); |
| break; |
| case (EV_MBOX_CEQ): |
| send_ceq_msg(&mbox->ceq, p, msg); |
| break; |
| default: |
| printk("[kernel] Unknown mbox type %d!\n", mbox->type); |
| } |
| } |
| |
| /* Helper: use this when sending a message to a VCPD mbox. It just posts to the |
| * ev_mbox and sets notif pending. Note this uses a userspace address for the |
| * VCPD (though not a user's pointer). */ |
| static void post_vc_msg(struct proc *p, uint32_t vcoreid, |
| struct event_mbox *ev_mbox, struct event_msg *ev_msg, |
| int ev_flags) |
| { |
| struct preempt_data *vcpd = &__procdata.vcore_preempt_data[vcoreid]; |
| post_ev_msg(p, ev_mbox, ev_msg, ev_flags); |
| /* Set notif pending so userspace doesn't miss the message while |
| * yielding */ |
| wmb(); /* Ensure ev_msg write is before notif_pending */ |
| /* proc_notify() also sets this, but the ev_q might not have requested |
| * an IPI, so we have to do it here too. */ |
| vcpd->notif_pending = TRUE; |
| } |
| |
| /* Helper: will IPI / proc_notify if the flags say so. We also check to make |
| * sure it is mapped (slight optimization) */ |
| static void try_notify(struct proc *p, uint32_t vcoreid, int ev_flags) |
| { |
| /* Note this is an unlocked-peek at the vcoremap */ |
| if ((ev_flags & EVENT_IPI) && vcore_is_mapped(p, vcoreid)) |
| proc_notify(p, vcoreid); |
| } |
| |
| /* Helper: sends the message and an optional IPI to the vcore. Sends to the |
| * public mbox. */ |
| static void spam_vcore(struct proc *p, uint32_t vcoreid, |
| struct event_msg *ev_msg, int ev_flags) |
| { |
| post_vc_msg(p, vcoreid, get_vcpd_mbox_pub(vcoreid), ev_msg, ev_flags); |
| try_notify(p, vcoreid, ev_flags); |
| } |
| |
| /* Attempts to message a vcore that may or may not have VC_CAN_RCV_MSG set. If |
| * so, we'll post the message and the message will eventually get dealt with |
| * (when the vcore runs or when it is preempte-recovered). */ |
| static bool try_spam_vcore(struct proc *p, uint32_t vcoreid, |
| struct event_msg *ev_msg, int ev_flags) |
| { |
| /* Not sure if we can or not, so check before spamming. Technically, |
| * the only critical part is that we __alert, then check can_alert. */ |
| if (can_msg_vcore(vcoreid)) { |
| spam_vcore(p, vcoreid, ev_msg, ev_flags); |
| /* prev write (notif_pending) must come before following reads*/ |
| wrmb(); |
| if (can_msg_vcore(vcoreid)) |
| return TRUE; |
| } |
| return FALSE; |
| } |
| |
| /* Helper: will try to message (INDIR/IPI) a list member (lists of vcores). We |
| * use this on the online and bulk_preempted vcore lists. If this succeeds in |
| * alerting a vcore on the list, it'll return TRUE. We need to be careful here, |
| * since we're reading a list that could be concurrently modified. The |
| * important thing is that we can always fail if we're unsure (such as with |
| * lists being temporarily empty). The caller will be able to deal with it via |
| * the ultimate fallback. */ |
| static bool spam_list_member(struct vcore_tailq *list, struct proc *p, |
| struct event_msg *ev_msg, int ev_flags) |
| { |
| struct vcore *vc, *vc_first; |
| uint32_t vcoreid; |
| int loops = 0; |
| vc = TAILQ_FIRST(list); |
| /* If the list appears empty, we'll bail out (failing) after the loop. |
| */ |
| while (vc) { |
| vcoreid = vcore2vcoreid(p, vc); |
| /* post the alert. Not using the try_spam_vcore() helper since |
| * I want something more customized for the lists. */ |
| spam_vcore(p, vcoreid, ev_msg, ev_flags); |
| /* prev write (notif_pending) must come before following reads*/ |
| wrmb(); |
| /* I used to check can_msg_vcore(vcoreid) here, but that would |
| * make spamming list members unusable for MUST_RUN scenarios. |
| * |
| * Regardless, if they are still the first on the list, then |
| * they are still going to get the message. For the online |
| * list, proc_yield() will return them to userspace (where they |
| * will get the message) because __alert_vcore() set |
| * notif_pending. For the BP list, they will either be turned |
| * on later, or have a preempt message sent about their demise. |
| * |
| * We race on list membership (and not exclusively |
| * VC_CAN_RCV_MSG, so that when it fails we can get a new vcore |
| * to try (or know WHP there are none). */ |
| vc_first = TAILQ_FIRST(list); |
| if (vc == vc_first) |
| return TRUE; |
| /* At this point, the list has changed and the vcore we tried |
| * yielded, so we try the *new* list head. Track loops for |
| * sanity reasons. */ |
| if (loops++ > 10) { |
| warn("Too many (%d) attempts to find a vcore, failing!", |
| loops); |
| return FALSE; /* always safe to fail! */ |
| } |
| /* Get set up for your attack run! */ |
| vc = vc_first; |
| } |
| return FALSE; |
| } |
| |
| /* This makes sure ev_msg is sent to some vcore, preferring vcoreid. |
| * |
| * One of the goals of SPAM_INDIR (and this func) is to allow processes to yield |
| * cores without fear of losing messages. Even when yielding and getting |
| * preempted, if your message is spammed, it will get to some vcore. If |
| * MUST_RUN is set, it'll get to a running vcore. Messages that you send like |
| * this must be able to handle spurious reads, since more than one vcore is |
| * likely to get the message and handle it. |
| * |
| * We try the desired vcore, using VC_CAN_RCV_MSG. Failing that, we'll search |
| * the online and then the bulk_preempted lists. These lists serve as a way to |
| * find likely messageable vcores. spam_list_member() helps us with them, |
| * failing if anything seems to go wrong. At which point we just lock and try |
| * to deal with things. In that scenario, we most likely would need to lock |
| * anyway to wake up the process (was WAITING). |
| * |
| * One tricky thing with sending to the bulk_preempt list is that we may want to |
| * send a message about a (bulk) preemption to someone on that list. This works |
| * since a given vcore that was preempted will be removed from that list before |
| * we try to send_event() (in theory, there isn't code that can send that event |
| * yet). Someone else will get the event and wake up the preempted vcore. */ |
| static void spam_public_msg(struct proc *p, struct event_msg *ev_msg, |
| uint32_t vcoreid, int ev_flags) |
| { |
| struct vcore *vc; |
| if (!__proc_is_mcp(p)) { |
| spam_vcore(p, 0, ev_msg, ev_flags); |
| return; |
| } |
| if (ev_flags & EVENT_VCORE_MUST_RUN) { |
| /* Could check for waiting and skip these spams, which will |
| * fail. Could also skip trying for vcoreid, and just spam any |
| * old online VC. */ |
| if (vcore_is_mapped(p, vcoreid)) { |
| /* check, signal, check again */ |
| spam_vcore(p, vcoreid, ev_msg, ev_flags); |
| /* notif_pending write must come before following read |
| */ |
| wrmb(); |
| if (vcore_is_mapped(p, vcoreid)) |
| return; |
| } |
| if (spam_list_member(&p->online_vcs, p, ev_msg, ev_flags)) |
| return; |
| goto ultimate_fallback; |
| } |
| /* First, try posting to the desired vcore */ |
| if (try_spam_vcore(p, vcoreid, ev_msg, ev_flags)) |
| return; |
| /* If the process is WAITING, let's just jump to the fallback */ |
| if (p->state == PROC_WAITING) |
| goto ultimate_fallback; |
| /* If we're here, the desired vcore is unreachable, but the process is |
| * probably RUNNING_M (online_vs) or RUNNABLE_M (bulk preempted or |
| * recently woken up), so we'll need to find another vcore. */ |
| if (spam_list_member(&p->online_vcs, p, ev_msg, ev_flags)) |
| return; |
| if (spam_list_member(&p->bulk_preempted_vcs, p, ev_msg, ev_flags)) |
| return; |
| /* Last chance, let's check the head of the inactives. It might be |
| * alertable (the kernel set it earlier due to an event, or it was a |
| * bulk_preempt that didn't restart), and we can avoid grabbing the |
| * proc_lock. */ |
| vc = TAILQ_FIRST(&p->inactive_vcs); |
| if (vc) { /* might be none in rare circumstances */ |
| if (try_spam_vcore(p, vcore2vcoreid(p, vc), ev_msg, ev_flags)) { |
| /* It's possible that we're WAITING here. EVENT_WAKEUP |
| * will handle it. One way for this to happen is if a |
| * normal vcore was preempted right as another vcore was |
| * yielding, and the preempted message was sent after |
| * the last vcore yielded (which caused us to be |
| * WAITING). */ |
| return; |
| } |
| } |
| ultimate_fallback: |
| /* At this point, we can't find one. This could be due to a (hopefully |
| * rare) weird yield/request storm, or more commonly because the lists |
| * were empty and the process is simply WAITING (yielded all of its |
| * vcores and is waiting on an event). Time for the ultimate fallback: |
| * locking. Note that when we __alert_vcore(), there is a chance we |
| * need to mmap, which grabs the vmr_lock and pte_lock. */ |
| spin_lock(&p->proc_lock); |
| if (p->state != PROC_WAITING) { |
| /* We need to check the online and bulk_preempt lists again, now |
| * that we are sure no one is messing with them. If we're |
| * WAITING, we can skip these (or assert they are empty!). */ |
| vc = TAILQ_FIRST(&p->online_vcs); |
| if (vc) { |
| /* there's an online vcore, so just alert it (we know it |
| * isn't going anywhere), and return */ |
| spam_vcore(p, vcore2vcoreid(p, vc), ev_msg, ev_flags); |
| spin_unlock(&p->proc_lock); |
| return; |
| } |
| vc = TAILQ_FIRST(&p->bulk_preempted_vcs); |
| if (vc) { |
| /* the process is bulk preempted, similar deal to above |
| */ |
| spam_vcore(p, vcore2vcoreid(p, vc), ev_msg, ev_flags); |
| spin_unlock(&p->proc_lock); |
| return; |
| } |
| } |
| /* At this point, we're sure all vcores are yielded, though we might not |
| * be WAITING. Post to the first on the inactive list (which is the one |
| * that will definitely be woken up) */ |
| vc = TAILQ_FIRST(&p->inactive_vcs); |
| assert(vc); |
| spam_vcore(p, vcore2vcoreid(p, vc), ev_msg, ev_flags); |
| /* Set the vcore's alertable flag, to short circuit our last ditch |
| * effort above */ |
| set_vcore_msgable(vcore2vcoreid(p, vc)); |
| /* The first event to catch the process with no online/bp vcores will |
| * need to wake it up, which is handled elsewhere if they requested |
| * EVENT_WAKEUP. We could be RUNNABLE_M here if another event already |
| * woke us and we didn't get lucky with the penultimate fallback. */ |
| spin_unlock(&p->proc_lock); |
| } |
| |
| /* Helper: sends an indirection event for an ev_q, preferring vcoreid */ |
| static void send_indir(struct proc *p, struct event_queue *ev_q, |
| uint32_t vcoreid) |
| { |
| struct event_msg local_msg = {0}; |
| /* If an alert is already pending and they don't want repeats, just |
| * return. One of the few uses of NOTHROTTLE will be for preempt_msg |
| * ev_qs. Ex: an INDIR was already sent to the preempted vcore, then |
| * alert throttling would stop another vcore from getting the message |
| * about the original vcore. */ |
| if (!(ev_q->ev_flags & EVENT_NOTHROTTLE) && (ev_q->ev_alert_pending)) |
| return; |
| /* We'll eventually get an INDIR through, so don't send any more til |
| * userspace toggles this. Regardless of other writers to this flag, we |
| * eventually send an alert that causes userspace to turn throttling off |
| * again (before handling all of the ev_q's events). |
| * |
| * This will also squelch IPIs, since there's no reason to send the IPI |
| * if the INDIR is still un-acknowledged. The vcore is either in vcore |
| * context, attempting to deal with the INDIR, or offline. This |
| * statement is probably true. */ |
| ev_q->ev_alert_pending = TRUE; |
| wmb(); /* force this write to happen before any event writes */ |
| local_msg.ev_type = EV_EVENT; |
| local_msg.ev_arg3 = ev_q; |
| /* If we're not spamming indirs, just send and be done with it. |
| * |
| * It's possible that the user does not want to poll their evq and wants |
| * an INDIR, but also doesn't care about sleeping or otherwise not |
| * getting the message right away. The INDIR could sit in the VCPD of a |
| * vcore that doesn't run for a while. Perhaps if the app always made |
| * sure VC 0 was on when it was running at all, and sent the INDIR |
| * there. Or there was a per-vc evq that only needed to be handled when |
| * the VC turned on. This gets at another aspect of INDIRs, other than |
| * it's need for "only once" operation: maybe the mbox type isn't a UCQ |
| * (like the VCPD mboxes). */ |
| if (!(ev_q->ev_flags & EVENT_SPAM_INDIR)) { |
| spam_vcore(p, vcoreid, &local_msg, ev_q->ev_flags); |
| return; |
| } |
| /* At this point, we actually want to send and spam an INDIR. |
| * This will guarantee the message makes it to some vcore. */ |
| spam_public_msg(p, &local_msg, vcoreid, ev_q->ev_flags); |
| } |
| |
| /* Send an event to ev_q, based on the parameters in ev_q's flag. We don't |
| * accept null ev_qs, since the caller ought to be checking before bothering to |
| * make a msg and send it to the event_q. Vcoreid is who the kernel thinks the |
| * message ought to go to (for IPIs). Appropriate for things like |
| * EV_PREEMPT_PENDING, where we tell the affected vcore. To have the message go |
| * where the kernel suggests, set EVENT_VCORE_APPRO(priate). */ |
| void send_event(struct proc *p, struct event_queue *ev_q, struct event_msg *msg, |
| uint32_t vcoreid) |
| { |
| uintptr_t old_proc; |
| struct event_mbox *ev_mbox = 0; |
| |
| assert(!in_irq_ctx(&per_cpu_info[core_id()])); |
| assert(p); |
| if (proc_is_dying(p)) |
| return; |
| printd("[kernel] sending msg to proc %p, ev_q %p\n", p, ev_q); |
| assert(is_user_rwaddr(ev_q, sizeof(struct event_queue))); |
| /* ev_q is a user pointer, so we need to make sure we're in the right |
| * address space */ |
| old_proc = switch_to(p); |
| /* Get the vcoreid that we'll message (if appropriate). For INDIR and |
| * SPAMMING, this is the first choice of a vcore, but other vcores might |
| * get it. Common case is !APPRO and !ROUNDROBIN. Note we are |
| * clobbering the vcoreid parameter. */ |
| if (!(ev_q->ev_flags & EVENT_VCORE_APPRO)) |
| vcoreid = ev_q->ev_vcore; /* use the ev_q's vcoreid */ |
| /* Note that RR overwrites APPRO */ |
| if (ev_q->ev_flags & EVENT_ROUNDROBIN) { |
| /* Pick a vcore, round-robin style. Assuming ev_vcore was the |
| * previous one used. Note that round-robin overrides the |
| * passed-in vcoreid. Also note this may be 'wrong' if |
| * num_vcores changes. */ |
| vcoreid = (ev_q->ev_vcore + 1) % p->procinfo->num_vcores; |
| ev_q->ev_vcore = vcoreid; |
| } |
| if (!vcoreid_is_safe(vcoreid)) { |
| /* Ought to kill them, just warn for now */ |
| printk("[kernel] Vcoreid %d unsafe! (too big?)\n", vcoreid); |
| goto out; |
| } |
| /* If we're a SPAM_PUBLIC, they just want us to spam the message. Note |
| * we don't care about the mbox, since it'll go to VCPD public mboxes, |
| * and we'll prefer to send it to whatever vcoreid we determined at this |
| * point (via APPRO or whatever). */ |
| if (ev_q->ev_flags & EVENT_SPAM_PUBLIC) { |
| spam_public_msg(p, msg, vcoreid, ev_q->ev_flags); |
| goto wakeup; |
| } |
| /* We aren't spamming and we know the default vcore, and now we need to |
| * figure out which mbox to use. If they provided an mbox, we'll use |
| * it. If not, we'll use a VCPD mbox (public or private, depending on |
| * the flags). */ |
| ev_mbox = ev_q->ev_mbox; |
| if (!ev_mbox) |
| ev_mbox = get_vcpd_mbox(vcoreid, ev_q->ev_flags); |
| /* At this point, we ought to have the right mbox to send the msg to, |
| * and which vcore to alert (IPI/INDIR) (if applicable). The mbox could |
| * be the vcore's vcpd ev_mbox. */ |
| if (!ev_mbox) { |
| /* This shouldn't happen any more, this is more for sanity's |
| * sake */ |
| warn("[kernel] ought to have an mbox by now!"); |
| goto out; |
| } |
| /* Even if we're using an mbox in procdata (VCPD), we want a user |
| * pointer */ |
| if (!is_user_rwaddr(ev_mbox, sizeof(struct event_mbox))) { |
| /* Ought to kill them, just warn for now */ |
| printk("[kernel] Illegal addr for ev_mbox\n"); |
| goto out; |
| } |
| post_ev_msg(p, ev_mbox, msg, ev_q->ev_flags); |
| wmb(); /* ensure ev_msg write is before alerting the vcore */ |
| /* Prod/alert a vcore with an IPI or INDIR, if desired. INDIR will also |
| * call try_notify (IPI) later */ |
| if (ev_q->ev_flags & EVENT_INDIR) { |
| send_indir(p, ev_q, vcoreid); |
| } else { |
| /* they may want an IPI despite not wanting an INDIR */ |
| try_notify(p, vcoreid, ev_q->ev_flags); |
| } |
| wakeup: |
| if ((ev_q->ev_flags & EVENT_WAKEUP) && (p->state == PROC_WAITING)) |
| proc_wakeup(p); |
| /* Fall through */ |
| out: |
| /* Return to the old address space. */ |
| switch_back(p, old_proc); |
| } |
| |
| /* Send an event for the kernel event ev_num. These are the "one sided" kernel |
| * initiated events, that require a lookup of the ev_q in procdata. This is |
| * roughly equivalent to the old "proc_notify()" */ |
| void send_kernel_event(struct proc *p, struct event_msg *msg, uint32_t vcoreid) |
| { |
| uint16_t ev_num = msg->ev_type; |
| assert(ev_num < MAX_NR_EVENT); /* events start at 0 */ |
| struct event_queue *ev_q = p->procdata->kernel_evts[ev_num]; |
| /* linux would put a rmb_depends() here too, i think. */ |
| if (ev_q) |
| send_event(p, ev_q, msg, vcoreid); |
| } |
| |
| /* Writes the msg to the vcpd mbox of the vcore. If you want the private mbox, |
| * send in the ev_flag EVENT_VCORE_PRIVATE. If not, the message could |
| * be received by other vcores if the given vcore is offline/preempted/etc. |
| * Whatever other flags you pass in will get sent to post_ev_msg. Currently, |
| * the only one that will get looked at is NO_MSG (set a bit). |
| * |
| * This needs to load current (switch_to), but doesn't need to care about what |
| * the process wants. Note this isn't commonly used - just the monitor and |
| * sys_self_notify(). */ |
| void post_vcore_event(struct proc *p, struct event_msg *msg, uint32_t vcoreid, |
| int ev_flags) |
| { |
| /* Need to set p as current to post the event */ |
| struct per_cpu_info *pcpui = &per_cpu_info[core_id()]; |
| uintptr_t old_proc = switch_to(p); |
| |
| /* *ev_mbox is the user address of the vcpd mbox */ |
| post_vc_msg(p, vcoreid, get_vcpd_mbox(vcoreid, ev_flags), msg, ev_flags); |
| switch_back(p, old_proc); |
| } |
| |
| /* Attempts to send a posix signal to the process. If they do not have an ev_q |
| * registered for EV_POSIX_SIGNAL, then nothing will happen. */ |
| void send_posix_signal(struct proc *p, int sig_nr) |
| { |
| struct event_msg local_msg = {0}; |
| local_msg.ev_type = EV_POSIX_SIGNAL; |
| local_msg.ev_arg1 = sig_nr; |
| send_kernel_event(p, &local_msg, 0); |
| } |