| #include <parlib/vcore.h> |
| #include <parlib/mcs.h> |
| #include <parlib/arch/atomic.h> |
| #include <string.h> |
| #include <stdlib.h> |
| #include <parlib/uthread.h> |
| #include <parlib/parlib.h> |
| #include <malloc.h> |
| |
| // MCS locks |
| void mcs_lock_init(struct mcs_lock *lock) |
| { |
| memset(lock,0,sizeof(mcs_lock_t)); |
| } |
| |
| static inline mcs_lock_qnode_t *mcs_qnode_swap(mcs_lock_qnode_t **addr, |
| mcs_lock_qnode_t *val) |
| { |
| return (mcs_lock_qnode_t*)atomic_swap_ptr((void**)addr, val); |
| } |
| |
| void mcs_lock_lock(struct mcs_lock *lock, struct mcs_lock_qnode *qnode) |
| { |
| qnode->next = 0; |
| cmb(); /* swap provides a CPU mb() */ |
| mcs_lock_qnode_t *predecessor = mcs_qnode_swap(&lock->lock, qnode); |
| if (predecessor) { |
| qnode->locked = 1; |
| wmb(); |
| predecessor->next = qnode; |
| /* no need for a wrmb(), since this will only get unlocked after |
| * they read our previous write */ |
| while (qnode->locked) |
| cpu_relax(); |
| } |
| cmb(); /* just need a cmb, the swap handles the CPU wmb/wrmb() */ |
| } |
| |
| void mcs_lock_unlock(struct mcs_lock *lock, struct mcs_lock_qnode *qnode) |
| { |
| /* Check if someone is already waiting on us to unlock */ |
| if (qnode->next == 0) { |
| cmb(); /* no need for CPU mbs, since there's an atomic_swap() */ |
| /* Unlock it */ |
| mcs_lock_qnode_t *old_tail = mcs_qnode_swap(&lock->lock,0); |
| /* no one else was already waiting, so we successfully unlocked |
| * and can return */ |
| if (old_tail == qnode) |
| return; |
| /* someone else was already waiting on the lock (last one on the |
| * list), and we accidentally took them off. Try and put it |
| * back. */ |
| mcs_lock_qnode_t *usurper = mcs_qnode_swap(&lock->lock,old_tail); |
| /* since someone else was waiting, they should have made |
| * themselves our next. spin (very briefly!) til it happens. */ |
| while (qnode->next == 0) |
| cpu_relax(); |
| if (usurper) { |
| /* an usurper is someone who snuck in before we could |
| * put the old tail back. They now have the lock. |
| * Let's put whoever is supposed to be next as their |
| * next one. */ |
| usurper->next = qnode->next; |
| } else { |
| /* No usurper meant we put things back correctly, so we |
| * should just pass the lock / unlock whoever is next */ |
| qnode->next->locked = 0; |
| } |
| } else { |
| /* mb()s necessary since we didn't call an atomic_swap() */ |
| /* need to make sure any previous writes don't pass unlocking */ |
| wmb(); |
| /* need to make sure any reads happen before the unlocking */ |
| rwmb(); |
| /* simply unlock whoever is next */ |
| qnode->next->locked = 0; |
| } |
| } |
| |
| /* CAS style mcs locks, kept around til we use them. We're using the |
| * usurper-style, since RISCV doesn't have a real CAS (yet?). */ |
| void mcs_lock_unlock_cas(struct mcs_lock *lock, struct mcs_lock_qnode *qnode) |
| { |
| /* Check if someone is already waiting on us to unlock */ |
| if (qnode->next == 0) { |
| cmb(); /* no need for CPU mbs, since there's an atomic_cas() */ |
| /* If we're still the lock, just swap it with 0 (unlock) and |
| * return */ |
| if (atomic_cas_ptr((void**)&lock->lock, qnode, 0)) |
| return; |
| /* We failed, someone is there and we are some (maybe a |
| * different) thread's pred. Since someone else was waiting, |
| * they should have made themselves our next. Spin (very |
| * briefly!) til it happens. */ |
| while (qnode->next == 0) |
| cpu_relax(); |
| /* Alpha wants a read_barrier_depends() here */ |
| /* Now that we have a next, unlock them */ |
| qnode->next->locked = 0; |
| } else { |
| /* mb()s necessary since we didn't call an atomic_swap() */ |
| /* need to make sure any previous writes don't pass unlocking */ |
| wmb(); |
| /* need to make sure any reads happen before the unlocking */ |
| rwmb(); |
| /* simply unlock whoever is next */ |
| qnode->next->locked = 0; |
| } |
| } |
| |
| /* We don't bother saving the state, like we do with irqsave, since we can use |
| * whether or not we are in vcore context to determine that. This means you |
| * shouldn't call this from those moments when you fake being in vcore context |
| * (when switching into the TLS, etc). */ |
| void mcs_lock_notifsafe(struct mcs_lock *lock, struct mcs_lock_qnode *qnode) |
| { |
| uth_disable_notifs(); |
| mcs_lock_lock(lock, qnode); |
| } |
| |
| /* Note we turn off the DONT_MIGRATE flag before enabling notifs. This is fine, |
| * since we wouldn't receive any notifs that could lead to us migrating after we |
| * set DONT_MIGRATE but before enable_notifs(). We need it to be in this order, |
| * since we need to check messages after ~DONT_MIGRATE. */ |
| void mcs_unlock_notifsafe(struct mcs_lock *lock, struct mcs_lock_qnode *qnode) |
| { |
| mcs_lock_unlock(lock, qnode); |
| uth_enable_notifs(); |
| } |
| |
| /* Preemption detection and recovering MCS locks. */ |
| /* Old style. Has trouble getting out of 'preempt/change-to storms' under |
| * heavy contention and with preemption. */ |
| void mcs_pdro_init(struct mcs_pdro_lock *lock) |
| { |
| lock->lock = 0; |
| } |
| |
| void mcs_pdro_fini(struct mcs_pdro_lock *lock) |
| { |
| } |
| |
| /* Internal version of the locking function, doesn't care if notifs are |
| * disabled. While spinning, we'll check to see if other vcores involved in the |
| * locking are running. If we change to that vcore, we'll continue when our |
| * vcore gets restarted. If the change fails, it is because the vcore is |
| * running, and we'll continue. |
| * |
| * It's worth noting that changing to another vcore won't hurt correctness. |
| * Even if they are no longer the lockholder, they will be checking preemption |
| * messages and will help break out of the deadlock. So long as we don't |
| * spin uncontrollably, we're okay. */ |
| void __mcs_pdro_lock(struct mcs_pdro_lock *lock, struct mcs_pdro_qnode *qnode) |
| { |
| struct mcs_pdro_qnode *predecessor; |
| uint32_t pred_vcoreid; |
| |
| qnode->next = 0; |
| cmb(); /* swap provides a CPU mb() */ |
| predecessor = atomic_swap_ptr((void**)&lock->lock, qnode); |
| if (predecessor) { |
| qnode->locked = 1; |
| /* Read-in the vcoreid before releasing them. We won't need to |
| * worry about their qnode memory being freed/reused (they can't |
| * til we fill in the 'next' slot), which is a bit of a |
| * performance win. This also cuts down on cache-line |
| * contention when we ensure they run, which helps a lot too. */ |
| pred_vcoreid = ACCESS_ONCE(predecessor->vcoreid); |
| wmb(); /* order the locked write before the next write */ |
| predecessor->next = qnode; |
| /* no need for a wrmb(), since this will only get unlocked after |
| * they read our previous write */ |
| while (qnode->locked) { |
| /* We don't know who the lock holder is (it hurts |
| * performance via 'true' sharing to track it) Instead |
| * we'll make sure our pred is running, which trickles |
| * up to the lock holder. */ |
| ensure_vcore_runs(pred_vcoreid); |
| cpu_relax(); |
| } |
| } |
| } |
| |
| /* Using the CAS style unlocks, since the usurper recovery is a real pain in the |
| * ass */ |
| void __mcs_pdro_unlock(struct mcs_pdro_lock *lock, struct mcs_pdro_qnode *qnode) |
| { |
| uint32_t a_tail_vcoreid; |
| /* Check if someone is already waiting on us to unlock */ |
| if (qnode->next == 0) { |
| cmb(); /* no need for CPU mbs, since there's an atomic_cas() */ |
| /* If we're still the lock, just swap it with 0 (unlock) and |
| * return */ |
| if (atomic_cas_ptr((void**)&lock->lock, qnode, 0)) |
| return; |
| /* Read in the tail (or someone who recently was the tail, but |
| * could now be farther up the chain), in prep for our spinning. |
| */ |
| a_tail_vcoreid = ACCESS_ONCE(lock->lock->vcoreid); |
| /* We failed, someone is there and we are some (maybe a |
| * different) thread's pred. Since someone else was waiting, |
| * they should have made themselves our next. Spin (very |
| * briefly!) til it happens. */ |
| while (qnode->next == 0) { |
| /* We need to get our next to run, but we don't know who |
| * they are. If we make sure a tail is running, that |
| * will percolate up to make sure our qnode->next is |
| * running */ |
| ensure_vcore_runs(a_tail_vcoreid); |
| cpu_relax(); |
| } |
| /* Alpha wants a read_barrier_depends() here */ |
| /* Now that we have a next, unlock them */ |
| qnode->next->locked = 0; |
| } else { |
| /* mb()s necessary since we didn't call an atomic_swap() */ |
| /* need to make sure any previous writes don't pass unlocking */ |
| wmb(); |
| /* need to make sure any reads happen before the unlocking */ |
| rwmb(); |
| /* simply unlock whoever is next */ |
| qnode->next->locked = 0; |
| } |
| } |
| |
| /* Actual MCS-PDRO locks. Don't worry about initializing any fields of qnode. |
| * We'll do vcoreid here, and the locking code deals with the other fields */ |
| void mcs_pdro_lock(struct mcs_pdro_lock *lock, struct mcs_pdro_qnode *qnode) |
| { |
| /* Disable notifs, if we're an _M uthread */ |
| uth_disable_notifs(); |
| cmb(); /* in the off-chance the compiler wants to read vcoreid early */ |
| qnode->vcoreid = vcore_id(); |
| __mcs_pdro_lock(lock, qnode); |
| } |
| |
| /* CAS-less unlock, not quite as efficient and will make sure every vcore runs |
| * (since we don't have a convenient way to make sure our qnode->next runs |
| * yet, other than making sure everyone runs). |
| * |
| * To use this without ensuring all vcores run, you'll need the unlock code to |
| * save pred to a specific field in the qnode and check both its initial pred |
| * as well as its run time pred (who could be an usurper). It's all possible, |
| * but a little more difficult to follow. Also, I'm adjusting this comment |
| * months after writing it originally, so it is probably not sufficient, but |
| * necessary. */ |
| void __mcs_pdro_unlock_no_cas(struct mcs_pdro_lock *lock, |
| struct mcs_pdro_qnode *qnode) |
| { |
| struct mcs_pdro_qnode *old_tail, *usurper; |
| |
| /* Check if someone is already waiting on us to unlock */ |
| if (qnode->next == 0) { |
| cmb(); /* no need for CPU mbs, since there's an atomic_swap() */ |
| /* Unlock it */ |
| old_tail = atomic_swap_ptr((void**)&lock->lock, 0); |
| /* no one else was already waiting, so we successfully unlocked |
| * and can return */ |
| if (old_tail == qnode) |
| return; |
| /* someone else was already waiting on the lock (last one on the |
| * list), and we accidentally took them off. Try and put it |
| * back. */ |
| usurper = atomic_swap_ptr((void*)&lock->lock, old_tail); |
| /* since someone else was waiting, they should have made |
| * themselves our next. spin (very briefly!) til it happens. */ |
| while (qnode->next == 0) { |
| /* make sure old_tail isn't preempted. best we can do |
| * for now is to make sure all vcores run, and thereby |
| * get our next. */ |
| for (int i = 0; i < max_vcores(); i++) |
| ensure_vcore_runs(i); |
| cpu_relax(); |
| } |
| if (usurper) { |
| /* an usurper is someone who snuck in before we could |
| * put the old tail back. They now have the lock. |
| * Let's put whoever is supposed to be next as their |
| * next one. |
| * |
| * First, we need to change our next's pred. There's a |
| * slight race here, so our next will need to make sure |
| * both us and pred are done */ |
| /* I was trying to do something so we didn't need to |
| * ensure all vcores run, using more space in the qnode |
| * to figure out who our pred was a lock time (guessing |
| * actually, since there's a race, etc). */ |
| //qnode->next->pred = usurper; |
| //wmb(); |
| usurper->next = qnode->next; |
| /* could imagine another wmb() and a flag so our next |
| * knows to no longer check us too. */ |
| } else { |
| /* No usurper meant we put things back correctly, so we |
| * should just pass the lock / unlock whoever is next */ |
| qnode->next->locked = 0; |
| } |
| } else { |
| /* mb()s necessary since we didn't call an atomic_swap() */ |
| /* need to make sure any previous writes don't pass unlocking */ |
| wmb(); |
| /* need to make sure any reads happen before the unlocking */ |
| rwmb(); |
| /* simply unlock whoever is next */ |
| qnode->next->locked = 0; |
| } |
| } |
| |
| void mcs_pdro_unlock(struct mcs_pdro_lock *lock, struct mcs_pdro_qnode *qnode) |
| { |
| __mcs_pdro_unlock(lock, qnode); |
| /* Enable notifs, if we're an _M uthread */ |
| uth_enable_notifs(); |
| } |
| |
| /* New style: under heavy contention with preemption, they won't enter the |
| * 'preempt/change_to storm' that can happen to PDRs, at the cost of some |
| * performance. This is the default. */ |
| void mcs_pdr_init(struct mcs_pdr_lock *lock) |
| { |
| int ret; |
| |
| lock->lock = 0; |
| lock->lockholder_vcoreid = MCSPDR_NO_LOCKHOLDER; |
| ret = posix_memalign((void**)&lock->qnodes, |
| __alignof__(struct mcs_pdr_qnode), |
| sizeof(struct mcs_pdr_qnode) * max_vcores()); |
| assert(!ret); |
| } |
| |
| void mcs_pdr_fini(struct mcs_pdr_lock *lock) |
| { |
| free(lock->qnodes); |
| } |
| |
| /* Similar to the original PDR lock, this tracks the lockholder for better |
| * recovery from preemptions. Under heavy contention, changing to the |
| * lockholder instead of pred makes it more likely to have a vcore outside the |
| * MCS chain handle the preemption. If that never happens, performance will |
| * suffer. |
| * |
| * Simply checking the lockholder causes a lot of unnecessary traffic, so we |
| * first look for signs of preemption in read-mostly locations (by comparison, |
| * the lockholder changes on every lock/unlock). |
| * |
| * We also use the "qnodes are in the lock" style, which is slightly slower than |
| * using the stack in regular MCS/MCSPDR locks, but it speeds PDR up a bit by |
| * not having to read other qnodes' memory to determine their vcoreid. The |
| * slowdown may be due to some weird caching/prefetch settings (like Adjacent |
| * Cacheline Prefetch). |
| * |
| * Note that these locks, like all PDR locks, have opportunities to accidentally |
| * ensure some vcore runs that isn't in the chain. Whenever we read lockholder |
| * or even pred, that particular vcore might subsequently unlock and then get |
| * preempted (or change_to someone else) before we ensure they run. If this |
| * happens and there is another VC in the MCS chain, it will make sure the right |
| * cores run. If there are no other vcores in the chain, it is up to the rest |
| * of the vcore/event handling system to deal with this, which should happen |
| * when one of the other vcores handles the preemption message generated by our |
| * change_to. */ |
| void __mcs_pdr_lock(struct mcs_pdr_lock *lock, struct mcs_pdr_qnode *qnode) |
| { |
| struct mcs_pdr_qnode *predecessor; |
| uint32_t pred_vcoreid; |
| struct mcs_pdr_qnode *qnode0 = qnode - vcore_id(); |
| seq_ctr_t seq; |
| qnode->next = 0; |
| cmb(); /* swap provides a CPU mb() */ |
| predecessor = atomic_swap_ptr((void**)&lock->lock, qnode); |
| if (predecessor) { |
| qnode->locked = 1; |
| /* can compute this whenever */ |
| pred_vcoreid = predecessor - qnode0; |
| wmb(); /* order the locked write before the next write */ |
| predecessor->next = qnode; |
| seq = ACCESS_ONCE(__procinfo.coremap_seqctr); |
| /* no need for a wrmb(), since this will only get unlocked after |
| * they read our pred->next write */ |
| while (qnode->locked) { |
| /* Check to see if anything is amiss. If someone in the |
| * chain is preempted, then someone will notice. Simply |
| * checking our pred isn't that great of an indicator of |
| * preemption. The reason is that the offline vcore is |
| * most likely the lockholder (under heavy lock |
| * contention), and we want someone farther back in the |
| * chain to notice (someone that will stay preempted |
| * long enough for a vcore outside the chain to recover |
| * them). Checking the seqctr will tell us of any |
| * preempts since we started, so if a storm starts while |
| * we're spinning, we can join in and try to save the |
| * lockholder before its successor gets it. |
| * |
| * Also, if we're the lockholder, then we need to let |
| * our pred run so they can hand us the lock. */ |
| if (vcore_is_preempted(pred_vcoreid) || |
| seq != __procinfo.coremap_seqctr) { |
| /* Note that we don't normally ensure our *pred* |
| * runs. */ |
| if (lock->lockholder_vcoreid == |
| MCSPDR_NO_LOCKHOLDER || |
| lock->lockholder_vcoreid == vcore_id()) |
| ensure_vcore_runs(pred_vcoreid); |
| else |
| ensure_vcore_runs( |
| lock->lockholder_vcoreid); |
| } |
| cpu_relax(); |
| } |
| } else { |
| lock->lockholder_vcoreid = vcore_id(); |
| } |
| } |
| |
| void __mcs_pdr_unlock(struct mcs_pdr_lock *lock, struct mcs_pdr_qnode *qnode) |
| { |
| uint32_t a_tail_vcoreid; |
| struct mcs_pdr_qnode *qnode0 = qnode - vcore_id(); |
| |
| /* Check if someone is already waiting on us to unlock */ |
| if (qnode->next == 0) { |
| cmb(); /* no need for CPU mbs, since there's an atomic_cas() */ |
| /* If we're still the lock, just swap it with 0 (unlock) and |
| * return */ |
| if (atomic_cas_ptr((void**)&lock->lock, qnode, 0)) { |
| /* This is racy with the new lockholder. it's possible |
| * that we'll clobber their legit write, though it |
| * doesn't actually hurt correctness. it'll get sorted |
| * out on the next unlock. */ |
| lock->lockholder_vcoreid = MCSPDR_NO_LOCKHOLDER; |
| return; |
| } |
| /* Get the tail (or someone who recently was the tail, but could |
| * now be farther up the chain), in prep for our spinning. |
| * Could do an ACCESS_ONCE on lock->lock */ |
| a_tail_vcoreid = lock->lock - qnode0; |
| /* We failed, someone is there and we are some (maybe a |
| * different) thread's pred. Since someone else was waiting, |
| * they should have made themselves our next. Spin (very |
| * briefly!) til it happens. */ |
| while (qnode->next == 0) { |
| /* We need to get our next to run, but we don't know who |
| * they are. If we make sure a tail is running, that |
| * will percolate up to make sure our qnode->next is |
| * running. |
| * |
| * But first, we need to tell everyone that there is no |
| * specific lockholder. lockholder_vcoreid is a |
| * short-circuit on the "walk the chain" PDR. Normally, |
| * that's okay. But now we need to make sure everyone |
| * is walking the chain from a_tail up to our pred. */ |
| lock->lockholder_vcoreid = MCSPDR_NO_LOCKHOLDER; |
| ensure_vcore_runs(a_tail_vcoreid); |
| cpu_relax(); |
| } |
| /* Alpha wants a read_barrier_depends() here */ |
| lock->lockholder_vcoreid = qnode->next - qnode0; |
| wmb(); /* order the vcoreid write before the unlock */ |
| qnode->next->locked = 0; |
| } else { |
| /* Note we're saying someone else is the lockholder, though we |
| * still are the lockholder until we unlock the next qnode. Our |
| * next knows that if it sees itself is the lockholder, that it |
| * needs to make sure we run. */ |
| lock->lockholder_vcoreid = qnode->next - qnode0; |
| /* mb()s necessary since we didn't call an atomic_swap() */ |
| /* need to make sure any previous writes don't pass unlocking */ |
| wmb(); |
| /* need to make sure any reads happen before the unlocking */ |
| rwmb(); |
| /* simply unlock whoever is next */ |
| qnode->next->locked = 0; |
| } |
| } |
| |
| void mcs_pdr_lock(struct mcs_pdr_lock *lock) |
| { |
| uth_disable_notifs(); |
| cmb(); /* in the off-chance the compiler wants to read vcoreid early */ |
| __mcs_pdr_lock(lock, &lock->qnodes[vcore_id()]); |
| } |
| |
| void mcs_pdr_unlock(struct mcs_pdr_lock *lock) |
| { |
| __mcs_pdr_unlock(lock, &lock->qnodes[vcore_id()]); |
| uth_enable_notifs(); |
| } |