| #include <vcore.h> |
| #include <mcs.h> |
| #include <arch/atomic.h> |
| #include <string.h> |
| #include <stdlib.h> |
| #include <uthread.h> |
| #include <parlib.h> |
| #include <malloc.h> |
| |
| // MCS locks |
| void mcs_lock_init(struct mcs_lock *lock) |
| { |
| memset(lock,0,sizeof(mcs_lock_t)); |
| } |
| |
| static inline mcs_lock_qnode_t *mcs_qnode_swap(mcs_lock_qnode_t **addr, |
| mcs_lock_qnode_t *val) |
| { |
| return (mcs_lock_qnode_t*)atomic_swap_ptr((void**)addr, val); |
| } |
| |
| void mcs_lock_lock(struct mcs_lock *lock, struct mcs_lock_qnode *qnode) |
| { |
| qnode->next = 0; |
| cmb(); /* swap provides a CPU mb() */ |
| mcs_lock_qnode_t *predecessor = mcs_qnode_swap(&lock->lock, qnode); |
| if (predecessor) { |
| qnode->locked = 1; |
| wmb(); |
| predecessor->next = qnode; |
| /* no need for a wrmb(), since this will only get unlocked after they |
| * read our previous write */ |
| while (qnode->locked) |
| cpu_relax(); |
| } |
| cmb(); /* just need a cmb, the swap handles the CPU wmb/wrmb() */ |
| } |
| |
| void mcs_lock_unlock(struct mcs_lock *lock, struct mcs_lock_qnode *qnode) |
| { |
| /* Check if someone is already waiting on us to unlock */ |
| if (qnode->next == 0) { |
| cmb(); /* no need for CPU mbs, since there's an atomic_swap() */ |
| /* Unlock it */ |
| mcs_lock_qnode_t *old_tail = mcs_qnode_swap(&lock->lock,0); |
| /* no one else was already waiting, so we successfully unlocked and can |
| * return */ |
| if (old_tail == qnode) |
| return; |
| /* someone else was already waiting on the lock (last one on the list), |
| * and we accidentally took them off. Try and put it back. */ |
| mcs_lock_qnode_t *usurper = mcs_qnode_swap(&lock->lock,old_tail); |
| /* since someone else was waiting, they should have made themselves our |
| * next. spin (very briefly!) til it happens. */ |
| while (qnode->next == 0) |
| cpu_relax(); |
| if (usurper) { |
| /* an usurper is someone who snuck in before we could put the old |
| * tail back. They now have the lock. Let's put whoever is |
| * supposed to be next as their next one. */ |
| usurper->next = qnode->next; |
| } else { |
| /* No usurper meant we put things back correctly, so we should just |
| * pass the lock / unlock whoever is next */ |
| qnode->next->locked = 0; |
| } |
| } else { |
| /* mb()s necessary since we didn't call an atomic_swap() */ |
| wmb(); /* need to make sure any previous writes don't pass unlocking */ |
| rwmb(); /* need to make sure any reads happen before the unlocking */ |
| /* simply unlock whoever is next */ |
| qnode->next->locked = 0; |
| } |
| } |
| |
| /* CAS style mcs locks, kept around til we use them. We're using the |
| * usurper-style, since RISCV doesn't have a real CAS (yet?). */ |
| void mcs_lock_unlock_cas(struct mcs_lock *lock, struct mcs_lock_qnode *qnode) |
| { |
| /* Check if someone is already waiting on us to unlock */ |
| if (qnode->next == 0) { |
| cmb(); /* no need for CPU mbs, since there's an atomic_cas() */ |
| /* If we're still the lock, just swap it with 0 (unlock) and return */ |
| if (atomic_cas_ptr((void**)&lock->lock, qnode, 0)) |
| return; |
| /* We failed, someone is there and we are some (maybe a different) |
| * thread's pred. Since someone else was waiting, they should have made |
| * themselves our next. Spin (very briefly!) til it happens. */ |
| while (qnode->next == 0) |
| cpu_relax(); |
| /* Alpha wants a read_barrier_depends() here */ |
| /* Now that we have a next, unlock them */ |
| qnode->next->locked = 0; |
| } else { |
| /* mb()s necessary since we didn't call an atomic_swap() */ |
| wmb(); /* need to make sure any previous writes don't pass unlocking */ |
| rwmb(); /* need to make sure any reads happen before the unlocking */ |
| /* simply unlock whoever is next */ |
| qnode->next->locked = 0; |
| } |
| } |
| |
| /* We don't bother saving the state, like we do with irqsave, since we can use |
| * whether or not we are in vcore context to determine that. This means you |
| * shouldn't call this from those moments when you fake being in vcore context |
| * (when switching into the TLS, etc). */ |
| void mcs_lock_notifsafe(struct mcs_lock *lock, struct mcs_lock_qnode *qnode) |
| { |
| uth_disable_notifs(); |
| mcs_lock_lock(lock, qnode); |
| } |
| |
| /* Note we turn off the DONT_MIGRATE flag before enabling notifs. This is fine, |
| * since we wouldn't receive any notifs that could lead to us migrating after we |
| * set DONT_MIGRATE but before enable_notifs(). We need it to be in this order, |
| * since we need to check messages after ~DONT_MIGRATE. */ |
| void mcs_unlock_notifsafe(struct mcs_lock *lock, struct mcs_lock_qnode *qnode) |
| { |
| mcs_lock_unlock(lock, qnode); |
| uth_enable_notifs(); |
| } |
| |
| // MCS dissemination barrier! |
| int mcs_barrier_init(mcs_barrier_t* b, size_t np) |
| { |
| if(np > max_vcores()) |
| return -1; |
| b->allnodes = (mcs_dissem_flags_t*)malloc(np*sizeof(mcs_dissem_flags_t)); |
| memset(b->allnodes,0,np*sizeof(mcs_dissem_flags_t)); |
| b->nprocs = np; |
| |
| b->logp = (np & (np-1)) != 0; |
| while(np >>= 1) |
| b->logp++; |
| |
| size_t i,k; |
| for(i = 0; i < b->nprocs; i++) |
| { |
| b->allnodes[i].parity = 0; |
| b->allnodes[i].sense = 1; |
| |
| for(k = 0; k < b->logp; k++) |
| { |
| size_t j = (i+(1<<k)) % b->nprocs; |
| b->allnodes[i].partnerflags[0][k] = &b->allnodes[j].myflags[0][k]; |
| b->allnodes[i].partnerflags[1][k] = &b->allnodes[j].myflags[1][k]; |
| } |
| } |
| |
| return 0; |
| } |
| |
| void mcs_barrier_wait(mcs_barrier_t* b, size_t pid) |
| { |
| mcs_dissem_flags_t* localflags = &b->allnodes[pid]; |
| size_t i; |
| for(i = 0; i < b->logp; i++) |
| { |
| *localflags->partnerflags[localflags->parity][i] = localflags->sense; |
| while(localflags->myflags[localflags->parity][i] != localflags->sense); |
| } |
| if(localflags->parity) |
| localflags->sense = 1-localflags->sense; |
| localflags->parity = 1-localflags->parity; |
| } |
| |
| /* Preemption detection and recovering MCS locks. */ |
| /* Old style. Has trouble getting out of 'preempt/change-to storms' under |
| * heavy contention and with preemption. */ |
| void mcs_pdro_init(struct mcs_pdro_lock *lock) |
| { |
| lock->lock = 0; |
| } |
| |
| void mcs_pdro_fini(struct mcs_pdro_lock *lock) |
| { |
| } |
| |
| /* Internal version of the locking function, doesn't care if notifs are |
| * disabled. While spinning, we'll check to see if other vcores involved in the |
| * locking are running. If we change to that vcore, we'll continue when our |
| * vcore gets restarted. If the change fails, it is because the vcore is |
| * running, and we'll continue. |
| * |
| * It's worth noting that changing to another vcore won't hurt correctness. |
| * Even if they are no longer the lockholder, they will be checking preemption |
| * messages and will help break out of the deadlock. So long as we don't |
| * spin uncontrollably, we're okay. */ |
| void __mcs_pdro_lock(struct mcs_pdro_lock *lock, struct mcs_pdro_qnode *qnode) |
| { |
| struct mcs_pdro_qnode *predecessor; |
| uint32_t pred_vcoreid; |
| /* Now the actual lock */ |
| qnode->next = 0; |
| cmb(); /* swap provides a CPU mb() */ |
| predecessor = atomic_swap_ptr((void**)&lock->lock, qnode); |
| if (predecessor) { |
| qnode->locked = 1; |
| /* Read-in the vcoreid before releasing them. We won't need to worry |
| * about their qnode memory being freed/reused (they can't til we fill |
| * in the 'next' slot), which is a bit of a performance win. This also |
| * cuts down on cache-line contention when we ensure they run, which |
| * helps a lot too. */ |
| pred_vcoreid = ACCESS_ONCE(predecessor->vcoreid); |
| wmb(); /* order the locked write before the next write */ |
| predecessor->next = qnode; |
| /* no need for a wrmb(), since this will only get unlocked after they |
| * read our previous write */ |
| while (qnode->locked) { |
| /* We don't know who the lock holder is (it hurts performance via |
| * 'true' sharing to track it) Instead we'll make sure our pred is |
| * running, which trickles up to the lock holder. */ |
| ensure_vcore_runs(pred_vcoreid); |
| cpu_relax(); |
| } |
| } |
| } |
| |
| /* Using the CAS style unlocks, since the usurper recovery is a real pain in the |
| * ass */ |
| void __mcs_pdro_unlock(struct mcs_pdro_lock *lock, struct mcs_pdro_qnode *qnode) |
| { |
| uint32_t a_tail_vcoreid; |
| /* Check if someone is already waiting on us to unlock */ |
| if (qnode->next == 0) { |
| cmb(); /* no need for CPU mbs, since there's an atomic_cas() */ |
| /* If we're still the lock, just swap it with 0 (unlock) and return */ |
| if (atomic_cas_ptr((void**)&lock->lock, qnode, 0)) |
| return; |
| /* Read in the tail (or someone who recently was the tail, but could now |
| * be farther up the chain), in prep for our spinning. */ |
| a_tail_vcoreid = ACCESS_ONCE(lock->lock->vcoreid); |
| /* We failed, someone is there and we are some (maybe a different) |
| * thread's pred. Since someone else was waiting, they should have made |
| * themselves our next. Spin (very briefly!) til it happens. */ |
| while (qnode->next == 0) { |
| /* We need to get our next to run, but we don't know who they are. |
| * If we make sure a tail is running, that will percolate up to make |
| * sure our qnode->next is running */ |
| ensure_vcore_runs(a_tail_vcoreid); |
| cpu_relax(); |
| } |
| /* Alpha wants a read_barrier_depends() here */ |
| /* Now that we have a next, unlock them */ |
| qnode->next->locked = 0; |
| } else { |
| /* mb()s necessary since we didn't call an atomic_swap() */ |
| wmb(); /* need to make sure any previous writes don't pass unlocking */ |
| rwmb(); /* need to make sure any reads happen before the unlocking */ |
| /* simply unlock whoever is next */ |
| qnode->next->locked = 0; |
| } |
| } |
| |
| /* Actual MCS-PDRO locks. Don't worry about initializing any fields of qnode. |
| * We'll do vcoreid here, and the locking code deals with the other fields */ |
| void mcs_pdro_lock(struct mcs_pdro_lock *lock, struct mcs_pdro_qnode *qnode) |
| { |
| /* Disable notifs, if we're an _M uthread */ |
| uth_disable_notifs(); |
| cmb(); /* in the off-chance the compiler wants to read vcoreid early */ |
| qnode->vcoreid = vcore_id(); |
| __mcs_pdro_lock(lock, qnode); |
| } |
| |
| /* CAS-less unlock, not quite as efficient and will make sure every vcore runs |
| * (since we don't have a convenient way to make sure our qnode->next runs |
| * yet, other than making sure everyone runs). |
| * |
| * To use this without ensuring all vcores run, you'll need the unlock code to |
| * save pred to a specific field in the qnode and check both its initial pred |
| * as well as its run time pred (who could be an usurper). It's all possible, |
| * but a little more difficult to follow. Also, I'm adjusting this comment |
| * months after writing it originally, so it is probably not sufficient, but |
| * necessary. */ |
| void __mcs_pdro_unlock_no_cas(struct mcs_pdro_lock *lock, |
| struct mcs_pdro_qnode *qnode) |
| { |
| struct mcs_pdro_qnode *old_tail, *usurper; |
| /* Check if someone is already waiting on us to unlock */ |
| if (qnode->next == 0) { |
| cmb(); /* no need for CPU mbs, since there's an atomic_swap() */ |
| /* Unlock it */ |
| old_tail = atomic_swap_ptr((void**)&lock->lock, 0); |
| /* no one else was already waiting, so we successfully unlocked and can |
| * return */ |
| if (old_tail == qnode) |
| return; |
| /* someone else was already waiting on the lock (last one on the list), |
| * and we accidentally took them off. Try and put it back. */ |
| usurper = atomic_swap_ptr((void*)&lock->lock, old_tail); |
| /* since someone else was waiting, they should have made themselves our |
| * next. spin (very briefly!) til it happens. */ |
| while (qnode->next == 0) { |
| /* make sure old_tail isn't preempted. best we can do for now is |
| * to make sure all vcores run, and thereby get our next. */ |
| for (int i = 0; i < max_vcores(); i++) |
| ensure_vcore_runs(i); |
| cpu_relax(); |
| } |
| if (usurper) { |
| /* an usurper is someone who snuck in before we could put the old |
| * tail back. They now have the lock. Let's put whoever is |
| * supposed to be next as their next one. |
| * |
| * First, we need to change our next's pred. There's a slight race |
| * here, so our next will need to make sure both us and pred are |
| * done */ |
| /* I was trying to do something so we didn't need to ensure all |
| * vcores run, using more space in the qnode to figure out who our |
| * pred was a lock time (guessing actually, since there's a race, |
| * etc). */ |
| //qnode->next->pred = usurper; |
| //wmb(); |
| usurper->next = qnode->next; |
| /* could imagine another wmb() and a flag so our next knows to no |
| * longer check us too. */ |
| } else { |
| /* No usurper meant we put things back correctly, so we should just |
| * pass the lock / unlock whoever is next */ |
| qnode->next->locked = 0; |
| } |
| } else { |
| /* mb()s necessary since we didn't call an atomic_swap() */ |
| wmb(); /* need to make sure any previous writes don't pass unlocking */ |
| rwmb(); /* need to make sure any reads happen before the unlocking */ |
| /* simply unlock whoever is next */ |
| qnode->next->locked = 0; |
| } |
| } |
| |
| void mcs_pdro_unlock(struct mcs_pdro_lock *lock, struct mcs_pdro_qnode *qnode) |
| { |
| __mcs_pdro_unlock(lock, qnode); |
| /* Enable notifs, if we're an _M uthread */ |
| uth_enable_notifs(); |
| } |
| |
| /* New style: under heavy contention with preemption, they won't enter the |
| * 'preempt/change_to storm' that can happen to PDRs, at the cost of some |
| * performance. This is the default. */ |
| void mcs_pdr_init(struct mcs_pdr_lock *lock) |
| { |
| int ret; |
| lock->lock = 0; |
| lock->lockholder_vcoreid = MCSPDR_NO_LOCKHOLDER; |
| ret = posix_memalign((void**)&lock->qnodes, |
| __alignof__(struct mcs_pdr_qnode), |
| sizeof(struct mcs_pdr_qnode) * max_vcores()); |
| assert(!ret); |
| } |
| |
| void mcs_pdr_fini(struct mcs_pdr_lock *lock) |
| { |
| free(lock->qnodes); |
| } |
| |
| /* Similar to the original PDR lock, this tracks the lockholder for better |
| * recovery from preemptions. Under heavy contention, changing to the |
| * lockholder instead of pred makes it more likely to have a vcore outside the |
| * MCS chain handle the preemption. If that never happens, performance will |
| * suffer. |
| * |
| * Simply checking the lockholder causes a lot of unnecessary traffic, so we |
| * first look for signs of preemption in read-mostly locations (by comparison, |
| * the lockholder changes on every lock/unlock). |
| * |
| * We also use the "qnodes are in the lock" style, which is slightly slower than |
| * using the stack in regular MCS/MCSPDR locks, but it speeds PDR up a bit by |
| * not having to read other qnodes' memory to determine their vcoreid. The |
| * slowdown may be due to some weird caching/prefetch settings (like Adjacent |
| * Cacheline Prefetch). |
| * |
| * Note that these locks, like all PDR locks, have opportunities to accidentally |
| * ensure some vcore runs that isn't in the chain. Whenever we read lockholder |
| * or even pred, that particular vcore might subsequently unlock and then get |
| * preempted (or change_to someone else) before we ensure they run. If this |
| * happens and there is another VC in the MCS chain, it will make sure the right |
| * cores run. If there are no other vcores in the chain, it is up to the rest |
| * of the vcore/event handling system to deal with this, which should happen |
| * when one of the other vcores handles the preemption message generated by our |
| * change_to. */ |
| void __mcs_pdr_lock(struct mcs_pdr_lock *lock, struct mcs_pdr_qnode *qnode) |
| { |
| struct mcs_pdr_qnode *predecessor; |
| uint32_t pred_vcoreid; |
| struct mcs_pdr_qnode *qnode0 = qnode - vcore_id(); |
| seq_ctr_t seq; |
| qnode->next = 0; |
| cmb(); /* swap provides a CPU mb() */ |
| predecessor = atomic_swap_ptr((void**)&lock->lock, qnode); |
| if (predecessor) { |
| qnode->locked = 1; |
| pred_vcoreid = predecessor - qnode0; /* can compute this whenever */ |
| wmb(); /* order the locked write before the next write */ |
| predecessor->next = qnode; |
| seq = ACCESS_ONCE(__procinfo.coremap_seqctr); |
| /* no need for a wrmb(), since this will only get unlocked after they |
| * read our pred->next write */ |
| while (qnode->locked) { |
| /* Check to see if anything is amiss. If someone in the chain is |
| * preempted, then someone will notice. Simply checking our pred |
| * isn't that great of an indicator of preemption. The reason is |
| * that the offline vcore is most likely the lockholder (under heavy |
| * lock contention), and we want someone farther back in the chain |
| * to notice (someone that will stay preempted long enough for a |
| * vcore outside the chain to recover them). Checking the seqctr |
| * will tell us of any preempts since we started, so if a storm |
| * starts while we're spinning, we can join in and try to save the |
| * lockholder before its successor gets it. |
| * |
| * Also, if we're the lockholder, then we need to let our pred run |
| * so they can hand us the lock. */ |
| if (vcore_is_preempted(pred_vcoreid) || |
| seq != __procinfo.coremap_seqctr) { |
| if (lock->lockholder_vcoreid == MCSPDR_NO_LOCKHOLDER || |
| lock->lockholder_vcoreid == vcore_id()) |
| ensure_vcore_runs(pred_vcoreid); |
| else |
| ensure_vcore_runs(lock->lockholder_vcoreid); |
| } |
| cpu_relax(); |
| } |
| } else { |
| lock->lockholder_vcoreid = vcore_id(); |
| } |
| } |
| |
| void __mcs_pdr_unlock(struct mcs_pdr_lock *lock, struct mcs_pdr_qnode *qnode) |
| { |
| uint32_t a_tail_vcoreid; |
| struct mcs_pdr_qnode *qnode0 = qnode - vcore_id(); |
| /* Check if someone is already waiting on us to unlock */ |
| if (qnode->next == 0) { |
| cmb(); /* no need for CPU mbs, since there's an atomic_cas() */ |
| /* If we're still the lock, just swap it with 0 (unlock) and return */ |
| if (atomic_cas_ptr((void**)&lock->lock, qnode, 0)) { |
| /* This is racy with the new lockholder. it's possible that we'll |
| * clobber their legit write, though it doesn't actually hurt |
| * correctness. it'll get sorted out on the next unlock. */ |
| lock->lockholder_vcoreid = MCSPDR_NO_LOCKHOLDER; |
| return; |
| } |
| /* Get the tail (or someone who recently was the tail, but could now |
| * be farther up the chain), in prep for our spinning. Could do an |
| * ACCESS_ONCE on lock->lock */ |
| a_tail_vcoreid = lock->lock - qnode0; |
| /* We failed, someone is there and we are some (maybe a different) |
| * thread's pred. Since someone else was waiting, they should have made |
| * themselves our next. Spin (very briefly!) til it happens. */ |
| while (qnode->next == 0) { |
| /* We need to get our next to run, but we don't know who they are. |
| * If we make sure a tail is running, that will percolate up to make |
| * sure our qnode->next is running */ |
| ensure_vcore_runs(a_tail_vcoreid); |
| cpu_relax(); |
| } |
| /* Alpha wants a read_barrier_depends() here */ |
| lock->lockholder_vcoreid = qnode->next - qnode0; |
| wmb(); /* order the vcoreid write before the unlock */ |
| qnode->next->locked = 0; |
| } else { |
| /* Note we're saying someone else is the lockholder, though we still are |
| * the lockholder until we unlock the next qnode. Our next knows that |
| * if it sees itself is the lockholder, that it needs to make sure we |
| * run. */ |
| lock->lockholder_vcoreid = qnode->next - qnode0; |
| /* mb()s necessary since we didn't call an atomic_swap() */ |
| wmb(); /* need to make sure any previous writes don't pass unlocking */ |
| rwmb(); /* need to make sure any reads happen before the unlocking */ |
| /* simply unlock whoever is next */ |
| qnode->next->locked = 0; |
| } |
| } |
| |
| void mcs_pdr_lock(struct mcs_pdr_lock *lock) |
| { |
| uth_disable_notifs(); |
| cmb(); /* in the off-chance the compiler wants to read vcoreid early */ |
| __mcs_pdr_lock(lock, &lock->qnodes[vcore_id()]); |
| } |
| |
| void mcs_pdr_unlock(struct mcs_pdr_lock *lock) |
| { |
| __mcs_pdr_unlock(lock, &lock->qnodes[vcore_id()]); |
| uth_enable_notifs(); |
| } |