blob: bd174aef995e0a88fc982bcde686db30f6a64d93 [file] [log] [blame]
#include <parlib/vcore.h>
#include <parlib/mcs.h>
#include <parlib/arch/atomic.h>
#include <string.h>
#include <stdlib.h>
#include <parlib/uthread.h>
#include <parlib/parlib.h>
#include <malloc.h>
// MCS locks
void mcs_lock_init(struct mcs_lock *lock)
{
memset(lock,0,sizeof(mcs_lock_t));
}
static inline mcs_lock_qnode_t *mcs_qnode_swap(mcs_lock_qnode_t **addr,
mcs_lock_qnode_t *val)
{
return (mcs_lock_qnode_t*)atomic_swap_ptr((void**)addr, val);
}
void mcs_lock_lock(struct mcs_lock *lock, struct mcs_lock_qnode *qnode)
{
qnode->next = 0;
cmb(); /* swap provides a CPU mb() */
mcs_lock_qnode_t *predecessor = mcs_qnode_swap(&lock->lock, qnode);
if (predecessor) {
qnode->locked = 1;
wmb();
predecessor->next = qnode;
/* no need for a wrmb(), since this will only get unlocked after
* they read our previous write */
while (qnode->locked)
cpu_relax();
}
cmb(); /* just need a cmb, the swap handles the CPU wmb/wrmb() */
}
void mcs_lock_unlock(struct mcs_lock *lock, struct mcs_lock_qnode *qnode)
{
/* Check if someone is already waiting on us to unlock */
if (qnode->next == 0) {
cmb(); /* no need for CPU mbs, since there's an atomic_swap() */
/* Unlock it */
mcs_lock_qnode_t *old_tail = mcs_qnode_swap(&lock->lock,0);
/* no one else was already waiting, so we successfully unlocked
* and can return */
if (old_tail == qnode)
return;
/* someone else was already waiting on the lock (last one on the
* list), and we accidentally took them off. Try and put it
* back. */
mcs_lock_qnode_t *usurper = mcs_qnode_swap(&lock->lock,old_tail);
/* since someone else was waiting, they should have made
* themselves our next. spin (very briefly!) til it happens. */
while (qnode->next == 0)
cpu_relax();
if (usurper) {
/* an usurper is someone who snuck in before we could
* put the old tail back. They now have the lock.
* Let's put whoever is supposed to be next as their
* next one. */
usurper->next = qnode->next;
} else {
/* No usurper meant we put things back correctly, so we
* should just pass the lock / unlock whoever is next */
qnode->next->locked = 0;
}
} else {
/* mb()s necessary since we didn't call an atomic_swap() */
/* need to make sure any previous writes don't pass unlocking */
wmb();
/* need to make sure any reads happen before the unlocking */
rwmb();
/* simply unlock whoever is next */
qnode->next->locked = 0;
}
}
/* CAS style mcs locks, kept around til we use them. We're using the
* usurper-style, since RISCV doesn't have a real CAS (yet?). */
void mcs_lock_unlock_cas(struct mcs_lock *lock, struct mcs_lock_qnode *qnode)
{
/* Check if someone is already waiting on us to unlock */
if (qnode->next == 0) {
cmb(); /* no need for CPU mbs, since there's an atomic_cas() */
/* If we're still the lock, just swap it with 0 (unlock) and
* return */
if (atomic_cas_ptr((void**)&lock->lock, qnode, 0))
return;
/* We failed, someone is there and we are some (maybe a
* different) thread's pred. Since someone else was waiting,
* they should have made themselves our next. Spin (very
* briefly!) til it happens. */
while (qnode->next == 0)
cpu_relax();
/* Alpha wants a read_barrier_depends() here */
/* Now that we have a next, unlock them */
qnode->next->locked = 0;
} else {
/* mb()s necessary since we didn't call an atomic_swap() */
/* need to make sure any previous writes don't pass unlocking */
wmb();
/* need to make sure any reads happen before the unlocking */
rwmb();
/* simply unlock whoever is next */
qnode->next->locked = 0;
}
}
/* We don't bother saving the state, like we do with irqsave, since we can use
* whether or not we are in vcore context to determine that. This means you
* shouldn't call this from those moments when you fake being in vcore context
* (when switching into the TLS, etc). */
void mcs_lock_notifsafe(struct mcs_lock *lock, struct mcs_lock_qnode *qnode)
{
uth_disable_notifs();
mcs_lock_lock(lock, qnode);
}
/* Note we turn off the DONT_MIGRATE flag before enabling notifs. This is fine,
* since we wouldn't receive any notifs that could lead to us migrating after we
* set DONT_MIGRATE but before enable_notifs(). We need it to be in this order,
* since we need to check messages after ~DONT_MIGRATE. */
void mcs_unlock_notifsafe(struct mcs_lock *lock, struct mcs_lock_qnode *qnode)
{
mcs_lock_unlock(lock, qnode);
uth_enable_notifs();
}
/* Preemption detection and recovering MCS locks. */
/* Old style. Has trouble getting out of 'preempt/change-to storms' under
* heavy contention and with preemption. */
void mcs_pdro_init(struct mcs_pdro_lock *lock)
{
lock->lock = 0;
}
void mcs_pdro_fini(struct mcs_pdro_lock *lock)
{
}
/* Internal version of the locking function, doesn't care if notifs are
* disabled. While spinning, we'll check to see if other vcores involved in the
* locking are running. If we change to that vcore, we'll continue when our
* vcore gets restarted. If the change fails, it is because the vcore is
* running, and we'll continue.
*
* It's worth noting that changing to another vcore won't hurt correctness.
* Even if they are no longer the lockholder, they will be checking preemption
* messages and will help break out of the deadlock. So long as we don't
* spin uncontrollably, we're okay. */
void __mcs_pdro_lock(struct mcs_pdro_lock *lock, struct mcs_pdro_qnode *qnode)
{
struct mcs_pdro_qnode *predecessor;
uint32_t pred_vcoreid;
qnode->next = 0;
cmb(); /* swap provides a CPU mb() */
predecessor = atomic_swap_ptr((void**)&lock->lock, qnode);
if (predecessor) {
qnode->locked = 1;
/* Read-in the vcoreid before releasing them. We won't need to
* worry about their qnode memory being freed/reused (they can't
* til we fill in the 'next' slot), which is a bit of a
* performance win. This also cuts down on cache-line
* contention when we ensure they run, which helps a lot too. */
pred_vcoreid = ACCESS_ONCE(predecessor->vcoreid);
wmb(); /* order the locked write before the next write */
predecessor->next = qnode;
/* no need for a wrmb(), since this will only get unlocked after
* they read our previous write */
while (qnode->locked) {
/* We don't know who the lock holder is (it hurts
* performance via 'true' sharing to track it) Instead
* we'll make sure our pred is running, which trickles
* up to the lock holder. */
ensure_vcore_runs(pred_vcoreid);
cpu_relax();
}
}
}
/* Using the CAS style unlocks, since the usurper recovery is a real pain in the
* ass */
void __mcs_pdro_unlock(struct mcs_pdro_lock *lock, struct mcs_pdro_qnode *qnode)
{
uint32_t a_tail_vcoreid;
/* Check if someone is already waiting on us to unlock */
if (qnode->next == 0) {
cmb(); /* no need for CPU mbs, since there's an atomic_cas() */
/* If we're still the lock, just swap it with 0 (unlock) and
* return */
if (atomic_cas_ptr((void**)&lock->lock, qnode, 0))
return;
/* Read in the tail (or someone who recently was the tail, but
* could now be farther up the chain), in prep for our spinning.
*/
a_tail_vcoreid = ACCESS_ONCE(lock->lock->vcoreid);
/* We failed, someone is there and we are some (maybe a
* different) thread's pred. Since someone else was waiting,
* they should have made themselves our next. Spin (very
* briefly!) til it happens. */
while (qnode->next == 0) {
/* We need to get our next to run, but we don't know who
* they are. If we make sure a tail is running, that
* will percolate up to make sure our qnode->next is
* running */
ensure_vcore_runs(a_tail_vcoreid);
cpu_relax();
}
/* Alpha wants a read_barrier_depends() here */
/* Now that we have a next, unlock them */
qnode->next->locked = 0;
} else {
/* mb()s necessary since we didn't call an atomic_swap() */
/* need to make sure any previous writes don't pass unlocking */
wmb();
/* need to make sure any reads happen before the unlocking */
rwmb();
/* simply unlock whoever is next */
qnode->next->locked = 0;
}
}
/* Actual MCS-PDRO locks. Don't worry about initializing any fields of qnode.
* We'll do vcoreid here, and the locking code deals with the other fields */
void mcs_pdro_lock(struct mcs_pdro_lock *lock, struct mcs_pdro_qnode *qnode)
{
/* Disable notifs, if we're an _M uthread */
uth_disable_notifs();
cmb(); /* in the off-chance the compiler wants to read vcoreid early */
qnode->vcoreid = vcore_id();
__mcs_pdro_lock(lock, qnode);
}
/* CAS-less unlock, not quite as efficient and will make sure every vcore runs
* (since we don't have a convenient way to make sure our qnode->next runs
* yet, other than making sure everyone runs).
*
* To use this without ensuring all vcores run, you'll need the unlock code to
* save pred to a specific field in the qnode and check both its initial pred
* as well as its run time pred (who could be an usurper). It's all possible,
* but a little more difficult to follow. Also, I'm adjusting this comment
* months after writing it originally, so it is probably not sufficient, but
* necessary. */
void __mcs_pdro_unlock_no_cas(struct mcs_pdro_lock *lock,
struct mcs_pdro_qnode *qnode)
{
struct mcs_pdro_qnode *old_tail, *usurper;
/* Check if someone is already waiting on us to unlock */
if (qnode->next == 0) {
cmb(); /* no need for CPU mbs, since there's an atomic_swap() */
/* Unlock it */
old_tail = atomic_swap_ptr((void**)&lock->lock, 0);
/* no one else was already waiting, so we successfully unlocked
* and can return */
if (old_tail == qnode)
return;
/* someone else was already waiting on the lock (last one on the
* list), and we accidentally took them off. Try and put it
* back. */
usurper = atomic_swap_ptr((void*)&lock->lock, old_tail);
/* since someone else was waiting, they should have made
* themselves our next. spin (very briefly!) til it happens. */
while (qnode->next == 0) {
/* make sure old_tail isn't preempted. best we can do
* for now is to make sure all vcores run, and thereby
* get our next. */
for (int i = 0; i < max_vcores(); i++)
ensure_vcore_runs(i);
cpu_relax();
}
if (usurper) {
/* an usurper is someone who snuck in before we could
* put the old tail back. They now have the lock.
* Let's put whoever is supposed to be next as their
* next one.
*
* First, we need to change our next's pred. There's a
* slight race here, so our next will need to make sure
* both us and pred are done */
/* I was trying to do something so we didn't need to
* ensure all vcores run, using more space in the qnode
* to figure out who our pred was a lock time (guessing
* actually, since there's a race, etc). */
//qnode->next->pred = usurper;
//wmb();
usurper->next = qnode->next;
/* could imagine another wmb() and a flag so our next
* knows to no longer check us too. */
} else {
/* No usurper meant we put things back correctly, so we
* should just pass the lock / unlock whoever is next */
qnode->next->locked = 0;
}
} else {
/* mb()s necessary since we didn't call an atomic_swap() */
/* need to make sure any previous writes don't pass unlocking */
wmb();
/* need to make sure any reads happen before the unlocking */
rwmb();
/* simply unlock whoever is next */
qnode->next->locked = 0;
}
}
void mcs_pdro_unlock(struct mcs_pdro_lock *lock, struct mcs_pdro_qnode *qnode)
{
__mcs_pdro_unlock(lock, qnode);
/* Enable notifs, if we're an _M uthread */
uth_enable_notifs();
}
/* New style: under heavy contention with preemption, they won't enter the
* 'preempt/change_to storm' that can happen to PDRs, at the cost of some
* performance. This is the default. */
void mcs_pdr_init(struct mcs_pdr_lock *lock)
{
int ret;
lock->lock = 0;
lock->lockholder_vcoreid = MCSPDR_NO_LOCKHOLDER;
ret = posix_memalign((void**)&lock->qnodes,
__alignof__(struct mcs_pdr_qnode),
sizeof(struct mcs_pdr_qnode) * max_vcores());
assert(!ret);
}
void mcs_pdr_fini(struct mcs_pdr_lock *lock)
{
free(lock->qnodes);
}
/* Similar to the original PDR lock, this tracks the lockholder for better
* recovery from preemptions. Under heavy contention, changing to the
* lockholder instead of pred makes it more likely to have a vcore outside the
* MCS chain handle the preemption. If that never happens, performance will
* suffer.
*
* Simply checking the lockholder causes a lot of unnecessary traffic, so we
* first look for signs of preemption in read-mostly locations (by comparison,
* the lockholder changes on every lock/unlock).
*
* We also use the "qnodes are in the lock" style, which is slightly slower than
* using the stack in regular MCS/MCSPDR locks, but it speeds PDR up a bit by
* not having to read other qnodes' memory to determine their vcoreid. The
* slowdown may be due to some weird caching/prefetch settings (like Adjacent
* Cacheline Prefetch).
*
* Note that these locks, like all PDR locks, have opportunities to accidentally
* ensure some vcore runs that isn't in the chain. Whenever we read lockholder
* or even pred, that particular vcore might subsequently unlock and then get
* preempted (or change_to someone else) before we ensure they run. If this
* happens and there is another VC in the MCS chain, it will make sure the right
* cores run. If there are no other vcores in the chain, it is up to the rest
* of the vcore/event handling system to deal with this, which should happen
* when one of the other vcores handles the preemption message generated by our
* change_to. */
void __mcs_pdr_lock(struct mcs_pdr_lock *lock, struct mcs_pdr_qnode *qnode)
{
struct mcs_pdr_qnode *predecessor;
uint32_t pred_vcoreid;
struct mcs_pdr_qnode *qnode0 = qnode - vcore_id();
seq_ctr_t seq;
qnode->next = 0;
cmb(); /* swap provides a CPU mb() */
predecessor = atomic_swap_ptr((void**)&lock->lock, qnode);
if (predecessor) {
qnode->locked = 1;
/* can compute this whenever */
pred_vcoreid = predecessor - qnode0;
wmb(); /* order the locked write before the next write */
predecessor->next = qnode;
seq = ACCESS_ONCE(__procinfo.coremap_seqctr);
/* no need for a wrmb(), since this will only get unlocked after
* they read our pred->next write */
while (qnode->locked) {
/* Check to see if anything is amiss. If someone in the
* chain is preempted, then someone will notice. Simply
* checking our pred isn't that great of an indicator of
* preemption. The reason is that the offline vcore is
* most likely the lockholder (under heavy lock
* contention), and we want someone farther back in the
* chain to notice (someone that will stay preempted
* long enough for a vcore outside the chain to recover
* them). Checking the seqctr will tell us of any
* preempts since we started, so if a storm starts while
* we're spinning, we can join in and try to save the
* lockholder before its successor gets it.
*
* Also, if we're the lockholder, then we need to let
* our pred run so they can hand us the lock. */
if (vcore_is_preempted(pred_vcoreid) ||
seq != __procinfo.coremap_seqctr) {
/* Note that we don't normally ensure our *pred*
* runs. */
if (lock->lockholder_vcoreid ==
MCSPDR_NO_LOCKHOLDER ||
lock->lockholder_vcoreid == vcore_id())
ensure_vcore_runs(pred_vcoreid);
else
ensure_vcore_runs(
lock->lockholder_vcoreid);
}
cpu_relax();
}
} else {
lock->lockholder_vcoreid = vcore_id();
}
}
void __mcs_pdr_unlock(struct mcs_pdr_lock *lock, struct mcs_pdr_qnode *qnode)
{
uint32_t a_tail_vcoreid;
struct mcs_pdr_qnode *qnode0 = qnode - vcore_id();
/* Check if someone is already waiting on us to unlock */
if (qnode->next == 0) {
cmb(); /* no need for CPU mbs, since there's an atomic_cas() */
/* If we're still the lock, just swap it with 0 (unlock) and
* return */
if (atomic_cas_ptr((void**)&lock->lock, qnode, 0)) {
/* This is racy with the new lockholder. it's possible
* that we'll clobber their legit write, though it
* doesn't actually hurt correctness. it'll get sorted
* out on the next unlock. */
lock->lockholder_vcoreid = MCSPDR_NO_LOCKHOLDER;
return;
}
/* Get the tail (or someone who recently was the tail, but could
* now be farther up the chain), in prep for our spinning.
* Could do an ACCESS_ONCE on lock->lock */
a_tail_vcoreid = lock->lock - qnode0;
/* We failed, someone is there and we are some (maybe a
* different) thread's pred. Since someone else was waiting,
* they should have made themselves our next. Spin (very
* briefly!) til it happens. */
while (qnode->next == 0) {
/* We need to get our next to run, but we don't know who
* they are. If we make sure a tail is running, that
* will percolate up to make sure our qnode->next is
* running.
*
* But first, we need to tell everyone that there is no
* specific lockholder. lockholder_vcoreid is a
* short-circuit on the "walk the chain" PDR. Normally,
* that's okay. But now we need to make sure everyone
* is walking the chain from a_tail up to our pred. */
lock->lockholder_vcoreid = MCSPDR_NO_LOCKHOLDER;
ensure_vcore_runs(a_tail_vcoreid);
cpu_relax();
}
/* Alpha wants a read_barrier_depends() here */
lock->lockholder_vcoreid = qnode->next - qnode0;
wmb(); /* order the vcoreid write before the unlock */
qnode->next->locked = 0;
} else {
/* Note we're saying someone else is the lockholder, though we
* still are the lockholder until we unlock the next qnode. Our
* next knows that if it sees itself is the lockholder, that it
* needs to make sure we run. */
lock->lockholder_vcoreid = qnode->next - qnode0;
/* mb()s necessary since we didn't call an atomic_swap() */
/* need to make sure any previous writes don't pass unlocking */
wmb();
/* need to make sure any reads happen before the unlocking */
rwmb();
/* simply unlock whoever is next */
qnode->next->locked = 0;
}
}
void mcs_pdr_lock(struct mcs_pdr_lock *lock)
{
uth_disable_notifs();
cmb(); /* in the off-chance the compiler wants to read vcoreid early */
__mcs_pdr_lock(lock, &lock->qnodes[vcore_id()]);
}
void mcs_pdr_unlock(struct mcs_pdr_lock *lock)
{
__mcs_pdr_unlock(lock, &lock->qnodes[vcore_id()]);
uth_enable_notifs();
}