|  | #include <parlib/common.h> | 
|  | #include <futex.h> | 
|  | #include <sys/queue.h> | 
|  | #include <parlib/uthread.h> | 
|  | #include <parlib/parlib.h> | 
|  | #include <parlib/assert.h> | 
|  | #include <parlib/stdio.h> | 
|  | #include <errno.h> | 
|  | #include <parlib/slab.h> | 
|  | #include <parlib/mcs.h> | 
|  | #include <parlib/alarm.h> | 
|  |  | 
|  | struct futex_element { | 
|  | TAILQ_ENTRY(futex_element) link; | 
|  | int *uaddr; | 
|  | bool on_list; | 
|  | bool waker_using; | 
|  | uth_cond_var_t cv; | 
|  | }; | 
|  | TAILQ_HEAD(futex_queue, futex_element); | 
|  |  | 
|  | struct futex_data { | 
|  | struct mcs_pdr_lock lock; | 
|  | struct futex_queue queue; | 
|  | }; | 
|  | static struct futex_data __futex; | 
|  |  | 
|  | static inline void futex_init(void *arg) | 
|  | { | 
|  | mcs_pdr_init(&__futex.lock); | 
|  | TAILQ_INIT(&__futex.queue); | 
|  | } | 
|  |  | 
|  | static inline int futex_wait(int *uaddr, int val, | 
|  | const struct timespec *abs_timeout) | 
|  | { | 
|  | struct futex_element e[1]; | 
|  | bool timed_out; | 
|  |  | 
|  | mcs_pdr_lock(&__futex.lock); | 
|  | if (*uaddr != val) { | 
|  | mcs_pdr_unlock(&__futex.lock); | 
|  | return 0; | 
|  | } | 
|  | e->uaddr = uaddr; | 
|  | uth_cond_var_init(&e->cv); | 
|  | e->waker_using = false; | 
|  | e->on_list = true; | 
|  | TAILQ_INSERT_TAIL(&__futex.queue, e, link); | 
|  | /* Lock switch.  Any waker will grab the global lock, then grab ours. | 
|  | * We're downgrading to the CV lock, which still protects us from | 
|  | * missing the signal (which is someone calling Wake after changing | 
|  | * *uaddr).  The CV code will atomically block (with timeout) and unlock | 
|  | * the CV lock. | 
|  | * | 
|  | * Ordering is __futex.lock -> CV lock, but you can have the inner lock | 
|  | * without holding the outer lock. */ | 
|  | uth_cond_var_lock(&e->cv); | 
|  | mcs_pdr_unlock(&__futex.lock); | 
|  |  | 
|  | timed_out = !uth_cond_var_timed_wait(&e->cv, NULL, abs_timeout); | 
|  | /* CV wait returns with the lock held, which is unneccessary for | 
|  | * futexes.  We could use this cv lock and maybe a trylock on the futex | 
|  | * to sync with futex_wake, instead of the current synchronization | 
|  | * techniques with the bools. */ | 
|  | uth_cond_var_unlock(&e->cv); | 
|  |  | 
|  | /* In the common case, the waker woke us and already cleared on_list, | 
|  | * and we'd rather not grab the __futex lock again.  Note the outer | 
|  | * on_list check is an optimization, and we need the lock to be sure. | 
|  | * Also note the waker sets waker_using before on_list, so if we happen | 
|  | * to see !on_list (while the waker is mucking with the list), we'll see | 
|  | * waker_using and spin below. */ | 
|  | if (e->on_list) { | 
|  | mcs_pdr_lock(&__futex.lock); | 
|  | if (e->on_list) | 
|  | TAILQ_REMOVE(&__futex.queue, e, link); | 
|  | mcs_pdr_unlock(&__futex.lock); | 
|  | } | 
|  | rmb();	/* read on_list before waker_using */ | 
|  | /* The waker might have yanked us and is about to kick the CV.  Need to | 
|  | * wait til they are done before freeing e. */ | 
|  | while (e->waker_using) | 
|  | cpu_relax_any(); | 
|  |  | 
|  | if (timed_out) { | 
|  | errno = ETIMEDOUT; | 
|  | return -1; | 
|  | } | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static inline int futex_wake(int *uaddr, int count) | 
|  | { | 
|  | int max = count; | 
|  | struct futex_element *e, *temp; | 
|  | struct futex_queue q = TAILQ_HEAD_INITIALIZER(q); | 
|  |  | 
|  | /* The waiter spins on us with cpu_relax_any().  That code assumes the | 
|  | * target of the wait/spin is in vcore context, or at least has notifs | 
|  | * disabled. */ | 
|  | uth_disable_notifs(); | 
|  | mcs_pdr_lock(&__futex.lock); | 
|  | TAILQ_FOREACH_SAFE(e, &__futex.queue, link, temp) { | 
|  | if (count <= 0) | 
|  | break; | 
|  | if (e->uaddr == uaddr) { | 
|  | e->waker_using = true; | 
|  | /* flag waker_using before saying !on_list */ | 
|  | wmb(); | 
|  | e->on_list = false; | 
|  | TAILQ_REMOVE(&__futex.queue, e, link); | 
|  | TAILQ_INSERT_TAIL(&q, e, link); | 
|  | count--; | 
|  | } | 
|  | } | 
|  | mcs_pdr_unlock(&__futex.lock); | 
|  | TAILQ_FOREACH_SAFE(e, &q, link, temp) { | 
|  | TAILQ_REMOVE(&q, e, link); | 
|  | uth_cond_var_signal(&e->cv); | 
|  | /* Do not touch e after marking it. */ | 
|  | e->waker_using = false; | 
|  | } | 
|  | uth_enable_notifs(); | 
|  |  | 
|  | return max - count; | 
|  | } | 
|  |  | 
|  | int futex(int *uaddr, int op, int val, const struct timespec *timeout, | 
|  | int *uaddr2, int val3) | 
|  | { | 
|  | static parlib_once_t once = PARLIB_ONCE_INIT; | 
|  | struct timespec abs_timeout[1]; | 
|  |  | 
|  | parlib_run_once(&once, futex_init, NULL); | 
|  | assert(uaddr2 == NULL); | 
|  | assert(val3 == 0); | 
|  |  | 
|  | /* futex timeouts are relative.  Internally, we use absolute timeouts */ | 
|  | if (timeout) { | 
|  | clock_gettime(CLOCK_MONOTONIC, abs_timeout); | 
|  | /* timespec_add is available inside glibc, but not out here. */ | 
|  | abs_timeout->tv_sec += timeout->tv_sec; | 
|  | abs_timeout->tv_nsec += timeout->tv_nsec; | 
|  | if (abs_timeout->tv_nsec >= 1000000000) { | 
|  | abs_timeout->tv_nsec -= 1000000000; | 
|  | abs_timeout->tv_sec++; | 
|  | } | 
|  | } | 
|  |  | 
|  | switch (op) { | 
|  | case FUTEX_WAIT: | 
|  | return futex_wait(uaddr, val, timeout ? abs_timeout : NULL); | 
|  | case FUTEX_WAKE: | 
|  | return futex_wake(uaddr, val); | 
|  | default: | 
|  | errno = ENOSYS; | 
|  | return -1; | 
|  | } | 
|  | return -1; | 
|  | } |