user/pthread/futex.c - upstream - Git at Google

 #include <parlib/common.h>
 #include <futex.h>
 #include <sys/queue.h>
 #include <parlib/uthread.h>
 #include <parlib/parlib.h>
 #include <parlib/assert.h>
 #include <parlib/stdio.h>
 #include <errno.h>
 #include <parlib/slab.h>
 #include <parlib/mcs.h>
 #include <parlib/alarm.h>

 struct futex_element {
 	TAILQ_ENTRY(futex_element) link;
 	int *uaddr;
 	bool on_list;
 	bool waker_using;
 	uth_cond_var_t cv;
 };
 TAILQ_HEAD(futex_queue, futex_element);

 struct futex_data {
 	struct mcs_pdr_lock lock;
 	struct futex_queue queue;
 };
 static struct futex_data __futex;

 static inline void futex_init(void *arg)
 {
 	mcs_pdr_init(&__futex.lock);
 	TAILQ_INIT(&__futex.queue);
 }

 static inline int futex_wait(int *uaddr, int val,
                              const struct timespec *abs_timeout)
 {
 	struct futex_element e[1];
 	bool timed_out;

 	mcs_pdr_lock(&__futex.lock);
 	if (*uaddr != val) {
 		mcs_pdr_unlock(&__futex.lock);
 		return 0;
 	}
 	e->uaddr = uaddr;
 	uth_cond_var_init(&e->cv);
 	e->waker_using = false;
 	e->on_list = true;
 	TAILQ_INSERT_TAIL(&__futex.queue, e, link);
 	/* Lock switch.  Any waker will grab the global lock, then grab ours.
 	 * We're downgrading to the CV lock, which still protects us from
 	 * missing the signal (which is someone calling Wake after changing
 	 * *uaddr).  The CV code will atomically block (with timeout) and unlock
 	 * the CV lock.
 	 *
 	 * Ordering is __futex.lock -> CV lock, but you can have the inner lock
 	 * without holding the outer lock. */
 	uth_cond_var_lock(&e->cv);
 	mcs_pdr_unlock(&__futex.lock);

 	timed_out = !uth_cond_var_timed_wait(&e->cv, NULL, abs_timeout);
 	/* CV wait returns with the lock held, which is unneccessary for
 	 * futexes.  We could use this cv lock and maybe a trylock on the futex
 	 * to sync with futex_wake, instead of the current synchronization
 	 * techniques with the bools. */
 	uth_cond_var_unlock(&e->cv);

 	/* In the common case, the waker woke us and already cleared on_list,
 	 * and we'd rather not grab the __futex lock again.  Note the outer
 	 * on_list check is an optimization, and we need the lock to be sure.
 	 * Also note the waker sets waker_using before on_list, so if we happen
 	 * to see !on_list (while the waker is mucking with the list), we'll see
 	 * waker_using and spin below. */
 	if (e->on_list) {
 		mcs_pdr_lock(&__futex.lock);
 		if (e->on_list)
 			TAILQ_REMOVE(&__futex.queue, e, link);
 		mcs_pdr_unlock(&__futex.lock);
 	}
 	rmb();	/* read on_list before waker_using */
 	/* The waker might have yanked us and is about to kick the CV.  Need to
 	 * wait til they are done before freeing e. */
 	while (e->waker_using)
 		cpu_relax_any();

 	if (timed_out) {
 		errno = ETIMEDOUT;
 		return -1;
 	}
 	return 0;
 }

 static inline int futex_wake(int *uaddr, int count)
 {
 	int max = count;
 	struct futex_element *e, *temp;
 	struct futex_queue q = TAILQ_HEAD_INITIALIZER(q);

 	/* The waiter spins on us with cpu_relax_any().  That code assumes the
 	 * target of the wait/spin is in vcore context, or at least has notifs
 	 * disabled. */
 	uth_disable_notifs();
 	mcs_pdr_lock(&__futex.lock);
 	TAILQ_FOREACH_SAFE(e, &__futex.queue, link, temp) {
 		if (count <= 0)
 			break;
 		if (e->uaddr == uaddr) {
 			e->waker_using = true;
 			/* flag waker_using before saying !on_list */
 			wmb();
 			e->on_list = false;
 			TAILQ_REMOVE(&__futex.queue, e, link);
 			TAILQ_INSERT_TAIL(&q, e, link);
 			count--;
 		}
 	}
 	mcs_pdr_unlock(&__futex.lock);
 	TAILQ_FOREACH_SAFE(e, &q, link, temp) {
 		TAILQ_REMOVE(&q, e, link);
 		uth_cond_var_signal(&e->cv);
 		/* Do not touch e after marking it. */
 		e->waker_using = false;
 	}
 	uth_enable_notifs();

 	return max - count;
 }

 int futex(int *uaddr, int op, int val, const struct timespec *timeout,
           int *uaddr2, int val3)
 {
 	static parlib_once_t once = PARLIB_ONCE_INIT;
 	struct timespec abs_timeout[1];

 	parlib_run_once(&once, futex_init, NULL);
 	assert(uaddr2 == NULL);
 	assert(val3 == 0);

 	/* futex timeouts are relative.  Internally, we use absolute timeouts */
 	if (timeout) {
 		clock_gettime(CLOCK_MONOTONIC, abs_timeout);
 		/* timespec_add is available inside glibc, but not out here. */
 		abs_timeout->tv_sec += timeout->tv_sec;
 		abs_timeout->tv_nsec += timeout->tv_nsec;
 		if (abs_timeout->tv_nsec >= 1000000000) {
 			abs_timeout->tv_nsec -= 1000000000;
 			abs_timeout->tv_sec++;
 		}
 	}

 	switch (op) {
 	case FUTEX_WAIT:
 		return futex_wait(uaddr, val, timeout ? abs_timeout : NULL);
 	case FUTEX_WAKE:
 		return futex_wake(uaddr, val);
 	default:
 		errno = ENOSYS;
 		return -1;
 	}
 	return -1;
 }
	#include <parlib/common.h>
	#include <futex.h>
	#include <sys/queue.h>
	#include <parlib/uthread.h>
	#include <parlib/parlib.h>
	#include <parlib/assert.h>
	#include <parlib/stdio.h>
	#include <errno.h>
	#include <parlib/slab.h>
	#include <parlib/mcs.h>
	#include <parlib/alarm.h>

	struct futex_element {
	TAILQ_ENTRY(futex_element) link;
	int *uaddr;
	bool on_list;
	bool waker_using;
	uth_cond_var_t cv;
	};
	TAILQ_HEAD(futex_queue, futex_element);

	struct futex_data {
	struct mcs_pdr_lock lock;
	struct futex_queue queue;
	};
	static struct futex_data __futex;

	static inline void futex_init(void *arg)
	{
	mcs_pdr_init(&__futex.lock);
	TAILQ_INIT(&__futex.queue);
	}

	static inline int futex_wait(int *uaddr, int val,
	const struct timespec *abs_timeout)
	{
	struct futex_element e[1];
	bool timed_out;

	mcs_pdr_lock(&__futex.lock);
	if (*uaddr != val) {
	mcs_pdr_unlock(&__futex.lock);
	return 0;
	}
	e->uaddr = uaddr;
	uth_cond_var_init(&e->cv);
	e->waker_using = false;
	e->on_list = true;
	TAILQ_INSERT_TAIL(&__futex.queue, e, link);
	/* Lock switch. Any waker will grab the global lock, then grab ours.
	* We're downgrading to the CV lock, which still protects us from
	* missing the signal (which is someone calling Wake after changing
	* *uaddr). The CV code will atomically block (with timeout) and unlock
	* the CV lock.
	*
	* Ordering is __futex.lock -> CV lock, but you can have the inner lock
	* without holding the outer lock. */
	uth_cond_var_lock(&e->cv);
	mcs_pdr_unlock(&__futex.lock);

	timed_out = !uth_cond_var_timed_wait(&e->cv, NULL, abs_timeout);
	/* CV wait returns with the lock held, which is unneccessary for
	* futexes. We could use this cv lock and maybe a trylock on the futex
	* to sync with futex_wake, instead of the current synchronization
	* techniques with the bools. */
	uth_cond_var_unlock(&e->cv);

	/* In the common case, the waker woke us and already cleared on_list,
	* and we'd rather not grab the __futex lock again. Note the outer
	* on_list check is an optimization, and we need the lock to be sure.
	* Also note the waker sets waker_using before on_list, so if we happen
	* to see !on_list (while the waker is mucking with the list), we'll see
	* waker_using and spin below. */
	if (e->on_list) {
	mcs_pdr_lock(&__futex.lock);
	if (e->on_list)
	TAILQ_REMOVE(&__futex.queue, e, link);
	mcs_pdr_unlock(&__futex.lock);
	}
	rmb(); /* read on_list before waker_using */
	/* The waker might have yanked us and is about to kick the CV. Need to
	* wait til they are done before freeing e. */
	while (e->waker_using)
	cpu_relax_any();

	if (timed_out) {
	errno = ETIMEDOUT;
	return -1;
	}
	return 0;
	}

	static inline int futex_wake(int *uaddr, int count)
	{
	int max = count;
	struct futex_element e, temp;
	struct futex_queue q = TAILQ_HEAD_INITIALIZER(q);

	/* The waiter spins on us with cpu_relax_any(). That code assumes the
	* target of the wait/spin is in vcore context, or at least has notifs
	* disabled. */
	uth_disable_notifs();
	mcs_pdr_lock(&__futex.lock);
	TAILQ_FOREACH_SAFE(e, &__futex.queue, link, temp) {
	if (count <= 0)
	break;
	if (e->uaddr == uaddr) {
	e->waker_using = true;
	/* flag waker_using before saying !on_list */
	wmb();
	e->on_list = false;
	TAILQ_REMOVE(&__futex.queue, e, link);
	TAILQ_INSERT_TAIL(&q, e, link);
	count--;
	}
	}
	mcs_pdr_unlock(&__futex.lock);
	TAILQ_FOREACH_SAFE(e, &q, link, temp) {
	TAILQ_REMOVE(&q, e, link);
	uth_cond_var_signal(&e->cv);
	/* Do not touch e after marking it. */
	e->waker_using = false;
	}
	uth_enable_notifs();

	return max - count;
	}

	int futex(int uaddr, int op, int val, const struct timespec timeout,
	int *uaddr2, int val3)
	{
	static parlib_once_t once = PARLIB_ONCE_INIT;
	struct timespec abs_timeout[1];

	parlib_run_once(&once, futex_init, NULL);
	assert(uaddr2 == NULL);
	assert(val3 == 0);

	/* futex timeouts are relative. Internally, we use absolute timeouts */
	if (timeout) {
	clock_gettime(CLOCK_MONOTONIC, abs_timeout);
	/* timespec_add is available inside glibc, but not out here. */
	abs_timeout->tv_sec += timeout->tv_sec;
	abs_timeout->tv_nsec += timeout->tv_nsec;
	if (abs_timeout->tv_nsec >= 1000000000) {
	abs_timeout->tv_nsec -= 1000000000;
	abs_timeout->tv_sec++;
	}
	}

	switch (op) {
	case FUTEX_WAIT:
	return futex_wait(uaddr, val, timeout ? abs_timeout : NULL);
	case FUTEX_WAKE:
	return futex_wake(uaddr, val);
	default:
	errno = ENOSYS;
	return -1;
	}
	return -1;
	}