blob: cf678a1fcc57768f5777f07155e36ae1ce3d376b [file] [log] [blame]
/* Copyright (c) 2013 The Regents of the University of California
* Copyright (c) 2018 Google Inc.
* Barret Rhoden <brho@cs.berkeley.edu>
* See LICENSE for details.
*
* Userspace alarms. There are lower level helpers to build your own alarms
* from the #alarm device and an alarm service, based off a slimmed down version
* of the kernel alarms. Under the hood, the user alarm uses the #alarm service
* for the root of the alarm chain.
*
* There's only one timer chain, unlike in the kernel, for the entire process.
* If you want one-off timers unrelated to the chain (and sent to other vcores),
* use #alarm directly.
*
* Your handlers will run from vcore context.
*
* Code differences from the kernel (for future porting):
* - init_alarm_service, run as a constructor
* - set_alarm() and friends are __tc_set_alarm(), passing global_tchain.
* - reset_tchain_interrupt() uses #alarm
* - spinlocks -> spin_pdr_locks (cv's lock, actually)
* - ev_q wrappers for converting #alarm events to __triggers
* - printks, and other minor stuff. */
#include <sys/queue.h>
#include <sys/time.h>
#include <parlib/alarm.h>
#include <stdio.h>
#include <parlib/assert.h>
#include <parlib/stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <parlib/parlib.h>
#include <parlib/event.h>
#include <parlib/uthread.h>
#include <parlib/spinlock.h>
#include <parlib/timing.h>
#include <sys/plan9_helpers.h>
#include <sys/fork_cb.h>
/* Helper to get your own alarm. If you don't care about a return value, pass
* 0 and it'll be ignored. The alarm is built, but has no evq or timer set. */
int devalarm_get_fds(int *ctlfd_r, int *timerfd_r, int *alarmid_r)
{
int ctlfd, timerfd, alarmid, ret;
char buf[20];
char path[32];
ctlfd = open("#alarm/clone", O_RDWR | O_CLOEXEC);
if (ctlfd < 0)
return -1;
ret = read(ctlfd, buf, sizeof(buf) - 1);
if (ret <= 0)
return -1;
buf[ret] = 0;
alarmid = atoi(buf);
snprintf(path, sizeof(path), "#alarm/a%s/timer", buf);
timerfd = open(path, O_RDWR | O_CLOEXEC);
if (timerfd < 0)
return -1;
if (ctlfd_r)
*ctlfd_r = ctlfd;
else
close(ctlfd);
if (timerfd_r)
*timerfd_r = timerfd;
else
close(timerfd);
if (alarmid_r)
*alarmid_r = alarmid;
return 0;
}
int devalarm_set_evq(int timerfd, struct event_queue *ev_q, int alarmid)
{
struct fd_tap_req tap_req = {0};
tap_req.fd = timerfd;
tap_req.cmd = FDTAP_CMD_ADD;
tap_req.filter = FDTAP_FILT_WRITTEN;
tap_req.ev_id = EV_ALARM;
tap_req.ev_q = ev_q;
tap_req.data = (void*)(long)alarmid;
if (sys_tap_fds(&tap_req, 1) != 1)
return -1;
return 0;
}
int devalarm_set_time(int timerfd, uint64_t tsc_time)
{
return write_hex_to_fd(timerfd, tsc_time);
}
int devalarm_get_id(struct event_msg *ev_msg)
{
if (!ev_msg)
return -1;
return (int)(long)ev_msg->ev_arg3;
}
int devalarm_disable(int timerfd)
{
return write_hex_to_fd(timerfd, 0);
}
/* Helpers, basically renamed kernel interfaces, with the *tchain. */
static void __tc_set_alarm(struct timer_chain *tchain,
struct alarm_waiter *waiter);
static bool __tc_unset_alarm(struct timer_chain *tchain,
struct alarm_waiter *waiter);
static bool __tc_reset_alarm_abs(struct timer_chain *tchain,
struct alarm_waiter *waiter,
uint64_t abs_time);
static void handle_user_alarm(struct event_msg *ev_msg, unsigned int ev_type,
void *data);
/* One chain to rule them all. */
struct timer_chain global_tchain;
/* Helper, resets the earliest/latest times, based on the elements of the list.
* If the list is empty, we set the times to be the 12345 poison time. Since
* the list is empty, the alarm shouldn't be going off. */
static void reset_tchain_times(struct timer_chain *tchain)
{
if (TAILQ_EMPTY(&tchain->waiters)) {
tchain->earliest_time = ALARM_POISON_TIME;
tchain->latest_time = ALARM_POISON_TIME;
} else {
tchain->earliest_time =
TAILQ_FIRST(&tchain->waiters)->wake_up_time;
tchain->latest_time =
TAILQ_LAST(&tchain->waiters, awaiters_tailq)->wake_up_time;
}
}
static void devalarm_forked(void)
{
close(global_tchain.ctlfd);
close(global_tchain.timerfd);
if (devalarm_get_fds(&global_tchain.ctlfd, &global_tchain.timerfd,
NULL))
perror("Useralarm on fork");
}
static void __attribute__((constructor)) alarm_service_ctor(void)
{
int ctlfd, timerfd, alarmid;
struct event_queue *ev_q;
static struct fork_cb devalarm_fork_cb = {.func = devalarm_forked};
if (__in_fake_parlib())
return;
/* Sets up timer chain (only one chain per process) */
TAILQ_INIT(&global_tchain.waiters);
global_tchain.running = NULL;
reset_tchain_times(&global_tchain);
uth_cond_var_init(&global_tchain.cv);
if (devalarm_get_fds(&ctlfd, &timerfd, &alarmid)) {
perror("Useralarm: devalarm_get_fds");
return;
}
/* Since we're doing SPAM_PUBLIC later, we actually don't need a big
* ev_q. But someone might copy/paste this and change a flag. */
register_ev_handler(EV_ALARM, handle_user_alarm, 0);
if (!(ev_q = get_eventq(EV_MBOX_UCQ))) {
perror("Useralarm: Failed ev_q");
return;
}
ev_q->ev_vcore = 0;
/* We could get multiple events for a single alarm. It's okay, since
* __trigger can handle spurious upcalls. If it ever is not okay, then
* use an INDIR (probably with SPAM_INDIR too) instead of SPAM_PUBLIC.
*/
ev_q->ev_flags = EVENT_IPI | EVENT_SPAM_PUBLIC | EVENT_WAKEUP;
if (devalarm_set_evq(timerfd, ev_q, alarmid)) {
perror("set_alarm_evq");
return;
}
/* now the alarm is all set, just need to write the timer whenever we
* want it to go off. */
global_tchain.alarmid = alarmid;
global_tchain.ctlfd = ctlfd;
global_tchain.timerfd = timerfd;
global_tchain.ev_q = ev_q; /* mostly for debugging */
register_fork_cb(&devalarm_fork_cb);
}
/* Initializes a new awaiter. */
void init_awaiter(struct alarm_waiter *waiter,
void (*func) (struct alarm_waiter *awaiter))
{
waiter->wake_up_time = ALARM_POISON_TIME;
assert(func);
waiter->func = func;
waiter->on_tchain = false;
}
/* Give this the absolute time. For now, abs_time is the TSC time that you want
* the alarm to go off. */
static void __set_awaiter_abs(struct alarm_waiter *waiter, uint64_t abs_time)
{
waiter->wake_up_time = abs_time;
}
/* Give this the absolute unix time (in microseconds) that you want the alarm
* to go off. */
void set_awaiter_abs_unix(struct alarm_waiter *waiter, uint64_t abs_usec)
{
__set_awaiter_abs(waiter, epoch_nsec_to_tsc(abs_usec * 1000));
}
/* Give this a relative time from now, in microseconds. This might be easier to
* use than dealing with the TSC. */
void set_awaiter_rel(struct alarm_waiter *waiter, uint64_t usleep)
{
uint64_t now, then;
now = read_tsc();
then = now + usec2tsc(usleep);
/* This will go off if we wrap-around the TSC. It'll never happen for
* legit values, but this might catch some bugs with large usleeps. */
assert(now <= then);
__set_awaiter_abs(waiter, then);
}
/* Increment the timer that was already set, so that it goes off usleep usec
* from the previous tick. This is different than 'rel' in that it doesn't care
* about when 'now' is. */
void set_awaiter_inc(struct alarm_waiter *waiter, uint64_t usleep)
{
assert(waiter->wake_up_time != ALARM_POISON_TIME);
waiter->wake_up_time += usec2tsc(usleep);
}
/* User interface to the global tchain */
void set_alarm(struct alarm_waiter *waiter)
{
__tc_set_alarm(&global_tchain, waiter);
}
bool unset_alarm(struct alarm_waiter *waiter)
{
return __tc_unset_alarm(&global_tchain, waiter);
}
bool reset_alarm_abs(struct alarm_waiter *waiter, uint64_t abs_time)
{
return __tc_reset_alarm_abs(&global_tchain, waiter, abs_time);
}
/* Helper, makes sure the kernel alarm is turned on at the right time. */
static void reset_tchain_interrupt(struct timer_chain *tchain)
{
if (TAILQ_EMPTY(&tchain->waiters)) {
/* Turn it off */
printd("Turning alarm off\n");
if (devalarm_disable(tchain->timerfd)) {
printf("Useralarm: unable to disarm alarm!\n");
return;
}
} else {
/* Make sure it is on and set to the earliest time */
assert(tchain->earliest_time != ALARM_POISON_TIME);
/* TODO: check for times in the past or very close to now */
printd("Turning alarm on for %llu\n", tchain->earliest_time);
if (devalarm_set_time(tchain->timerfd, tchain->earliest_time)) {
perror("Useralarm: Failed to set timer");
return;
}
}
}
/* This is called when the kernel alarm triggers a tchain, and needs to wake up
* everyone whose time is up. Called from vcore context. */
static void __trigger_tchain(struct timer_chain *tchain)
{
struct alarm_waiter *i, *temp;
struct uthread *unsetter;
spin_pdr_lock(&tchain->cv.lock);
/* It's possible we have multiple contexts running a single tchain. It
* shouldn't be possible for per-core tchains, but it is possible
* otherwise. In that case, we can just abort, treating the event/IRQ
* that woke us up as a 'poke'. */
if (tchain->running) {
spin_pdr_unlock(&tchain->cv.lock);
return;
}
while ((i = TAILQ_FIRST(&tchain->waiters))) {
/* TODO: Could also do something in cases where it's close to
* expiring. */
if (i->wake_up_time > read_tsc())
break;
TAILQ_REMOVE(&tchain->waiters, i, next);
i->on_tchain = false;
tchain->running = i;
/* Need the tchain times (earliest/latest) in sync when
* unlocked. */
reset_tchain_times(tchain);
spin_pdr_unlock(&tchain->cv.lock);
/* Don't touch the waiter after running it, since the memory can
* be used immediately */
i->func(i);
spin_pdr_lock(&tchain->cv.lock);
tchain->running = NULL;
/* This is the guts of a signal, but we're optimizing for the
* common case where there is no unsetter. Uthread CV
* signal/broadcast wakes the uthreads up outside of the CV
* lock, which will avoid any lock-ordering issues with the 2LS
* and the CV - in this case, the alarm service. */
unsetter = __uth_cond_var_wake_one(&tchain->cv);
if (unsetter) {
spin_pdr_unlock(&tchain->cv.lock);
uthread_runnable(unsetter);
spin_pdr_lock(&tchain->cv.lock);
}
}
reset_tchain_interrupt(tchain);
spin_pdr_unlock(&tchain->cv.lock);
}
static void handle_user_alarm(struct event_msg *ev_msg, unsigned int ev_type,
void *data)
{
assert(ev_type == EV_ALARM);
if (devalarm_get_id(ev_msg) == global_tchain.alarmid)
__trigger_tchain(&global_tchain);
}
/* Helper, inserts the waiter into the tchain, returning TRUE if we still need
* to reset the tchain interrupt. Caller holds the lock. */
static bool __insert_awaiter(struct timer_chain *tchain,
struct alarm_waiter *waiter)
{
struct alarm_waiter *i, *temp;
waiter->on_tchain = TRUE;
/* Either the list is empty, or not. */
if (TAILQ_EMPTY(&tchain->waiters)) {
tchain->earliest_time = waiter->wake_up_time;
tchain->latest_time = waiter->wake_up_time;
TAILQ_INSERT_HEAD(&tchain->waiters, waiter, next);
/* Need to turn on the timer interrupt later */
return TRUE;
}
/* If not, either we're first, last, or in the middle. Reset the
* interrupt and adjust the tchain's times accordingly. */
if (waiter->wake_up_time < tchain->earliest_time) {
tchain->earliest_time = waiter->wake_up_time;
TAILQ_INSERT_HEAD(&tchain->waiters, waiter, next);
/* Changed the first entry; we'll need to reset the interrupt
* later */
return TRUE;
}
/* If there is a tie for last, the newer one will really go last. We
* need to handle equality here since the loop later won't catch it. */
if (waiter->wake_up_time >= tchain->latest_time) {
tchain->latest_time = waiter->wake_up_time;
/* Proactively put it at the end if we know we're last */
TAILQ_INSERT_TAIL(&tchain->waiters, waiter, next);
return FALSE;
}
/* Insert before the first one you are earlier than. This won't scale
* well (TODO) if we have a lot of inserts. The proactive insert_tail
* up above will help a bit. */
TAILQ_FOREACH_SAFE(i, &tchain->waiters, next, temp) {
if (waiter->wake_up_time < i->wake_up_time) {
TAILQ_INSERT_BEFORE(i, waiter, next);
return FALSE;
}
}
panic("Could not find a spot for awaiter %p\n", waiter);
}
static void __tc_set_alarm(struct timer_chain *tchain,
struct alarm_waiter *waiter)
{
assert(waiter->wake_up_time != ALARM_POISON_TIME);
assert(!waiter->on_tchain);
spin_pdr_lock(&tchain->cv.lock);
if (__insert_awaiter(tchain, waiter))
reset_tchain_interrupt(tchain);
spin_pdr_unlock(&tchain->cv.lock);
}
/* Helper, rips the waiter from the tchain, knowing that it is on the list.
* Returns TRUE if the tchain interrupt needs to be reset. Callers hold the
* lock. */
static bool __remove_awaiter(struct timer_chain *tchain,
struct alarm_waiter *waiter)
{
struct alarm_waiter *temp;
bool reset_int = FALSE; /* whether or not to reset the interrupt */
/* Need to make sure earliest and latest are set, in case we're mucking
* with the first and/or last element of the chain. */
if (TAILQ_FIRST(&tchain->waiters) == waiter) {
temp = TAILQ_NEXT(waiter, next);
tchain->earliest_time = (temp) ? temp->wake_up_time :
ALARM_POISON_TIME;
reset_int = TRUE; /* we'll need to reset the timer later */
}
if (TAILQ_LAST(&tchain->waiters, awaiters_tailq) == waiter) {
temp = TAILQ_PREV(waiter, awaiters_tailq, next);
tchain->latest_time = (temp) ? temp->wake_up_time :
ALARM_POISON_TIME;
}
TAILQ_REMOVE(&tchain->waiters, waiter, next);
waiter->on_tchain = FALSE;
return reset_int;
}
/* Removes waiter from the tchain before it goes off. Returns TRUE if we
* disarmed before the alarm went off, FALSE if it already fired. May block,
* since the handler may be running asynchronously. */
static bool __tc_unset_alarm(struct timer_chain *tchain,
struct alarm_waiter *waiter)
{
spin_pdr_lock(&tchain->cv.lock);
for (;;) {
if (waiter->on_tchain) {
if (__remove_awaiter(tchain, waiter))
reset_tchain_interrupt(tchain);
spin_pdr_unlock(&tchain->cv.lock);
return true;
}
if (tchain->running != waiter) {
spin_pdr_unlock(&tchain->cv.lock);
return false;
}
/* It's running. We'll need to try again. Note the alarm could
* have resubmitted itself, so ideally the caller can tell it to
* not resubmit.
*
* Despite the slightly more difficult wake-up code in userspace
* compared to the kernel, it's still better to use a CV here.
* Some go tests in qemu were more likely to timeout/starve even
* if we did some form of unlock/yield/relock pattern. */
uth_cond_var_wait(&tchain->cv, NULL);
}
}
/* waiter may be on the tchain, or it might have fired already and be off the
* tchain. Either way, this will put the waiter on the list, set to go off at
* abs_time. If you know the alarm has fired, don't call this. Just set the
* awaiter, and then set_alarm() */
static bool __tc_reset_alarm_abs(struct timer_chain *tchain,
struct alarm_waiter *waiter, uint64_t abs_time)
{
bool ret;
ret = __tc_unset_alarm(tchain, waiter);
__set_awaiter_abs(waiter, abs_time);
__tc_set_alarm(tchain, waiter);
return ret;
}
/* Debug helpers */
void print_chain(struct timer_chain *tchain)
{
struct alarm_waiter *i;
spin_pdr_lock(&tchain->cv.lock);
printf("Chain %p is%s empty, early: %llu latest: %llu\n", tchain,
TAILQ_EMPTY(&tchain->waiters) ? "" : " not",
tchain->earliest_time,
tchain->latest_time);
spin_pdr_unlock(&tchain->cv.lock);
}
/* "parlib" alarm handlers */
void alarm_abort_sysc(struct alarm_waiter *awaiter)
{
struct uthread *uth = awaiter->data;
assert(uth);
if (uth->sysc && sys_abort_sysc(uth->sysc))
return;
/* There are a bunch of reasons why we didn't abort the syscall. The
* syscall might not have been issued or blocked at all, so uth->sysc
* would be NULL. The syscall might have blocked, but at a
* non-abortable location
* - picture blocking on a qlock, then unblocking and blocking later on
* a rendez. If you try to abort in between, abort_sysc will fail,
* then we'll get blocked on the rendez until the next abort.
* Finally, the syscall might have completed, but the uthread hasn't
* cancelled the alarm yet.
*
* It's always safe to rearm the alarm - the uthread will unset it and
* break us out of the rearm loop. */
set_awaiter_rel(awaiter, 10000);
set_alarm(awaiter);
}