|  | /* Copyright (c) 2013 The Regents of the University of California | 
|  | * Copyright (c) 2018 Google Inc. | 
|  | * Barret Rhoden <brho@cs.berkeley.edu> | 
|  | * See LICENSE for details. | 
|  | * | 
|  | * Userspace alarms.  There are lower level helpers to build your own alarms | 
|  | * from the #alarm device and an alarm service, based off a slimmed down version | 
|  | * of the kernel alarms.  Under the hood, the user alarm uses the #alarm service | 
|  | * for the root of the alarm chain. | 
|  | * | 
|  | * There's only one timer chain, unlike in the kernel, for the entire process. | 
|  | * If you want one-off timers unrelated to the chain (and sent to other vcores), | 
|  | * use #alarm directly. | 
|  | * | 
|  | * Your handlers will run from vcore context. | 
|  | * | 
|  | * Code differences from the kernel (for future porting): | 
|  | * - init_alarm_service, run as a constructor | 
|  | * - set_alarm() and friends are __tc_set_alarm(), passing global_tchain. | 
|  | * - reset_tchain_interrupt() uses #alarm | 
|  | * - spinlocks -> spin_pdr_locks (cv's lock, actually) | 
|  | * - ev_q wrappers for converting #alarm events to __triggers | 
|  | * - printks, and other minor stuff. */ | 
|  |  | 
|  | #include <sys/queue.h> | 
|  | #include <sys/time.h> | 
|  | #include <parlib/alarm.h> | 
|  | #include <parlib/assert.h> | 
|  | #include <parlib/stdio.h> | 
|  | #include <stdlib.h> | 
|  | #include <unistd.h> | 
|  | #include <sys/types.h> | 
|  | #include <sys/stat.h> | 
|  | #include <fcntl.h> | 
|  | #include <parlib/parlib.h> | 
|  | #include <parlib/event.h> | 
|  | #include <parlib/uthread.h> | 
|  | #include <parlib/spinlock.h> | 
|  | #include <parlib/timing.h> | 
|  | #include <sys/plan9_helpers.h> | 
|  | #include <sys/fork_cb.h> | 
|  |  | 
|  | /* Helper to get your own alarm.   If you don't care about a return value, pass | 
|  | * 0 and it'll be ignored.  The alarm is built, but has no evq or timer set. */ | 
|  | int devalarm_get_fds(int *ctlfd_r, int *timerfd_r, int *alarmid_r) | 
|  | { | 
|  | int ctlfd, timerfd, alarmid, ret; | 
|  | char buf[20]; | 
|  | char path[32]; | 
|  |  | 
|  | ctlfd = open("#alarm/clone", O_RDWR | O_CLOEXEC); | 
|  | if (ctlfd < 0) | 
|  | return -1; | 
|  | ret = read(ctlfd, buf, sizeof(buf) - 1); | 
|  | if (ret <= 0) | 
|  | return -1; | 
|  | buf[ret] = 0; | 
|  | alarmid = atoi(buf); | 
|  | snprintf(path, sizeof(path), "#alarm/a%s/timer", buf); | 
|  | timerfd = open(path, O_RDWR | O_CLOEXEC); | 
|  | if (timerfd < 0) | 
|  | return -1; | 
|  | if (ctlfd_r) | 
|  | *ctlfd_r = ctlfd; | 
|  | else | 
|  | close(ctlfd); | 
|  | if (timerfd_r) | 
|  | *timerfd_r = timerfd; | 
|  | else | 
|  | close(timerfd); | 
|  | if (alarmid_r) | 
|  | *alarmid_r = alarmid; | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | int devalarm_set_evq(int timerfd, struct event_queue *ev_q, int alarmid) | 
|  | { | 
|  | struct fd_tap_req tap_req = {0}; | 
|  |  | 
|  | tap_req.fd = timerfd; | 
|  | tap_req.cmd = FDTAP_CMD_ADD; | 
|  | tap_req.filter = FDTAP_FILT_WRITTEN; | 
|  | tap_req.ev_id = EV_ALARM; | 
|  | tap_req.ev_q = ev_q; | 
|  | tap_req.data = (void*)(long)alarmid; | 
|  | if (sys_tap_fds(&tap_req, 1) != 1) | 
|  | return -1; | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | int devalarm_set_time(int timerfd, uint64_t tsc_time) | 
|  | { | 
|  | return write_hex_to_fd(timerfd, tsc_time); | 
|  | } | 
|  |  | 
|  | int devalarm_get_id(struct event_msg *ev_msg) | 
|  | { | 
|  | if (!ev_msg) | 
|  | return -1; | 
|  | return (int)(long)ev_msg->ev_arg3; | 
|  | } | 
|  |  | 
|  | int devalarm_disable(int timerfd) | 
|  | { | 
|  | return write_hex_to_fd(timerfd, 0); | 
|  | } | 
|  |  | 
|  | /* Helpers, basically renamed kernel interfaces, with the *tchain. */ | 
|  | static void __tc_set_alarm(struct timer_chain *tchain, | 
|  | struct alarm_waiter *waiter); | 
|  | static bool __tc_unset_alarm(struct timer_chain *tchain, | 
|  | struct alarm_waiter *waiter); | 
|  | static bool __tc_reset_alarm_abs(struct timer_chain *tchain, | 
|  | struct alarm_waiter *waiter, | 
|  | uint64_t abs_time); | 
|  | static void handle_user_alarm(struct event_msg *ev_msg, unsigned int ev_type, | 
|  | void *data); | 
|  |  | 
|  | /* One chain to rule them all. */ | 
|  | struct timer_chain global_tchain; | 
|  |  | 
|  | /* Helper, resets the earliest/latest times, based on the elements of the list. | 
|  | * If the list is empty, we set the times to be the 12345 poison time.  Since | 
|  | * the list is empty, the alarm shouldn't be going off. */ | 
|  | static void reset_tchain_times(struct timer_chain *tchain) | 
|  | { | 
|  | if (TAILQ_EMPTY(&tchain->waiters)) { | 
|  | tchain->earliest_time = ALARM_POISON_TIME; | 
|  | tchain->latest_time = ALARM_POISON_TIME; | 
|  | } else { | 
|  | tchain->earliest_time = | 
|  | TAILQ_FIRST(&tchain->waiters)->wake_up_time; | 
|  | tchain->latest_time = | 
|  | TAILQ_LAST(&tchain->waiters, awaiters_tailq)->wake_up_time; | 
|  | } | 
|  | } | 
|  |  | 
|  | static void devalarm_forked(void) | 
|  | { | 
|  | close(global_tchain.ctlfd); | 
|  | close(global_tchain.timerfd); | 
|  | if (devalarm_get_fds(&global_tchain.ctlfd, &global_tchain.timerfd, | 
|  | NULL)) | 
|  | perror("Useralarm on fork"); | 
|  | } | 
|  |  | 
|  | static void __attribute__((constructor)) alarm_service_ctor(void) | 
|  | { | 
|  | int ctlfd, timerfd, alarmid; | 
|  | struct event_queue *ev_q; | 
|  | static struct fork_cb devalarm_fork_cb = {.func = devalarm_forked}; | 
|  |  | 
|  | if (__in_fake_parlib()) | 
|  | return; | 
|  | /* Sets up timer chain (only one chain per process) */ | 
|  | TAILQ_INIT(&global_tchain.waiters); | 
|  | global_tchain.running = NULL; | 
|  | reset_tchain_times(&global_tchain); | 
|  | uth_cond_var_init(&global_tchain.cv); | 
|  |  | 
|  | if (devalarm_get_fds(&ctlfd, &timerfd, &alarmid)) { | 
|  | perror("Useralarm: devalarm_get_fds"); | 
|  | return; | 
|  | } | 
|  | /* Since we're doing SPAM_PUBLIC later, we actually don't need a big | 
|  | * ev_q.  But someone might copy/paste this and change a flag. */ | 
|  | register_ev_handler(EV_ALARM, handle_user_alarm, 0); | 
|  | if (!(ev_q = get_eventq(EV_MBOX_UCQ))) { | 
|  | perror("Useralarm: Failed ev_q"); | 
|  | return; | 
|  | } | 
|  | ev_q->ev_vcore = 0; | 
|  | /* We could get multiple events for a single alarm.  It's okay, since | 
|  | * __trigger can handle spurious upcalls.  If it ever is not okay, then | 
|  | * use an INDIR (probably with SPAM_INDIR too) instead of SPAM_PUBLIC. | 
|  | */ | 
|  | ev_q->ev_flags = EVENT_IPI | EVENT_SPAM_PUBLIC | EVENT_WAKEUP; | 
|  | if (devalarm_set_evq(timerfd, ev_q, alarmid)) { | 
|  | perror("set_alarm_evq"); | 
|  | return; | 
|  | } | 
|  | /* now the alarm is all set, just need to write the timer whenever we | 
|  | * want it to go off. */ | 
|  | global_tchain.alarmid = alarmid; | 
|  | global_tchain.ctlfd = ctlfd; | 
|  | global_tchain.timerfd = timerfd; | 
|  | global_tchain.ev_q = ev_q;	/* mostly for debugging */ | 
|  | register_fork_cb(&devalarm_fork_cb); | 
|  | } | 
|  |  | 
|  | /* Initializes a new awaiter. */ | 
|  | void init_awaiter(struct alarm_waiter *waiter, | 
|  | void (*func) (struct alarm_waiter *awaiter)) | 
|  | { | 
|  | waiter->wake_up_time = ALARM_POISON_TIME; | 
|  | assert(func); | 
|  | waiter->func = func; | 
|  | waiter->on_tchain = false; | 
|  | } | 
|  |  | 
|  | /* Give this the absolute time.  For now, abs_time is the TSC time that you want | 
|  | * the alarm to go off. */ | 
|  | static void __set_awaiter_abs(struct alarm_waiter *waiter, uint64_t abs_time) | 
|  | { | 
|  | waiter->wake_up_time = abs_time; | 
|  | } | 
|  |  | 
|  | /* Give this the absolute unix time (in microseconds) that you want the alarm | 
|  | * to go off. */ | 
|  | void set_awaiter_abs_unix(struct alarm_waiter *waiter, uint64_t abs_usec) | 
|  | { | 
|  | __set_awaiter_abs(waiter, epoch_nsec_to_tsc(abs_usec * 1000)); | 
|  | } | 
|  |  | 
|  | /* Give this a relative time from now, in microseconds.  This might be easier to | 
|  | * use than dealing with the TSC. */ | 
|  | void set_awaiter_rel(struct alarm_waiter *waiter, uint64_t usleep) | 
|  | { | 
|  | uint64_t now, then; | 
|  |  | 
|  | now = read_tsc(); | 
|  | then = now + usec2tsc(usleep); | 
|  | /* This will go off if we wrap-around the TSC.  It'll never happen for | 
|  | * legit values, but this might catch some bugs with large usleeps. */ | 
|  | assert(now <= then); | 
|  | __set_awaiter_abs(waiter, then); | 
|  | } | 
|  |  | 
|  | /* Increment the timer that was already set, so that it goes off usleep usec | 
|  | * from the previous tick.  This is different than 'rel' in that it doesn't care | 
|  | * about when 'now' is. */ | 
|  | void set_awaiter_inc(struct alarm_waiter *waiter, uint64_t usleep) | 
|  | { | 
|  | assert(waiter->wake_up_time != ALARM_POISON_TIME); | 
|  | waiter->wake_up_time += usec2tsc(usleep); | 
|  | } | 
|  |  | 
|  | /* User interface to the global tchain */ | 
|  | void set_alarm(struct alarm_waiter *waiter) | 
|  | { | 
|  | __tc_set_alarm(&global_tchain, waiter); | 
|  | } | 
|  |  | 
|  | bool unset_alarm(struct alarm_waiter *waiter) | 
|  | { | 
|  | return __tc_unset_alarm(&global_tchain, waiter); | 
|  | } | 
|  |  | 
|  | bool reset_alarm_abs(struct alarm_waiter *waiter, uint64_t abs_time) | 
|  | { | 
|  | return __tc_reset_alarm_abs(&global_tchain, waiter, abs_time); | 
|  | } | 
|  |  | 
|  | /* Helper, makes sure the kernel alarm is turned on at the right time. */ | 
|  | static void reset_tchain_interrupt(struct timer_chain *tchain) | 
|  | { | 
|  | if (TAILQ_EMPTY(&tchain->waiters)) { | 
|  | /* Turn it off */ | 
|  | printd("Turning alarm off\n"); | 
|  | if (devalarm_disable(tchain->timerfd)) { | 
|  | printf("Useralarm: unable to disarm alarm!\n"); | 
|  | return; | 
|  | } | 
|  | } else { | 
|  | /* Make sure it is on and set to the earliest time */ | 
|  | assert(tchain->earliest_time != ALARM_POISON_TIME); | 
|  | /* TODO: check for times in the past or very close to now */ | 
|  | printd("Turning alarm on for %llu\n", tchain->earliest_time); | 
|  | if (devalarm_set_time(tchain->timerfd, tchain->earliest_time)) { | 
|  | perror("Useralarm: Failed to set timer"); | 
|  | return; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | /* This is called when the kernel alarm triggers a tchain, and needs to wake up | 
|  | * everyone whose time is up.  Called from vcore context. */ | 
|  | static void __trigger_tchain(struct timer_chain *tchain) | 
|  | { | 
|  | struct alarm_waiter *i, *temp; | 
|  | struct uthread *unsetter; | 
|  |  | 
|  | spin_pdr_lock(&tchain->cv.lock); | 
|  | /* It's possible we have multiple contexts running a single tchain.  It | 
|  | * shouldn't be possible for per-core tchains, but it is possible | 
|  | * otherwise.  In that case, we can just abort, treating the event/IRQ | 
|  | * that woke us up as a 'poke'. */ | 
|  | if (tchain->running) { | 
|  | spin_pdr_unlock(&tchain->cv.lock); | 
|  | return; | 
|  | } | 
|  | while ((i = TAILQ_FIRST(&tchain->waiters))) { | 
|  | /* TODO: Could also do something in cases where it's close to | 
|  | * expiring. */ | 
|  | if (i->wake_up_time > read_tsc()) | 
|  | break; | 
|  | TAILQ_REMOVE(&tchain->waiters, i, next); | 
|  | i->on_tchain = false; | 
|  | tchain->running = i; | 
|  |  | 
|  | /* Need the tchain times (earliest/latest) in sync when | 
|  | * unlocked. */ | 
|  | reset_tchain_times(tchain); | 
|  |  | 
|  | spin_pdr_unlock(&tchain->cv.lock); | 
|  |  | 
|  | /* Don't touch the waiter after running it, since the memory can | 
|  | * be used immediately */ | 
|  | i->func(i); | 
|  |  | 
|  | spin_pdr_lock(&tchain->cv.lock); | 
|  | tchain->running = NULL; | 
|  |  | 
|  | /* This is the guts of a signal, but we're optimizing for the | 
|  | * common case where there is no unsetter.  Uthread CV | 
|  | * signal/broadcast wakes the uthreads up outside of the CV | 
|  | * lock, which will avoid any lock-ordering issues with the 2LS | 
|  | * and the CV - in this case, the alarm service. */ | 
|  | unsetter = __uth_cond_var_wake_one(&tchain->cv); | 
|  | if (unsetter) { | 
|  | spin_pdr_unlock(&tchain->cv.lock); | 
|  | uthread_runnable(unsetter); | 
|  | spin_pdr_lock(&tchain->cv.lock); | 
|  | } | 
|  | } | 
|  | reset_tchain_interrupt(tchain); | 
|  | spin_pdr_unlock(&tchain->cv.lock); | 
|  | } | 
|  |  | 
|  | static void handle_user_alarm(struct event_msg *ev_msg, unsigned int ev_type, | 
|  | void *data) | 
|  | { | 
|  | assert(ev_type == EV_ALARM); | 
|  | if (devalarm_get_id(ev_msg) == global_tchain.alarmid) | 
|  | __trigger_tchain(&global_tchain); | 
|  | } | 
|  |  | 
|  | /* Helper, inserts the waiter into the tchain, returning TRUE if we still need | 
|  | * to reset the tchain interrupt.  Caller holds the lock. */ | 
|  | static bool __insert_awaiter(struct timer_chain *tchain, | 
|  | struct alarm_waiter *waiter) | 
|  | { | 
|  | struct alarm_waiter *i, *temp; | 
|  |  | 
|  | waiter->on_tchain = TRUE; | 
|  | /* Either the list is empty, or not. */ | 
|  | if (TAILQ_EMPTY(&tchain->waiters)) { | 
|  | tchain->earliest_time = waiter->wake_up_time; | 
|  | tchain->latest_time = waiter->wake_up_time; | 
|  | TAILQ_INSERT_HEAD(&tchain->waiters, waiter, next); | 
|  | /* Need to turn on the timer interrupt later */ | 
|  | return TRUE; | 
|  | } | 
|  | /* If not, either we're first, last, or in the middle.  Reset the | 
|  | * interrupt and adjust the tchain's times accordingly. */ | 
|  | if (waiter->wake_up_time < tchain->earliest_time) { | 
|  | tchain->earliest_time = waiter->wake_up_time; | 
|  | TAILQ_INSERT_HEAD(&tchain->waiters, waiter, next); | 
|  | /* Changed the first entry; we'll need to reset the interrupt | 
|  | * later */ | 
|  | return TRUE; | 
|  | } | 
|  | /* If there is a tie for last, the newer one will really go last.  We | 
|  | * need to handle equality here since the loop later won't catch it. */ | 
|  | if (waiter->wake_up_time >= tchain->latest_time) { | 
|  | tchain->latest_time = waiter->wake_up_time; | 
|  | /* Proactively put it at the end if we know we're last */ | 
|  | TAILQ_INSERT_TAIL(&tchain->waiters, waiter, next); | 
|  | return FALSE; | 
|  | } | 
|  | /* Insert before the first one you are earlier than.  This won't scale | 
|  | * well (TODO) if we have a lot of inserts.  The proactive insert_tail | 
|  | * up above will help a bit. */ | 
|  | TAILQ_FOREACH_SAFE(i, &tchain->waiters, next, temp) { | 
|  | if (waiter->wake_up_time < i->wake_up_time) { | 
|  | TAILQ_INSERT_BEFORE(i, waiter, next); | 
|  | return FALSE; | 
|  | } | 
|  | } | 
|  | panic("Could not find a spot for awaiter %p\n", waiter); | 
|  | } | 
|  |  | 
|  | static void __tc_set_alarm(struct timer_chain *tchain, | 
|  | struct alarm_waiter *waiter) | 
|  | { | 
|  | assert(waiter->wake_up_time != ALARM_POISON_TIME); | 
|  | assert(!waiter->on_tchain); | 
|  |  | 
|  | spin_pdr_lock(&tchain->cv.lock); | 
|  | if (__insert_awaiter(tchain, waiter)) | 
|  | reset_tchain_interrupt(tchain); | 
|  | spin_pdr_unlock(&tchain->cv.lock); | 
|  | } | 
|  |  | 
|  | /* Helper, rips the waiter from the tchain, knowing that it is on the list. | 
|  | * Returns TRUE if the tchain interrupt needs to be reset.  Callers hold the | 
|  | * lock. */ | 
|  | static bool __remove_awaiter(struct timer_chain *tchain, | 
|  | struct alarm_waiter *waiter) | 
|  | { | 
|  | struct alarm_waiter *temp; | 
|  | bool reset_int = FALSE;	/* whether or not to reset the interrupt */ | 
|  |  | 
|  | /* Need to make sure earliest and latest are set, in case we're mucking | 
|  | * with the first and/or last element of the chain. */ | 
|  | if (TAILQ_FIRST(&tchain->waiters) == waiter) { | 
|  | temp = TAILQ_NEXT(waiter, next); | 
|  | tchain->earliest_time = (temp) ? temp->wake_up_time : | 
|  | ALARM_POISON_TIME; | 
|  | reset_int = TRUE; /* we'll need to reset the timer later */ | 
|  | } | 
|  | if (TAILQ_LAST(&tchain->waiters, awaiters_tailq) == waiter) { | 
|  | temp = TAILQ_PREV(waiter, awaiters_tailq, next); | 
|  | tchain->latest_time = (temp) ? temp->wake_up_time : | 
|  | ALARM_POISON_TIME; | 
|  | } | 
|  | TAILQ_REMOVE(&tchain->waiters, waiter, next); | 
|  | waiter->on_tchain = FALSE; | 
|  | return reset_int; | 
|  | } | 
|  |  | 
|  | /* Removes waiter from the tchain before it goes off.  Returns TRUE if we | 
|  | * disarmed before the alarm went off, FALSE if it already fired.  May block, | 
|  | * since the handler may be running asynchronously. */ | 
|  | static bool __tc_unset_alarm(struct timer_chain *tchain, | 
|  | struct alarm_waiter *waiter) | 
|  | { | 
|  | spin_pdr_lock(&tchain->cv.lock); | 
|  | for (;;) { | 
|  | if (waiter->on_tchain) { | 
|  | if (__remove_awaiter(tchain, waiter)) | 
|  | reset_tchain_interrupt(tchain); | 
|  | spin_pdr_unlock(&tchain->cv.lock); | 
|  | return true; | 
|  | } | 
|  | if (tchain->running != waiter) { | 
|  | spin_pdr_unlock(&tchain->cv.lock); | 
|  | return false; | 
|  | } | 
|  | /* It's running.  We'll need to try again.  Note the alarm could | 
|  | * have resubmitted itself, so ideally the caller can tell it to | 
|  | * not resubmit. | 
|  | * | 
|  | * Despite the slightly more difficult wake-up code in userspace | 
|  | * compared to the kernel, it's still better to use a CV here. | 
|  | * Some go tests in qemu were more likely to timeout/starve even | 
|  | * if we did some form of unlock/yield/relock pattern. */ | 
|  | uth_cond_var_wait(&tchain->cv, NULL); | 
|  | } | 
|  | } | 
|  |  | 
|  | /* waiter may be on the tchain, or it might have fired already and be off the | 
|  | * tchain.  Either way, this will put the waiter on the list, set to go off at | 
|  | * abs_time.  If you know the alarm has fired, don't call this.  Just set the | 
|  | * awaiter, and then set_alarm() */ | 
|  | static bool __tc_reset_alarm_abs(struct timer_chain *tchain, | 
|  | struct alarm_waiter *waiter, uint64_t abs_time) | 
|  | { | 
|  | bool ret; | 
|  |  | 
|  | ret = __tc_unset_alarm(tchain, waiter); | 
|  | __set_awaiter_abs(waiter, abs_time); | 
|  | __tc_set_alarm(tchain, waiter); | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | /* Debug helpers */ | 
|  |  | 
|  | void print_chain(struct timer_chain *tchain) | 
|  | { | 
|  | struct alarm_waiter *i; | 
|  | spin_pdr_lock(&tchain->cv.lock); | 
|  | printf("Chain %p is%s empty, early: %llu latest: %llu\n", tchain, | 
|  | TAILQ_EMPTY(&tchain->waiters) ? "" : " not", | 
|  | tchain->earliest_time, | 
|  | tchain->latest_time); | 
|  | spin_pdr_unlock(&tchain->cv.lock); | 
|  | } | 
|  |  | 
|  | /* "parlib" alarm handlers */ | 
|  | void alarm_abort_sysc(struct alarm_waiter *awaiter) | 
|  | { | 
|  | struct uthread *uth = awaiter->data; | 
|  |  | 
|  | assert(uth); | 
|  | if (uth->sysc && sys_abort_sysc(uth->sysc)) | 
|  | return; | 
|  | /* There are a bunch of reasons why we didn't abort the syscall.  The | 
|  | * syscall might not have been issued or blocked at all, so uth->sysc | 
|  | * would be NULL.  The syscall might have blocked, but at a | 
|  | * non-abortable location | 
|  | * - picture blocking on a qlock, then unblocking and blocking later on | 
|  | *   a rendez.  If you try to abort in between, abort_sysc will fail, | 
|  | *   then we'll get blocked on the rendez until the next abort. | 
|  | *   Finally, the syscall might have completed, but the uthread hasn't | 
|  | *   cancelled the alarm yet. | 
|  | * | 
|  | * It's always safe to rearm the alarm - the uthread will unset it and | 
|  | * break us out of the rearm loop. */ | 
|  | set_awaiter_rel(awaiter, 10000); | 
|  | set_alarm(awaiter); | 
|  | } |