| /* Copyright (c) 2013 The Regents of the University of California |
| * Copyright (c) 2018 Google Inc. |
| * Barret Rhoden <brho@cs.berkeley.edu> |
| * See LICENSE for details. |
| * |
| * Userspace alarms. There are lower level helpers to build your own alarms |
| * from the #alarm device and an alarm service, based off a slimmed down version |
| * of the kernel alarms. Under the hood, the user alarm uses the #alarm service |
| * for the root of the alarm chain. |
| * |
| * There's only one timer chain, unlike in the kernel, for the entire process. |
| * If you want one-off timers unrelated to the chain (and sent to other vcores), |
| * use #alarm directly. |
| * |
| * Your handlers will run from vcore context. |
| * |
| * Code differences from the kernel (for future porting): |
| * - init_alarm_service, run as a constructor |
| * - set_alarm() and friends are __tc_set_alarm(), passing global_tchain. |
| * - reset_tchain_interrupt() uses #alarm |
| * - spinlocks -> spin_pdr_locks (cv's lock, actually) |
| * - ev_q wrappers for converting #alarm events to __triggers |
| * - printks, and other minor stuff. */ |
| |
| #include <sys/queue.h> |
| #include <sys/time.h> |
| #include <parlib/alarm.h> |
| #include <stdio.h> |
| #include <parlib/assert.h> |
| #include <parlib/stdio.h> |
| #include <stdlib.h> |
| #include <unistd.h> |
| #include <sys/types.h> |
| #include <sys/stat.h> |
| #include <fcntl.h> |
| #include <parlib/parlib.h> |
| #include <parlib/event.h> |
| #include <parlib/uthread.h> |
| #include <parlib/spinlock.h> |
| #include <parlib/timing.h> |
| #include <sys/plan9_helpers.h> |
| #include <sys/fork_cb.h> |
| |
| /* Helper to get your own alarm. If you don't care about a return value, pass |
| * 0 and it'll be ignored. The alarm is built, but has no evq or timer set. */ |
| int devalarm_get_fds(int *ctlfd_r, int *timerfd_r, int *alarmid_r) |
| { |
| int ctlfd, timerfd, alarmid, ret; |
| char buf[20]; |
| char path[32]; |
| |
| ctlfd = open("#alarm/clone", O_RDWR | O_CLOEXEC); |
| if (ctlfd < 0) |
| return -1; |
| ret = read(ctlfd, buf, sizeof(buf) - 1); |
| if (ret <= 0) |
| return -1; |
| buf[ret] = 0; |
| alarmid = atoi(buf); |
| snprintf(path, sizeof(path), "#alarm/a%s/timer", buf); |
| timerfd = open(path, O_RDWR | O_CLOEXEC); |
| if (timerfd < 0) |
| return -1; |
| if (ctlfd_r) |
| *ctlfd_r = ctlfd; |
| else |
| close(ctlfd); |
| if (timerfd_r) |
| *timerfd_r = timerfd; |
| else |
| close(timerfd); |
| if (alarmid_r) |
| *alarmid_r = alarmid; |
| return 0; |
| } |
| |
| int devalarm_set_evq(int timerfd, struct event_queue *ev_q, int alarmid) |
| { |
| struct fd_tap_req tap_req = {0}; |
| |
| tap_req.fd = timerfd; |
| tap_req.cmd = FDTAP_CMD_ADD; |
| tap_req.filter = FDTAP_FILT_WRITTEN; |
| tap_req.ev_id = EV_ALARM; |
| tap_req.ev_q = ev_q; |
| tap_req.data = (void*)(long)alarmid; |
| if (sys_tap_fds(&tap_req, 1) != 1) |
| return -1; |
| return 0; |
| } |
| |
| int devalarm_set_time(int timerfd, uint64_t tsc_time) |
| { |
| return write_hex_to_fd(timerfd, tsc_time); |
| } |
| |
| int devalarm_get_id(struct event_msg *ev_msg) |
| { |
| if (!ev_msg) |
| return -1; |
| return (int)(long)ev_msg->ev_arg3; |
| } |
| |
| int devalarm_disable(int timerfd) |
| { |
| return write_hex_to_fd(timerfd, 0); |
| } |
| |
| /* Helpers, basically renamed kernel interfaces, with the *tchain. */ |
| static void __tc_set_alarm(struct timer_chain *tchain, |
| struct alarm_waiter *waiter); |
| static bool __tc_unset_alarm(struct timer_chain *tchain, |
| struct alarm_waiter *waiter); |
| static bool __tc_reset_alarm_abs(struct timer_chain *tchain, |
| struct alarm_waiter *waiter, |
| uint64_t abs_time); |
| static void handle_user_alarm(struct event_msg *ev_msg, unsigned int ev_type, |
| void *data); |
| |
| /* One chain to rule them all. */ |
| struct timer_chain global_tchain; |
| |
| /* Helper, resets the earliest/latest times, based on the elements of the list. |
| * If the list is empty, we set the times to be the 12345 poison time. Since |
| * the list is empty, the alarm shouldn't be going off. */ |
| static void reset_tchain_times(struct timer_chain *tchain) |
| { |
| if (TAILQ_EMPTY(&tchain->waiters)) { |
| tchain->earliest_time = ALARM_POISON_TIME; |
| tchain->latest_time = ALARM_POISON_TIME; |
| } else { |
| tchain->earliest_time = |
| TAILQ_FIRST(&tchain->waiters)->wake_up_time; |
| tchain->latest_time = |
| TAILQ_LAST(&tchain->waiters, awaiters_tailq)->wake_up_time; |
| } |
| } |
| |
| static void devalarm_forked(void) |
| { |
| close(global_tchain.ctlfd); |
| close(global_tchain.timerfd); |
| if (devalarm_get_fds(&global_tchain.ctlfd, &global_tchain.timerfd, |
| NULL)) |
| perror("Useralarm on fork"); |
| } |
| |
| static void __attribute__((constructor)) alarm_service_ctor(void) |
| { |
| int ctlfd, timerfd, alarmid; |
| struct event_queue *ev_q; |
| static struct fork_cb devalarm_fork_cb = {.func = devalarm_forked}; |
| |
| if (__in_fake_parlib()) |
| return; |
| /* Sets up timer chain (only one chain per process) */ |
| TAILQ_INIT(&global_tchain.waiters); |
| global_tchain.running = NULL; |
| reset_tchain_times(&global_tchain); |
| uth_cond_var_init(&global_tchain.cv); |
| |
| if (devalarm_get_fds(&ctlfd, &timerfd, &alarmid)) { |
| perror("Useralarm: devalarm_get_fds"); |
| return; |
| } |
| /* Since we're doing SPAM_PUBLIC later, we actually don't need a big |
| * ev_q. But someone might copy/paste this and change a flag. */ |
| register_ev_handler(EV_ALARM, handle_user_alarm, 0); |
| if (!(ev_q = get_eventq(EV_MBOX_UCQ))) { |
| perror("Useralarm: Failed ev_q"); |
| return; |
| } |
| ev_q->ev_vcore = 0; |
| /* We could get multiple events for a single alarm. It's okay, since |
| * __trigger can handle spurious upcalls. If it ever is not okay, then |
| * use an INDIR (probably with SPAM_INDIR too) instead of SPAM_PUBLIC. |
| */ |
| ev_q->ev_flags = EVENT_IPI | EVENT_SPAM_PUBLIC | EVENT_WAKEUP; |
| if (devalarm_set_evq(timerfd, ev_q, alarmid)) { |
| perror("set_alarm_evq"); |
| return; |
| } |
| /* now the alarm is all set, just need to write the timer whenever we |
| * want it to go off. */ |
| global_tchain.alarmid = alarmid; |
| global_tchain.ctlfd = ctlfd; |
| global_tchain.timerfd = timerfd; |
| global_tchain.ev_q = ev_q; /* mostly for debugging */ |
| register_fork_cb(&devalarm_fork_cb); |
| } |
| |
| /* Initializes a new awaiter. */ |
| void init_awaiter(struct alarm_waiter *waiter, |
| void (*func) (struct alarm_waiter *awaiter)) |
| { |
| waiter->wake_up_time = ALARM_POISON_TIME; |
| assert(func); |
| waiter->func = func; |
| waiter->on_tchain = false; |
| } |
| |
| /* Give this the absolute time. For now, abs_time is the TSC time that you want |
| * the alarm to go off. */ |
| static void __set_awaiter_abs(struct alarm_waiter *waiter, uint64_t abs_time) |
| { |
| waiter->wake_up_time = abs_time; |
| } |
| |
| /* Give this the absolute unix time (in microseconds) that you want the alarm |
| * to go off. */ |
| void set_awaiter_abs_unix(struct alarm_waiter *waiter, uint64_t abs_usec) |
| { |
| __set_awaiter_abs(waiter, epoch_nsec_to_tsc(abs_usec * 1000)); |
| } |
| |
| /* Give this a relative time from now, in microseconds. This might be easier to |
| * use than dealing with the TSC. */ |
| void set_awaiter_rel(struct alarm_waiter *waiter, uint64_t usleep) |
| { |
| uint64_t now, then; |
| |
| now = read_tsc(); |
| then = now + usec2tsc(usleep); |
| /* This will go off if we wrap-around the TSC. It'll never happen for |
| * legit values, but this might catch some bugs with large usleeps. */ |
| assert(now <= then); |
| __set_awaiter_abs(waiter, then); |
| } |
| |
| /* Increment the timer that was already set, so that it goes off usleep usec |
| * from the previous tick. This is different than 'rel' in that it doesn't care |
| * about when 'now' is. */ |
| void set_awaiter_inc(struct alarm_waiter *waiter, uint64_t usleep) |
| { |
| assert(waiter->wake_up_time != ALARM_POISON_TIME); |
| waiter->wake_up_time += usec2tsc(usleep); |
| } |
| |
| /* User interface to the global tchain */ |
| void set_alarm(struct alarm_waiter *waiter) |
| { |
| __tc_set_alarm(&global_tchain, waiter); |
| } |
| |
| bool unset_alarm(struct alarm_waiter *waiter) |
| { |
| return __tc_unset_alarm(&global_tchain, waiter); |
| } |
| |
| bool reset_alarm_abs(struct alarm_waiter *waiter, uint64_t abs_time) |
| { |
| return __tc_reset_alarm_abs(&global_tchain, waiter, abs_time); |
| } |
| |
| /* Helper, makes sure the kernel alarm is turned on at the right time. */ |
| static void reset_tchain_interrupt(struct timer_chain *tchain) |
| { |
| if (TAILQ_EMPTY(&tchain->waiters)) { |
| /* Turn it off */ |
| printd("Turning alarm off\n"); |
| if (devalarm_disable(tchain->timerfd)) { |
| printf("Useralarm: unable to disarm alarm!\n"); |
| return; |
| } |
| } else { |
| /* Make sure it is on and set to the earliest time */ |
| assert(tchain->earliest_time != ALARM_POISON_TIME); |
| /* TODO: check for times in the past or very close to now */ |
| printd("Turning alarm on for %llu\n", tchain->earliest_time); |
| if (devalarm_set_time(tchain->timerfd, tchain->earliest_time)) { |
| perror("Useralarm: Failed to set timer"); |
| return; |
| } |
| } |
| } |
| |
| /* This is called when the kernel alarm triggers a tchain, and needs to wake up |
| * everyone whose time is up. Called from vcore context. */ |
| static void __trigger_tchain(struct timer_chain *tchain) |
| { |
| struct alarm_waiter *i, *temp; |
| struct uthread *unsetter; |
| |
| spin_pdr_lock(&tchain->cv.lock); |
| /* It's possible we have multiple contexts running a single tchain. It |
| * shouldn't be possible for per-core tchains, but it is possible |
| * otherwise. In that case, we can just abort, treating the event/IRQ |
| * that woke us up as a 'poke'. */ |
| if (tchain->running) { |
| spin_pdr_unlock(&tchain->cv.lock); |
| return; |
| } |
| while ((i = TAILQ_FIRST(&tchain->waiters))) { |
| /* TODO: Could also do something in cases where it's close to |
| * expiring. */ |
| if (i->wake_up_time > read_tsc()) |
| break; |
| TAILQ_REMOVE(&tchain->waiters, i, next); |
| i->on_tchain = false; |
| tchain->running = i; |
| |
| /* Need the tchain times (earliest/latest) in sync when |
| * unlocked. */ |
| reset_tchain_times(tchain); |
| |
| spin_pdr_unlock(&tchain->cv.lock); |
| |
| /* Don't touch the waiter after running it, since the memory can |
| * be used immediately */ |
| i->func(i); |
| |
| spin_pdr_lock(&tchain->cv.lock); |
| tchain->running = NULL; |
| |
| /* This is the guts of a signal, but we're optimizing for the |
| * common case where there is no unsetter. Uthread CV |
| * signal/broadcast wakes the uthreads up outside of the CV |
| * lock, which will avoid any lock-ordering issues with the 2LS |
| * and the CV - in this case, the alarm service. */ |
| unsetter = __uth_cond_var_wake_one(&tchain->cv); |
| if (unsetter) { |
| spin_pdr_unlock(&tchain->cv.lock); |
| uthread_runnable(unsetter); |
| spin_pdr_lock(&tchain->cv.lock); |
| } |
| } |
| reset_tchain_interrupt(tchain); |
| spin_pdr_unlock(&tchain->cv.lock); |
| } |
| |
| static void handle_user_alarm(struct event_msg *ev_msg, unsigned int ev_type, |
| void *data) |
| { |
| assert(ev_type == EV_ALARM); |
| if (devalarm_get_id(ev_msg) == global_tchain.alarmid) |
| __trigger_tchain(&global_tchain); |
| } |
| |
| /* Helper, inserts the waiter into the tchain, returning TRUE if we still need |
| * to reset the tchain interrupt. Caller holds the lock. */ |
| static bool __insert_awaiter(struct timer_chain *tchain, |
| struct alarm_waiter *waiter) |
| { |
| struct alarm_waiter *i, *temp; |
| |
| waiter->on_tchain = TRUE; |
| /* Either the list is empty, or not. */ |
| if (TAILQ_EMPTY(&tchain->waiters)) { |
| tchain->earliest_time = waiter->wake_up_time; |
| tchain->latest_time = waiter->wake_up_time; |
| TAILQ_INSERT_HEAD(&tchain->waiters, waiter, next); |
| /* Need to turn on the timer interrupt later */ |
| return TRUE; |
| } |
| /* If not, either we're first, last, or in the middle. Reset the |
| * interrupt and adjust the tchain's times accordingly. */ |
| if (waiter->wake_up_time < tchain->earliest_time) { |
| tchain->earliest_time = waiter->wake_up_time; |
| TAILQ_INSERT_HEAD(&tchain->waiters, waiter, next); |
| /* Changed the first entry; we'll need to reset the interrupt |
| * later */ |
| return TRUE; |
| } |
| /* If there is a tie for last, the newer one will really go last. We |
| * need to handle equality here since the loop later won't catch it. */ |
| if (waiter->wake_up_time >= tchain->latest_time) { |
| tchain->latest_time = waiter->wake_up_time; |
| /* Proactively put it at the end if we know we're last */ |
| TAILQ_INSERT_TAIL(&tchain->waiters, waiter, next); |
| return FALSE; |
| } |
| /* Insert before the first one you are earlier than. This won't scale |
| * well (TODO) if we have a lot of inserts. The proactive insert_tail |
| * up above will help a bit. */ |
| TAILQ_FOREACH_SAFE(i, &tchain->waiters, next, temp) { |
| if (waiter->wake_up_time < i->wake_up_time) { |
| TAILQ_INSERT_BEFORE(i, waiter, next); |
| return FALSE; |
| } |
| } |
| panic("Could not find a spot for awaiter %p\n", waiter); |
| } |
| |
| static void __tc_set_alarm(struct timer_chain *tchain, |
| struct alarm_waiter *waiter) |
| { |
| assert(waiter->wake_up_time != ALARM_POISON_TIME); |
| assert(!waiter->on_tchain); |
| |
| spin_pdr_lock(&tchain->cv.lock); |
| if (__insert_awaiter(tchain, waiter)) |
| reset_tchain_interrupt(tchain); |
| spin_pdr_unlock(&tchain->cv.lock); |
| } |
| |
| /* Helper, rips the waiter from the tchain, knowing that it is on the list. |
| * Returns TRUE if the tchain interrupt needs to be reset. Callers hold the |
| * lock. */ |
| static bool __remove_awaiter(struct timer_chain *tchain, |
| struct alarm_waiter *waiter) |
| { |
| struct alarm_waiter *temp; |
| bool reset_int = FALSE; /* whether or not to reset the interrupt */ |
| |
| /* Need to make sure earliest and latest are set, in case we're mucking |
| * with the first and/or last element of the chain. */ |
| if (TAILQ_FIRST(&tchain->waiters) == waiter) { |
| temp = TAILQ_NEXT(waiter, next); |
| tchain->earliest_time = (temp) ? temp->wake_up_time : |
| ALARM_POISON_TIME; |
| reset_int = TRUE; /* we'll need to reset the timer later */ |
| } |
| if (TAILQ_LAST(&tchain->waiters, awaiters_tailq) == waiter) { |
| temp = TAILQ_PREV(waiter, awaiters_tailq, next); |
| tchain->latest_time = (temp) ? temp->wake_up_time : |
| ALARM_POISON_TIME; |
| } |
| TAILQ_REMOVE(&tchain->waiters, waiter, next); |
| waiter->on_tchain = FALSE; |
| return reset_int; |
| } |
| |
| /* Removes waiter from the tchain before it goes off. Returns TRUE if we |
| * disarmed before the alarm went off, FALSE if it already fired. May block, |
| * since the handler may be running asynchronously. */ |
| static bool __tc_unset_alarm(struct timer_chain *tchain, |
| struct alarm_waiter *waiter) |
| { |
| spin_pdr_lock(&tchain->cv.lock); |
| for (;;) { |
| if (waiter->on_tchain) { |
| if (__remove_awaiter(tchain, waiter)) |
| reset_tchain_interrupt(tchain); |
| spin_pdr_unlock(&tchain->cv.lock); |
| return true; |
| } |
| if (tchain->running != waiter) { |
| spin_pdr_unlock(&tchain->cv.lock); |
| return false; |
| } |
| /* It's running. We'll need to try again. Note the alarm could |
| * have resubmitted itself, so ideally the caller can tell it to |
| * not resubmit. |
| * |
| * Despite the slightly more difficult wake-up code in userspace |
| * compared to the kernel, it's still better to use a CV here. |
| * Some go tests in qemu were more likely to timeout/starve even |
| * if we did some form of unlock/yield/relock pattern. */ |
| uth_cond_var_wait(&tchain->cv, NULL); |
| } |
| } |
| |
| /* waiter may be on the tchain, or it might have fired already and be off the |
| * tchain. Either way, this will put the waiter on the list, set to go off at |
| * abs_time. If you know the alarm has fired, don't call this. Just set the |
| * awaiter, and then set_alarm() */ |
| static bool __tc_reset_alarm_abs(struct timer_chain *tchain, |
| struct alarm_waiter *waiter, uint64_t abs_time) |
| { |
| bool ret; |
| |
| ret = __tc_unset_alarm(tchain, waiter); |
| __set_awaiter_abs(waiter, abs_time); |
| __tc_set_alarm(tchain, waiter); |
| return ret; |
| } |
| |
| /* Debug helpers */ |
| |
| void print_chain(struct timer_chain *tchain) |
| { |
| struct alarm_waiter *i; |
| spin_pdr_lock(&tchain->cv.lock); |
| printf("Chain %p is%s empty, early: %llu latest: %llu\n", tchain, |
| TAILQ_EMPTY(&tchain->waiters) ? "" : " not", |
| tchain->earliest_time, |
| tchain->latest_time); |
| spin_pdr_unlock(&tchain->cv.lock); |
| } |
| |
| /* "parlib" alarm handlers */ |
| void alarm_abort_sysc(struct alarm_waiter *awaiter) |
| { |
| struct uthread *uth = awaiter->data; |
| |
| assert(uth); |
| if (uth->sysc && sys_abort_sysc(uth->sysc)) |
| return; |
| /* There are a bunch of reasons why we didn't abort the syscall. The |
| * syscall might not have been issued or blocked at all, so uth->sysc |
| * would be NULL. The syscall might have blocked, but at a |
| * non-abortable location |
| * - picture blocking on a qlock, then unblocking and blocking later on |
| * a rendez. If you try to abort in between, abort_sysc will fail, |
| * then we'll get blocked on the rendez until the next abort. |
| * Finally, the syscall might have completed, but the uthread hasn't |
| * cancelled the alarm yet. |
| * |
| * It's always safe to rearm the alarm - the uthread will unset it and |
| * break us out of the rearm loop. */ |
| set_awaiter_rel(awaiter, 10000); |
| set_alarm(awaiter); |
| } |