blob: 2186af274a0611ab199c2ed64d3cc5f00592e0c5 [file] [log] [blame]
/* Copyright (c) 2013 The Regents of the University of California
* Copyright (c) 2016 Google Inc.
* Barret Rhoden <brho@cs.berkeley.edu>
* See LICENSE for details.
*
* #alarm: a device for registering per-process alarms.
*
* Allows a process to set up alarms, which they can tap to get events at a
* certain TSC time.
*
* Every process has their own alarm sets and view of #alarm; gen and friends
* look at current's alarmset when it is time to gen or open a file.
*
* To use, first open #alarm/clone, and that gives you an alarm directory aN,
* where N is ID of the alarm. The FD you get from clone points to 'ctl.'
*
* 'ctl' takes no commands. You can read it to get the ID. That's it.
*
* 'timer' takes the hex string value (in absolute tsc time) to fire the alarm.
* Writing 0 disables the alarm. You can read 'timer' to get the next time it
* will fire, in TSC time. 0 means it is disabled. To find out about the timer
* firing, put an FD tap on 'timer' for FDTAP_FILT_WRITTEN.
*
* 'period' takes the hex string value (in TSC ticks) for the period of the
* alarm. If non-zero, the alarm will rearm when it fires. You can read the
* period.
*
* Reading the 'count' file will return the number of times the alarm has
* expired since the last read or the last write to 'timer'. If this is 0, then
* read() will block or EAGAIN. You cannot write 'count'. You can tap it for
* FDTAP_FILT_READABLE.
*
* While each process has a separate view of #alarm, it is possible to post a
* chan to Qctl or Qtimer to #srv. If another proc has your Qtimer, it can set
* it in the past, thereby triggering an immediate event. More clever than
* useful.
*
* Notes on refcnting (the trickier parts here):
* - the proc_alarms have counted references to their proc
* proc won't free til all alarms are closed, which is fine. we close
* all files in destroy. if a proc drops a chan in srv, the proc will stay
* alive because the alarm is alive - til that chan is closed (srvremove)
*
* other shady ways to keep a chan alive: cd to it! if it is ., we'd
* keep a ref around. however, only alarmdir *file* grab refs, not
* directories.
*
* - proc_alarms are kref'd, since there can be multiple chans per alarm
* the only thing that keeps an alarm alive is a chan on a CTL or TIMER (or
* other file). when you cloned, you got back an open CTL, which keeps the
* alarm (and the dir) alive.
*
* we need to be careful generating krefs, in case alarms are concurrently
* released and removed from the lists. just like with procs and pid2proc,
* we need to sync with the source of the kref. */
#include <kmalloc.h>
#include <string.h>
#include <stdio.h>
#include <assert.h>
#include <error.h>
#include <pmap.h>
#include <sys/queue.h>
#include <smp.h>
#include <kref.h>
#include <atomic.h>
#include <alarm.h>
#include <umem.h>
#include <devalarm.h>
struct dev alarmdevtab;
static char *devname(void)
{
return alarmdevtab.name;
}
/* qid path types */
#define Qtopdir 1
#define Qclone 2
#define Qalarmdir 3
#define Qctl 4
#define Qtimer 5 /* Qctl + 1 */
#define Qperiod 6
#define Qcount 7
/* This paddr/kaddr is a bit dangerous. it'll work so long as we don't need all
* 64 bits for a physical address (48 is the current norm on x86_64). */
#define ADDR_SHIFT 5
#define QID2A(q) ((struct proc_alarm*)KADDR(((q).path >> ADDR_SHIFT)))
#define TYPE(q) ((q).path & ((1 << ADDR_SHIFT) - 1))
#define QID(ptr, type) ((PADDR(ptr) << ADDR_SHIFT) | type)
extern struct username eve;
static void alarm_release(struct kref *kref)
{
struct proc_alarm *a = container_of(kref, struct proc_alarm, kref);
struct proc *p = a->proc;
assert(p);
spin_lock(&p->alarmset.lock);
TAILQ_REMOVE(&p->alarmset.list, a, link);
spin_unlock(&p->alarmset.lock);
/* When this returns, the alarm has either fired or it never will */
unset_alarm(p->alarmset.tchain, &a->a_waiter);
proc_decref(p);
kfree(a);
}
static void alarm_fire_taps(struct proc_alarm *a, int filter)
{
struct fd_tap *tap_i;
SLIST_FOREACH(tap_i, &a->fd_taps, link)
fire_tap(tap_i, filter);
}
static void proc_alarm_handler(struct alarm_waiter *a_waiter)
{
struct proc_alarm *a = container_of(a_waiter, struct proc_alarm,
a_waiter);
cv_lock(&a->cv);
a->count++;
if (!a->period) {
a_waiter->wake_up_time = 0;
} else {
/* TODO: use an alarm helper, once we switch over to nsec */
a_waiter->wake_up_time += a->period;
set_alarm(a->proc->alarmset.tchain, a_waiter);
}
__cv_broadcast(&a->cv);
/* Fires taps for both Qtimer and Qcount. */
alarm_fire_taps(a, FDTAP_FILT_WRITTEN | FDTAP_FILT_READABLE);
cv_unlock(&a->cv);
}
void devalarm_init(struct proc *p)
{
TAILQ_INIT(&p->alarmset.list);
spinlock_init(&p->alarmset.lock);
/* Just running all the proc alarms on core 0. */
p->alarmset.tchain = &per_cpu_info[0].tchain;
p->alarmset.id_counter = 0;
}
static int alarmgen(struct chan *c, char *entry_name, struct dirtab *unused,
int unused_nr_dirtab, int s, struct dir *dp)
{
struct qid q;
struct proc_alarm *a_i;
struct proc *p = current;
/* Whether we're in one dir or at the top, .. still takes us to the top.
*/
if (s == DEVDOTDOT) {
mkqid(&q, Qtopdir, 0, QTDIR);
devdir(c, q, devname(), 0, eve.name, 0555, dp);
return 1;
}
switch (TYPE(c->qid)) {
case Qtopdir:
/* Generate elements for the top level dir. We support a clone
* and alarm dirs at the top level */
if (s == 0) {
mkqid(&q, Qclone, 0, QTFILE);
devdir(c, q, "clone", 0, eve.name, 0666, dp);
return 1;
}
s--; /* 1 -> 0th element, 2 -> 1st element, etc */
/* Gets the s-th element (0 index)
*
* I would like to take advantage of the state machine and our
* previous answer to get the sth element of the list. We can
* get at our previous run of gen from dp (struct dir), and use
* that to get the next item. I'd like to do something like:
*
* if (dp->qid.path >> ADDR_SHIFT)
* a_i = TAILQ_NEXT(QID2A(dp->qid), link);
*
* Dev would give us a 0'd dp path on the first run, so if we
* have a path, we know we're on an iterative run. However, the
* problem is that we could have lost the element dp refers to
* (QID2A(dp->qid)) since our previous run, so we can't even
* access that memory to check for refcnts or anything. We need
* a new model for how gen works (probably a gen_start and
* gen_stop devop, passed as parameters to devwalk), so that we
* can have some invariants between gen runs.
*
* Til then, we're stuck with arrays like in #ip (though we can
* use Linux style fdsets) or lousy O(n^2) linked lists (like
* #srv).
*
* Note that we won't always start a gen loop with s == 0
* (devdirread, for instance) */
spin_lock(&p->alarmset.lock);
TAILQ_FOREACH(a_i, &p->alarmset.list, link) {
if (s-- == 0)
break;
}
/* As soon as we unlock, someone could free a_i */
if (!a_i) {
spin_unlock(&p->alarmset.lock);
return -1;
}
snprintf(get_cur_genbuf(), GENBUF_SZ, "a%d", a_i->id);
mkqid(&q, QID(a_i, Qalarmdir), 0, QTDIR);
devdir(c, q, get_cur_genbuf(), 0, eve.name, 0555, dp);
spin_unlock(&p->alarmset.lock);
return 1;
case Qalarmdir:
/* Gen the contents of the alarm dirs */
s += Qctl; /* first time through, start on Qctl */
switch (s) {
case Qctl:
mkqid(&q, QID(QID2A(c->qid), Qctl), 0, QTFILE);
devdir(c, q, "ctl", 0, eve.name, 0666, dp);
return 1;
case Qtimer:
mkqid(&q, QID(QID2A(c->qid), Qtimer), 0, QTFILE);
devdir(c, q, "timer", 0, eve.name, 0666, dp);
return 1;
case Qperiod:
mkqid(&q, QID(QID2A(c->qid), Qperiod), 0, QTFILE);
devdir(c, q, "period", 0, eve.name, 0666, dp);
return 1;
case Qcount:
mkqid(&q, QID(QID2A(c->qid), Qcount), 0, QTFILE);
devdir(c, q, "count", 0, eve.name, 0666, dp);
return 1;
}
return -1;
/* Need to also provide a direct hit for Qclone and all other
* files (at all levels of the hierarchy). Every file is both
* generated (via the s increments in their respective
* directories) and directly gen-able. devstat() will call gen
* with a specific path in the qid. In these cases, we make a
* dir for whatever they are asking for. Note the qid stays the
* same. I think this is what the old plan9 comments above
* devgen were talking about for (ii).
*
* We don't need to do this for the directories - devstat will
* look for the a directory by path and fail. Then it will
* manually build the stat output (check the -1 case in
* devstat). */
case Qclone:
devdir(c, c->qid, "clone", 0, eve.name, 0666, dp);
return 1;
case Qctl:
devdir(c, c->qid, "ctl", 0, eve.name, 0666, dp);
return 1;
case Qtimer:
devdir(c, c->qid, "timer", 0, eve.name, 0666, dp);
return 1;
case Qperiod:
devdir(c, c->qid, "period", 0, eve.name, 0666, dp);
return 1;
case Qcount:
devdir(c, c->qid, "count", 0, eve.name, 0666, dp);
return 1;
}
return -1;
}
static void alarminit(void)
{
}
static struct chan *alarmattach(char *spec)
{
struct chan *c = devattach(devname(), spec);
mkqid(&c->qid, Qtopdir, 0, QTDIR);
return c;
}
static struct walkqid *alarmwalk(struct chan *c, struct chan *nc, char **name,
unsigned int nname)
{
return devwalk(c, nc, name, nname, 0, 0, alarmgen);
}
static size_t alarmstat(struct chan *c, uint8_t *db, size_t n)
{
return devstat(c, db, n, 0, 0, alarmgen);
}
/* It shouldn't matter if p = current is DYING. We'll eventually fail to insert
* the open chan into p's fd table, then decref the chan. */
static struct chan *alarmopen(struct chan *c, int omode)
{
struct proc *p = current;
struct proc_alarm *a, *a_i;
switch (TYPE(c->qid)) {
case Qtopdir:
case Qalarmdir:
if (omode & O_REMCLO)
error(EPERM, ERROR_FIXME);
if (omode & O_WRITE)
error(EISDIR, ERROR_FIXME);
break;
case Qclone:
a = kzmalloc(sizeof(struct proc_alarm), MEM_WAIT);
kref_init(&a->kref, alarm_release, 1);
SLIST_INIT(&a->fd_taps);
cv_init(&a->cv);
qlock_init(&a->qlock);
init_awaiter(&a->a_waiter, proc_alarm_handler);
spin_lock(&p->alarmset.lock);
a->id = p->alarmset.id_counter++;
proc_incref(p, 1);
a->proc = p;
TAILQ_INSERT_TAIL(&p->alarmset.list, a, link);
spin_unlock(&p->alarmset.lock);
mkqid(&c->qid, QID(a, Qctl), 0, QTFILE);
break;
case Qctl:
case Qtimer:
case Qperiod:
case Qcount:
/* the purpose of opening is to hold a kref on the proc_alarm */
a = QID2A(c->qid);
assert(a);
/* this isn't a valid pointer yet, since our chan doesn't have a
* ref. since the time that walk gave our chan the qid, the
* chan could have been closed, and the alarm decref'd and
* freed. the qid is essentially an uncounted reference, and we
* need to go to the source to attempt to get a real ref.
* Unfortunately, this is another scan of the list, same as
* devsrv. */
spin_lock(&p->alarmset.lock);
TAILQ_FOREACH(a_i, &p->alarmset.list, link) {
if (a_i == a) {
assert(a->proc == current);
/* it's still possible we're not getting the
* ref, racing with the release method */
if (!kref_get_not_zero(&a->kref, 1)) {
/* lost the race; error out later */
a_i = 0;
}
break;
}
}
spin_unlock(&p->alarmset.lock);
if (!a_i)
error(EFAIL,
"Unable to open alarm, concurrent closing");
break;
}
c->mode = openmode(omode);
/* Assumes c is unique (can't be closed concurrently */
c->flag |= COPEN;
c->offset = 0;
return c;
}
static void alarmclose(struct chan *c)
{
/* There are more closes than opens. For instance, sysstat doesn't
* open, but it will close the chan it got from namec. We only want to
* clean up/decref chans that were actually open. */
if (!(c->flag & COPEN))
return;
switch (TYPE(c->qid)) {
case Qctl:
case Qtimer:
case Qperiod:
case Qcount:
kref_put(&QID2A(c->qid)->kref);
break;
}
}
/* Helper for Qcount to encapsulate timerfd. */
static long read_qcount(struct chan *c, void *ubuf, size_t n)
{
ERRSTACK(1);
struct proc_alarm *a = QID2A(c->qid);
struct cv_lookup_elm cle;
unsigned long old_count;
if (n > sizeof(old_count))
error(EINVAL, "timerfd buffer is too small (%llu)", n);
/* TODO: have easily abortable CVs that don't require this mechanism. */
cv_lock(&a->cv);
__reg_abortable_cv(&cle, &a->cv);
if (waserror()) {
cv_unlock(&a->cv);
dereg_abortable_cv(&cle);
nexterror();
}
while (!a->count) {
if (c->flag & O_NONBLOCK)
error(EAGAIN, "#alarm count was 0");
if (should_abort(&cle))
error(EINTR, "syscall aborted");
cv_wait(&a->cv);
}
old_count = a->count;
a->count = 0;
cv_unlock(&a->cv);
dereg_abortable_cv(&cle);
poperror();
if (copy_to_user(ubuf, &old_count, sizeof(old_count)))
error(EFAULT, "timerfd copy_to_user failed");
return sizeof(old_count);
}
static size_t alarmread(struct chan *c, void *ubuf, size_t n, off64_t offset)
{
struct proc_alarm *p_alarm;
switch (TYPE(c->qid)) {
case Qtopdir:
case Qalarmdir:
return devdirread(c, ubuf, n, 0, 0, alarmgen);
case Qctl:
p_alarm = QID2A(c->qid);
/* simple reads from p_alarm shouldn't need a lock */
return readnum(offset, ubuf, n, p_alarm->id, NUMSIZE32);
case Qtimer:
p_alarm = QID2A(c->qid);
return readnum(offset, ubuf, n, p_alarm->a_waiter.wake_up_time,
NUMSIZE64);
case Qperiod:
p_alarm = QID2A(c->qid);
return readnum(offset, ubuf, n, p_alarm->period, NUMSIZE64);
case Qcount:
return read_qcount(c, ubuf, n); /* ignore offset */
default:
panic("Bad QID %p in devalarm", c->qid.path);
}
return 0;
}
/* Helper, sets the procalarm to hexval (abs TSC ticks). 0 disarms. */
static void set_proc_alarm(struct proc_alarm *a, uint64_t hexval)
{
/* Due to how we have to maintain 'count', we need to strictly account
* for the firings of the alarm. Easiest thing is to disarm it, reset
* everything, then rearm it. Note that if someone is blocked on count
* = 0, they may still be blocked until the next time the alarm fires.
*
* unset waits on the handler, which grabs the cv lock, so we don't grab
* the cv lock. However, we still need to protect ourselves from
* multiple setters trying to run this at once. Unset actually can
* handle being called concurrently, but alarm setters can't, nor can it
* handle the unsets and sets getting out of sync. For instance, two
* unsets followed by two sets would be a bug. Likewise, setting the
* awaiter value while it is on a tchain is a bug. The qlock prevents
* that. */
qlock(&a->qlock);
unset_alarm(a->proc->alarmset.tchain, &a->a_waiter);
cv_lock(&a->cv);
a->count = 0;
if (hexval) {
set_awaiter_abs(&a->a_waiter, hexval);
set_alarm(a->proc->alarmset.tchain, &a->a_waiter);
}
cv_unlock(&a->cv);
qunlock(&a->qlock);
}
/* Note that in read and write we have an open chan, which means we have an
* active kref on the p_alarm. Also note that we make no assumptions about
* current here - we find the proc (and the tchain) via the ref stored in the
* proc_alarm. */
static size_t alarmwrite(struct chan *c, void *ubuf, size_t n, off64_t unused)
{
struct proc_alarm *p_alarm;
switch (TYPE(c->qid)) {
case Qtopdir:
case Qalarmdir:
case Qctl:
case Qcount:
error(EPERM, ERROR_FIXME);
case Qtimer:
set_proc_alarm(QID2A(c->qid), strtoul_from_ubuf(ubuf, n, 16));
break;
case Qperiod:
p_alarm = QID2A(c->qid);
/* racing with the handler which checks the val repeatedly */
cv_lock(&p_alarm->cv);
p_alarm->period = strtoul_from_ubuf(ubuf, n, 16);
cv_unlock(&p_alarm->cv);
break;
default:
panic("Bad QID %p in devalarm", c->qid.path);
}
return n;
}
/* We use the same tap list, regardless of Qtimer or Qcount */
static int tap_alarm(struct proc_alarm *a, struct fd_tap *tap, int cmd,
int legal_filter)
{
int ret;
if (tap->filter & ~legal_filter) {
set_error(ENOSYS, "Unsupported #%s tap, must be %p", devname(),
legal_filter);
return -1;
}
cv_lock(&a->cv);
switch (cmd) {
case (FDTAP_CMD_ADD):
SLIST_INSERT_HEAD(&a->fd_taps, tap, link);
ret = 0;
break;
case (FDTAP_CMD_REM):
SLIST_REMOVE(&a->fd_taps, tap, fd_tap, link);
ret = 0;
break;
default:
set_error(ENOSYS, "Unsupported #%s tap command %p",
devname(), cmd);
ret = -1;
}
cv_unlock(&a->cv);
return ret;
}
static int alarm_tapfd(struct chan *c, struct fd_tap *tap, int cmd)
{
struct proc_alarm *a = QID2A(c->qid);
/* We don't actually support HANGUP, but epoll implies it. */
#define ALARM_LEGAL_TIMER_TAPS (FDTAP_FILT_WRITTEN | FDTAP_FILT_HANGUP)
#define ALARM_LEGAL_COUNT_TAPS (FDTAP_FILT_READABLE | FDTAP_FILT_HANGUP)
switch (TYPE(c->qid)) {
case Qtimer:
return tap_alarm(a, tap, cmd, ALARM_LEGAL_TIMER_TAPS);
case Qcount:
return tap_alarm(a, tap, cmd, ALARM_LEGAL_COUNT_TAPS);
default:
set_error(ENOSYS, "Can't tap #%s file type %d", devname(),
c->qid.path);
return -1;
}
}
static char *alarm_chaninfo(struct chan *ch, char *ret, size_t ret_l)
{
struct proc_alarm *a;
struct timespec ts;
switch (TYPE(ch->qid)) {
case Qctl:
case Qtimer:
case Qperiod:
case Qcount:
a = QID2A(ch->qid);
ts = tsc2timespec(a->a_waiter.wake_up_time);
snprintf(ret, ret_l,
"Id %d, %s, expires [%7d.%09d] (%p), period %llu, count %llu",
a->id,
SLIST_EMPTY(&a->fd_taps) ? "untapped" : "tapped",
ts.tv_sec, ts.tv_nsec, a->a_waiter.wake_up_time,
a->period, a->count);
break;
default:
return devchaninfo(ch, ret, ret_l);
}
return ret;
}
struct dev alarmdevtab __devtab = {
.name = "alarm",
.reset = devreset,
.init = alarminit,
.shutdown = devshutdown,
.attach = alarmattach,
.walk = alarmwalk,
.stat = alarmstat,
.open = alarmopen,
.create = devcreate,
.close = alarmclose,
.read = alarmread,
.bread = devbread,
.write = alarmwrite,
.bwrite = devbwrite,
.remove = devremove,
.wstat = devwstat,
.power = devpower,
.chaninfo = alarm_chaninfo,
.tapfd = alarm_tapfd,
};