| /* Copyright (c) 2015 Google Inc |
| * Barret Rhoden <brho@cs.berkeley.edu> |
| * See LICENSE for details. |
| * |
| * FD taps. Allows the user to receive events when certain things happen to an |
| * FD's underlying device file/qid. */ |
| |
| #include <fdtap.h> |
| #include <event.h> |
| #include <kmalloc.h> |
| #include <syscall.h> |
| #include <error.h> |
| #include <umem.h> |
| |
| static void tap_min_release(struct kref *kref) |
| { |
| struct fd_tap *tap = container_of(kref, struct fd_tap, kref); |
| |
| cclose(tap->chan); |
| kfree(tap); |
| } |
| |
| static void tap_full_release(struct kref *kref) |
| { |
| struct fd_tap *tap = container_of(kref, struct fd_tap, kref); |
| |
| devtab[tap->chan->type].tapfd(tap->chan, tap, FDTAP_CMD_REM); |
| tap_min_release(kref); |
| } |
| |
| /* Adds a tap with the file/qid of the underlying device for the requested FD. |
| * The FD must be a chan, and the device must support the filter requested. |
| * |
| * Returns -1 or some other device-specific non-zero number on failure, 0 on |
| * success. */ |
| int add_fd_tap(struct proc *p, struct fd_tap_req *tap_req) |
| { |
| struct fd_table *fdt = &p->open_files; |
| struct fd_tap *tap; |
| int ret = 0; |
| struct chan *chan; |
| int fd = tap_req->fd; |
| |
| if (fd < 0) { |
| set_errno(EBADF); |
| return -1; |
| } |
| tap = kzmalloc(sizeof(struct fd_tap), MEM_WAIT); |
| tap->proc = p; |
| tap->fd = fd; |
| tap->filter = tap_req->filter; |
| tap->ev_q = tap_req->ev_q; |
| tap->ev_id = tap_req->ev_id; |
| tap->data = tap_req->data; |
| if (!is_user_rwaddr(tap->ev_q, sizeof(struct event_queue))) { |
| set_error(EINVAL, "Tap request with bad event_queue %p", |
| tap->ev_q); |
| kfree(tap); |
| return -1; |
| } |
| spin_lock(&fdt->lock); |
| if (fd >= fdt->max_fdset) { |
| set_errno(ENFILE); |
| goto out_with_lock; |
| } |
| if (!GET_BITMASK_BIT(fdt->open_fds->fds_bits, fd)) { |
| set_errno(EBADF); |
| goto out_with_lock; |
| } |
| if (!fdt->fd[fd].fd_chan) { |
| set_error(EINVAL, "Can't tap a VFS file"); |
| goto out_with_lock; |
| } |
| chan = fdt->fd[fd].fd_chan; |
| if (fdt->fd[fd].fd_tap) { |
| set_error(EBUSY, "FD %d already has a tap", fd); |
| goto out_with_lock; |
| } |
| if (!devtab[chan->type].tapfd) { |
| set_error(ENOSYS, "Device %s does not handle taps", |
| devtab[chan->type].name); |
| goto out_with_lock; |
| } |
| /* need to keep chan alive for our call to the device. someone else |
| * could come in and close the FD and the chan, once we unlock */ |
| chan_incref(chan); |
| tap->chan = chan; |
| /* One for the FD table, one for us to keep the removal of *this* tap |
| * from happening until we've attempted to register with the device. */ |
| kref_init(&tap->kref, tap_full_release, 2); |
| fdt->fd[fd].fd_tap = tap; |
| /* As soon as we unlock, another thread can come in and remove our old |
| * tap from the table and decref it. Our ref keeps us from removing it |
| * yet, as well as keeps the memory safe. However, a new tap can be |
| * installed and registered with the device before we even attempt to |
| * register. The devices should be able to handle multiple, distinct |
| * taps, even if they happen to have the same {proc, fd} tuple. */ |
| spin_unlock(&fdt->lock); |
| /* For refcnting fans, the tap ref is weak/uncounted. We'll protect the |
| * memory and call the device when tap is being released. */ |
| ret = devtab[chan->type].tapfd(chan, tap, FDTAP_CMD_ADD); |
| if (ret) { |
| /* we failed, so we need to make sure *our* tap is removed. We |
| * haven't decreffed, so we know our tap pointer is unique. */ |
| spin_lock(&fdt->lock); |
| if (fdt->fd[fd].fd_tap == tap) { |
| fdt->fd[fd].fd_tap = 0; |
| /* normally we can't decref a tap while holding a lock, |
| * but we know we have another reference so this won't |
| * trigger a release */ |
| kref_put(&tap->kref); |
| } |
| spin_unlock(&fdt->lock); |
| /* Regardless of whether someone else removed it or not, *we* |
| * are the only ones that know that registration failed and that |
| * we shouldn't remove it. Since we still hold a ref, we can |
| * change the release method to skip the device dereg. */ |
| tap->kref.release = tap_min_release; |
| } |
| kref_put(&tap->kref); |
| return ret; |
| out_with_lock: |
| spin_unlock(&fdt->lock); |
| kfree(tap); |
| return -1; |
| } |
| |
| /* Removes the FD tap associated with FD. Returns 0 on success, -1 with |
| * errno/errstr on failure. */ |
| int remove_fd_tap(struct proc *p, int fd) |
| { |
| struct fd_table *fdt = &p->open_files; |
| struct fd_tap *tap; |
| |
| spin_lock(&fdt->lock); |
| tap = fdt->fd[fd].fd_tap; |
| fdt->fd[fd].fd_tap = 0; |
| spin_unlock(&fdt->lock); |
| if (tap) { |
| kref_put(&tap->kref); |
| return 0; |
| } else { |
| set_error(EBADF, "FD %d was not tapped", fd); |
| return -1; |
| } |
| } |
| |
| /* Fires off tap, with the events of filter having occurred. Returns -1 on |
| * error, though this need a little more thought. |
| * |
| * Some callers may require this to not block. */ |
| int fire_tap(struct fd_tap *tap, int filter) |
| { |
| ERRSTACK(1); |
| struct event_msg ev_msg = {0}; |
| int fire_filt = tap->filter & filter; |
| |
| if (!fire_filt) |
| return 0; |
| if (waserror()) { |
| /* The process owning the tap could trigger a kernel PF, as with |
| * any send_event() call. Eventually we'll catch that with |
| * waserror. */ |
| warn("Tap for proc %d, fd %d, threw %s", tap->proc->pid, |
| tap->fd, current_errstr()); |
| poperror(); |
| return -1; |
| } |
| ev_msg.ev_type = tap->ev_id; /* e.g. CEQ idx */ |
| ev_msg.ev_arg2 = fire_filt; /* e.g. CEQ coalesce */ |
| ev_msg.ev_arg3 = tap->data; /* e.g. CEQ data */ |
| send_event(tap->proc, tap->ev_q, &ev_msg, 0); |
| poperror(); |
| return 0; |
| } |