| /* Copyright (c) 2015 Google Inc | 
 |  * Barret Rhoden <brho@cs.berkeley.edu> | 
 |  * See LICENSE for details. | 
 |  * | 
 |  * FD taps.  Allows the user to receive events when certain things happen to an | 
 |  * FD's underlying device file/qid. */ | 
 |  | 
 | #include <fdtap.h> | 
 | #include <event.h> | 
 | #include <kmalloc.h> | 
 | #include <syscall.h> | 
 | #include <error.h> | 
 | #include <umem.h> | 
 |  | 
 | static void tap_min_release(struct kref *kref) | 
 | { | 
 | 	struct fd_tap *tap = container_of(kref, struct fd_tap, kref); | 
 |  | 
 | 	cclose(tap->chan); | 
 | 	kfree(tap); | 
 | } | 
 |  | 
 | static void tap_full_release(struct kref *kref) | 
 | { | 
 | 	struct fd_tap *tap = container_of(kref, struct fd_tap, kref); | 
 |  | 
 | 	devtab[tap->chan->type].tapfd(tap->chan, tap, FDTAP_CMD_REM); | 
 | 	tap_min_release(kref); | 
 | } | 
 |  | 
 | /* Adds a tap with the file/qid of the underlying device for the requested FD. | 
 |  * The FD must be a chan, and the device must support the filter requested. | 
 |  * | 
 |  * Returns -1 or some other device-specific non-zero number on failure, 0 on | 
 |  * success. */ | 
 | int add_fd_tap(struct proc *p, struct fd_tap_req *tap_req) | 
 | { | 
 | 	struct fd_table *fdt = &p->open_files; | 
 | 	struct fd_tap *tap; | 
 | 	int ret = 0; | 
 | 	struct chan *chan; | 
 | 	int fd = tap_req->fd; | 
 |  | 
 | 	if (fd < 0) { | 
 | 		set_errno(EBADF); | 
 | 		return -1; | 
 | 	} | 
 | 	tap = kzmalloc(sizeof(struct fd_tap), MEM_WAIT); | 
 | 	tap->proc = p; | 
 | 	tap->fd = fd; | 
 | 	tap->filter = tap_req->filter; | 
 | 	tap->ev_q = tap_req->ev_q; | 
 | 	tap->ev_id = tap_req->ev_id; | 
 | 	tap->data = tap_req->data; | 
 | 	if (!is_user_rwaddr(tap->ev_q, sizeof(struct event_queue))) { | 
 | 		set_error(EINVAL, "Tap request with bad event_queue %p", | 
 | 			  tap->ev_q); | 
 | 		kfree(tap); | 
 | 		return -1; | 
 | 	} | 
 | 	spin_lock(&fdt->lock); | 
 | 	if (fd >= fdt->max_fdset) { | 
 | 		set_errno(ENFILE); | 
 | 		goto out_with_lock; | 
 | 	} | 
 | 	if (!GET_BITMASK_BIT(fdt->open_fds->fds_bits, fd)) { | 
 | 		set_errno(EBADF); | 
 | 		goto out_with_lock; | 
 | 	} | 
 | 	if (!fdt->fd[fd].fd_chan) { | 
 | 		set_error(EINVAL, "Can't tap a VFS file"); | 
 | 		goto out_with_lock; | 
 | 	} | 
 | 	chan = fdt->fd[fd].fd_chan; | 
 | 	if (fdt->fd[fd].fd_tap) { | 
 | 		set_error(EBUSY, "FD %d already has a tap", fd); | 
 | 		goto out_with_lock; | 
 | 	} | 
 | 	if (!devtab[chan->type].tapfd) { | 
 | 		set_error(ENOSYS, "Device %s does not handle taps", | 
 | 				  devtab[chan->type].name); | 
 | 		goto out_with_lock; | 
 | 	} | 
 | 	/* need to keep chan alive for our call to the device.  someone else | 
 | 	 * could come in and close the FD and the chan, once we unlock */ | 
 | 	chan_incref(chan); | 
 | 	tap->chan = chan; | 
 | 	/* One for the FD table, one for us to keep the removal of *this* tap | 
 | 	 * from happening until we've attempted to register with the device. */ | 
 | 	kref_init(&tap->kref, tap_full_release, 2); | 
 | 	fdt->fd[fd].fd_tap = tap; | 
 | 	/* As soon as we unlock, another thread can come in and remove our old | 
 | 	 * tap from the table and decref it.  Our ref keeps us from removing it | 
 | 	 * yet, as well as keeps the memory safe.  However, a new tap can be | 
 | 	 * installed and registered with the device before we even attempt to | 
 | 	 * register.  The devices should be able to handle multiple, distinct | 
 | 	 * taps, even if they happen to have the same {proc, fd} tuple. */ | 
 | 	spin_unlock(&fdt->lock); | 
 | 	/* For refcnting fans, the tap ref is weak/uncounted.  We'll protect the | 
 | 	 * memory and call the device when tap is being released. */ | 
 | 	ret = devtab[chan->type].tapfd(chan, tap, FDTAP_CMD_ADD); | 
 | 	if (ret) { | 
 | 		/* we failed, so we need to make sure *our* tap is removed.  We | 
 | 		 * haven't decreffed, so we know our tap pointer is unique. */ | 
 | 		spin_lock(&fdt->lock); | 
 | 		if (fdt->fd[fd].fd_tap == tap) { | 
 | 			fdt->fd[fd].fd_tap = 0; | 
 | 			/* normally we can't decref a tap while holding a lock, | 
 | 			 * but we know we have another reference so this won't | 
 | 			 * trigger a release */ | 
 | 			kref_put(&tap->kref); | 
 | 		} | 
 | 		spin_unlock(&fdt->lock); | 
 | 		/* Regardless of whether someone else removed it or not, *we* | 
 | 		 * are the only ones that know that registration failed and that | 
 | 		 * we shouldn't remove it.  Since we still hold a ref, we can | 
 | 		 * change the release method to skip the device dereg. */ | 
 | 		tap->kref.release = tap_min_release; | 
 | 	} | 
 | 	kref_put(&tap->kref); | 
 | 	return ret; | 
 | out_with_lock: | 
 | 	spin_unlock(&fdt->lock); | 
 | 	kfree(tap); | 
 | 	return -1; | 
 | } | 
 |  | 
 | /* Removes the FD tap associated with FD.  Returns 0 on success, -1 with | 
 |  * errno/errstr on failure. */ | 
 | int remove_fd_tap(struct proc *p, int fd) | 
 | { | 
 | 	struct fd_table *fdt = &p->open_files; | 
 | 	struct fd_tap *tap; | 
 |  | 
 | 	if (fd < 0) { | 
 | 		set_errno(EBADF); | 
 | 		return -1; | 
 | 	} | 
 | 	spin_lock(&fdt->lock); | 
 | 	if (fd >= fdt->max_fdset) { | 
 | 		set_errno(ENFILE); | 
 | 		goto err_with_lock; | 
 | 	} | 
 | 	tap = fdt->fd[fd].fd_tap; | 
 | 	if (!tap) { | 
 | 		set_error(EBADF, "FD %d was not tapped", fd); | 
 | 		goto err_with_lock; | 
 | 	} | 
 | 	fdt->fd[fd].fd_tap = 0; | 
 | 	spin_unlock(&fdt->lock); | 
 | 	kref_put(&tap->kref); | 
 | 	return 0; | 
 | err_with_lock: | 
 | 	spin_unlock(&fdt->lock); | 
 | 	return -1; | 
 | } | 
 |  | 
 | /* Fires off tap, with the events of filter having occurred.  Returns -1 on | 
 |  * error, though this need a little more thought. | 
 |  * | 
 |  * Some callers may require this to not block. */ | 
 | int fire_tap(struct fd_tap *tap, int filter) | 
 | { | 
 | 	ERRSTACK(1); | 
 | 	struct event_msg ev_msg = {0}; | 
 | 	int fire_filt = tap->filter & filter; | 
 |  | 
 | 	if (!fire_filt) | 
 | 		return 0; | 
 | 	if (waserror()) { | 
 | 		/* The process owning the tap could trigger a kernel PF, as with | 
 | 		 * any send_event() call.  Eventually we'll catch that with | 
 | 		 * waserror. */ | 
 | 		warn("Tap for proc %d, fd %d, threw %s", tap->proc->pid, | 
 | 		     tap->fd, current_errstr()); | 
 | 		poperror(); | 
 | 		return -1; | 
 | 	} | 
 | 	ev_msg.ev_type = tap->ev_id;	/* e.g. CEQ idx */ | 
 | 	ev_msg.ev_arg2 = fire_filt;	/* e.g. CEQ coalesce */ | 
 | 	ev_msg.ev_arg3 = tap->data;	/* e.g. CEQ data */ | 
 | 	send_event(tap->proc, tap->ev_q, &ev_msg, 0); | 
 | 	poperror(); | 
 | 	return 0; | 
 | } |