cbdma: overhauled #cbdma

This uses the dma engine API, and the Linux IOAT driver beneath that.

For whatever reason, we auto-assign 0:4.3 to the kernel, and that is the
only device used by #cbdma/ktest.

	cat cbdma/ktest 		# tests 0:4.3 in the kernel
	echo 00:04.3 0 > iommu/detach 	# detaches from the kernel
        echo 00:04.4 PID > iommu/detach # attaches to PID
        ucbdma 0:4.3 			# attach, test, detach
        ucbdma 0:4.4

ucbdma spits out all of the iommu mappings when it runs.  It's basically
the ktest "do a single memcpy and don't break anything" type of test.
i.e. it doesn't test stuff like "can i do a ucbdma without assigning" or
whatnot.

Signed-off-by: Barret Rhoden <brho@cs.berkeley.edu>
diff --git a/kern/drivers/dev/Kbuild b/kern/drivers/dev/Kbuild
index 77e8b78..ca1a5b4 100644
--- a/kern/drivers/dev/Kbuild
+++ b/kern/drivers/dev/Kbuild
@@ -11,7 +11,7 @@
 obj-y						+= mem.o
 obj-y						+= mnt.o
 obj-y						+= pci.o
-obj-n						+= cbdma.o
+obj-y						+= cbdma.o
 obj-y						+= iommu.o
 obj-y						+= pipe.o
 obj-y						+= proc.o
diff --git a/kern/drivers/dev/cbdma.c b/kern/drivers/dev/cbdma.c
index fdd1a52..8e3bbf8 100644
--- a/kern/drivers/dev/cbdma.c
+++ b/kern/drivers/dev/cbdma.c
@@ -1,5 +1,6 @@
-/* Copyright (c) 2019 Google Inc
+/* Copyright (c) 2019-2020 Google Inc
  * Aditya Basu <mitthu@google.com>
+ * Barret Rhoden <brho@google.com>
  * See LICENSE for details.
  *
  * Useful resources:
@@ -10,38 +11,22 @@
  *   - IOAT: (Intel) I/O Acceleration Technology
  *   - CDMA: Crystal Beach DMA
  *
- * CBDMA Notes
- * ===========
- * Every CBDMA PCI function has one MMIO address space (so only BAR0). Each
- * function can have multiple channels. Currently these devices only have one
- * channel per function. This can be read from the CHANCNT register (8-bit)
- * at offset 0x0.
- *
- * Each channel be independently configured for DMA. The MMIO config space of
- * every channel is 0x80 bytes. The first channel (or CHANNEL_0) starts at 0x80
- * offset.
- *
- * CHAINADDR points to a descriptor (desc) ring buffer. More precisely it points
- * to the first desc in the ring buffer. Each desc represents a single DMA
- * operation. Look at "struct desc" for it's structure.
- *
- * Each desc is 0x40 bytes (or 64 bytes) in size. A 4k page will be able to hold
- * 4k/64 = 64 entries. Note that the lower 6 bits of CHANADDR should be zero. So
- * the first desc's address needs to be aligned accordingly. Page-aligning the
- * first desc address will work because 4k page-aligned addresses will have
- * the last 12 bits as zero.
- *
  * TODO
- * ====
- * *MAJOR*
- *   - Update to the correct struct desc (from Linux kernel)
- *   - Make the status field embedded in the channel struct (no ptr business)
- *   - Add file for errors
- *   - Add locks to guard desc access
- *   - Freeze VA->PA page mappings till DMA is completed (esp. for ucbdma)
- * *MINOR*
- *   - Replace all CBDMA_* constants with IOAT_*
- *   - Initializes only the first found CBDMA device
+ * - Consider something lighter-weight than the qlock for ensuring the device
+ * doesn't get detached during operation.  kref, perhaps.  There's also an
+ * element of "stop new people from coming in", like we do with closing FDs.
+ * There's also stuff that the dmaengine does in linux.  See dma_chan_get().
+ * - Freeze or handle faults with VA->PA page mappings, till DMA is completed.
+ * Right now, we could get iommu faults, which was the purpose of this whole
+ * thing.
+ *	- The dmaengine has helpers for some of this.  dma_set_unmap() is a
+ *	"unmap all these things when you're done" approach, called by __cleanup
+ *	-> dma_descriptor_unmap().  the unmap struct is basically a todo list.
+ * - There's a lot of stuff we could do with the DMA engine to reduce the
+ * amount of device touches, contention, and other inefficiencies.
+ * issue_dma() is a minimalist one.  No batching, etc.  And with the pdev
+ * qlock, we have only a single request per PCI device, though there may be
+ * numerous channels.
  */
 
 #include <kmalloc.h>
@@ -54,136 +39,237 @@
 #include <arch/pci.h>
 #include <page_alloc.h>
 #include <pmap.h>
-#include <cbdma_regs.h>
 #include <arch/pci_regs.h>
 
-#define NDESC 1 // initialize these many descs
-#define BUFFERSZ 8192
-
-struct dev                cbdmadevtab;
-static struct pci_device  *pci;
-static void               *mmio;
-static uint8_t            chancnt; /* Total number of channels per function */
-static bool               iommu_enabled;
-static bool               cbdma_break_loop; /* toggle_foo functionality */
-
-/* PCIe Config Space; from Intel Xeon E7 2800/4800/8800 Datasheet Vol. 2 */
-enum {
-	DEVSTS = 0x9a, // 16-bit
-	PMCSR  = 0xe4, // 32-bit
-
-	DMAUNCERRSTS = 0x148, // 32-bit (DMA Cluster Uncorrectable Error Status)
-	DMAUNCERRMSK = 0x14c, // 32-bit
-	DMAUNCERRSEV = 0x150, // 32-bit
-	DMAUNCERRPTR = 0x154, // 8-bit
-	DMAGLBERRPTR = 0x160, // 8-bit
-
-	CHANERR_INT    = 0x180, // 32-bit
-	CHANERRMSK_INT = 0x184, // 32-bit
-	CHANERRSEV_INT = 0x188, // 32-bit
-	CHANERRPTR     = 0x18c, // 8-bit
-};
+#include <linux/dmaengine.h>
 
 /* QID Path */
 enum {
 	Qdir           = 0,
 	Qcbdmaktest    = 1,
-	Qcbdmastats    = 2,
-	Qcbdmareset    = 3,
-	Qcbdmaucopy    = 4,
-	Qcbdmaiommu    = 5,
-};
-
-/* supported ioat devices */
-enum {
-	ioat2021 = (0x2021 << 16) | 0x8086,
-	ioat2f20 = (0x2f20 << 16) | 0x8086,
+	Qcbdmaucopy    = 2,
 };
 
 static struct dirtab cbdmadir[] = {
 	{".",         {Qdir, 0, QTDIR}, 0, 0555},
 	{"ktest",     {Qcbdmaktest, 0, QTFILE}, 0, 0555},
-	{"stats",     {Qcbdmastats, 0, QTFILE}, 0, 0555},
-	{"reset",     {Qcbdmareset, 0, QTFILE}, 0, 0755},
 	{"ucopy",     {Qcbdmaucopy, 0, QTFILE}, 0, 0755},
-	{"iommu",     {Qcbdmaiommu, 0, QTFILE}, 0, 0755},
 };
 
-/* Descriptor structue as defined in the programmer's guide.
- * It describes a single DMA transfer
- */
-struct desc {
-	uint32_t  xfer_size;
-	uint32_t  descriptor_control;
-	uint64_t  src_addr;
-	uint64_t  dest_addr;
-	uint64_t  next_desc_addr;
-	uint64_t  next_source_address;
-	uint64_t  next_destination_address;
-	uint64_t  reserved0;
-	uint64_t  reserved1;
+/* TODO: this is a device/kernel ABI.  ucbdma.c has a copy.  It's probably not
+ * worth putting in its own header, since this is really cheap test code. */
+struct ucbdma {
+	uint64_t		dst_addr;
+	uint64_t		src_addr;
+	uint32_t		xfer_size;
+	char			bdf_str[10];
 } __attribute__((packed));
 
-/* The channels are indexed starting from 0 */
-static struct channel {
-	uint8_t                number; // channel number
-	struct desc            *pdesc; // desc ptr
-	int                    ndesc;  // num. of desc
-	uint64_t               *status; // reg: CHANSTS, needs to be 64B aligned
-	uint8_t                ver;    // reg: CBVER
-
-/* DEPRECATED */
-/* MMIO address space; from Intel Xeon E7 2800/4800/8800 Datasheet Vol. 2
- * Every channel 0x80 bytes in size.
- */
-	uint8_t  chancmd;
-	uint8_t  xrefcap;
-	uint16_t chanctrl;
-	uint16_t dmacount;
-	uint32_t chanerr;
-	uint64_t chansts;
-	uint64_t chainaddr;
-} cbdmadev, channel0;
-
 #define KTEST_SIZE 64
 static struct {
-	char    printbuf[4096];
 	char    src[KTEST_SIZE];
 	char    dst[KTEST_SIZE];
 	char    srcfill;
 	char    dstfill;
-} ktest;    /* TODO: needs locking */
+} ktest = {.srcfill = '0', .dstfill = 'X'};
 
-/* struct passed from the userspace */
-struct ucbdma {
-	struct desc desc;
-	uint64_t    status;
-	uint16_t    ndesc;
-};
-
-/* for debugging via kfunc; break out of infinite polling loops */
-void toggle_cbdma_break_loop(void)
+static inline struct pci_device *dma_chan_to_pci_dev(struct dma_chan *dc)
 {
-	cbdma_break_loop = !cbdma_break_loop;
-	printk("cbdma: cbdma_break_loop = %d\n", cbdma_break_loop);
+	return container_of(dc->device->dev, struct pci_device, linux_dev);
 }
 
-/* Function definitions start here */
-static inline bool is_initialized(void)
+/* Filter function for finding a particular PCI device.  If
+ * __dma_request_channel() asks for a particular device, we'll only give it that
+ * chan.  If you don't care, pass NULL, and you'll get any free chan. */
+static bool filter_pci_dev(struct dma_chan *dc, void *arg)
 {
-	if (!pci || !mmio)
-		return false;
-	else
-		return true;
+	struct pci_device *pdev = dma_chan_to_pci_dev(dc);
+
+	if (arg)
+		return arg == pdev;
+	return true;
 }
 
-static void *get_register(struct channel *c, int offset)
+/* Addresses are device-physical.  Caller holds the pdev qlock. */
+static void issue_dma(struct pci_device *pdev, physaddr_t dst, physaddr_t src,
+		      size_t len, bool async)
 {
-	uint64_t base = (c->number + 1) * IOAT_CHANNEL_MMIO_SIZE;
+	ERRSTACK(1);
+	struct dma_chan *dc;
+	dma_cap_mask_t mask;
+	struct dma_async_tx_descriptor *tx;
+	int flags;
 
-	return (char *) mmio + base + offset;
+	struct completion cmp;
+	unsigned long tmo;
+	dma_cookie_t cookie;
+
+	/* dmaengine_get works for the non-DMA_PRIVATE devices.  A lot
+	 * of devices turn on DMA_PRIVATE, in which case they won't be in the
+	 * general pool available to the dmaengine.  Instead, we directly
+	 * request DMA channels - particularly since we want specific devices to
+	 * use with the IOMMU. */
+
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_MEMCPY, mask);
+	dc = __dma_request_channel(&mask, filter_pci_dev, pdev);
+	if (!dc)
+		error(EFAIL, "Couldn't get a DMA channel");
+	if (waserror()) {
+		dma_release_channel(dc);
+		nexterror();
+	}
+
+	flags = 0;
+	if (async)
+		flags |= DMA_PREP_INTERRUPT;
+
+	if (!is_dma_copy_aligned(dc->device, dst, src, len))
+		error(EINVAL, "Bad copy alignment: %p %p %lu", dst, src, len);
+
+	tx = dmaengine_prep_dma_memcpy(dc, dst, src, len, flags);
+	if (!tx)
+		error(EFAIL, "Couldn't prep the memcpy!\n");
+
+	if (async) {
+		async_tx_ack(tx);
+		init_completion(&cmp);
+		tx->callback = (dma_async_tx_callback)complete;
+		tx->callback_param = &cmp;
+	}
+
+	cookie = dmaengine_submit(tx);
+	if (cookie < 0)
+		error(EFAIL, "Failed to submit the DMA...");
+
+	/* You can poke this.  dma_sync_wait() also calls this. */
+	dma_async_issue_pending(dc);
+
+	if (async) {
+		/* Giant warning: the polling methods, like
+		 * dmaengine_tx_status(), might actually trigger the
+		 * tx->callback.  At least the IOAT driver does this. */
+		tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+		if (tmo == 0 || dmaengine_tx_status(dc, cookie, NULL)
+					!= DMA_COMPLETE) {
+			error(ETIMEDOUT, "timeout or related spurious failure");
+		}
+	} else {
+		dma_wait_for_async_tx(tx);
+	}
+
+	dma_release_channel(dc);
+	poperror();
 }
 
+static void issue_dma_ucbdma(struct ucbdma *u)
+{
+	ERRSTACK(1);
+	struct pci_device *pdev;
+
+	pdev = pci_match_string(u->bdf_str);
+	if (!pdev)
+		error(ENODEV, "No device %s", u->bdf_str);
+	/* The qlock prevents unassignment from happening during an operation.
+	 * If that happened, the driver's reset method would be called while the
+	 * op is ongoing.  The driver might be able to handle that.  Though when
+	 * the iommu mappings are destroyed, the driver is likely to get wedged.
+	 *
+	 * A kref or something else might work better here, to allow multiple
+	 * DMAs at a time. */
+	qlock(&pdev->qlock);
+	if (waserror()) {
+		qunlock(&pdev->qlock);
+		nexterror();
+	}
+	if (pdev->proc_owner != current)
+		error(EINVAL, "wrong proc_owner");
+	issue_dma(pdev, u->dst_addr, u->src_addr, u->xfer_size, true);
+	qunlock(&pdev->qlock);
+	poperror();
+}
+
+/* Runs a basic test from within the kernel on 0:4.3.
+ *
+ * One option would be to have write() set the sza buffer.  It won't be static
+ * through the chan's lifetime (so you'd need to deal with syncing), but it'd
+ * let you set things.  Another would be to have another chan/file for the BDF
+ * (and you'd sync on that). */
+static struct sized_alloc *open_ktest(void)
+{
+	ERRSTACK(2);
+	struct pci_device *pdev = pci_match_tbdf(MKBUS(0, 0, 4, 3));
+	struct sized_alloc *sza;
+	physaddr_t dst, src;	/* device addrs */
+	char *dst_d, *src_d;	/* driver addrs */
+	uintptr_t prev;
+
+	if (!pdev)
+		error(EINVAL, "no 00:04.3");
+
+	qlock(&pdev->qlock);
+	/* We need to get into the address space of the device, which might be
+	 * NULL if it's the kernel's or unassigned. */
+	prev = switch_to(pdev->proc_owner);
+	if (waserror()) {
+		switch_back(pdev->proc_owner, prev);
+		qunlock(&pdev->qlock);
+		nexterror();
+	}
+
+	if (pdev->state != DEV_STATE_ASSIGNED_KERNEL &&
+	    pdev->state != DEV_STATE_ASSIGNED_USER)
+		error(EINVAL, "00:04.3 is unassigned (%d)", pdev->state);
+
+	dst_d = dma_alloc_coherent(&pdev->linux_dev, KTEST_SIZE, &dst,
+				   MEM_WAIT);
+	src_d = dma_alloc_coherent(&pdev->linux_dev, KTEST_SIZE, &src,
+				   MEM_WAIT);
+
+	if (waserror()) {
+		dma_free_coherent(&pdev->linux_dev, KTEST_SIZE, dst_d, dst);
+		dma_free_coherent(&pdev->linux_dev, KTEST_SIZE, src_d, src);
+		nexterror();
+	}
+
+	ktest.srcfill += 1;
+	/* initialize src and dst address */
+	memset(src_d, ktest.srcfill, KTEST_SIZE);
+	memset(dst_d, ktest.dstfill, KTEST_SIZE);
+	src_d[KTEST_SIZE-1] = '\0';
+	dst_d[KTEST_SIZE-1] = '\0';
+
+	issue_dma(pdev, dst, src, KTEST_SIZE, true);
+
+	sza = sized_kzmalloc(1024, MEM_WAIT);
+	sza_printf(sza, "\tCopy Size: %d (0x%x)\n", KTEST_SIZE, KTEST_SIZE);
+	sza_printf(sza, "\tsrcfill: %c (0x%x)\n", ktest.srcfill, ktest.srcfill);
+	sza_printf(sza, "\tdstfill: %c (0x%x)\n", ktest.dstfill, ktest.dstfill);
+
+	/* %s on a uptr causes a printfmt warning.  stop at 20 too.  sanity.*/
+	sza_printf(sza, "\tsrc_str (after copy): ");
+	for (int i = 0; i < 20; i++)
+		sza_printf(sza, "%c", src_d[i]);
+	sza_printf(sza, "\n");
+
+	sza_printf(sza, "\tdst_str (after copy): ");
+	for (int i = 0; i < 20; i++)
+		sza_printf(sza, "%c", dst_d[i]);
+	sza_printf(sza, "\n");
+
+	dma_free_coherent(&pdev->linux_dev, KTEST_SIZE, dst_d, dst);
+	dma_free_coherent(&pdev->linux_dev, KTEST_SIZE, src_d, src);
+	poperror();
+
+	switch_back(pdev->proc_owner, prev);
+	qunlock(&pdev->qlock);
+	poperror();
+
+	return sza;
+}
+
+struct dev cbdmadevtab;
+
 static char *devname(void)
 {
 	return cbdmadevtab.name;
@@ -191,8 +277,6 @@
 
 static struct chan *cbdmaattach(char *spec)
 {
-	if (!is_initialized())
-		error(ENODEV, "no cbdma device detected");
 	return devattach(devname(), spec);
 }
 
@@ -208,514 +292,9 @@
 	return devstat(c, dp, n, cbdmadir, ARRAY_SIZE(cbdmadir), devgen);
 }
 
-/* return string representation of chansts */
-char *cbdma_str_chansts(uint64_t chansts)
-{
-	char *status = "unrecognized status";
-
-	switch (chansts & IOAT_CHANSTS_STATUS) {
-	case IOAT_CHANSTS_ACTIVE:
-		status = "ACTIVE";
-		break;
-	case IOAT_CHANSTS_DONE:
-		status = "DONE";
-		break;
-	case IOAT_CHANSTS_SUSPENDED:
-		status = "SUSPENDED";
-		break;
-	case IOAT_CHANSTS_HALTED:
-		status = "HALTED";
-		break;
-	case IOAT_CHANSTS_ARMED:
-		status = "ARMED";
-		break;
-	default:
-		break;
-	}
-	return status;
-}
-
-/* print descriptors on console (for debugging) */
-static void dump_desc(struct desc *d, int count)
-{
-	printk("dumping descriptors (count = %d):\n", count);
-
-	while (count > 0) {
-		printk("desc: 0x%x, size: %d bytes\n",
-			d, sizeof(struct desc));
-		printk("[32] desc->xfer_size: 0x%x\n",
-			d->xfer_size);
-		printk("[32] desc->descriptor_control: 0x%x\n",
-			d->descriptor_control);
-		printk("[64] desc->src_addr: %p\n",
-			d->src_addr);
-		printk("[64] desc->dest_addr: %p\n",
-			d->dest_addr);
-		printk("[64] desc->next_desc_addr: %p\n",
-			d->next_desc_addr);
-		printk("[64] desc->next_source_address: %p\n",
-			d->next_source_address);
-		printk("[64] desc->next_destination_address: %p\n",
-			d->next_destination_address);
-		printk("[64] desc->reserved0: %p\n",
-			d->reserved0);
-		printk("[64] desc->reserved1: %p\n",
-			d->reserved1);
-
-		count--;
-		if (count > 0)
-			d = (struct desc *) KADDR(d->next_desc_addr);
-		printk("\n");
-	}
-}
-
-/* initialize desc ring
- *
- - Can be called multiple times, with different "ndesc" values.
- - NOTE: We only create _one_ valid desc. The next field points back itself
-	 (ring buffer).
- */
-static void init_desc(struct channel *c, int ndesc)
-{
-	struct desc *d, *tmp;
-	int i;
-	const int max_ndesc = PGSIZE / sizeof(struct desc);
-
-	/* sanity checks */
-	if (ndesc > max_ndesc) {
-		printk("cbdma: allocating only %d desc instead of %d desc\n",
-			max_ndesc, ndesc);
-		ndesc = max_ndesc;
-	}
-
-	c->ndesc = ndesc;
-
-	/* allocate pages for descriptors, last 6-bits must be zero */
-	if (!c->pdesc)
-		c->pdesc = kpage_zalloc_addr();
-
-	if (!c->pdesc) { /* error does not return */
-		printk("cbdma: cannot alloc page for desc\n");
-		return; /* TODO: return "false" */
-	}
-
-	/* preparing descriptors */
-	d = c->pdesc;
-	d->xfer_size = 1;
-	d->descriptor_control = CBDMA_DESC_CTRL_NULL_DESC;
-	d->next_desc_addr = PADDR(d);
-}
-
-/* struct channel is only used for get_register */
-static inline void cleanup_post_copy(struct channel *c)
-{
-	uint64_t value;
-
-	/* mmio_reg: DMACOUNT */
-	value = read16(get_register(c, IOAT_CHAN_DMACOUNT_OFFSET));
-	if (value != 0) {
-		printk("cbdma: info: DMACOUNT = %d\n", value); /* should be 0 */
-		write16(0, mmio + CBDMA_DMACOUNT_OFFSET);
-	}
-
-	/* mmio_reg: CHANERR */
-	value = read32(get_register(c, IOAT_CHANERR_OFFSET));
-	if (value != 0) {
-		printk("cbdma: error: CHANERR = 0x%x\n", value);
-		write32(value, get_register(c, IOAT_CHANERR_OFFSET));
-	}
-
-	/* ack errors */
-	if (ACCESS_PCIE_CONFIG_SPACE) {
-		/* PCIe_reg: CHANERR_INT */
-		value = pcidev_read32(pci, CHANERR_INT);
-		if (value != 0) {
-			printk("cbdma: error: CHANERR_INT = 0x%x\n", value);
-			pcidev_write32(pci, CHANERR_INT, value);
-		}
-
-		/* PCIe_reg: DMAUNCERRSTS */
-		value = pcidev_read32(pci, IOAT_PCI_DMAUNCERRSTS_OFFSET);
-		if (value != 0) {
-			printk("cbdma: error: DMAUNCERRSTS = 0x%x\n", value);
-			pcidev_write32(pci, IOAT_PCI_DMAUNCERRSTS_OFFSET,
-				       value);
-		}
-	}
-}
-
-/* struct channel is only used for get_register */
-static inline void perform_dma(struct channel *c, physaddr_t completion_sts,
-			       physaddr_t desc, uint16_t count)
-{
-	void __iomem *offset;
-
-	/* Set channel completion register where CBDMA will write content of
-	 * CHANSTS register upon successful DMA completion or error condition
-	 */
-	offset = get_register(c, IOAT_CHANCMP_OFFSET);
-	write64(completion_sts,	offset);
-
-	/* write locate of first desc to register CHAINADDR */
-	offset = get_register(c, IOAT_CHAINADDR_OFFSET(c->ver));
-	write64(desc, offset);
-	wmb_f();
-
-	/* writing valid number of descs: starts the DMA */
-	offset = get_register(c, IOAT_CHAN_DMACOUNT_OFFSET);
-	write16(count, offset);
-}
-
-static inline void wait_for_dma_completion(uint64_t *cmpsts)
-{
-	uint64_t sts;
-
-	do {
-		cpu_relax();
-		sts = *cmpsts;
-		if (cbdma_break_loop) {
-			printk("cbdma: cmpsts: %p = 0x%llx\n", cmpsts, sts);
-			break;
-		}
-	} while ((sts & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_ACTIVE);
-}
-
-/* cbdma_ktest: performs functional test on CBDMA
- *
- - Allocates 2 kernel pages: ktest_src and ktest_dst.
- - memsets the ktest_src page
- - Prepare descriptors for DMA transfer (need to be aligned)
- - Initiate the transfer
- - Prints results
- */
-static void cbdma_ktest(void)
-{
-	static struct desc *d;
-	uint64_t value;
-	struct channel *c = &channel0;
-
-	/* initialize src and dst address */
-	memset(ktest.src, ktest.srcfill, KTEST_SIZE);
-	memset(ktest.dst, ktest.dstfill, KTEST_SIZE);
-	ktest.src[KTEST_SIZE-1] = '\0';
-	ktest.dst[KTEST_SIZE-1] = '\0';
-
-	/* for subsequent ktests */
-	ktest.srcfill += 1;
-
-	/* preparing descriptors */
-	d = channel0.pdesc;
-	d->xfer_size            = (uint32_t) KTEST_SIZE;
-	d->src_addr             = (uint64_t) PADDR(ktest.src);
-	d->dest_addr            = (uint64_t) PADDR(ktest.dst);
-	d->descriptor_control   = CBDMA_DESC_CTRL_INTR_ON_COMPLETION |
-				  CBDMA_DESC_CTRL_WRITE_CHANCMP_ON_COMPLETION;
-
-	memset((uint64_t *)c->status, 0, sizeof(c->status));
-
-	/* perform actual DMA */
-	perform_dma(c, PADDR(c->status), PADDR(c->pdesc), 1);
-	wait_for_dma_completion(c->status);
-	cleanup_post_copy(c);
-}
-
-/* convert a userspace pointer to kaddr based pointer
- * TODO: this is dangerous and the pages are not pinned. Debugging only! */
-static inline void *uptr_to_kptr(void *ptr)
-{
-	return (void *) uva2kva(current, ptr, 1, PROT_WRITE);
-}
-
-/* function that uses kernel addresses to perform DMA.
- * Note: does not perform error checks for src / dest addr.
- * TODO: this only works if ktest is not run. Still it fails on alternate runs.
- *       Likely some error in setting up the desc from userspace.
- */
-static void issue_dma_kaddr(struct ucbdma *u)
-{
-	struct ucbdma *u_kaddr = uptr_to_kptr(u);
-	/* first field is struct desc */
-	struct desc *d = (struct desc *) u_kaddr;
-	struct channel *c = &channel0;
-	uint64_t value;
-
-	if (!u_kaddr) {
-		printk("[kern] cannot get kaddr for useraddr: %p\n", u);
-		return;
-	}
-	printk("[kern] ucbdma: user: %p kern: %p\n", u, u_kaddr);
-
-	/* preparing descriptors */
-	d->src_addr   = (uint64_t) PADDR(uptr_to_kptr((void*) d->src_addr));
-	d->dest_addr  = (uint64_t) PADDR(uptr_to_kptr((void*) d->dest_addr));
-	d->next_desc_addr = (uint64_t)
-			    PADDR(uptr_to_kptr((void*) d->next_desc_addr));
-
-	/* perform actual DMA */
-	perform_dma(c, PADDR(&u_kaddr->status), PADDR(d), u_kaddr->ndesc);
-	wait_for_dma_completion(&u_kaddr->status);
-	cleanup_post_copy(c);
-}
-
-/* function that uses virtual (process) addresses to perform DMA; IOMMU = ON
- * TODO: Verify once the IOMMU is setup and enabled.
- */
-static void issue_dma_vaddr(struct ucbdma *u)
-{
-	struct ucbdma *u_kaddr = uptr_to_kptr(u);
-	struct channel *c = &channel0;
-	uint64_t value;
-
-	printk("[kern] IOMMU = ON\n");
-	printk("[kern] ucbdma: user: %p kern: %p ndesc: %d\n", u,
-		&u_kaddr->desc, u_kaddr->ndesc);
-
-	/* perform actual DMA */
-	perform_dma(c, (physaddr_t) &u->status, (physaddr_t) &u->desc,
-		    u_kaddr->ndesc);
-	wait_for_dma_completion(&u_kaddr->status);
-	cleanup_post_copy(&channel0);
-}
-
-/* cbdma_stats: get stats about the device and driver
- */
-static struct sized_alloc *open_stats(void)
-{
-	struct sized_alloc *sza = sized_kzmalloc(BUFFERSZ, MEM_WAIT);
-	uint64_t value;
-
-	sza_printf(sza,
-		"Intel CBDMA [%x:%x] registered at %02x:%02x.%x\n",
-		pci->ven_id, pci->dev_id, pci->bus, pci->dev, pci->func);
-
-	/* driver info. */
-	sza_printf(sza, "    Driver Information:\n");
-	sza_printf(sza,
-		"\tmmio: %p\n"
-		"\ttotal_channels: %d\n"
-		"\tdesc_kaddr: %p\n"
-		"\tdesc_paddr: %p\n"
-		"\tdesc_num: %d\n"
-		"\tver: 0x%x\n"
-		"\tstatus_kaddr: %p\n"
-		"\tstatus_paddr: %p\n"
-		"\tstatus_value: 0x%x\n",
-		mmio, chancnt,
-		channel0.pdesc, PADDR(channel0.pdesc), channel0.ndesc,
-		channel0.ver, channel0.status, PADDR(channel0.status),
-		*(uint64_t *)channel0.status);
-
-	/* print the PCI registers */
-	sza_printf(sza, "    PCIe Config Registers:\n");
-
-	value = pcidev_read16(pci, PCI_CMD_REG);
-	sza_printf(sza, "\tPCICMD: 0x%x\n", value);
-
-	value = pcidev_read16(pci, PCI_STATUS_REG);
-	sza_printf(sza, "\tPCISTS: 0x%x\n", value);
-
-	value = pcidev_read16(pci, PCI_REVID_REG);
-	sza_printf(sza, "\tRID: 0x%x\n", value);
-
-	value = pcidev_read32(pci, PCI_BAR0_STD);
-	sza_printf(sza, "\tCB_BAR: 0x%x\n", value);
-
-	value = pcidev_read16(pci, DEVSTS);
-	sza_printf(sza, "\tDEVSTS: 0x%x\n", value);
-
-	value = pcidev_read32(pci, PMCSR);
-	sza_printf(sza, "\tPMCSR: 0x%x\n", value);
-
-	value = pcidev_read32(pci, DMAUNCERRSTS);
-	sza_printf(sza, "\tDMAUNCERRSTS: 0x%x\n", value);
-
-	value = pcidev_read32(pci, DMAUNCERRMSK);
-	sza_printf(sza, "\tDMAUNCERRMSK: 0x%x\n", value);
-
-	value = pcidev_read32(pci, DMAUNCERRSEV);
-	sza_printf(sza, "\tDMAUNCERRSEV: 0x%x\n", value);
-
-	value = pcidev_read8(pci, DMAUNCERRPTR);
-	sza_printf(sza, "\tDMAUNCERRPTR: 0x%x\n", value);
-
-	value = pcidev_read8(pci, DMAGLBERRPTR);
-	sza_printf(sza, "\tDMAGLBERRPTR: 0x%x\n", value);
-
-	value = pcidev_read32(pci, CHANERR_INT);
-	sza_printf(sza, "\tCHANERR_INT: 0x%x\n", value);
-
-	value = pcidev_read32(pci, CHANERRMSK_INT);
-	sza_printf(sza, "\tCHANERRMSK_INT: 0x%x\n", value);
-
-	value = pcidev_read32(pci, CHANERRSEV_INT);
-	sza_printf(sza, "\tCHANERRSEV_INT: 0x%x\n", value);
-
-	value = pcidev_read8(pci, CHANERRPTR);
-	sza_printf(sza, "\tCHANERRPTR: 0x%x\n", value);
-
-	sza_printf(sza, "    CHANNEL_0 MMIO Registers:\n");
-
-	value = read8(mmio + CBDMA_CHANCMD_OFFSET);
-	sza_printf(sza, "\tCHANCMD: 0x%x\n", value);
-
-	value = read8(mmio + IOAT_VER_OFFSET);
-	sza_printf(sza, "\tCBVER: 0x%x major=%d minor=%d\n",
-		   value,
-		   GET_IOAT_VER_MAJOR(value),
-		   GET_IOAT_VER_MINOR(value));
-
-	value = read16(mmio + CBDMA_CHANCTRL_OFFSET);
-	sza_printf(sza, "\tCHANCTRL: 0x%llx\n", value);
-
-	value = read64(mmio + CBDMA_CHANSTS_OFFSET);
-	sza_printf(sza, "\tCHANSTS: 0x%x [%s], desc_addr: %p, raw: 0x%llx\n",
-		   (value & IOAT_CHANSTS_STATUS),
-		   cbdma_str_chansts(value),
-		   (value & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR),
-		   value);
-
-	value = read64(mmio + CBDMA_CHAINADDR_OFFSET);
-	sza_printf(sza, "\tCHAINADDR: %p\n", value);
-
-	value = read64(mmio + CBDMA_CHANCMP_OFFSET);
-	sza_printf(sza, "\tCHANCMP: %p\n", value);
-
-	value = read16(mmio + CBDMA_DMACOUNT_OFFSET);
-	sza_printf(sza, "\tDMACOUNT: %d\n", value);
-
-	value = read32(mmio + CBDMA_CHANERR_OFFSET);
-	sza_printf(sza, "\tCHANERR: 0x%x\n", value);
-
-	return sza;
-}
-
-static struct sized_alloc *open_reset(void)
-{
-	struct sized_alloc *sza = sized_kzmalloc(BUFFERSZ, MEM_WAIT);
-
-	if (cbdma_is_reset_pending())
-		sza_printf(sza, "Status: Reset is pending\n");
-	else
-		sza_printf(sza, "Status: No pending reset\n");
-
-	sza_printf(sza, "Write '1' to perform reset!\n");
-
-	return sza;
-}
-
-static struct sized_alloc *open_iommu(void)
-{
-	struct sized_alloc *sza = sized_kzmalloc(BUFFERSZ, MEM_WAIT);
-
-	sza_printf(sza, "IOMMU enabled = %s\n", iommu_enabled ? "yes":"no");
-	sza_printf(sza, "Write '0' to disable or '1' to enable the IOMMU\n");
-
-	return sza;
-}
-
-/* targets channel0 */
-static struct sized_alloc *open_ktest(void)
-{
-	struct sized_alloc *sza = sized_kzmalloc(BUFFERSZ, MEM_WAIT);
-
-	/* run the test */
-	cbdma_ktest();
-
-	sza_printf(sza,
-	   "Self-test Intel CBDMA [%x:%x] registered at %02x:%02x.%x\n",
-	   pci->ven_id, pci->dev_id, pci->bus, pci->dev, pci->func);
-
-	sza_printf(sza, "\tChannel Status: %s (raw: 0x%x)\n",
-		cbdma_str_chansts(*((uint64_t *)channel0.status)),
-		(*((uint64_t *)channel0.status) & IOAT_CHANSTS_STATUS));
-
-	sza_printf(sza, "\tCopy Size: %d (0x%x)\n", KTEST_SIZE, KTEST_SIZE);
-	sza_printf(sza, "\tsrcfill: %c (0x%x)\n", ktest.srcfill, ktest.srcfill);
-	sza_printf(sza, "\tdstfill: %c (0x%x)\n", ktest.dstfill, ktest.dstfill);
-	sza_printf(sza, "\tsrc_str (after copy): %s\n", ktest.src);
-	sza_printf(sza, "\tdst_str (after copy): %s\n", ktest.dst);
-
-	return sza;
-}
-
-/* cbdma_reset_device: this fixes any programming errors done before
- */
-void cbdma_reset_device(void)
-{
-	int cbdmaver;
-	uint32_t error;
-
-	/* make sure the driver is initialized */
-	if (!mmio)
-		error(EIO, "cbdma: mmio addr not set");
-
-	pcidev_write16(pci, PCI_COMMAND, PCI_COMMAND_IO | PCI_COMMAND_MEMORY
-							| PCI_COMMAND_MASTER);
-	/* fetch version */
-	cbdmaver = read8(mmio + IOAT_VER_OFFSET);
-
-	/* ack channel errros */
-	error = read32(mmio + CBDMA_CHANERR_OFFSET);
-	write32(error, mmio + CBDMA_CHANERR_OFFSET);
-
-	if (ACCESS_PCIE_CONFIG_SPACE) {
-		/* ack pci device level errros */
-		/* clear DMA Cluster Uncorrectable Error Status */
-		error = pcidev_read32(pci, IOAT_PCI_DMAUNCERRSTS_OFFSET);
-		pcidev_write32(pci, IOAT_PCI_DMAUNCERRSTS_OFFSET, error);
-
-		/* clear DMA Channel Error Status */
-		error = pcidev_read32(pci, IOAT_PCI_CHANERR_INT_OFFSET);
-		pcidev_write32(pci, IOAT_PCI_CHANERR_INT_OFFSET, error);
-	}
-
-	/* reset */
-	write8(IOAT_CHANCMD_RESET, mmio
-				   + IOAT_CHANNEL_MMIO_SIZE
-				   + IOAT_CHANCMD_OFFSET(cbdmaver));
-
-	pcidev_write16(pci, PCI_COMMAND, PCI_COMMAND_IO | PCI_COMMAND_MEMORY
-			| PCI_COMMAND_MASTER | PCI_COMMAND_INTX_DISABLE);
-
-	printk("cbdma: reset performed\n");
-}
-
-/* cbdma_is_reset_pending: returns true if reset is pending
- */
-bool cbdma_is_reset_pending(void)
-{
-	int cbdmaver;
-	int status;
-
-	/* make sure the driver is initialized */
-	if (!mmio) {
-		error(EPERM, "cbdma: mmio addr not set");
-		return false; /* does not reach */
-	}
-
-	/* fetch version */
-	cbdmaver = read8(mmio + IOAT_VER_OFFSET);
-
-	status = read8(mmio + IOAT_CHANNEL_MMIO_SIZE
-			+ IOAT_CHANCMD_OFFSET(cbdmaver));
-
-	return (status & IOAT_CHANCMD_RESET) == IOAT_CHANCMD_RESET;
-}
-
-///////// SYS INTERFACE ////////////////////////////////////////////////////////
-
 static struct chan *cbdmaopen(struct chan *c, int omode)
 {
 	switch (c->qid.path) {
-	case Qcbdmastats:
-		c->synth_buf = open_stats();
-		break;
-	case Qcbdmareset:
-		c->synth_buf = open_reset();
-		break;
-	case Qcbdmaiommu:
-		c->synth_buf = open_iommu();
-		break;
 	case Qcbdmaktest:
 		c->synth_buf = open_ktest();
 		break;
@@ -732,9 +311,6 @@
 static void cbdmaclose(struct chan *c)
 {
 	switch (c->qid.path) {
-	case Qcbdmastats:
-	case Qcbdmareset:
-	case Qcbdmaiommu:
 	case Qcbdmaktest:
 		kfree(c->synth_buf);
 		c->synth_buf = NULL;
@@ -753,9 +329,6 @@
 
 	switch (c->qid.path) {
 	case Qcbdmaktest:
-	case Qcbdmastats:
-	case Qcbdmareset:
-	case Qcbdmaiommu:
 		return readstr(offset, va, n, sza->buf);
 	case Qcbdmaucopy:
 		return readstr(offset, va, n,
@@ -770,63 +343,22 @@
 	return -1;      /* not reached */
 }
 
-static void init_channel(struct channel *c, int cnum, int ndesc)
-{
-	c->number = cnum;
-	c->pdesc = NULL;
-	init_desc(c, ndesc);
-
-	/* this is a writeback field; the hardware will update this value */
-	if (c->status == 0)
-		c->status = kmalloc_align(sizeof(uint64_t), MEM_WAIT, 8);
-	assert(c->status != 0);
-
-	/* cbdma version */
-	c->ver = read8(mmio + IOAT_VER_OFFSET);
-
-	/* Set "Any Error Abort Enable": enables abort for any error encountered
-	 * Set "Error Completion Enable": enables completion write to address in
-					  CHANCMP for any error
-	 * Reset "Interrupt Disable": W1C, when clear enables interrupt to fire
-				    for next descriptor that specifies interrupt
-	*/
-	write8(IOAT_CHANCTRL_ANY_ERR_ABORT_EN | IOAT_CHANCTRL_ERR_COMPLETION_EN,
-	       get_register(c, IOAT_CHANCTRL_OFFSET));
-}
-
 static size_t cbdmawrite(struct chan *c, void *va, size_t n, off64_t offset)
 {
+	struct ucbdma ucbdma[1];
+
 	switch (c->qid.path) {
 	case Qdir:
 		error(EPERM, "writing not permitted");
 	case Qcbdmaktest:
-	case Qcbdmastats:
 		error(EPERM, ERROR_FIXME);
-	case Qcbdmareset:
-		if (offset == 0 && n > 0 && *(char *)va == '1') {
-			cbdma_reset_device();
-			init_channel(&channel0, 0, NDESC);
-		} else {
-			error(EINVAL, "cannot be empty string");
-		}
-		return n;
 	case Qcbdmaucopy:
-		if (offset == 0 && n > 0) {
-			printk("[kern] value from userspace: %p\n", va);
-			if (iommu_enabled)
-				issue_dma_vaddr(va);
-			else
-				issue_dma_kaddr(va);
-			return sizeof(8);
-		}
-		return 0;
-	case Qcbdmaiommu:
-		if (offset == 0 && n > 0 && *(char *)va == '1')
-			iommu_enabled = true;
-		else if (offset == 0 && n > 0 && *(char *)va == '0')
-			iommu_enabled = false;
-		else
-			error(EINVAL, "cannot be empty string");
+		if (n != sizeof(struct ucbdma))
+			error(EINVAL, "Bad ucbdma size %u (%u)", n,
+			      sizeof(struct ucbdma));
+		if (copy_from_user(ucbdma, va, sizeof(struct ucbdma)))
+			error(EINVAL, "Bad ucbdma pointer");
+		issue_dma_ucbdma(ucbdma);
 		return n;
 	default:
 		error(EIO, "cbdma: qid 0x%x is impossible", c->qid.path);
@@ -835,97 +367,10 @@
 	return -1;      /* not reached */
 }
 
-static void cbdma_interrupt(struct hw_trapframe *hw_tf, void *arg)
-{
-	uint16_t value;
-
-	value = read16(get_register(&channel0, IOAT_CHANCTRL_OFFSET));
-	write16(value | IOAT_CHANCTRL_INT_REARM,
-		get_register(&channel0, IOAT_CHANCTRL_OFFSET));
-}
-
-void cbdmainit(void)
-{
-	int tbdf;
-	int i;
-	int id;
-	struct pci_device *pci_iter;
-
-	printk("cbdma: skipping it\n");
-	return;
-
-	/* assigning global variables */
-	pci             = NULL;
-	mmio            = NULL;
-
-	/* initialize cbdmadev */
-	memset(&cbdmadev, 0x0, sizeof(cbdmadev));
-
-	/* search for the device 00:04.0 */
-	STAILQ_FOREACH(pci_iter, &pci_devices, all_dev) {
-		id = pci_iter->dev_id << 16 | pci_iter->ven_id;
-		switch (id) {
-		default:
-			continue;
-		case ioat2021:
-		case ioat2f20:
-			/* hack: bus 0 is the PCI_ALL iommu.
-			 * Can remove this once we add code for scoped IOMMU */
-			if (pci_iter->bus != 0)
-				continue;
-			pci = pci_iter;
-			break;
-		}
-	}
-
-	if (pci == NULL) {
-		printk("cbdma: no Intel CBDMA device found\n");
-		return;
-	}
-
-	/* search and find the mapped mmio region */
-	for (i = 0; i < COUNT_OF(pci->bar); i++) {
-		mmio = pci_get_mmio_bar_kva(pci, i);
-		if (!mmio)
-			continue;
-		break;
-	}
-
-	if (mmio == NULL) {
-		printk("cbdma: cannot map any bars!\n");
-		return;
-	}
-
-	/* performance related stuff */
-	pci_set_cacheline_size(pci);
-
-	/* Get the channel count. Top 3 bits of the register are reserved. */
-	chancnt = read8(mmio + IOAT_CHANCNT_OFFSET) & 0x1F;
-
-	/* initialization successful; print stats */
-	printk("cbdma: registered [%x:%x] at %02x:%02x.%x // "
-	       "mmio:%p\n",
-	       pci->ven_id, pci->dev_id, pci->bus, pci->dev, pci->func,
-	       mmio);
-
-	tbdf = MKBUS(BusPCI, pci->bus, pci->dev, pci->func);
-	register_irq(pci->irqline, cbdma_interrupt, NULL, tbdf);
-
-	/* reset device */
-	cbdma_reset_device();
-
-	/* initialize channel(s) */
-	init_channel(&channel0, 0, NDESC);
-
-	/* setup ktest struct */
-	ktest.srcfill = '1';
-	ktest.dstfill = '0';
-}
-
 struct dev cbdmadevtab __devtab = {
 	.name       = "cbdma",
 	.reset      = devreset,
-	.init       = cbdmainit,
+	.init       = devinit,
 	.shutdown   = devshutdown,
 	.attach     = cbdmaattach,
 	.walk       = cbdmawalk,
diff --git a/kern/include/cbdma_regs.h b/kern/include/cbdma_regs.h
deleted file mode 100644
index 6c8ec3d..0000000
--- a/kern/include/cbdma_regs.h
+++ /dev/null
@@ -1,268 +0,0 @@
-/* Copyright (c) 2019 Google Inc
- * Aditya Basu <mitthu@google.com>
- * See LICENSE for details.
- *
- * Copy of CBDMA register definitions from Linux kernel (around v5.1)
- * drivers/dma/ioat/registers.h
- */
-#ifndef _IOAT_REGISTERS_H_
-#define _IOAT_REGISTERS_H_
-
-#define ACCESS_PCIE_CONFIG_SPACE 1
-
-bool cbdma_is_reset_pending(void);
-void cbdma_reset_device(void);
-
-/* file: drivers/dma/ioat/hw.h */
-#define IOAT_VER_1_2            0x12    /* Version 1.2 */
-#define IOAT_VER_2_0            0x20    /* Version 2.0 */
-#define IOAT_VER_3_0            0x30    /* Version 3.0 */
-#define IOAT_VER_3_2            0x32    /* Version 3.2 */
-#define IOAT_VER_3_3            0x33    /* Version 3.3 */
-#define IOAT_VER_3_4		0x34	/* Version 3.4 */
-/* -------------------------------------- */
-
-#define IOAT_PCI_DMACTRL_OFFSET			0x48
-#define IOAT_PCI_DMACTRL_DMA_EN			0x00000001
-#define IOAT_PCI_DMACTRL_MSI_EN			0x00000002
-
-#define IOAT_PCI_DEVICE_ID_OFFSET		0x02
-#define IOAT_PCI_DMAUNCERRSTS_OFFSET		0x148
-#define IOAT_PCI_CHANERR_INT_OFFSET		0x180
-#define IOAT_PCI_CHANERRMASK_INT_OFFSET		0x184
-
-/* MMIO Device Registers */
-#define IOAT_CHANCNT_OFFSET			0x00	/*  8-bit */
-
-#define IOAT_XFERCAP_OFFSET			0x01	/*  8-bit */
-#define IOAT_XFERCAP_4KB			12
-#define IOAT_XFERCAP_8KB			13
-#define IOAT_XFERCAP_16KB			14
-#define IOAT_XFERCAP_32KB			15
-#define IOAT_XFERCAP_32GB			0
-
-#define IOAT_GENCTRL_OFFSET			0x02	/*  8-bit */
-#define IOAT_GENCTRL_DEBUG_EN			0x01
-
-#define IOAT_INTRCTRL_OFFSET			0x03	/*  8-bit */
-#define IOAT_INTRCTRL_MASTER_INT_EN		0x01	/* Master Interrupt Enable */
-#define IOAT_INTRCTRL_INT_STATUS		0x02	/* ATTNSTATUS -or- Channel Int */
-#define IOAT_INTRCTRL_INT			0x04	/* INT_STATUS -and- MASTER_INT_EN */
-#define IOAT_INTRCTRL_MSIX_VECTOR_CONTROL	0x08	/* Enable all MSI-X vectors */
-
-#define IOAT_ATTNSTATUS_OFFSET			0x04	/* Each bit is a channel */
-
-#define IOAT_VER_OFFSET				0x08	/*  8-bit */
-#define IOAT_VER_MAJOR_MASK			0xF0
-#define IOAT_VER_MINOR_MASK			0x0F
-#define GET_IOAT_VER_MAJOR(x)			(((x) & IOAT_VER_MAJOR_MASK) >> 4)
-#define GET_IOAT_VER_MINOR(x)			((x) & IOAT_VER_MINOR_MASK)
-
-#define IOAT_PERPORTOFFSET_OFFSET		0x0A	/* 16-bit */
-
-#define IOAT_INTRDELAY_OFFSET			0x0C	/* 16-bit */
-#define IOAT_INTRDELAY_MASK			0x3FFF	/* Interrupt Delay Time */
-#define IOAT_INTRDELAY_COALESE_SUPPORT		0x8000	/* Interrupt Coalescing Supported */
-
-#define IOAT_DEVICE_STATUS_OFFSET		0x0E	/* 16-bit */
-#define IOAT_DEVICE_STATUS_DEGRADED_MODE	0x0001
-#define IOAT_DEVICE_MMIO_RESTRICTED		0x0002
-#define IOAT_DEVICE_MEMORY_BYPASS		0x0004
-#define IOAT_DEVICE_ADDRESS_REMAPPING		0x0008
-
-#define IOAT_DMA_CAP_OFFSET			0x10	/* 32-bit */
-#define IOAT_CAP_PAGE_BREAK			0x00000001
-#define IOAT_CAP_CRC				0x00000002
-#define IOAT_CAP_SKIP_MARKER			0x00000004
-#define IOAT_CAP_DCA				0x00000010
-#define IOAT_CAP_CRC_MOVE			0x00000020
-#define IOAT_CAP_FILL_BLOCK			0x00000040
-#define IOAT_CAP_APIC				0x00000080
-#define IOAT_CAP_XOR				0x00000100
-#define IOAT_CAP_PQ				0x00000200
-#define IOAT_CAP_DWBES				0x00002000
-#define IOAT_CAP_RAID16SS			0x00020000
-
-#define IOAT_CHANNEL_MMIO_SIZE			0x80	/* Each Channel MMIO space is this size */
-
-/* DMA Channel Registers */
-#define IOAT_CHANCTRL_OFFSET			0x00	/* 16-bit Channel Control Register */
-#define IOAT_CHANCTRL_CHANNEL_PRIORITY_MASK	0xF000
-#define IOAT3_CHANCTRL_COMPL_DCA_EN		0x0200
-#define IOAT_CHANCTRL_CHANNEL_IN_USE		0x0100
-#define IOAT_CHANCTRL_DESCRIPTOR_ADDR_SNOOP_CONTROL	0x0020
-#define IOAT_CHANCTRL_ERR_INT_EN		0x0010
-#define IOAT_CHANCTRL_ANY_ERR_ABORT_EN		0x0008
-#define IOAT_CHANCTRL_ERR_COMPLETION_EN		0x0004
-#define IOAT_CHANCTRL_INT_REARM			0x0001
-#define IOAT_CHANCTRL_RUN			(IOAT_CHANCTRL_INT_REARM |\
-						 IOAT_CHANCTRL_ERR_INT_EN |\
-						 IOAT_CHANCTRL_ERR_COMPLETION_EN |\
-						 IOAT_CHANCTRL_ANY_ERR_ABORT_EN)
-
-#define IOAT_DMA_COMP_OFFSET			0x02	/* 16-bit DMA channel compatibility */
-#define IOAT_DMA_COMP_V1			0x0001	/* Compatibility with DMA version 1 */
-#define IOAT_DMA_COMP_V2			0x0002	/* Compatibility with DMA version 2 */
-
-
-#define IOAT1_CHANSTS_OFFSET		0x04	/* 64-bit Channel Status Register */
-#define IOAT2_CHANSTS_OFFSET		0x08	/* 64-bit Channel Status Register */
-#define IOAT_CHANSTS_OFFSET(ver)		((ver) < IOAT_VER_2_0 \
-						? IOAT1_CHANSTS_OFFSET : IOAT2_CHANSTS_OFFSET)
-#define IOAT1_CHANSTS_OFFSET_LOW	0x04
-#define IOAT2_CHANSTS_OFFSET_LOW	0x08
-#define IOAT_CHANSTS_OFFSET_LOW(ver)		((ver) < IOAT_VER_2_0 \
-						? IOAT1_CHANSTS_OFFSET_LOW : IOAT2_CHANSTS_OFFSET_LOW)
-#define IOAT1_CHANSTS_OFFSET_HIGH	0x08
-#define IOAT2_CHANSTS_OFFSET_HIGH	0x0C
-#define IOAT_CHANSTS_OFFSET_HIGH(ver)		((ver) < IOAT_VER_2_0 \
-						? IOAT1_CHANSTS_OFFSET_HIGH : IOAT2_CHANSTS_OFFSET_HIGH)
-#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR	(~0x3fULL)
-#define IOAT_CHANSTS_SOFT_ERR			0x10ULL
-#define IOAT_CHANSTS_UNAFFILIATED_ERR		0x8ULL
-#define IOAT_CHANSTS_STATUS	0x7ULL
-#define IOAT_CHANSTS_ACTIVE	0x0
-#define IOAT_CHANSTS_DONE	0x1
-#define IOAT_CHANSTS_SUSPENDED	0x2
-#define IOAT_CHANSTS_HALTED	0x3
-
-
-
-#define IOAT_CHAN_DMACOUNT_OFFSET	0x06    /* 16-bit DMA Count register */
-
-#define IOAT_DCACTRL_OFFSET         0x30   /* 32 bit Direct Cache Access Control Register */
-#define IOAT_DCACTRL_CMPL_WRITE_ENABLE 0x10000
-#define IOAT_DCACTRL_TARGET_CPU_MASK   0xFFFF /* APIC ID */
-
-/* CB DCA Memory Space Registers */
-#define IOAT_DCAOFFSET_OFFSET       0x14
-/* CB_BAR + IOAT_DCAOFFSET value */
-#define IOAT_DCA_VER_OFFSET         0x00
-#define IOAT_DCA_VER_MAJOR_MASK     0xF0
-#define IOAT_DCA_VER_MINOR_MASK     0x0F
-
-#define IOAT_DCA_COMP_OFFSET        0x02
-#define IOAT_DCA_COMP_V1            0x1
-
-#define IOAT_FSB_CAPABILITY_OFFSET  0x04
-#define IOAT_FSB_CAPABILITY_PREFETCH    0x1
-
-#define IOAT_PCI_CAPABILITY_OFFSET  0x06
-#define IOAT_PCI_CAPABILITY_MEMWR   0x1
-
-#define IOAT_FSB_CAP_ENABLE_OFFSET  0x08
-#define IOAT_FSB_CAP_ENABLE_PREFETCH    0x1
-
-#define IOAT_PCI_CAP_ENABLE_OFFSET  0x0A
-#define IOAT_PCI_CAP_ENABLE_MEMWR   0x1
-
-#define IOAT_APICID_TAG_MAP_OFFSET  0x0C
-#define IOAT_APICID_TAG_MAP_TAG0    0x0000000F
-#define IOAT_APICID_TAG_MAP_TAG0_SHIFT 0
-#define IOAT_APICID_TAG_MAP_TAG1    0x000000F0
-#define IOAT_APICID_TAG_MAP_TAG1_SHIFT 4
-#define IOAT_APICID_TAG_MAP_TAG2    0x00000F00
-#define IOAT_APICID_TAG_MAP_TAG2_SHIFT 8
-#define IOAT_APICID_TAG_MAP_TAG3    0x0000F000
-#define IOAT_APICID_TAG_MAP_TAG3_SHIFT 12
-#define IOAT_APICID_TAG_MAP_TAG4    0x000F0000
-#define IOAT_APICID_TAG_MAP_TAG4_SHIFT 16
-#define IOAT_APICID_TAG_CB2_VALID   0x8080808080
-
-#define IOAT_DCA_GREQID_OFFSET      0x10
-#define IOAT_DCA_GREQID_SIZE        0x04
-#define IOAT_DCA_GREQID_MASK        0xFFFF
-#define IOAT_DCA_GREQID_IGNOREFUN   0x10000000
-#define IOAT_DCA_GREQID_VALID       0x20000000
-#define IOAT_DCA_GREQID_LASTID      0x80000000
-
-#define IOAT3_CSI_CAPABILITY_OFFSET 0x08
-#define IOAT3_CSI_CAPABILITY_PREFETCH    0x1
-
-#define IOAT3_PCI_CAPABILITY_OFFSET 0x0A
-#define IOAT3_PCI_CAPABILITY_MEMWR  0x1
-
-#define IOAT3_CSI_CONTROL_OFFSET    0x0C
-#define IOAT3_CSI_CONTROL_PREFETCH  0x1
-
-#define IOAT3_PCI_CONTROL_OFFSET    0x0E
-#define IOAT3_PCI_CONTROL_MEMWR     0x1
-
-#define IOAT3_APICID_TAG_MAP_OFFSET 0x10
-#define IOAT3_APICID_TAG_MAP_OFFSET_LOW  0x10
-#define IOAT3_APICID_TAG_MAP_OFFSET_HIGH 0x14
-
-#define IOAT3_DCA_GREQID_OFFSET     0x02
-
-#define IOAT1_CHAINADDR_OFFSET		0x0C	/* 64-bit Descriptor Chain Address Register */
-#define IOAT2_CHAINADDR_OFFSET		0x10	/* 64-bit Descriptor Chain Address Register */
-#define IOAT_CHAINADDR_OFFSET(ver)		((ver) < IOAT_VER_2_0 \
-						? IOAT1_CHAINADDR_OFFSET : IOAT2_CHAINADDR_OFFSET)
-#define IOAT1_CHAINADDR_OFFSET_LOW	0x0C
-#define IOAT2_CHAINADDR_OFFSET_LOW	0x10
-#define IOAT_CHAINADDR_OFFSET_LOW(ver)		((ver) < IOAT_VER_2_0 \
-						? IOAT1_CHAINADDR_OFFSET_LOW : IOAT2_CHAINADDR_OFFSET_LOW)
-#define IOAT1_CHAINADDR_OFFSET_HIGH	0x10
-#define IOAT2_CHAINADDR_OFFSET_HIGH	0x14
-#define IOAT_CHAINADDR_OFFSET_HIGH(ver)		((ver) < IOAT_VER_2_0 \
-						? IOAT1_CHAINADDR_OFFSET_HIGH : IOAT2_CHAINADDR_OFFSET_HIGH)
-
-#define IOAT1_CHANCMD_OFFSET		0x14	/*  8-bit DMA Channel Command Register */
-#define IOAT2_CHANCMD_OFFSET		0x04	/*  8-bit DMA Channel Command Register */
-#define IOAT_CHANCMD_OFFSET(ver)		((ver) < IOAT_VER_2_0 \
-						? IOAT1_CHANCMD_OFFSET : IOAT2_CHANCMD_OFFSET)
-#define IOAT_CHANCMD_RESET			0x20
-#define IOAT_CHANCMD_RESUME			0x10
-#define IOAT_CHANCMD_ABORT			0x08
-#define IOAT_CHANCMD_SUSPEND			0x04
-#define IOAT_CHANCMD_APPEND			0x02
-#define IOAT_CHANCMD_START			0x01
-
-#define IOAT_CHANCMP_OFFSET			0x18	/* 64-bit Channel Completion Address Register */
-#define IOAT_CHANCMP_OFFSET_LOW			0x18
-#define IOAT_CHANCMP_OFFSET_HIGH		0x1C
-
-#define IOAT_CDAR_OFFSET			0x20	/* 64-bit Current Descriptor Address Register */
-#define IOAT_CDAR_OFFSET_LOW			0x20
-#define IOAT_CDAR_OFFSET_HIGH			0x24
-
-#define IOAT_CHANERR_OFFSET			0x28	/* 32-bit Channel Error Register */
-#define IOAT_CHANERR_SRC_ADDR_ERR	0x0001
-#define IOAT_CHANERR_DEST_ADDR_ERR	0x0002
-#define IOAT_CHANERR_NEXT_ADDR_ERR	0x0004
-#define IOAT_CHANERR_NEXT_DESC_ALIGN_ERR	0x0008
-#define IOAT_CHANERR_CHAIN_ADDR_VALUE_ERR	0x0010
-#define IOAT_CHANERR_CHANCMD_ERR		0x0020
-#define IOAT_CHANERR_CHIPSET_UNCORRECTABLE_DATA_INTEGRITY_ERR	0x0040
-#define IOAT_CHANERR_DMA_UNCORRECTABLE_DATA_INTEGRITY_ERR	0x0080
-#define IOAT_CHANERR_READ_DATA_ERR		0x0100
-#define IOAT_CHANERR_WRITE_DATA_ERR		0x0200
-#define IOAT_CHANERR_CONTROL_ERR	0x0400
-#define IOAT_CHANERR_LENGTH_ERR	0x0800
-#define IOAT_CHANERR_COMPLETION_ADDR_ERR	0x1000
-#define IOAT_CHANERR_INT_CONFIGURATION_ERR	0x2000
-#define IOAT_CHANERR_SOFT_ERR			0x4000
-#define IOAT_CHANERR_UNAFFILIATED_ERR		0x8000
-#define IOAT_CHANERR_XOR_P_OR_CRC_ERR		0x10000
-#define IOAT_CHANERR_XOR_Q_ERR			0x20000
-#define IOAT_CHANERR_DESCRIPTOR_COUNT_ERR	0x40000
-
-#define IOAT_CHANERR_HANDLE_MASK (IOAT_CHANERR_XOR_P_OR_CRC_ERR | IOAT_CHANERR_XOR_Q_ERR)
-
-#define IOAT_CHANERR_MASK_OFFSET		0x2C	/* 32-bit Channel Error Register */
-
-/* Extras: Added by Aditya Basu <mitthu@google.com> */
-#define CBDMA_CHANCMD_OFFSET	 			0x84
-#define CBDMA_CHANSTS_OFFSET	 			0x88
-#define CBDMA_CHANCTRL_OFFSET	 			0x80
-#define CBDMA_DMACOUNT_OFFSET				0x86
-#define CBDMA_CHAINADDR_OFFSET				0x90
-#define CBDMA_CHANCMP_OFFSET				0x98
-#define CBDMA_CHANERR_OFFSET				0xa8
-#define CBDMA_DESC_CTRL_INTR_ON_COMPLETION		0x01 /* 32-bit field */
-#define CBDMA_DESC_CTRL_WRITE_CHANCMP_ON_COMPLETION	0x08
-#define CBDMA_DESC_CTRL_NULL_DESC			0x20
-
-#define IOAT_CHANSTS_ARMED				0x4
-
-#endif /* _IOAT_REGISTERS_H_ */
diff --git a/tests/ucbdma.c b/tests/ucbdma.c
index c49f22b..2efb750 100644
--- a/tests/ucbdma.c
+++ b/tests/ucbdma.c
@@ -1,66 +1,45 @@
 /* Copyright (c) 2019 Google Inc
  * Aditya Basu <mitthu@google.com>
+ * Barret Rhoden <brho@google.com>
  * See LICENSE for details.
-
- * For kernel space
- * ----------------
- * uintptr_t uva2kva(struct proc *p, void *uva, size_t len, int prot)
- * prot is e.g. PROT_WRITE (writable by userspace).
- * returns a KVA, which you can convert to a phys addr with PADDR().
- *
- * TODO:
- *   - Bypass DMA re-mapping if iommu is not turned on (in #cbdma/iommu).
  */
 
-#include <stdio.h>
+#include <parlib/stdio.h>
 #include <stdlib.h>
 #include <sys/mman.h>
 #include <string.h>
 #include <inttypes.h>
 #include <fcntl.h>
 #include <unistd.h>
-#include <parlib/assert.h>
-
-#define CBDMA_DESC_CTRL_INTR_ON_COMPLETION              0x00000001
-#define CBDMA_DESC_CTRL_WRITE_CHANCMP_ON_COMPLETION     0x00000008
-#define CBDMA_DESC_CTRL_NULL_DESC                       0x20
 
 #define BUFFERSIZE 20
 
+#define error_exit(s)		\
+do {				\
+	perror((s));		\
+	exit(-1);		\
+} while (1)
+
 /* Descriptor structue as defined in the programmer's guide.
  * It describes a single DMA transfer
  */
-struct desc {
-	uint32_t  xfer_size;
-	uint32_t  descriptor_control;
-	uint64_t  src_addr;
-	uint64_t  dest_addr;
-	uint64_t  next_desc_addr;
-	uint64_t  next_source_address;
-	uint64_t  next_destination_address;
-	uint64_t  reserved0;
-	uint64_t  reserved1;
-} __attribute__((packed));
-
-/* describe a DMA */
 struct ucbdma {
-	struct desc    desc;
-	uint64_t       status;
-	uint16_t       ndesc;
-};
+	uint64_t		dst_addr;
+	uint64_t		src_addr;
+	uint32_t		xfer_size;
+	char			bdf_str[10];
+} __attribute__((packed));
 
 static void *map_page(void)
 {
 	void *region;
 	size_t pagesize = getpagesize();
 
-	printf("[user] page size: %zu bytes\n", pagesize);
-
 	region = mmap(0, pagesize, PROT_READ | PROT_WRITE | PROT_EXEC,
 			MAP_ANON | MAP_PRIVATE, 0, 0);
 
 	if (region == MAP_FAILED)
-		panic("cannot mmap");
+		error_exit("cannot mmap");
 
 	return region;
 }
@@ -72,7 +51,7 @@
 
 	err = munmap(region, pagesize);
 	if (err)
-		panic("cannot munmap");
+		error_exit("cannot munmap");
 }
 
 static void issue_dma(struct ucbdma *ptr)
@@ -80,10 +59,11 @@
 	int fd = open("#cbdma/ucopy", O_RDWR);
 
 	if (fd < 0)
-		panic("open failed: #cbdma/ucopy");
+		error_exit("open failed: #cbdma/ucopy");
 
 	printf("[user] ucbdma ptr: %p\n", ptr);
-	write(fd, ptr, sizeof(struct ucbdma *));
+	if (write(fd, ptr, sizeof(struct ucbdma)) < 0)
+		error_exit("write ucbdma");
 
 	close(fd);
 }
@@ -98,13 +78,9 @@
 {
 	printf("[user] ucbdma: %p, size: %d (or 0x%x)\n", ucbdma,
 		sizeof(struct ucbdma), sizeof(struct ucbdma));
-	printf("[user] \tdesc->xref_size: %d\n", ucbdma->desc.xfer_size);
-	printf("[user] \tdesc->src_addr: %p\n", ucbdma->desc.src_addr);
-	printf("[user] \tdesc->dest_addr: %p\n", ucbdma->desc.dest_addr);
-	printf("[user] \tdesc->next_desc_addr: %p\n",
-		ucbdma->desc.next_desc_addr);
-	printf("[user] \tndesc: %d\n", ucbdma->ndesc);
-	printf("[user] \tstatus: 0x%llx\n", ucbdma->status);
+	printf("[user] \txref_size: %d\n", ucbdma->xfer_size);
+	printf("[user] \tsrc_addr: %p\n", ucbdma->src_addr);
+	printf("[user] \tdst_addr: %p\n", ucbdma->dst_addr);
 }
 
 static void attach_device(char *pcistr)
@@ -113,11 +89,11 @@
 	int fd = open("#iommu/attach", O_RDWR);
 
 	if (fd < 0)
-		panic("open failed: #iommu/attach");
+		error_exit("open failed: #iommu/attach");
 
 	sprintf(buf, "%s %d\n", pcistr, getpid());
-	write(fd, buf, strlen(buf));
-
+	if (write(fd, buf, strlen(buf)) < 0)
+		error_exit("attach");
 	close(fd);
 
 	system("cat \\#iommu/mappings");
@@ -125,19 +101,34 @@
 
 static void detach_device(char *pcistr)
 {
+	char buf[1024];
 	int fd = open("#iommu/detach", O_RDWR);
 
 	if (fd < 0)
-		panic("open failed: #iommu/detach");
+		error_exit("open failed: #iommu/detach");
 
-	write(fd, pcistr, strlen(pcistr));
-
+	sprintf(buf, "%s %d\n", pcistr, getpid());
+	if (write(fd, buf, strlen(buf)) < 0)
+		error_exit("dettach");
 	close(fd);
 }
 
-int main(int argc, char **argv)
+static void showmapping(pid_t pid, char *dst)
 {
-	char *region;
+	/* One could imagine typeof-based macros that create a string of the
+	 * right size and snprintf variables with %d, %p, whatever... */
+	char pid_s[20];
+	char addr_s[20];
+	char *argv[] = { "m", "showmapping", pid_s, addr_s, NULL };
+
+	snprintf(pid_s, sizeof(pid_s), "%d", pid);
+	snprintf(addr_s, sizeof(addr_s), "%p", dst);
+
+	run_and_wait(argv[0], sizeof(argv), argv);
+}
+
+int main(int argc, char *argv[])
+{
 	struct ucbdma *ucbdma;
 	char *src, *dst;
 	char *pcistr;
@@ -151,39 +142,40 @@
 
 	attach_device(pcistr);
 
-	/* map page for placing ucbdma */
-	region = map_page();
-
 	/* setup src and dst buffers; 100 is random padding */
-	src = region + sizeof(struct ucbdma) + 100;
-	dst = region + sizeof(struct ucbdma) + 100 + BUFFERSIZE;
+	src = map_page();
+	dst = map_page();
+	printf("[user] mmaped src %p\n", src);
+	printf("[user] mmaped dst %p\n", dst);
+	/* No need to fill dst, it is all zeros (\0, not '0') from the OS */
 	fill_buffer(src, '1', BUFFERSIZE);
-	fill_buffer(dst, '0', BUFFERSIZE);
 	printf("[user] src: %s\n", src);
 	printf("[user] dst: %s\n", dst);
 
 	/* setup ucbdma*/
-	ucbdma = (struct ucbdma *) region;
-	ucbdma->status = 0;
-	ucbdma->desc.descriptor_control
-		= CBDMA_DESC_CTRL_INTR_ON_COMPLETION
-		| CBDMA_DESC_CTRL_WRITE_CHANCMP_ON_COMPLETION;
-	ucbdma->desc.xfer_size = BUFFERSIZE;
-	ucbdma->desc.src_addr  = (uint64_t) src;
-	ucbdma->desc.dest_addr = (uint64_t) dst;
-	ucbdma->desc.next_desc_addr = (uint64_t) &ucbdma->desc;
-	ucbdma->ndesc = 1;
+	ucbdma = malloc(sizeof(struct ucbdma));
+	ucbdma->xfer_size = BUFFERSIZE;
+	ucbdma->src_addr  = (uint64_t) src;
+	ucbdma->dst_addr = (uint64_t) dst;
+	memcpy(&ucbdma->bdf_str, pcistr, sizeof(ucbdma->bdf_str));
 
-	dump_ucbdma(ucbdma);
 	issue_dma(ucbdma);
-	dump_ucbdma(ucbdma);
 
-	printf("[user] channel_status: %llx\n", ucbdma->status);
 	printf("[user] src: %s\n", src);
 	printf("[user] dst: %s\n", dst);
 
+	/* Force an IOTLB flush by mmaping/munmapping an arbitrary page */
+	unmap_page(map_page());
+
+	/* Ideally, we'd see the dirty bit set in the PTE.  But we probably
+	 * won't.  The user would have to dirty the page to tell the OS it was
+	 * dirtied, which is really nasty. */
+	printf("[user] Asking the kernel to show the PTE for %p\n", dst);
+	showmapping(getpid(), dst);
+
 	/* cleanup */
-	unmap_page(region);
+	unmap_page(src);
+	unmap_page(dst);
 
 	detach_device(pcistr);