cbdma: add support for Intel CBDMA/IOAT

* Creates #cbdma device and a minimal hierarchy with files:
    ktest - run the self-test
    stats - dump register values and driver information
    reset - write 1 to reset the cbdma
    iommu - turn on/off IOMMU support

* Search through all PCI devices and looks for the following devices.
If any device is found, then only a single function is registered.
    * Vendor ID: 0x8086, Device ID: 0x2021 (Skylake)
    * Vendor ID: 0x8086, Device ID: 0x2f20 (Haswell)
* If no cbdma device is found then the device will not attach (bind).

* The PCI bar registers pages are re-mapped with nocache
* A desc chain is populated which describes the DMA transfers
* On MSI interrupts, the driver acks the interrupts and re-enables
interrupts

* User-Space CDMA (ucbdma)
    * desc addresses are converted to kaddr and issued (IOMMU = off)
    * desc addresses are not-converted to kaddr (IOMMU = on)

Signed-off-by: Aditya Basu <mitthu@google.com>
[minor formatting touchups]
Signed-off-by: Barret Rhoden <brho@cs.berkeley.edu>
diff --git a/kern/drivers/dev/Kbuild b/kern/drivers/dev/Kbuild
index b936073..9359432 100644
--- a/kern/drivers/dev/Kbuild
+++ b/kern/drivers/dev/Kbuild
@@ -11,6 +11,7 @@
 obj-y						+= mem.o
 obj-y						+= mnt.o
 obj-y						+= pci.o
+obj-y						+= cbdma.o
 obj-y						+= pipe.o
 obj-y						+= proc.o
 obj-y						+= random.o
diff --git a/kern/drivers/dev/cbdma.c b/kern/drivers/dev/cbdma.c
new file mode 100644
index 0000000..9ec2ce0
--- /dev/null
+++ b/kern/drivers/dev/cbdma.c
@@ -0,0 +1,954 @@
+/* Copyright (c) 2019 Google Inc
+ * Aditya Basu <mitthu@google.com>
+ * See LICENSE for details.
+ *
+ * Useful resources:
+ *   - Intel Xeon E7 2800/4800/8800 Datasheet Vol. 2
+ *   - Purley Programmer's Guide
+ *
+ * Acronyms:
+ *   - IOAT: (Intel) I/O Acceleration Technology
+ *   - CDMA: Crystal Beach DMA
+ *
+ * CBDMA Notes
+ * ===========
+ * Every CBDMA PCI function has one MMIO address space (so only BAR0). Each
+ * function can have multiple channels. Currently these devices only have one
+ * channel per function. This can be read from the CHANCNT register (8-bit)
+ * at offset 0x0.
+ *
+ * Each channel be independently configured for DMA. The MMIO config space of
+ * every channel is 0x80 bytes. The first channel (or CHANNEL_0) starts at 0x80
+ * offset.
+ *
+ * CHAINADDR points to a descriptor (desc) ring buffer. More precisely it points
+ * to the first desc in the ring buffer. Each desc represents a single DMA
+ * operation. Look at "struct desc" for it's structure.
+ *
+ * Each desc is 0x40 bytes (or 64 bytes) in size. A 4k page will be able to hold
+ * 4k/64 = 64 entries. Note that the lower 6 bits of CHANADDR should be zero. So
+ * the first desc's address needs to be aligned accordingly. Page-aligning the
+ * first desc address will work because 4k page-aligned addresses will have
+ * the last 12 bits as zero.
+ *
+ * TODO
+ * ====
+ * *MAJOR*
+ *   - Update to the correct struct desc (from Linux kernel)
+ *   - Make the status field embedded in the channel struct (no ptr business)
+ *   - Add file for errors
+ *   - Add locks to guard desc access
+ *   - Freeze VA->PA page mappings till DMA is completed (esp. for ucbdma)
+ * *MINOR*
+ *   - Replace all CBDMA_* constants with IOAT_*
+ *   - Initializes only the first found CBDMA device
+ */
+
+#include <kmalloc.h>
+#include <string.h>
+#include <stdio.h>
+#include <assert.h>
+#include <error.h>
+#include <net/ip.h>
+#include <linux_compat.h>
+#include <arch/pci.h>
+#include <page_alloc.h>
+#include <pmap.h>
+#include <cbdma_regs.h>
+#include <arch/pci_regs.h>
+
+#define NDESC 1 // initialize these many descs
+#define BUFFERSZ 8192
+
+struct dev                cbdmadevtab;
+static struct pci_device  *pci;
+static void               *mmio;
+static uint64_t           mmio_phy; /* physical addr */
+static uint32_t           mmio_sz;
+static uint8_t            chancnt; /* Total number of channels per function */
+static bool               iommu_enabled;
+static bool               cbdma_break_loop; /* toggle_foo functionality */
+
+/* PCIe Config Space; from Intel Xeon E7 2800/4800/8800 Datasheet Vol. 2 */
+enum {
+	DEVSTS = 0x9a, // 16-bit
+	PMCSR  = 0xe4, // 32-bit
+
+	DMAUNCERRSTS = 0x148, // 32-bit (DMA Cluster Uncorrectable Error Status)
+	DMAUNCERRMSK = 0x14c, // 32-bit
+	DMAUNCERRSEV = 0x150, // 32-bit
+	DMAUNCERRPTR = 0x154, // 8-bit
+	DMAGLBERRPTR = 0x160, // 8-bit
+
+	CHANERR_INT    = 0x180, // 32-bit
+	CHANERRMSK_INT = 0x184, // 32-bit
+	CHANERRSEV_INT = 0x188, // 32-bit
+	CHANERRPTR     = 0x18c, // 8-bit
+};
+
+/* QID Path */
+enum {
+	Qdir           = 0,
+	Qcbdmaktest    = 1,
+	Qcbdmastats    = 2,
+	Qcbdmareset    = 3,
+	Qcbdmaucopy    = 4,
+	Qcbdmaiommu    = 5,
+};
+
+/* supported ioat devices */
+enum {
+	ioat2021 = (0x2021 << 16) | 0x8086,
+	ioat2f20 = (0x2f20 << 16) | 0x8086,
+};
+
+static struct dirtab cbdmadir[] = {
+	{".",         {Qdir, 0, QTDIR}, 0, 0555},
+	{"ktest",     {Qcbdmaktest, 0, QTFILE}, 0, 0555},
+	{"stats",     {Qcbdmastats, 0, QTFILE}, 0, 0555},
+	{"reset",     {Qcbdmareset, 0, QTFILE}, 0, 0755},
+	{"ucopy",     {Qcbdmaucopy, 0, QTFILE}, 0, 0755},
+	{"iommu",     {Qcbdmaiommu, 0, QTFILE}, 0, 0755},
+};
+
+/* Descriptor structue as defined in the programmer's guide.
+ * It describes a single DMA transfer
+ */
+struct desc {
+	uint32_t  xfer_size;
+	uint32_t  descriptor_control;
+	uint64_t  src_addr;
+	uint64_t  dest_addr;
+	uint64_t  next_desc_addr;
+	uint64_t  next_source_address;
+	uint64_t  next_destination_address;
+	uint64_t  reserved0;
+	uint64_t  reserved1;
+} __attribute__((packed));
+
+/* The channels are indexed starting from 0 */
+static struct channel {
+	uint8_t                number; // channel number
+	struct desc            *pdesc; // desc ptr
+	int                    ndesc;  // num. of desc
+	uint64_t               *status; // reg: CHANSTS, needs to be 64B aligned
+	uint8_t                ver;    // reg: CBVER
+
+/* DEPRECATED */
+/* MMIO address space; from Intel Xeon E7 2800/4800/8800 Datasheet Vol. 2
+ * Every channel 0x80 bytes in size.
+ */
+	uint8_t  chancmd;
+	uint8_t  xrefcap;
+	uint16_t chanctrl;
+	uint16_t dmacount;
+	uint32_t chanerr;
+	uint64_t chansts;
+	uint64_t chainaddr;
+} cbdmadev, channel0;
+
+#define KTEST_SIZE 64
+static struct {
+	char    printbuf[4096];
+	char    src[KTEST_SIZE];
+	char    dst[KTEST_SIZE];
+	char    srcfill;
+	char    dstfill;
+} ktest;    /* TODO: needs locking */
+
+/* struct passed from the userspace */
+struct ucbdma {
+	struct desc desc;
+	uint64_t    status;
+	uint16_t    ndesc;
+};
+
+/* for debugging via kfunc; break out of infinite polling loops */
+void toggle_cbdma_break_loop(void)
+{
+	cbdma_break_loop = !cbdma_break_loop;
+	printk("cbdma: cbdma_break_loop = %d\n", cbdma_break_loop);
+}
+
+/* Function definitions start here */
+static inline bool is_initialized(void)
+{
+	if (!pci || !mmio)
+		return false;
+	else
+		return true;
+}
+
+static void *get_register(struct channel *c, int offset)
+{
+	uint64_t base = (c->number + 1) * IOAT_CHANNEL_MMIO_SIZE;
+
+	return (char *) mmio + base + offset;
+}
+
+static char *devname(void)
+{
+	return cbdmadevtab.name;
+}
+
+static struct chan *cbdmaattach(char *spec)
+{
+	if (!is_initialized())
+		error(ENODEV, "no cbdma device detected");
+	return devattach(devname(), spec);
+}
+
+struct walkqid *cbdmawalk(struct chan *c, struct chan *nc, char **name,
+			 unsigned int nname)
+{
+	return devwalk(c, nc, name, nname, cbdmadir,
+		       ARRAY_SIZE(cbdmadir), devgen);
+}
+
+static size_t cbdmastat(struct chan *c, uint8_t *dp, size_t n)
+{
+	return devstat(c, dp, n, cbdmadir, ARRAY_SIZE(cbdmadir), devgen);
+}
+
+/* return string representation of chansts */
+char *cbdma_str_chansts(uint64_t chansts)
+{
+	char *status = "unrecognized status";
+
+	switch (chansts & IOAT_CHANSTS_STATUS) {
+	case IOAT_CHANSTS_ACTIVE:
+		status = "ACTIVE";
+		break;
+	case IOAT_CHANSTS_DONE:
+		status = "DONE";
+		break;
+	case IOAT_CHANSTS_SUSPENDED:
+		status = "SUSPENDED";
+		break;
+	case IOAT_CHANSTS_HALTED:
+		status = "HALTED";
+		break;
+	case IOAT_CHANSTS_ARMED:
+		status = "ARMED";
+		break;
+	default:
+		break;
+	}
+	return status;
+}
+
+/* print descriptors on console (for debugging) */
+static void dump_desc(struct desc *d, int count)
+{
+	printk("dumping descriptors (count = %d):\n", count);
+
+	while (count > 0) {
+		printk("desc: 0x%x, size: %d bytes\n",
+			d, sizeof(struct desc));
+		printk("[32] desc->xfer_size: 0x%x\n",
+			d->xfer_size);
+		printk("[32] desc->descriptor_control: 0x%x\n",
+			d->descriptor_control);
+		printk("[64] desc->src_addr: %p\n",
+			d->src_addr);
+		printk("[64] desc->dest_addr: %p\n",
+			d->dest_addr);
+		printk("[64] desc->next_desc_addr: %p\n",
+			d->next_desc_addr);
+		printk("[64] desc->next_source_address: %p\n",
+			d->next_source_address);
+		printk("[64] desc->next_destination_address: %p\n",
+			d->next_destination_address);
+		printk("[64] desc->reserved0: %p\n",
+			d->reserved0);
+		printk("[64] desc->reserved1: %p\n",
+			d->reserved1);
+
+		count--;
+		if (count > 0)
+			d = (struct desc *) KADDR(d->next_desc_addr);
+		printk("\n");
+	}
+}
+
+/* initialize desc ring
+ *
+ - Can be called multiple times, with different "ndesc" values.
+ - NOTE: We only create _one_ valid desc. The next field points back itself
+	 (ring buffer).
+ */
+static void init_desc(struct channel *c, int ndesc)
+{
+	struct desc *d, *tmp;
+	int i;
+	const int max_ndesc = PGSIZE / sizeof(struct desc);
+
+	/* sanity checks */
+	if (ndesc > max_ndesc) {
+		printk("cbdma: allocating only %d desc instead of %d desc\n",
+			max_ndesc, ndesc);
+		ndesc = max_ndesc;
+	}
+
+	c->ndesc = ndesc;
+
+	/* allocate pages for descriptors, last 6-bits must be zero */
+	if (!c->pdesc)
+		c->pdesc = kpage_zalloc_addr();
+
+	if (!c->pdesc) { /* error does not return */
+		printk("cbdma: cannot alloc page for desc\n");
+		return; /* TODO: return "false" */
+	}
+
+	/* preparing descriptors */
+	d = c->pdesc;
+	d->xfer_size = 1;
+	d->descriptor_control = CBDMA_DESC_CTRL_NULL_DESC;
+	d->next_desc_addr = PADDR(d);
+}
+
+/* struct channel is only used for get_register */
+static inline void cleanup_post_copy(struct channel *c)
+{
+	uint64_t value;
+
+	/* mmio_reg: DMACOUNT */
+	value = read16(get_register(c, IOAT_CHAN_DMACOUNT_OFFSET));
+	if (value != 0) {
+		printk("cbdma: info: DMACOUNT = %d\n", value); /* should be 0 */
+		write16(0, mmio + CBDMA_DMACOUNT_OFFSET);
+	}
+
+	/* mmio_reg: CHANERR */
+	value = read32(get_register(c, IOAT_CHANERR_OFFSET));
+	if (value != 0) {
+		printk("cbdma: error: CHANERR = 0x%x\n", value);
+		write32(value, get_register(c, IOAT_CHANERR_OFFSET));
+	}
+
+	/* ack errors */
+	if (ACCESS_PCIE_CONFIG_SPACE) {
+		/* PCIe_reg: CHANERR_INT */
+		value = pcidev_read32(pci, CHANERR_INT);
+		if (value != 0) {
+			printk("cbdma: error: CHANERR_INT = 0x%x\n", value);
+			pcidev_write32(pci, CHANERR_INT, value);
+		}
+
+		/* PCIe_reg: DMAUNCERRSTS */
+		value = pcidev_read32(pci, IOAT_PCI_DMAUNCERRSTS_OFFSET);
+		if (value != 0) {
+			printk("cbdma: error: DMAUNCERRSTS = 0x%x\n", value);
+			pcidev_write32(pci, IOAT_PCI_DMAUNCERRSTS_OFFSET,
+				       value);
+		}
+	}
+}
+
+/* struct channel is only used for get_register */
+static inline void perform_dma(struct channel *c, physaddr_t completion_sts,
+			       physaddr_t desc, uint16_t count)
+{
+	void __iomem *offset;
+
+	/* Set channel completion register where CBDMA will write content of
+	 * CHANSTS register upon successful DMA completion or error condition
+	 */
+	offset = get_register(c, IOAT_CHANCMP_OFFSET);
+	write64(completion_sts,	offset);
+
+	/* write locate of first desc to register CHAINADDR */
+	offset = get_register(c, IOAT_CHAINADDR_OFFSET(c->ver));
+	write64(desc, offset);
+	wmb_f();
+
+	/* writing valid number of descs: starts the DMA */
+	offset = get_register(c, IOAT_CHAN_DMACOUNT_OFFSET);
+	write16(count, offset);
+}
+
+static inline void wait_for_dma_completion(uint64_t *cmpsts)
+{
+	uint64_t sts;
+
+	do {
+		cpu_relax();
+		sts = *cmpsts;
+		if (cbdma_break_loop) {
+			printk("cbdma: cmpsts: %p = 0x%llx\n", cmpsts, sts);
+			break;
+		}
+	} while ((sts & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_ACTIVE);
+}
+
+/* cbdma_ktest: performs functional test on CBDMA
+ *
+ - Allocates 2 kernel pages: ktest_src and ktest_dst.
+ - memsets the ktest_src page
+ - Prepare descriptors for DMA transfer (need to be aligned)
+ - Initiate the transfer
+ - Prints results
+ */
+static void cbdma_ktest(void)
+{
+	static struct desc *d;
+	uint64_t value;
+	struct channel *c = &channel0;
+
+	/* initialize src and dst address */
+	memset(ktest.src, ktest.srcfill, KTEST_SIZE);
+	memset(ktest.dst, ktest.dstfill, KTEST_SIZE);
+	ktest.src[KTEST_SIZE-1] = '\0';
+	ktest.dst[KTEST_SIZE-1] = '\0';
+
+	/* for subsequent ktests */
+	ktest.srcfill += 1;
+
+	/* preparing descriptors */
+	d = channel0.pdesc;
+	d->xfer_size            = (uint32_t) KTEST_SIZE;
+	d->src_addr             = (uint64_t) PADDR(ktest.src);
+	d->dest_addr            = (uint64_t) PADDR(ktest.dst);
+	d->descriptor_control   = CBDMA_DESC_CTRL_INTR_ON_COMPLETION |
+				  CBDMA_DESC_CTRL_WRITE_CHANCMP_ON_COMPLETION;
+
+	memset((uint64_t *)c->status, 0, sizeof(c->status));
+
+	/* perform actual DMA */
+	perform_dma(c, PADDR(c->status), PADDR(c->pdesc), 1);
+	wait_for_dma_completion(c->status);
+	cleanup_post_copy(c);
+}
+
+/* convert a userspace pointer to kaddr based pointer
+ * TODO: this is dangerous and the pages are not pinned. Debugging only! */
+static inline void *uptr_to_kptr(void *ptr)
+{
+	return (void *) uva2kva(current, ptr, 1, PROT_WRITE);
+}
+
+/* function that uses kernel addresses to perform DMA.
+ * Note: does not perform error checks for src / dest addr.
+ * TODO: this only works if ktest is not run. Still it fails on alternate runs.
+ *       Likely some error in setting up the desc from userspace.
+ */
+static void issue_dma_kaddr(struct ucbdma *u)
+{
+	struct ucbdma *u_kaddr = uptr_to_kptr(u);
+	/* first field is struct desc */
+	struct desc *d = (struct desc *) u_kaddr;
+	struct channel *c = &channel0;
+	uint64_t value;
+
+	if (!u_kaddr) {
+		printk("[kern] cannot get kaddr for useraddr: %p\n", u);
+		return;
+	}
+	printk("[kern] ucbdma: user: %p kern: %p\n", u, u_kaddr);
+
+	/* preparing descriptors */
+	d->src_addr   = (uint64_t) PADDR(uptr_to_kptr((void*) d->src_addr));
+	d->dest_addr  = (uint64_t) PADDR(uptr_to_kptr((void*) d->dest_addr));
+	d->next_desc_addr = (uint64_t)
+			    PADDR(uptr_to_kptr((void*) d->next_desc_addr));
+
+	/* perform actual DMA */
+	perform_dma(c, PADDR(&u_kaddr->status), PADDR(d), u_kaddr->ndesc);
+	wait_for_dma_completion(&u_kaddr->status);
+	cleanup_post_copy(c);
+}
+
+/* function that uses virtual (process) addresses to perform DMA; IOMMU = ON
+ * TODO: Verify once the IOMMU is setup and enabled.
+ */
+static void issue_dma_vaddr(struct ucbdma *u)
+{
+	struct ucbdma *u_kaddr = uptr_to_kptr(u);
+	struct channel *c = &channel0;
+	uint64_t value;
+
+	printk("[kern] IOMMU = ON\n");
+	printk("[kern] ucbdma: user: %p kern: %p ndesc: %d\n", u,
+		&u_kaddr->desc, u_kaddr->ndesc);
+
+	/* perform actual DMA */
+	perform_dma(c, (physaddr_t) &u->status, (physaddr_t) &u->desc,
+		    u_kaddr->ndesc);
+	wait_for_dma_completion(&u_kaddr->status);
+	cleanup_post_copy(&channel0);
+}
+
+/* cbdma_stats: get stats about the device and driver
+ */
+static struct sized_alloc *open_stats(void)
+{
+	struct sized_alloc *sza = sized_kzmalloc(BUFFERSZ, MEM_WAIT);
+	uint64_t value;
+
+	sza_printf(sza,
+		"Intel CBDMA [%x:%x] registered at %02x:%02x.%x\n",
+		pci->ven_id, pci->dev_id, pci->bus, pci->dev, pci->func);
+
+	/* driver info. */
+	sza_printf(sza, "    Driver Information:\n");
+	sza_printf(sza,
+		"\tmmio: %p\n"
+		"\tmmio_phy: 0x%x\n"
+		"\tmmio_sz: %lu\n"
+		"\ttotal_channels: %d\n"
+		"\tdesc_kaddr: %p\n"
+		"\tdesc_paddr: %p\n"
+		"\tdesc_num: %d\n"
+		"\tver: 0x%x\n"
+		"\tstatus_kaddr: %p\n"
+		"\tstatus_paddr: %p\n"
+		"\tstatus_value: 0x%x\n",
+		mmio, mmio_phy, mmio_sz, chancnt,
+		channel0.pdesc, PADDR(channel0.pdesc), channel0.ndesc,
+		channel0.ver, channel0.status, PADDR(channel0.status),
+		*(uint64_t *)channel0.status);
+
+	/* print the PCI registers */
+	sza_printf(sza, "    PCIe Config Registers:\n");
+
+	value = pcidev_read16(pci, PCI_CMD_REG);
+	sza_printf(sza, "\tPCICMD: 0x%x\n", value);
+
+	value = pcidev_read16(pci, PCI_STATUS_REG);
+	sza_printf(sza, "\tPCISTS: 0x%x\n", value);
+
+	value = pcidev_read16(pci, PCI_REVID_REG);
+	sza_printf(sza, "\tRID: 0x%x\n", value);
+
+	value = pcidev_read32(pci, PCI_BAR0_STD);
+	sza_printf(sza, "\tCB_BAR: 0x%x\n", value);
+
+	value = pcidev_read16(pci, DEVSTS);
+	sza_printf(sza, "\tDEVSTS: 0x%x\n", value);
+
+	value = pcidev_read32(pci, PMCSR);
+	sza_printf(sza, "\tPMCSR: 0x%x\n", value);
+
+	value = pcidev_read32(pci, DMAUNCERRSTS);
+	sza_printf(sza, "\tDMAUNCERRSTS: 0x%x\n", value);
+
+	value = pcidev_read32(pci, DMAUNCERRMSK);
+	sza_printf(sza, "\tDMAUNCERRMSK: 0x%x\n", value);
+
+	value = pcidev_read32(pci, DMAUNCERRSEV);
+	sza_printf(sza, "\tDMAUNCERRSEV: 0x%x\n", value);
+
+	value = pcidev_read8(pci, DMAUNCERRPTR);
+	sza_printf(sza, "\tDMAUNCERRPTR: 0x%x\n", value);
+
+	value = pcidev_read8(pci, DMAGLBERRPTR);
+	sza_printf(sza, "\tDMAGLBERRPTR: 0x%x\n", value);
+
+	value = pcidev_read32(pci, CHANERR_INT);
+	sza_printf(sza, "\tCHANERR_INT: 0x%x\n", value);
+
+	value = pcidev_read32(pci, CHANERRMSK_INT);
+	sza_printf(sza, "\tCHANERRMSK_INT: 0x%x\n", value);
+
+	value = pcidev_read32(pci, CHANERRSEV_INT);
+	sza_printf(sza, "\tCHANERRSEV_INT: 0x%x\n", value);
+
+	value = pcidev_read8(pci, CHANERRPTR);
+	sza_printf(sza, "\tCHANERRPTR: 0x%x\n", value);
+
+	sza_printf(sza, "    CHANNEL_0 MMIO Registers:\n");
+
+	value = read8(mmio + CBDMA_CHANCMD_OFFSET);
+	sza_printf(sza, "\tCHANCMD: 0x%x\n", value);
+
+	value = read8(mmio + IOAT_VER_OFFSET);
+	sza_printf(sza, "\tCBVER: 0x%x major=%d minor=%d\n",
+		   value,
+		   GET_IOAT_VER_MAJOR(value),
+		   GET_IOAT_VER_MINOR(value));
+
+	value = read16(mmio + CBDMA_CHANCTRL_OFFSET);
+	sza_printf(sza, "\tCHANCTRL: 0x%llx\n", value);
+
+	value = read64(mmio + CBDMA_CHANSTS_OFFSET);
+	sza_printf(sza, "\tCHANSTS: 0x%x [%s], desc_addr: %p, raw: 0x%llx\n",
+		   (value & IOAT_CHANSTS_STATUS),
+		   cbdma_str_chansts(value),
+		   (value & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR),
+		   value);
+
+	value = read64(mmio + CBDMA_CHAINADDR_OFFSET);
+	sza_printf(sza, "\tCHAINADDR: %p\n", value);
+
+	value = read64(mmio + CBDMA_CHANCMP_OFFSET);
+	sza_printf(sza, "\tCHANCMP: %p\n", value);
+
+	value = read16(mmio + CBDMA_DMACOUNT_OFFSET);
+	sza_printf(sza, "\tDMACOUNT: %d\n", value);
+
+	value = read32(mmio + CBDMA_CHANERR_OFFSET);
+	sza_printf(sza, "\tCHANERR: 0x%x\n", value);
+
+	return sza;
+}
+
+static struct sized_alloc *open_reset(void)
+{
+	struct sized_alloc *sza = sized_kzmalloc(BUFFERSZ, MEM_WAIT);
+
+	if (cbdma_is_reset_pending())
+		sza_printf(sza, "Status: Reset is pending\n");
+	else
+		sza_printf(sza, "Status: No pending reset\n");
+
+	sza_printf(sza, "Write '1' to perform reset!\n");
+
+	return sza;
+}
+
+static struct sized_alloc *open_iommu(void)
+{
+	struct sized_alloc *sza = sized_kzmalloc(BUFFERSZ, MEM_WAIT);
+
+	sza_printf(sza, "IOMMU enabled = %s\n", iommu_enabled ? "yes":"no");
+	sza_printf(sza, "Write '0' to disable or '1' to enable the IOMMU\n");
+
+	return sza;
+}
+
+/* targets channel0 */
+static struct sized_alloc *open_ktest(void)
+{
+	struct sized_alloc *sza = sized_kzmalloc(BUFFERSZ, MEM_WAIT);
+
+	/* run the test */
+	cbdma_ktest();
+
+	sza_printf(sza,
+	   "Self-test Intel CBDMA [%x:%x] registered at %02x:%02x.%x\n",
+	   pci->ven_id, pci->dev_id, pci->bus, pci->dev, pci->func);
+
+	sza_printf(sza, "\tChannel Status: %s (raw: 0x%x)\n",
+		cbdma_str_chansts(*((uint64_t *)channel0.status)),
+		(*((uint64_t *)channel0.status) & IOAT_CHANSTS_STATUS));
+
+	sza_printf(sza, "\tCopy Size: %d (0x%x)\n", KTEST_SIZE, KTEST_SIZE);
+	sza_printf(sza, "\tsrcfill: %c (0x%x)\n", ktest.srcfill, ktest.srcfill);
+	sza_printf(sza, "\tdstfill: %c (0x%x)\n", ktest.dstfill, ktest.dstfill);
+	sza_printf(sza, "\tsrc_str (after copy): %s\n", ktest.src);
+	sza_printf(sza, "\tdst_str (after copy): %s\n", ktest.dst);
+
+	return sza;
+}
+
+/* cbdma_reset_device: this fixes any programming errors done before
+ */
+void cbdma_reset_device(void)
+{
+	int cbdmaver;
+	uint32_t error;
+
+	/* make sure the driver is initialized */
+	if (!mmio)
+		error(EIO, "cbdma: mmio addr not set");
+
+	pcidev_write16(pci, PCI_COMMAND, PCI_COMMAND_IO | PCI_COMMAND_MEMORY
+							| PCI_COMMAND_MASTER);
+	/* fetch version */
+	cbdmaver = read8(mmio + IOAT_VER_OFFSET);
+
+	/* ack channel errros */
+	error = read32(mmio + CBDMA_CHANERR_OFFSET);
+	write32(error, mmio + CBDMA_CHANERR_OFFSET);
+
+	if (ACCESS_PCIE_CONFIG_SPACE) {
+		/* ack pci device level errros */
+		/* clear DMA Cluster Uncorrectable Error Status */
+		error = pcidev_read32(pci, IOAT_PCI_DMAUNCERRSTS_OFFSET);
+		pcidev_write32(pci, IOAT_PCI_DMAUNCERRSTS_OFFSET, error);
+
+		/* clear DMA Channel Error Status */
+		error = pcidev_read32(pci, IOAT_PCI_CHANERR_INT_OFFSET);
+		pcidev_write32(pci, IOAT_PCI_CHANERR_INT_OFFSET, error);
+	}
+
+	/* reset */
+	write8(IOAT_CHANCMD_RESET, mmio
+				   + IOAT_CHANNEL_MMIO_SIZE
+				   + IOAT_CHANCMD_OFFSET(cbdmaver));
+
+	pcidev_write16(pci, PCI_COMMAND, PCI_COMMAND_IO | PCI_COMMAND_MEMORY
+			| PCI_COMMAND_MASTER | PCI_COMMAND_INTX_DISABLE);
+
+	printk("cbdma: reset performed\n");
+}
+
+/* cbdma_is_reset_pending: returns true if reset is pending
+ */
+bool cbdma_is_reset_pending(void)
+{
+	int cbdmaver;
+	int status;
+
+	/* make sure the driver is initialized */
+	if (!mmio) {
+		error(EPERM, "cbdma: mmio addr not set");
+		return false; /* does not reach */
+	}
+
+	/* fetch version */
+	cbdmaver = read8(mmio + IOAT_VER_OFFSET);
+
+	status = read8(mmio + IOAT_CHANNEL_MMIO_SIZE
+			+ IOAT_CHANCMD_OFFSET(cbdmaver));
+
+	return (status & IOAT_CHANCMD_RESET) == IOAT_CHANCMD_RESET;
+}
+
+///////// SYS INTERFACE ////////////////////////////////////////////////////////
+
+static struct chan *cbdmaopen(struct chan *c, int omode)
+{
+	switch (c->qid.path) {
+	case Qcbdmastats:
+		c->synth_buf = open_stats();
+		break;
+	case Qcbdmareset:
+		c->synth_buf = open_reset();
+		break;
+	case Qcbdmaiommu:
+		c->synth_buf = open_iommu();
+		break;
+	case Qcbdmaktest:
+		c->synth_buf = open_ktest();
+		break;
+	case Qdir:
+	case Qcbdmaucopy:
+		break;
+	default:
+		error(EIO, "cbdma: qid 0x%x is impossible", c->qid.path);
+	}
+
+	return devopen(c, omode, cbdmadir, ARRAY_SIZE(cbdmadir), devgen);
+}
+
+static void cbdmaclose(struct chan *c)
+{
+	switch (c->qid.path) {
+	case Qcbdmastats:
+	case Qcbdmareset:
+	case Qcbdmaiommu:
+	case Qcbdmaktest:
+		kfree(c->synth_buf);
+		c->synth_buf = NULL;
+		break;
+	case Qdir:
+	case Qcbdmaucopy:
+		break;
+	default:
+		error(EIO, "cbdma: qid 0x%x is impossible", c->qid.path);
+	}
+}
+
+static size_t cbdmaread(struct chan *c, void *va, size_t n, off64_t offset)
+{
+	struct sized_alloc *sza = c->synth_buf;
+
+	switch (c->qid.path) {
+	case Qcbdmaktest:
+	case Qcbdmastats:
+	case Qcbdmareset:
+	case Qcbdmaiommu:
+		return readstr(offset, va, n, sza->buf);
+	case Qcbdmaucopy:
+		return readstr(offset, va, n,
+			"Write address of struct ucopy to issue DMA\n");
+	case Qdir:
+		return devdirread(c, va, n, cbdmadir, ARRAY_SIZE(cbdmadir),
+					devgen);
+	default:
+		error(EIO, "cbdma: qid 0x%x is impossible", c->qid.path);
+	}
+
+	return -1;      /* not reached */
+}
+
+static void init_channel(struct channel *c, int cnum, int ndesc)
+{
+	c->number = cnum;
+	c->pdesc = NULL;
+	init_desc(c, ndesc);
+
+	/* this is a writeback field; the hardware will update this value */
+	if (c->status == 0)
+		c->status = kmalloc_align(sizeof(uint64_t), MEM_WAIT, 8);
+	assert(c->status != 0);
+
+	/* cbdma version */
+	c->ver = read8(mmio + IOAT_VER_OFFSET);
+
+	/* Set "Any Error Abort Enable": enables abort for any error encountered
+	 * Set "Error Completion Enable": enables completion write to address in
+					  CHANCMP for any error
+	 * Reset "Interrupt Disable": W1C, when clear enables interrupt to fire
+				    for next descriptor that specifies interrupt
+	*/
+	write8(IOAT_CHANCTRL_ANY_ERR_ABORT_EN | IOAT_CHANCTRL_ERR_COMPLETION_EN,
+	       get_register(c, IOAT_CHANCTRL_OFFSET));
+}
+
+static size_t cbdmawrite(struct chan *c, void *va, size_t n, off64_t offset)
+{
+	switch (c->qid.path) {
+	case Qdir:
+		error(EPERM, "writing not permitted");
+	case Qcbdmaktest:
+	case Qcbdmastats:
+		error(EPERM, ERROR_FIXME);
+	case Qcbdmareset:
+		if (offset == 0 && n > 0 && *(char *)va == '1') {
+			cbdma_reset_device();
+			init_channel(&channel0, 0, NDESC);
+		} else {
+			error(EINVAL, "cannot be empty string");
+		}
+		return n;
+	case Qcbdmaucopy:
+		if (offset == 0 && n > 0) {
+			printk("[kern] value from userspace: %p\n", va);
+			if (iommu_enabled)
+				issue_dma_vaddr(va);
+			else
+				issue_dma_kaddr(va);
+			return sizeof(8);
+		}
+		return 0;
+	case Qcbdmaiommu:
+		if (offset == 0 && n > 0 && *(char *)va == '1')
+			iommu_enabled = true;
+		else if (offset == 0 && n > 0 && *(char *)va == '0')
+			iommu_enabled = false;
+		else
+			error(EINVAL, "cannot be empty string");
+		return n;
+	default:
+		error(EIO, "cbdma: qid 0x%x is impossible", c->qid.path);
+	}
+
+	return -1;      /* not reached */
+}
+
+static void cbdma_interrupt(struct hw_trapframe *hw_tf, void *arg)
+{
+	uint16_t value;
+
+	value = read16(get_register(&channel0, IOAT_CHANCTRL_OFFSET));
+	write16(value | IOAT_CHANCTRL_INT_REARM,
+		get_register(&channel0, IOAT_CHANCTRL_OFFSET));
+}
+
+void cbdmainit(void)
+{
+	int tbdf;
+	int i;
+	int id;
+	struct pci_device *pci_iter;
+
+	/* assigning global variables */
+	pci             = NULL;
+	mmio            = NULL;
+	mmio_sz         = -1;
+
+	/* initialize cbdmadev */
+	memset(&cbdmadev, 0x0, sizeof(cbdmadev));
+
+	/* search for the device 00:04.0 */
+	STAILQ_FOREACH(pci_iter, &pci_devices, all_dev) {
+		id = pci_iter->dev_id << 16 | pci_iter->ven_id;
+		switch (id) {
+		default:
+			continue;
+		case ioat2021:
+		case ioat2f20:
+			/* hack: bus 0 is the PCI_ALL iommu.
+			 * Can remove this once we add code for scoped IOMMU */
+			if (pci_iter->bus != 0)
+				continue;
+			pci = pci_iter;
+			break;
+		}
+	}
+
+	if (pci == NULL) {
+		printk("cbdma: no Intel CBDMA device found\n");
+		return;
+	}
+
+	/* search and find the mapped mmio region */
+	for (i = 0; i < COUNT_OF(pci->bar); i++) {
+		if (pci->bar[i].mmio_sz == 0)
+			continue;
+		mmio_phy = (pci->bar[0].mmio_base32
+			 ? pci->bar[0].mmio_base32
+			 : pci->bar[0].mmio_base64);
+		mmio_sz  = pci->bar[i].mmio_sz;
+		mmio     = (void *) vmap_pmem_nocache(mmio_phy, mmio_sz);
+		break;
+	}
+
+	/* handle any errors */
+	if (mmio_sz == -1) {
+		printk("cbdma: invalid mmio_sz\n");
+		return;
+	}
+
+	if (mmio == NULL) {
+		printk("cbdma: cannot map %p\n", mmio_phy);
+		return;
+	}
+
+	/* performance related stuff */
+	pci_set_cacheline_size(pci);
+
+	/* Get the channel count. Top 3 bits of the register are reserved. */
+	chancnt = read8(mmio + IOAT_CHANCNT_OFFSET) & 0x1F;
+
+	/* initialization successful; print stats */
+	printk("cbdma: registered [%x:%x] at %02x:%02x.%x // "
+	       "mmio:%p mmio_sz:%lu\n",
+	       pci->ven_id, pci->dev_id, pci->bus, pci->dev, pci->func,
+	       mmio, mmio_sz);
+
+	tbdf = MKBUS(BusPCI, pci->bus, pci->dev, pci->func);
+	register_irq(pci->irqline, cbdma_interrupt, NULL, tbdf);
+
+	/* reset device */
+	cbdma_reset_device();
+
+	/* initialize channel(s) */
+	init_channel(&channel0, 0, NDESC);
+
+	/* setup ktest struct */
+	ktest.srcfill = '1';
+	ktest.dstfill = '0';
+}
+
+struct dev cbdmadevtab __devtab = {
+	.name       = "cbdma",
+	.reset      = devreset,
+	.init       = cbdmainit,
+	.shutdown   = devshutdown,
+	.attach     = cbdmaattach,
+	.walk       = cbdmawalk,
+	.stat       = cbdmastat,
+	.open       = cbdmaopen,
+	.create     = devcreate,
+	.close      = cbdmaclose,
+	.read       = cbdmaread,
+	.bread      = devbread,
+	.write      = cbdmawrite,
+	.bwrite     = devbwrite,
+	.remove     = devremove,
+	.wstat      = devwstat,
+};
diff --git a/kern/include/cbdma_regs.h b/kern/include/cbdma_regs.h
new file mode 100644
index 0000000..6c8ec3d
--- /dev/null
+++ b/kern/include/cbdma_regs.h
@@ -0,0 +1,268 @@
+/* Copyright (c) 2019 Google Inc
+ * Aditya Basu <mitthu@google.com>
+ * See LICENSE for details.
+ *
+ * Copy of CBDMA register definitions from Linux kernel (around v5.1)
+ * drivers/dma/ioat/registers.h
+ */
+#ifndef _IOAT_REGISTERS_H_
+#define _IOAT_REGISTERS_H_
+
+#define ACCESS_PCIE_CONFIG_SPACE 1
+
+bool cbdma_is_reset_pending(void);
+void cbdma_reset_device(void);
+
+/* file: drivers/dma/ioat/hw.h */
+#define IOAT_VER_1_2            0x12    /* Version 1.2 */
+#define IOAT_VER_2_0            0x20    /* Version 2.0 */
+#define IOAT_VER_3_0            0x30    /* Version 3.0 */
+#define IOAT_VER_3_2            0x32    /* Version 3.2 */
+#define IOAT_VER_3_3            0x33    /* Version 3.3 */
+#define IOAT_VER_3_4		0x34	/* Version 3.4 */
+/* -------------------------------------- */
+
+#define IOAT_PCI_DMACTRL_OFFSET			0x48
+#define IOAT_PCI_DMACTRL_DMA_EN			0x00000001
+#define IOAT_PCI_DMACTRL_MSI_EN			0x00000002
+
+#define IOAT_PCI_DEVICE_ID_OFFSET		0x02
+#define IOAT_PCI_DMAUNCERRSTS_OFFSET		0x148
+#define IOAT_PCI_CHANERR_INT_OFFSET		0x180
+#define IOAT_PCI_CHANERRMASK_INT_OFFSET		0x184
+
+/* MMIO Device Registers */
+#define IOAT_CHANCNT_OFFSET			0x00	/*  8-bit */
+
+#define IOAT_XFERCAP_OFFSET			0x01	/*  8-bit */
+#define IOAT_XFERCAP_4KB			12
+#define IOAT_XFERCAP_8KB			13
+#define IOAT_XFERCAP_16KB			14
+#define IOAT_XFERCAP_32KB			15
+#define IOAT_XFERCAP_32GB			0
+
+#define IOAT_GENCTRL_OFFSET			0x02	/*  8-bit */
+#define IOAT_GENCTRL_DEBUG_EN			0x01
+
+#define IOAT_INTRCTRL_OFFSET			0x03	/*  8-bit */
+#define IOAT_INTRCTRL_MASTER_INT_EN		0x01	/* Master Interrupt Enable */
+#define IOAT_INTRCTRL_INT_STATUS		0x02	/* ATTNSTATUS -or- Channel Int */
+#define IOAT_INTRCTRL_INT			0x04	/* INT_STATUS -and- MASTER_INT_EN */
+#define IOAT_INTRCTRL_MSIX_VECTOR_CONTROL	0x08	/* Enable all MSI-X vectors */
+
+#define IOAT_ATTNSTATUS_OFFSET			0x04	/* Each bit is a channel */
+
+#define IOAT_VER_OFFSET				0x08	/*  8-bit */
+#define IOAT_VER_MAJOR_MASK			0xF0
+#define IOAT_VER_MINOR_MASK			0x0F
+#define GET_IOAT_VER_MAJOR(x)			(((x) & IOAT_VER_MAJOR_MASK) >> 4)
+#define GET_IOAT_VER_MINOR(x)			((x) & IOAT_VER_MINOR_MASK)
+
+#define IOAT_PERPORTOFFSET_OFFSET		0x0A	/* 16-bit */
+
+#define IOAT_INTRDELAY_OFFSET			0x0C	/* 16-bit */
+#define IOAT_INTRDELAY_MASK			0x3FFF	/* Interrupt Delay Time */
+#define IOAT_INTRDELAY_COALESE_SUPPORT		0x8000	/* Interrupt Coalescing Supported */
+
+#define IOAT_DEVICE_STATUS_OFFSET		0x0E	/* 16-bit */
+#define IOAT_DEVICE_STATUS_DEGRADED_MODE	0x0001
+#define IOAT_DEVICE_MMIO_RESTRICTED		0x0002
+#define IOAT_DEVICE_MEMORY_BYPASS		0x0004
+#define IOAT_DEVICE_ADDRESS_REMAPPING		0x0008
+
+#define IOAT_DMA_CAP_OFFSET			0x10	/* 32-bit */
+#define IOAT_CAP_PAGE_BREAK			0x00000001
+#define IOAT_CAP_CRC				0x00000002
+#define IOAT_CAP_SKIP_MARKER			0x00000004
+#define IOAT_CAP_DCA				0x00000010
+#define IOAT_CAP_CRC_MOVE			0x00000020
+#define IOAT_CAP_FILL_BLOCK			0x00000040
+#define IOAT_CAP_APIC				0x00000080
+#define IOAT_CAP_XOR				0x00000100
+#define IOAT_CAP_PQ				0x00000200
+#define IOAT_CAP_DWBES				0x00002000
+#define IOAT_CAP_RAID16SS			0x00020000
+
+#define IOAT_CHANNEL_MMIO_SIZE			0x80	/* Each Channel MMIO space is this size */
+
+/* DMA Channel Registers */
+#define IOAT_CHANCTRL_OFFSET			0x00	/* 16-bit Channel Control Register */
+#define IOAT_CHANCTRL_CHANNEL_PRIORITY_MASK	0xF000
+#define IOAT3_CHANCTRL_COMPL_DCA_EN		0x0200
+#define IOAT_CHANCTRL_CHANNEL_IN_USE		0x0100
+#define IOAT_CHANCTRL_DESCRIPTOR_ADDR_SNOOP_CONTROL	0x0020
+#define IOAT_CHANCTRL_ERR_INT_EN		0x0010
+#define IOAT_CHANCTRL_ANY_ERR_ABORT_EN		0x0008
+#define IOAT_CHANCTRL_ERR_COMPLETION_EN		0x0004
+#define IOAT_CHANCTRL_INT_REARM			0x0001
+#define IOAT_CHANCTRL_RUN			(IOAT_CHANCTRL_INT_REARM |\
+						 IOAT_CHANCTRL_ERR_INT_EN |\
+						 IOAT_CHANCTRL_ERR_COMPLETION_EN |\
+						 IOAT_CHANCTRL_ANY_ERR_ABORT_EN)
+
+#define IOAT_DMA_COMP_OFFSET			0x02	/* 16-bit DMA channel compatibility */
+#define IOAT_DMA_COMP_V1			0x0001	/* Compatibility with DMA version 1 */
+#define IOAT_DMA_COMP_V2			0x0002	/* Compatibility with DMA version 2 */
+
+
+#define IOAT1_CHANSTS_OFFSET		0x04	/* 64-bit Channel Status Register */
+#define IOAT2_CHANSTS_OFFSET		0x08	/* 64-bit Channel Status Register */
+#define IOAT_CHANSTS_OFFSET(ver)		((ver) < IOAT_VER_2_0 \
+						? IOAT1_CHANSTS_OFFSET : IOAT2_CHANSTS_OFFSET)
+#define IOAT1_CHANSTS_OFFSET_LOW	0x04
+#define IOAT2_CHANSTS_OFFSET_LOW	0x08
+#define IOAT_CHANSTS_OFFSET_LOW(ver)		((ver) < IOAT_VER_2_0 \
+						? IOAT1_CHANSTS_OFFSET_LOW : IOAT2_CHANSTS_OFFSET_LOW)
+#define IOAT1_CHANSTS_OFFSET_HIGH	0x08
+#define IOAT2_CHANSTS_OFFSET_HIGH	0x0C
+#define IOAT_CHANSTS_OFFSET_HIGH(ver)		((ver) < IOAT_VER_2_0 \
+						? IOAT1_CHANSTS_OFFSET_HIGH : IOAT2_CHANSTS_OFFSET_HIGH)
+#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR	(~0x3fULL)
+#define IOAT_CHANSTS_SOFT_ERR			0x10ULL
+#define IOAT_CHANSTS_UNAFFILIATED_ERR		0x8ULL
+#define IOAT_CHANSTS_STATUS	0x7ULL
+#define IOAT_CHANSTS_ACTIVE	0x0
+#define IOAT_CHANSTS_DONE	0x1
+#define IOAT_CHANSTS_SUSPENDED	0x2
+#define IOAT_CHANSTS_HALTED	0x3
+
+
+
+#define IOAT_CHAN_DMACOUNT_OFFSET	0x06    /* 16-bit DMA Count register */
+
+#define IOAT_DCACTRL_OFFSET         0x30   /* 32 bit Direct Cache Access Control Register */
+#define IOAT_DCACTRL_CMPL_WRITE_ENABLE 0x10000
+#define IOAT_DCACTRL_TARGET_CPU_MASK   0xFFFF /* APIC ID */
+
+/* CB DCA Memory Space Registers */
+#define IOAT_DCAOFFSET_OFFSET       0x14
+/* CB_BAR + IOAT_DCAOFFSET value */
+#define IOAT_DCA_VER_OFFSET         0x00
+#define IOAT_DCA_VER_MAJOR_MASK     0xF0
+#define IOAT_DCA_VER_MINOR_MASK     0x0F
+
+#define IOAT_DCA_COMP_OFFSET        0x02
+#define IOAT_DCA_COMP_V1            0x1
+
+#define IOAT_FSB_CAPABILITY_OFFSET  0x04
+#define IOAT_FSB_CAPABILITY_PREFETCH    0x1
+
+#define IOAT_PCI_CAPABILITY_OFFSET  0x06
+#define IOAT_PCI_CAPABILITY_MEMWR   0x1
+
+#define IOAT_FSB_CAP_ENABLE_OFFSET  0x08
+#define IOAT_FSB_CAP_ENABLE_PREFETCH    0x1
+
+#define IOAT_PCI_CAP_ENABLE_OFFSET  0x0A
+#define IOAT_PCI_CAP_ENABLE_MEMWR   0x1
+
+#define IOAT_APICID_TAG_MAP_OFFSET  0x0C
+#define IOAT_APICID_TAG_MAP_TAG0    0x0000000F
+#define IOAT_APICID_TAG_MAP_TAG0_SHIFT 0
+#define IOAT_APICID_TAG_MAP_TAG1    0x000000F0
+#define IOAT_APICID_TAG_MAP_TAG1_SHIFT 4
+#define IOAT_APICID_TAG_MAP_TAG2    0x00000F00
+#define IOAT_APICID_TAG_MAP_TAG2_SHIFT 8
+#define IOAT_APICID_TAG_MAP_TAG3    0x0000F000
+#define IOAT_APICID_TAG_MAP_TAG3_SHIFT 12
+#define IOAT_APICID_TAG_MAP_TAG4    0x000F0000
+#define IOAT_APICID_TAG_MAP_TAG4_SHIFT 16
+#define IOAT_APICID_TAG_CB2_VALID   0x8080808080
+
+#define IOAT_DCA_GREQID_OFFSET      0x10
+#define IOAT_DCA_GREQID_SIZE        0x04
+#define IOAT_DCA_GREQID_MASK        0xFFFF
+#define IOAT_DCA_GREQID_IGNOREFUN   0x10000000
+#define IOAT_DCA_GREQID_VALID       0x20000000
+#define IOAT_DCA_GREQID_LASTID      0x80000000
+
+#define IOAT3_CSI_CAPABILITY_OFFSET 0x08
+#define IOAT3_CSI_CAPABILITY_PREFETCH    0x1
+
+#define IOAT3_PCI_CAPABILITY_OFFSET 0x0A
+#define IOAT3_PCI_CAPABILITY_MEMWR  0x1
+
+#define IOAT3_CSI_CONTROL_OFFSET    0x0C
+#define IOAT3_CSI_CONTROL_PREFETCH  0x1
+
+#define IOAT3_PCI_CONTROL_OFFSET    0x0E
+#define IOAT3_PCI_CONTROL_MEMWR     0x1
+
+#define IOAT3_APICID_TAG_MAP_OFFSET 0x10
+#define IOAT3_APICID_TAG_MAP_OFFSET_LOW  0x10
+#define IOAT3_APICID_TAG_MAP_OFFSET_HIGH 0x14
+
+#define IOAT3_DCA_GREQID_OFFSET     0x02
+
+#define IOAT1_CHAINADDR_OFFSET		0x0C	/* 64-bit Descriptor Chain Address Register */
+#define IOAT2_CHAINADDR_OFFSET		0x10	/* 64-bit Descriptor Chain Address Register */
+#define IOAT_CHAINADDR_OFFSET(ver)		((ver) < IOAT_VER_2_0 \
+						? IOAT1_CHAINADDR_OFFSET : IOAT2_CHAINADDR_OFFSET)
+#define IOAT1_CHAINADDR_OFFSET_LOW	0x0C
+#define IOAT2_CHAINADDR_OFFSET_LOW	0x10
+#define IOAT_CHAINADDR_OFFSET_LOW(ver)		((ver) < IOAT_VER_2_0 \
+						? IOAT1_CHAINADDR_OFFSET_LOW : IOAT2_CHAINADDR_OFFSET_LOW)
+#define IOAT1_CHAINADDR_OFFSET_HIGH	0x10
+#define IOAT2_CHAINADDR_OFFSET_HIGH	0x14
+#define IOAT_CHAINADDR_OFFSET_HIGH(ver)		((ver) < IOAT_VER_2_0 \
+						? IOAT1_CHAINADDR_OFFSET_HIGH : IOAT2_CHAINADDR_OFFSET_HIGH)
+
+#define IOAT1_CHANCMD_OFFSET		0x14	/*  8-bit DMA Channel Command Register */
+#define IOAT2_CHANCMD_OFFSET		0x04	/*  8-bit DMA Channel Command Register */
+#define IOAT_CHANCMD_OFFSET(ver)		((ver) < IOAT_VER_2_0 \
+						? IOAT1_CHANCMD_OFFSET : IOAT2_CHANCMD_OFFSET)
+#define IOAT_CHANCMD_RESET			0x20
+#define IOAT_CHANCMD_RESUME			0x10
+#define IOAT_CHANCMD_ABORT			0x08
+#define IOAT_CHANCMD_SUSPEND			0x04
+#define IOAT_CHANCMD_APPEND			0x02
+#define IOAT_CHANCMD_START			0x01
+
+#define IOAT_CHANCMP_OFFSET			0x18	/* 64-bit Channel Completion Address Register */
+#define IOAT_CHANCMP_OFFSET_LOW			0x18
+#define IOAT_CHANCMP_OFFSET_HIGH		0x1C
+
+#define IOAT_CDAR_OFFSET			0x20	/* 64-bit Current Descriptor Address Register */
+#define IOAT_CDAR_OFFSET_LOW			0x20
+#define IOAT_CDAR_OFFSET_HIGH			0x24
+
+#define IOAT_CHANERR_OFFSET			0x28	/* 32-bit Channel Error Register */
+#define IOAT_CHANERR_SRC_ADDR_ERR	0x0001
+#define IOAT_CHANERR_DEST_ADDR_ERR	0x0002
+#define IOAT_CHANERR_NEXT_ADDR_ERR	0x0004
+#define IOAT_CHANERR_NEXT_DESC_ALIGN_ERR	0x0008
+#define IOAT_CHANERR_CHAIN_ADDR_VALUE_ERR	0x0010
+#define IOAT_CHANERR_CHANCMD_ERR		0x0020
+#define IOAT_CHANERR_CHIPSET_UNCORRECTABLE_DATA_INTEGRITY_ERR	0x0040
+#define IOAT_CHANERR_DMA_UNCORRECTABLE_DATA_INTEGRITY_ERR	0x0080
+#define IOAT_CHANERR_READ_DATA_ERR		0x0100
+#define IOAT_CHANERR_WRITE_DATA_ERR		0x0200
+#define IOAT_CHANERR_CONTROL_ERR	0x0400
+#define IOAT_CHANERR_LENGTH_ERR	0x0800
+#define IOAT_CHANERR_COMPLETION_ADDR_ERR	0x1000
+#define IOAT_CHANERR_INT_CONFIGURATION_ERR	0x2000
+#define IOAT_CHANERR_SOFT_ERR			0x4000
+#define IOAT_CHANERR_UNAFFILIATED_ERR		0x8000
+#define IOAT_CHANERR_XOR_P_OR_CRC_ERR		0x10000
+#define IOAT_CHANERR_XOR_Q_ERR			0x20000
+#define IOAT_CHANERR_DESCRIPTOR_COUNT_ERR	0x40000
+
+#define IOAT_CHANERR_HANDLE_MASK (IOAT_CHANERR_XOR_P_OR_CRC_ERR | IOAT_CHANERR_XOR_Q_ERR)
+
+#define IOAT_CHANERR_MASK_OFFSET		0x2C	/* 32-bit Channel Error Register */
+
+/* Extras: Added by Aditya Basu <mitthu@google.com> */
+#define CBDMA_CHANCMD_OFFSET	 			0x84
+#define CBDMA_CHANSTS_OFFSET	 			0x88
+#define CBDMA_CHANCTRL_OFFSET	 			0x80
+#define CBDMA_DMACOUNT_OFFSET				0x86
+#define CBDMA_CHAINADDR_OFFSET				0x90
+#define CBDMA_CHANCMP_OFFSET				0x98
+#define CBDMA_CHANERR_OFFSET				0xa8
+#define CBDMA_DESC_CTRL_INTR_ON_COMPLETION		0x01 /* 32-bit field */
+#define CBDMA_DESC_CTRL_WRITE_CHANCMP_ON_COMPLETION	0x08
+#define CBDMA_DESC_CTRL_NULL_DESC			0x20
+
+#define IOAT_CHANSTS_ARMED				0x4
+
+#endif /* _IOAT_REGISTERS_H_ */