cbdma: add support for Intel CBDMA/IOAT
* Creates #cbdma device and a minimal hierarchy with files:
ktest - run the self-test
stats - dump register values and driver information
reset - write 1 to reset the cbdma
iommu - turn on/off IOMMU support
* Search through all PCI devices and looks for the following devices.
If any device is found, then only a single function is registered.
* Vendor ID: 0x8086, Device ID: 0x2021 (Skylake)
* Vendor ID: 0x8086, Device ID: 0x2f20 (Haswell)
* If no cbdma device is found then the device will not attach (bind).
* The PCI bar registers pages are re-mapped with nocache
* A desc chain is populated which describes the DMA transfers
* On MSI interrupts, the driver acks the interrupts and re-enables
interrupts
* User-Space CDMA (ucbdma)
* desc addresses are converted to kaddr and issued (IOMMU = off)
* desc addresses are not-converted to kaddr (IOMMU = on)
Signed-off-by: Aditya Basu <mitthu@google.com>
[minor formatting touchups]
Signed-off-by: Barret Rhoden <brho@cs.berkeley.edu>
diff --git a/kern/drivers/dev/Kbuild b/kern/drivers/dev/Kbuild
index b936073..9359432 100644
--- a/kern/drivers/dev/Kbuild
+++ b/kern/drivers/dev/Kbuild
@@ -11,6 +11,7 @@
obj-y += mem.o
obj-y += mnt.o
obj-y += pci.o
+obj-y += cbdma.o
obj-y += pipe.o
obj-y += proc.o
obj-y += random.o
diff --git a/kern/drivers/dev/cbdma.c b/kern/drivers/dev/cbdma.c
new file mode 100644
index 0000000..9ec2ce0
--- /dev/null
+++ b/kern/drivers/dev/cbdma.c
@@ -0,0 +1,954 @@
+/* Copyright (c) 2019 Google Inc
+ * Aditya Basu <mitthu@google.com>
+ * See LICENSE for details.
+ *
+ * Useful resources:
+ * - Intel Xeon E7 2800/4800/8800 Datasheet Vol. 2
+ * - Purley Programmer's Guide
+ *
+ * Acronyms:
+ * - IOAT: (Intel) I/O Acceleration Technology
+ * - CDMA: Crystal Beach DMA
+ *
+ * CBDMA Notes
+ * ===========
+ * Every CBDMA PCI function has one MMIO address space (so only BAR0). Each
+ * function can have multiple channels. Currently these devices only have one
+ * channel per function. This can be read from the CHANCNT register (8-bit)
+ * at offset 0x0.
+ *
+ * Each channel be independently configured for DMA. The MMIO config space of
+ * every channel is 0x80 bytes. The first channel (or CHANNEL_0) starts at 0x80
+ * offset.
+ *
+ * CHAINADDR points to a descriptor (desc) ring buffer. More precisely it points
+ * to the first desc in the ring buffer. Each desc represents a single DMA
+ * operation. Look at "struct desc" for it's structure.
+ *
+ * Each desc is 0x40 bytes (or 64 bytes) in size. A 4k page will be able to hold
+ * 4k/64 = 64 entries. Note that the lower 6 bits of CHANADDR should be zero. So
+ * the first desc's address needs to be aligned accordingly. Page-aligning the
+ * first desc address will work because 4k page-aligned addresses will have
+ * the last 12 bits as zero.
+ *
+ * TODO
+ * ====
+ * *MAJOR*
+ * - Update to the correct struct desc (from Linux kernel)
+ * - Make the status field embedded in the channel struct (no ptr business)
+ * - Add file for errors
+ * - Add locks to guard desc access
+ * - Freeze VA->PA page mappings till DMA is completed (esp. for ucbdma)
+ * *MINOR*
+ * - Replace all CBDMA_* constants with IOAT_*
+ * - Initializes only the first found CBDMA device
+ */
+
+#include <kmalloc.h>
+#include <string.h>
+#include <stdio.h>
+#include <assert.h>
+#include <error.h>
+#include <net/ip.h>
+#include <linux_compat.h>
+#include <arch/pci.h>
+#include <page_alloc.h>
+#include <pmap.h>
+#include <cbdma_regs.h>
+#include <arch/pci_regs.h>
+
+#define NDESC 1 // initialize these many descs
+#define BUFFERSZ 8192
+
+struct dev cbdmadevtab;
+static struct pci_device *pci;
+static void *mmio;
+static uint64_t mmio_phy; /* physical addr */
+static uint32_t mmio_sz;
+static uint8_t chancnt; /* Total number of channels per function */
+static bool iommu_enabled;
+static bool cbdma_break_loop; /* toggle_foo functionality */
+
+/* PCIe Config Space; from Intel Xeon E7 2800/4800/8800 Datasheet Vol. 2 */
+enum {
+ DEVSTS = 0x9a, // 16-bit
+ PMCSR = 0xe4, // 32-bit
+
+ DMAUNCERRSTS = 0x148, // 32-bit (DMA Cluster Uncorrectable Error Status)
+ DMAUNCERRMSK = 0x14c, // 32-bit
+ DMAUNCERRSEV = 0x150, // 32-bit
+ DMAUNCERRPTR = 0x154, // 8-bit
+ DMAGLBERRPTR = 0x160, // 8-bit
+
+ CHANERR_INT = 0x180, // 32-bit
+ CHANERRMSK_INT = 0x184, // 32-bit
+ CHANERRSEV_INT = 0x188, // 32-bit
+ CHANERRPTR = 0x18c, // 8-bit
+};
+
+/* QID Path */
+enum {
+ Qdir = 0,
+ Qcbdmaktest = 1,
+ Qcbdmastats = 2,
+ Qcbdmareset = 3,
+ Qcbdmaucopy = 4,
+ Qcbdmaiommu = 5,
+};
+
+/* supported ioat devices */
+enum {
+ ioat2021 = (0x2021 << 16) | 0x8086,
+ ioat2f20 = (0x2f20 << 16) | 0x8086,
+};
+
+static struct dirtab cbdmadir[] = {
+ {".", {Qdir, 0, QTDIR}, 0, 0555},
+ {"ktest", {Qcbdmaktest, 0, QTFILE}, 0, 0555},
+ {"stats", {Qcbdmastats, 0, QTFILE}, 0, 0555},
+ {"reset", {Qcbdmareset, 0, QTFILE}, 0, 0755},
+ {"ucopy", {Qcbdmaucopy, 0, QTFILE}, 0, 0755},
+ {"iommu", {Qcbdmaiommu, 0, QTFILE}, 0, 0755},
+};
+
+/* Descriptor structue as defined in the programmer's guide.
+ * It describes a single DMA transfer
+ */
+struct desc {
+ uint32_t xfer_size;
+ uint32_t descriptor_control;
+ uint64_t src_addr;
+ uint64_t dest_addr;
+ uint64_t next_desc_addr;
+ uint64_t next_source_address;
+ uint64_t next_destination_address;
+ uint64_t reserved0;
+ uint64_t reserved1;
+} __attribute__((packed));
+
+/* The channels are indexed starting from 0 */
+static struct channel {
+ uint8_t number; // channel number
+ struct desc *pdesc; // desc ptr
+ int ndesc; // num. of desc
+ uint64_t *status; // reg: CHANSTS, needs to be 64B aligned
+ uint8_t ver; // reg: CBVER
+
+/* DEPRECATED */
+/* MMIO address space; from Intel Xeon E7 2800/4800/8800 Datasheet Vol. 2
+ * Every channel 0x80 bytes in size.
+ */
+ uint8_t chancmd;
+ uint8_t xrefcap;
+ uint16_t chanctrl;
+ uint16_t dmacount;
+ uint32_t chanerr;
+ uint64_t chansts;
+ uint64_t chainaddr;
+} cbdmadev, channel0;
+
+#define KTEST_SIZE 64
+static struct {
+ char printbuf[4096];
+ char src[KTEST_SIZE];
+ char dst[KTEST_SIZE];
+ char srcfill;
+ char dstfill;
+} ktest; /* TODO: needs locking */
+
+/* struct passed from the userspace */
+struct ucbdma {
+ struct desc desc;
+ uint64_t status;
+ uint16_t ndesc;
+};
+
+/* for debugging via kfunc; break out of infinite polling loops */
+void toggle_cbdma_break_loop(void)
+{
+ cbdma_break_loop = !cbdma_break_loop;
+ printk("cbdma: cbdma_break_loop = %d\n", cbdma_break_loop);
+}
+
+/* Function definitions start here */
+static inline bool is_initialized(void)
+{
+ if (!pci || !mmio)
+ return false;
+ else
+ return true;
+}
+
+static void *get_register(struct channel *c, int offset)
+{
+ uint64_t base = (c->number + 1) * IOAT_CHANNEL_MMIO_SIZE;
+
+ return (char *) mmio + base + offset;
+}
+
+static char *devname(void)
+{
+ return cbdmadevtab.name;
+}
+
+static struct chan *cbdmaattach(char *spec)
+{
+ if (!is_initialized())
+ error(ENODEV, "no cbdma device detected");
+ return devattach(devname(), spec);
+}
+
+struct walkqid *cbdmawalk(struct chan *c, struct chan *nc, char **name,
+ unsigned int nname)
+{
+ return devwalk(c, nc, name, nname, cbdmadir,
+ ARRAY_SIZE(cbdmadir), devgen);
+}
+
+static size_t cbdmastat(struct chan *c, uint8_t *dp, size_t n)
+{
+ return devstat(c, dp, n, cbdmadir, ARRAY_SIZE(cbdmadir), devgen);
+}
+
+/* return string representation of chansts */
+char *cbdma_str_chansts(uint64_t chansts)
+{
+ char *status = "unrecognized status";
+
+ switch (chansts & IOAT_CHANSTS_STATUS) {
+ case IOAT_CHANSTS_ACTIVE:
+ status = "ACTIVE";
+ break;
+ case IOAT_CHANSTS_DONE:
+ status = "DONE";
+ break;
+ case IOAT_CHANSTS_SUSPENDED:
+ status = "SUSPENDED";
+ break;
+ case IOAT_CHANSTS_HALTED:
+ status = "HALTED";
+ break;
+ case IOAT_CHANSTS_ARMED:
+ status = "ARMED";
+ break;
+ default:
+ break;
+ }
+ return status;
+}
+
+/* print descriptors on console (for debugging) */
+static void dump_desc(struct desc *d, int count)
+{
+ printk("dumping descriptors (count = %d):\n", count);
+
+ while (count > 0) {
+ printk("desc: 0x%x, size: %d bytes\n",
+ d, sizeof(struct desc));
+ printk("[32] desc->xfer_size: 0x%x\n",
+ d->xfer_size);
+ printk("[32] desc->descriptor_control: 0x%x\n",
+ d->descriptor_control);
+ printk("[64] desc->src_addr: %p\n",
+ d->src_addr);
+ printk("[64] desc->dest_addr: %p\n",
+ d->dest_addr);
+ printk("[64] desc->next_desc_addr: %p\n",
+ d->next_desc_addr);
+ printk("[64] desc->next_source_address: %p\n",
+ d->next_source_address);
+ printk("[64] desc->next_destination_address: %p\n",
+ d->next_destination_address);
+ printk("[64] desc->reserved0: %p\n",
+ d->reserved0);
+ printk("[64] desc->reserved1: %p\n",
+ d->reserved1);
+
+ count--;
+ if (count > 0)
+ d = (struct desc *) KADDR(d->next_desc_addr);
+ printk("\n");
+ }
+}
+
+/* initialize desc ring
+ *
+ - Can be called multiple times, with different "ndesc" values.
+ - NOTE: We only create _one_ valid desc. The next field points back itself
+ (ring buffer).
+ */
+static void init_desc(struct channel *c, int ndesc)
+{
+ struct desc *d, *tmp;
+ int i;
+ const int max_ndesc = PGSIZE / sizeof(struct desc);
+
+ /* sanity checks */
+ if (ndesc > max_ndesc) {
+ printk("cbdma: allocating only %d desc instead of %d desc\n",
+ max_ndesc, ndesc);
+ ndesc = max_ndesc;
+ }
+
+ c->ndesc = ndesc;
+
+ /* allocate pages for descriptors, last 6-bits must be zero */
+ if (!c->pdesc)
+ c->pdesc = kpage_zalloc_addr();
+
+ if (!c->pdesc) { /* error does not return */
+ printk("cbdma: cannot alloc page for desc\n");
+ return; /* TODO: return "false" */
+ }
+
+ /* preparing descriptors */
+ d = c->pdesc;
+ d->xfer_size = 1;
+ d->descriptor_control = CBDMA_DESC_CTRL_NULL_DESC;
+ d->next_desc_addr = PADDR(d);
+}
+
+/* struct channel is only used for get_register */
+static inline void cleanup_post_copy(struct channel *c)
+{
+ uint64_t value;
+
+ /* mmio_reg: DMACOUNT */
+ value = read16(get_register(c, IOAT_CHAN_DMACOUNT_OFFSET));
+ if (value != 0) {
+ printk("cbdma: info: DMACOUNT = %d\n", value); /* should be 0 */
+ write16(0, mmio + CBDMA_DMACOUNT_OFFSET);
+ }
+
+ /* mmio_reg: CHANERR */
+ value = read32(get_register(c, IOAT_CHANERR_OFFSET));
+ if (value != 0) {
+ printk("cbdma: error: CHANERR = 0x%x\n", value);
+ write32(value, get_register(c, IOAT_CHANERR_OFFSET));
+ }
+
+ /* ack errors */
+ if (ACCESS_PCIE_CONFIG_SPACE) {
+ /* PCIe_reg: CHANERR_INT */
+ value = pcidev_read32(pci, CHANERR_INT);
+ if (value != 0) {
+ printk("cbdma: error: CHANERR_INT = 0x%x\n", value);
+ pcidev_write32(pci, CHANERR_INT, value);
+ }
+
+ /* PCIe_reg: DMAUNCERRSTS */
+ value = pcidev_read32(pci, IOAT_PCI_DMAUNCERRSTS_OFFSET);
+ if (value != 0) {
+ printk("cbdma: error: DMAUNCERRSTS = 0x%x\n", value);
+ pcidev_write32(pci, IOAT_PCI_DMAUNCERRSTS_OFFSET,
+ value);
+ }
+ }
+}
+
+/* struct channel is only used for get_register */
+static inline void perform_dma(struct channel *c, physaddr_t completion_sts,
+ physaddr_t desc, uint16_t count)
+{
+ void __iomem *offset;
+
+ /* Set channel completion register where CBDMA will write content of
+ * CHANSTS register upon successful DMA completion or error condition
+ */
+ offset = get_register(c, IOAT_CHANCMP_OFFSET);
+ write64(completion_sts, offset);
+
+ /* write locate of first desc to register CHAINADDR */
+ offset = get_register(c, IOAT_CHAINADDR_OFFSET(c->ver));
+ write64(desc, offset);
+ wmb_f();
+
+ /* writing valid number of descs: starts the DMA */
+ offset = get_register(c, IOAT_CHAN_DMACOUNT_OFFSET);
+ write16(count, offset);
+}
+
+static inline void wait_for_dma_completion(uint64_t *cmpsts)
+{
+ uint64_t sts;
+
+ do {
+ cpu_relax();
+ sts = *cmpsts;
+ if (cbdma_break_loop) {
+ printk("cbdma: cmpsts: %p = 0x%llx\n", cmpsts, sts);
+ break;
+ }
+ } while ((sts & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_ACTIVE);
+}
+
+/* cbdma_ktest: performs functional test on CBDMA
+ *
+ - Allocates 2 kernel pages: ktest_src and ktest_dst.
+ - memsets the ktest_src page
+ - Prepare descriptors for DMA transfer (need to be aligned)
+ - Initiate the transfer
+ - Prints results
+ */
+static void cbdma_ktest(void)
+{
+ static struct desc *d;
+ uint64_t value;
+ struct channel *c = &channel0;
+
+ /* initialize src and dst address */
+ memset(ktest.src, ktest.srcfill, KTEST_SIZE);
+ memset(ktest.dst, ktest.dstfill, KTEST_SIZE);
+ ktest.src[KTEST_SIZE-1] = '\0';
+ ktest.dst[KTEST_SIZE-1] = '\0';
+
+ /* for subsequent ktests */
+ ktest.srcfill += 1;
+
+ /* preparing descriptors */
+ d = channel0.pdesc;
+ d->xfer_size = (uint32_t) KTEST_SIZE;
+ d->src_addr = (uint64_t) PADDR(ktest.src);
+ d->dest_addr = (uint64_t) PADDR(ktest.dst);
+ d->descriptor_control = CBDMA_DESC_CTRL_INTR_ON_COMPLETION |
+ CBDMA_DESC_CTRL_WRITE_CHANCMP_ON_COMPLETION;
+
+ memset((uint64_t *)c->status, 0, sizeof(c->status));
+
+ /* perform actual DMA */
+ perform_dma(c, PADDR(c->status), PADDR(c->pdesc), 1);
+ wait_for_dma_completion(c->status);
+ cleanup_post_copy(c);
+}
+
+/* convert a userspace pointer to kaddr based pointer
+ * TODO: this is dangerous and the pages are not pinned. Debugging only! */
+static inline void *uptr_to_kptr(void *ptr)
+{
+ return (void *) uva2kva(current, ptr, 1, PROT_WRITE);
+}
+
+/* function that uses kernel addresses to perform DMA.
+ * Note: does not perform error checks for src / dest addr.
+ * TODO: this only works if ktest is not run. Still it fails on alternate runs.
+ * Likely some error in setting up the desc from userspace.
+ */
+static void issue_dma_kaddr(struct ucbdma *u)
+{
+ struct ucbdma *u_kaddr = uptr_to_kptr(u);
+ /* first field is struct desc */
+ struct desc *d = (struct desc *) u_kaddr;
+ struct channel *c = &channel0;
+ uint64_t value;
+
+ if (!u_kaddr) {
+ printk("[kern] cannot get kaddr for useraddr: %p\n", u);
+ return;
+ }
+ printk("[kern] ucbdma: user: %p kern: %p\n", u, u_kaddr);
+
+ /* preparing descriptors */
+ d->src_addr = (uint64_t) PADDR(uptr_to_kptr((void*) d->src_addr));
+ d->dest_addr = (uint64_t) PADDR(uptr_to_kptr((void*) d->dest_addr));
+ d->next_desc_addr = (uint64_t)
+ PADDR(uptr_to_kptr((void*) d->next_desc_addr));
+
+ /* perform actual DMA */
+ perform_dma(c, PADDR(&u_kaddr->status), PADDR(d), u_kaddr->ndesc);
+ wait_for_dma_completion(&u_kaddr->status);
+ cleanup_post_copy(c);
+}
+
+/* function that uses virtual (process) addresses to perform DMA; IOMMU = ON
+ * TODO: Verify once the IOMMU is setup and enabled.
+ */
+static void issue_dma_vaddr(struct ucbdma *u)
+{
+ struct ucbdma *u_kaddr = uptr_to_kptr(u);
+ struct channel *c = &channel0;
+ uint64_t value;
+
+ printk("[kern] IOMMU = ON\n");
+ printk("[kern] ucbdma: user: %p kern: %p ndesc: %d\n", u,
+ &u_kaddr->desc, u_kaddr->ndesc);
+
+ /* perform actual DMA */
+ perform_dma(c, (physaddr_t) &u->status, (physaddr_t) &u->desc,
+ u_kaddr->ndesc);
+ wait_for_dma_completion(&u_kaddr->status);
+ cleanup_post_copy(&channel0);
+}
+
+/* cbdma_stats: get stats about the device and driver
+ */
+static struct sized_alloc *open_stats(void)
+{
+ struct sized_alloc *sza = sized_kzmalloc(BUFFERSZ, MEM_WAIT);
+ uint64_t value;
+
+ sza_printf(sza,
+ "Intel CBDMA [%x:%x] registered at %02x:%02x.%x\n",
+ pci->ven_id, pci->dev_id, pci->bus, pci->dev, pci->func);
+
+ /* driver info. */
+ sza_printf(sza, " Driver Information:\n");
+ sza_printf(sza,
+ "\tmmio: %p\n"
+ "\tmmio_phy: 0x%x\n"
+ "\tmmio_sz: %lu\n"
+ "\ttotal_channels: %d\n"
+ "\tdesc_kaddr: %p\n"
+ "\tdesc_paddr: %p\n"
+ "\tdesc_num: %d\n"
+ "\tver: 0x%x\n"
+ "\tstatus_kaddr: %p\n"
+ "\tstatus_paddr: %p\n"
+ "\tstatus_value: 0x%x\n",
+ mmio, mmio_phy, mmio_sz, chancnt,
+ channel0.pdesc, PADDR(channel0.pdesc), channel0.ndesc,
+ channel0.ver, channel0.status, PADDR(channel0.status),
+ *(uint64_t *)channel0.status);
+
+ /* print the PCI registers */
+ sza_printf(sza, " PCIe Config Registers:\n");
+
+ value = pcidev_read16(pci, PCI_CMD_REG);
+ sza_printf(sza, "\tPCICMD: 0x%x\n", value);
+
+ value = pcidev_read16(pci, PCI_STATUS_REG);
+ sza_printf(sza, "\tPCISTS: 0x%x\n", value);
+
+ value = pcidev_read16(pci, PCI_REVID_REG);
+ sza_printf(sza, "\tRID: 0x%x\n", value);
+
+ value = pcidev_read32(pci, PCI_BAR0_STD);
+ sza_printf(sza, "\tCB_BAR: 0x%x\n", value);
+
+ value = pcidev_read16(pci, DEVSTS);
+ sza_printf(sza, "\tDEVSTS: 0x%x\n", value);
+
+ value = pcidev_read32(pci, PMCSR);
+ sza_printf(sza, "\tPMCSR: 0x%x\n", value);
+
+ value = pcidev_read32(pci, DMAUNCERRSTS);
+ sza_printf(sza, "\tDMAUNCERRSTS: 0x%x\n", value);
+
+ value = pcidev_read32(pci, DMAUNCERRMSK);
+ sza_printf(sza, "\tDMAUNCERRMSK: 0x%x\n", value);
+
+ value = pcidev_read32(pci, DMAUNCERRSEV);
+ sza_printf(sza, "\tDMAUNCERRSEV: 0x%x\n", value);
+
+ value = pcidev_read8(pci, DMAUNCERRPTR);
+ sza_printf(sza, "\tDMAUNCERRPTR: 0x%x\n", value);
+
+ value = pcidev_read8(pci, DMAGLBERRPTR);
+ sza_printf(sza, "\tDMAGLBERRPTR: 0x%x\n", value);
+
+ value = pcidev_read32(pci, CHANERR_INT);
+ sza_printf(sza, "\tCHANERR_INT: 0x%x\n", value);
+
+ value = pcidev_read32(pci, CHANERRMSK_INT);
+ sza_printf(sza, "\tCHANERRMSK_INT: 0x%x\n", value);
+
+ value = pcidev_read32(pci, CHANERRSEV_INT);
+ sza_printf(sza, "\tCHANERRSEV_INT: 0x%x\n", value);
+
+ value = pcidev_read8(pci, CHANERRPTR);
+ sza_printf(sza, "\tCHANERRPTR: 0x%x\n", value);
+
+ sza_printf(sza, " CHANNEL_0 MMIO Registers:\n");
+
+ value = read8(mmio + CBDMA_CHANCMD_OFFSET);
+ sza_printf(sza, "\tCHANCMD: 0x%x\n", value);
+
+ value = read8(mmio + IOAT_VER_OFFSET);
+ sza_printf(sza, "\tCBVER: 0x%x major=%d minor=%d\n",
+ value,
+ GET_IOAT_VER_MAJOR(value),
+ GET_IOAT_VER_MINOR(value));
+
+ value = read16(mmio + CBDMA_CHANCTRL_OFFSET);
+ sza_printf(sza, "\tCHANCTRL: 0x%llx\n", value);
+
+ value = read64(mmio + CBDMA_CHANSTS_OFFSET);
+ sza_printf(sza, "\tCHANSTS: 0x%x [%s], desc_addr: %p, raw: 0x%llx\n",
+ (value & IOAT_CHANSTS_STATUS),
+ cbdma_str_chansts(value),
+ (value & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR),
+ value);
+
+ value = read64(mmio + CBDMA_CHAINADDR_OFFSET);
+ sza_printf(sza, "\tCHAINADDR: %p\n", value);
+
+ value = read64(mmio + CBDMA_CHANCMP_OFFSET);
+ sza_printf(sza, "\tCHANCMP: %p\n", value);
+
+ value = read16(mmio + CBDMA_DMACOUNT_OFFSET);
+ sza_printf(sza, "\tDMACOUNT: %d\n", value);
+
+ value = read32(mmio + CBDMA_CHANERR_OFFSET);
+ sza_printf(sza, "\tCHANERR: 0x%x\n", value);
+
+ return sza;
+}
+
+static struct sized_alloc *open_reset(void)
+{
+ struct sized_alloc *sza = sized_kzmalloc(BUFFERSZ, MEM_WAIT);
+
+ if (cbdma_is_reset_pending())
+ sza_printf(sza, "Status: Reset is pending\n");
+ else
+ sza_printf(sza, "Status: No pending reset\n");
+
+ sza_printf(sza, "Write '1' to perform reset!\n");
+
+ return sza;
+}
+
+static struct sized_alloc *open_iommu(void)
+{
+ struct sized_alloc *sza = sized_kzmalloc(BUFFERSZ, MEM_WAIT);
+
+ sza_printf(sza, "IOMMU enabled = %s\n", iommu_enabled ? "yes":"no");
+ sza_printf(sza, "Write '0' to disable or '1' to enable the IOMMU\n");
+
+ return sza;
+}
+
+/* targets channel0 */
+static struct sized_alloc *open_ktest(void)
+{
+ struct sized_alloc *sza = sized_kzmalloc(BUFFERSZ, MEM_WAIT);
+
+ /* run the test */
+ cbdma_ktest();
+
+ sza_printf(sza,
+ "Self-test Intel CBDMA [%x:%x] registered at %02x:%02x.%x\n",
+ pci->ven_id, pci->dev_id, pci->bus, pci->dev, pci->func);
+
+ sza_printf(sza, "\tChannel Status: %s (raw: 0x%x)\n",
+ cbdma_str_chansts(*((uint64_t *)channel0.status)),
+ (*((uint64_t *)channel0.status) & IOAT_CHANSTS_STATUS));
+
+ sza_printf(sza, "\tCopy Size: %d (0x%x)\n", KTEST_SIZE, KTEST_SIZE);
+ sza_printf(sza, "\tsrcfill: %c (0x%x)\n", ktest.srcfill, ktest.srcfill);
+ sza_printf(sza, "\tdstfill: %c (0x%x)\n", ktest.dstfill, ktest.dstfill);
+ sza_printf(sza, "\tsrc_str (after copy): %s\n", ktest.src);
+ sza_printf(sza, "\tdst_str (after copy): %s\n", ktest.dst);
+
+ return sza;
+}
+
+/* cbdma_reset_device: this fixes any programming errors done before
+ */
+void cbdma_reset_device(void)
+{
+ int cbdmaver;
+ uint32_t error;
+
+ /* make sure the driver is initialized */
+ if (!mmio)
+ error(EIO, "cbdma: mmio addr not set");
+
+ pcidev_write16(pci, PCI_COMMAND, PCI_COMMAND_IO | PCI_COMMAND_MEMORY
+ | PCI_COMMAND_MASTER);
+ /* fetch version */
+ cbdmaver = read8(mmio + IOAT_VER_OFFSET);
+
+ /* ack channel errros */
+ error = read32(mmio + CBDMA_CHANERR_OFFSET);
+ write32(error, mmio + CBDMA_CHANERR_OFFSET);
+
+ if (ACCESS_PCIE_CONFIG_SPACE) {
+ /* ack pci device level errros */
+ /* clear DMA Cluster Uncorrectable Error Status */
+ error = pcidev_read32(pci, IOAT_PCI_DMAUNCERRSTS_OFFSET);
+ pcidev_write32(pci, IOAT_PCI_DMAUNCERRSTS_OFFSET, error);
+
+ /* clear DMA Channel Error Status */
+ error = pcidev_read32(pci, IOAT_PCI_CHANERR_INT_OFFSET);
+ pcidev_write32(pci, IOAT_PCI_CHANERR_INT_OFFSET, error);
+ }
+
+ /* reset */
+ write8(IOAT_CHANCMD_RESET, mmio
+ + IOAT_CHANNEL_MMIO_SIZE
+ + IOAT_CHANCMD_OFFSET(cbdmaver));
+
+ pcidev_write16(pci, PCI_COMMAND, PCI_COMMAND_IO | PCI_COMMAND_MEMORY
+ | PCI_COMMAND_MASTER | PCI_COMMAND_INTX_DISABLE);
+
+ printk("cbdma: reset performed\n");
+}
+
+/* cbdma_is_reset_pending: returns true if reset is pending
+ */
+bool cbdma_is_reset_pending(void)
+{
+ int cbdmaver;
+ int status;
+
+ /* make sure the driver is initialized */
+ if (!mmio) {
+ error(EPERM, "cbdma: mmio addr not set");
+ return false; /* does not reach */
+ }
+
+ /* fetch version */
+ cbdmaver = read8(mmio + IOAT_VER_OFFSET);
+
+ status = read8(mmio + IOAT_CHANNEL_MMIO_SIZE
+ + IOAT_CHANCMD_OFFSET(cbdmaver));
+
+ return (status & IOAT_CHANCMD_RESET) == IOAT_CHANCMD_RESET;
+}
+
+///////// SYS INTERFACE ////////////////////////////////////////////////////////
+
+static struct chan *cbdmaopen(struct chan *c, int omode)
+{
+ switch (c->qid.path) {
+ case Qcbdmastats:
+ c->synth_buf = open_stats();
+ break;
+ case Qcbdmareset:
+ c->synth_buf = open_reset();
+ break;
+ case Qcbdmaiommu:
+ c->synth_buf = open_iommu();
+ break;
+ case Qcbdmaktest:
+ c->synth_buf = open_ktest();
+ break;
+ case Qdir:
+ case Qcbdmaucopy:
+ break;
+ default:
+ error(EIO, "cbdma: qid 0x%x is impossible", c->qid.path);
+ }
+
+ return devopen(c, omode, cbdmadir, ARRAY_SIZE(cbdmadir), devgen);
+}
+
+static void cbdmaclose(struct chan *c)
+{
+ switch (c->qid.path) {
+ case Qcbdmastats:
+ case Qcbdmareset:
+ case Qcbdmaiommu:
+ case Qcbdmaktest:
+ kfree(c->synth_buf);
+ c->synth_buf = NULL;
+ break;
+ case Qdir:
+ case Qcbdmaucopy:
+ break;
+ default:
+ error(EIO, "cbdma: qid 0x%x is impossible", c->qid.path);
+ }
+}
+
+static size_t cbdmaread(struct chan *c, void *va, size_t n, off64_t offset)
+{
+ struct sized_alloc *sza = c->synth_buf;
+
+ switch (c->qid.path) {
+ case Qcbdmaktest:
+ case Qcbdmastats:
+ case Qcbdmareset:
+ case Qcbdmaiommu:
+ return readstr(offset, va, n, sza->buf);
+ case Qcbdmaucopy:
+ return readstr(offset, va, n,
+ "Write address of struct ucopy to issue DMA\n");
+ case Qdir:
+ return devdirread(c, va, n, cbdmadir, ARRAY_SIZE(cbdmadir),
+ devgen);
+ default:
+ error(EIO, "cbdma: qid 0x%x is impossible", c->qid.path);
+ }
+
+ return -1; /* not reached */
+}
+
+static void init_channel(struct channel *c, int cnum, int ndesc)
+{
+ c->number = cnum;
+ c->pdesc = NULL;
+ init_desc(c, ndesc);
+
+ /* this is a writeback field; the hardware will update this value */
+ if (c->status == 0)
+ c->status = kmalloc_align(sizeof(uint64_t), MEM_WAIT, 8);
+ assert(c->status != 0);
+
+ /* cbdma version */
+ c->ver = read8(mmio + IOAT_VER_OFFSET);
+
+ /* Set "Any Error Abort Enable": enables abort for any error encountered
+ * Set "Error Completion Enable": enables completion write to address in
+ CHANCMP for any error
+ * Reset "Interrupt Disable": W1C, when clear enables interrupt to fire
+ for next descriptor that specifies interrupt
+ */
+ write8(IOAT_CHANCTRL_ANY_ERR_ABORT_EN | IOAT_CHANCTRL_ERR_COMPLETION_EN,
+ get_register(c, IOAT_CHANCTRL_OFFSET));
+}
+
+static size_t cbdmawrite(struct chan *c, void *va, size_t n, off64_t offset)
+{
+ switch (c->qid.path) {
+ case Qdir:
+ error(EPERM, "writing not permitted");
+ case Qcbdmaktest:
+ case Qcbdmastats:
+ error(EPERM, ERROR_FIXME);
+ case Qcbdmareset:
+ if (offset == 0 && n > 0 && *(char *)va == '1') {
+ cbdma_reset_device();
+ init_channel(&channel0, 0, NDESC);
+ } else {
+ error(EINVAL, "cannot be empty string");
+ }
+ return n;
+ case Qcbdmaucopy:
+ if (offset == 0 && n > 0) {
+ printk("[kern] value from userspace: %p\n", va);
+ if (iommu_enabled)
+ issue_dma_vaddr(va);
+ else
+ issue_dma_kaddr(va);
+ return sizeof(8);
+ }
+ return 0;
+ case Qcbdmaiommu:
+ if (offset == 0 && n > 0 && *(char *)va == '1')
+ iommu_enabled = true;
+ else if (offset == 0 && n > 0 && *(char *)va == '0')
+ iommu_enabled = false;
+ else
+ error(EINVAL, "cannot be empty string");
+ return n;
+ default:
+ error(EIO, "cbdma: qid 0x%x is impossible", c->qid.path);
+ }
+
+ return -1; /* not reached */
+}
+
+static void cbdma_interrupt(struct hw_trapframe *hw_tf, void *arg)
+{
+ uint16_t value;
+
+ value = read16(get_register(&channel0, IOAT_CHANCTRL_OFFSET));
+ write16(value | IOAT_CHANCTRL_INT_REARM,
+ get_register(&channel0, IOAT_CHANCTRL_OFFSET));
+}
+
+void cbdmainit(void)
+{
+ int tbdf;
+ int i;
+ int id;
+ struct pci_device *pci_iter;
+
+ /* assigning global variables */
+ pci = NULL;
+ mmio = NULL;
+ mmio_sz = -1;
+
+ /* initialize cbdmadev */
+ memset(&cbdmadev, 0x0, sizeof(cbdmadev));
+
+ /* search for the device 00:04.0 */
+ STAILQ_FOREACH(pci_iter, &pci_devices, all_dev) {
+ id = pci_iter->dev_id << 16 | pci_iter->ven_id;
+ switch (id) {
+ default:
+ continue;
+ case ioat2021:
+ case ioat2f20:
+ /* hack: bus 0 is the PCI_ALL iommu.
+ * Can remove this once we add code for scoped IOMMU */
+ if (pci_iter->bus != 0)
+ continue;
+ pci = pci_iter;
+ break;
+ }
+ }
+
+ if (pci == NULL) {
+ printk("cbdma: no Intel CBDMA device found\n");
+ return;
+ }
+
+ /* search and find the mapped mmio region */
+ for (i = 0; i < COUNT_OF(pci->bar); i++) {
+ if (pci->bar[i].mmio_sz == 0)
+ continue;
+ mmio_phy = (pci->bar[0].mmio_base32
+ ? pci->bar[0].mmio_base32
+ : pci->bar[0].mmio_base64);
+ mmio_sz = pci->bar[i].mmio_sz;
+ mmio = (void *) vmap_pmem_nocache(mmio_phy, mmio_sz);
+ break;
+ }
+
+ /* handle any errors */
+ if (mmio_sz == -1) {
+ printk("cbdma: invalid mmio_sz\n");
+ return;
+ }
+
+ if (mmio == NULL) {
+ printk("cbdma: cannot map %p\n", mmio_phy);
+ return;
+ }
+
+ /* performance related stuff */
+ pci_set_cacheline_size(pci);
+
+ /* Get the channel count. Top 3 bits of the register are reserved. */
+ chancnt = read8(mmio + IOAT_CHANCNT_OFFSET) & 0x1F;
+
+ /* initialization successful; print stats */
+ printk("cbdma: registered [%x:%x] at %02x:%02x.%x // "
+ "mmio:%p mmio_sz:%lu\n",
+ pci->ven_id, pci->dev_id, pci->bus, pci->dev, pci->func,
+ mmio, mmio_sz);
+
+ tbdf = MKBUS(BusPCI, pci->bus, pci->dev, pci->func);
+ register_irq(pci->irqline, cbdma_interrupt, NULL, tbdf);
+
+ /* reset device */
+ cbdma_reset_device();
+
+ /* initialize channel(s) */
+ init_channel(&channel0, 0, NDESC);
+
+ /* setup ktest struct */
+ ktest.srcfill = '1';
+ ktest.dstfill = '0';
+}
+
+struct dev cbdmadevtab __devtab = {
+ .name = "cbdma",
+ .reset = devreset,
+ .init = cbdmainit,
+ .shutdown = devshutdown,
+ .attach = cbdmaattach,
+ .walk = cbdmawalk,
+ .stat = cbdmastat,
+ .open = cbdmaopen,
+ .create = devcreate,
+ .close = cbdmaclose,
+ .read = cbdmaread,
+ .bread = devbread,
+ .write = cbdmawrite,
+ .bwrite = devbwrite,
+ .remove = devremove,
+ .wstat = devwstat,
+};
diff --git a/kern/include/cbdma_regs.h b/kern/include/cbdma_regs.h
new file mode 100644
index 0000000..6c8ec3d
--- /dev/null
+++ b/kern/include/cbdma_regs.h
@@ -0,0 +1,268 @@
+/* Copyright (c) 2019 Google Inc
+ * Aditya Basu <mitthu@google.com>
+ * See LICENSE for details.
+ *
+ * Copy of CBDMA register definitions from Linux kernel (around v5.1)
+ * drivers/dma/ioat/registers.h
+ */
+#ifndef _IOAT_REGISTERS_H_
+#define _IOAT_REGISTERS_H_
+
+#define ACCESS_PCIE_CONFIG_SPACE 1
+
+bool cbdma_is_reset_pending(void);
+void cbdma_reset_device(void);
+
+/* file: drivers/dma/ioat/hw.h */
+#define IOAT_VER_1_2 0x12 /* Version 1.2 */
+#define IOAT_VER_2_0 0x20 /* Version 2.0 */
+#define IOAT_VER_3_0 0x30 /* Version 3.0 */
+#define IOAT_VER_3_2 0x32 /* Version 3.2 */
+#define IOAT_VER_3_3 0x33 /* Version 3.3 */
+#define IOAT_VER_3_4 0x34 /* Version 3.4 */
+/* -------------------------------------- */
+
+#define IOAT_PCI_DMACTRL_OFFSET 0x48
+#define IOAT_PCI_DMACTRL_DMA_EN 0x00000001
+#define IOAT_PCI_DMACTRL_MSI_EN 0x00000002
+
+#define IOAT_PCI_DEVICE_ID_OFFSET 0x02
+#define IOAT_PCI_DMAUNCERRSTS_OFFSET 0x148
+#define IOAT_PCI_CHANERR_INT_OFFSET 0x180
+#define IOAT_PCI_CHANERRMASK_INT_OFFSET 0x184
+
+/* MMIO Device Registers */
+#define IOAT_CHANCNT_OFFSET 0x00 /* 8-bit */
+
+#define IOAT_XFERCAP_OFFSET 0x01 /* 8-bit */
+#define IOAT_XFERCAP_4KB 12
+#define IOAT_XFERCAP_8KB 13
+#define IOAT_XFERCAP_16KB 14
+#define IOAT_XFERCAP_32KB 15
+#define IOAT_XFERCAP_32GB 0
+
+#define IOAT_GENCTRL_OFFSET 0x02 /* 8-bit */
+#define IOAT_GENCTRL_DEBUG_EN 0x01
+
+#define IOAT_INTRCTRL_OFFSET 0x03 /* 8-bit */
+#define IOAT_INTRCTRL_MASTER_INT_EN 0x01 /* Master Interrupt Enable */
+#define IOAT_INTRCTRL_INT_STATUS 0x02 /* ATTNSTATUS -or- Channel Int */
+#define IOAT_INTRCTRL_INT 0x04 /* INT_STATUS -and- MASTER_INT_EN */
+#define IOAT_INTRCTRL_MSIX_VECTOR_CONTROL 0x08 /* Enable all MSI-X vectors */
+
+#define IOAT_ATTNSTATUS_OFFSET 0x04 /* Each bit is a channel */
+
+#define IOAT_VER_OFFSET 0x08 /* 8-bit */
+#define IOAT_VER_MAJOR_MASK 0xF0
+#define IOAT_VER_MINOR_MASK 0x0F
+#define GET_IOAT_VER_MAJOR(x) (((x) & IOAT_VER_MAJOR_MASK) >> 4)
+#define GET_IOAT_VER_MINOR(x) ((x) & IOAT_VER_MINOR_MASK)
+
+#define IOAT_PERPORTOFFSET_OFFSET 0x0A /* 16-bit */
+
+#define IOAT_INTRDELAY_OFFSET 0x0C /* 16-bit */
+#define IOAT_INTRDELAY_MASK 0x3FFF /* Interrupt Delay Time */
+#define IOAT_INTRDELAY_COALESE_SUPPORT 0x8000 /* Interrupt Coalescing Supported */
+
+#define IOAT_DEVICE_STATUS_OFFSET 0x0E /* 16-bit */
+#define IOAT_DEVICE_STATUS_DEGRADED_MODE 0x0001
+#define IOAT_DEVICE_MMIO_RESTRICTED 0x0002
+#define IOAT_DEVICE_MEMORY_BYPASS 0x0004
+#define IOAT_DEVICE_ADDRESS_REMAPPING 0x0008
+
+#define IOAT_DMA_CAP_OFFSET 0x10 /* 32-bit */
+#define IOAT_CAP_PAGE_BREAK 0x00000001
+#define IOAT_CAP_CRC 0x00000002
+#define IOAT_CAP_SKIP_MARKER 0x00000004
+#define IOAT_CAP_DCA 0x00000010
+#define IOAT_CAP_CRC_MOVE 0x00000020
+#define IOAT_CAP_FILL_BLOCK 0x00000040
+#define IOAT_CAP_APIC 0x00000080
+#define IOAT_CAP_XOR 0x00000100
+#define IOAT_CAP_PQ 0x00000200
+#define IOAT_CAP_DWBES 0x00002000
+#define IOAT_CAP_RAID16SS 0x00020000
+
+#define IOAT_CHANNEL_MMIO_SIZE 0x80 /* Each Channel MMIO space is this size */
+
+/* DMA Channel Registers */
+#define IOAT_CHANCTRL_OFFSET 0x00 /* 16-bit Channel Control Register */
+#define IOAT_CHANCTRL_CHANNEL_PRIORITY_MASK 0xF000
+#define IOAT3_CHANCTRL_COMPL_DCA_EN 0x0200
+#define IOAT_CHANCTRL_CHANNEL_IN_USE 0x0100
+#define IOAT_CHANCTRL_DESCRIPTOR_ADDR_SNOOP_CONTROL 0x0020
+#define IOAT_CHANCTRL_ERR_INT_EN 0x0010
+#define IOAT_CHANCTRL_ANY_ERR_ABORT_EN 0x0008
+#define IOAT_CHANCTRL_ERR_COMPLETION_EN 0x0004
+#define IOAT_CHANCTRL_INT_REARM 0x0001
+#define IOAT_CHANCTRL_RUN (IOAT_CHANCTRL_INT_REARM |\
+ IOAT_CHANCTRL_ERR_INT_EN |\
+ IOAT_CHANCTRL_ERR_COMPLETION_EN |\
+ IOAT_CHANCTRL_ANY_ERR_ABORT_EN)
+
+#define IOAT_DMA_COMP_OFFSET 0x02 /* 16-bit DMA channel compatibility */
+#define IOAT_DMA_COMP_V1 0x0001 /* Compatibility with DMA version 1 */
+#define IOAT_DMA_COMP_V2 0x0002 /* Compatibility with DMA version 2 */
+
+
+#define IOAT1_CHANSTS_OFFSET 0x04 /* 64-bit Channel Status Register */
+#define IOAT2_CHANSTS_OFFSET 0x08 /* 64-bit Channel Status Register */
+#define IOAT_CHANSTS_OFFSET(ver) ((ver) < IOAT_VER_2_0 \
+ ? IOAT1_CHANSTS_OFFSET : IOAT2_CHANSTS_OFFSET)
+#define IOAT1_CHANSTS_OFFSET_LOW 0x04
+#define IOAT2_CHANSTS_OFFSET_LOW 0x08
+#define IOAT_CHANSTS_OFFSET_LOW(ver) ((ver) < IOAT_VER_2_0 \
+ ? IOAT1_CHANSTS_OFFSET_LOW : IOAT2_CHANSTS_OFFSET_LOW)
+#define IOAT1_CHANSTS_OFFSET_HIGH 0x08
+#define IOAT2_CHANSTS_OFFSET_HIGH 0x0C
+#define IOAT_CHANSTS_OFFSET_HIGH(ver) ((ver) < IOAT_VER_2_0 \
+ ? IOAT1_CHANSTS_OFFSET_HIGH : IOAT2_CHANSTS_OFFSET_HIGH)
+#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR (~0x3fULL)
+#define IOAT_CHANSTS_SOFT_ERR 0x10ULL
+#define IOAT_CHANSTS_UNAFFILIATED_ERR 0x8ULL
+#define IOAT_CHANSTS_STATUS 0x7ULL
+#define IOAT_CHANSTS_ACTIVE 0x0
+#define IOAT_CHANSTS_DONE 0x1
+#define IOAT_CHANSTS_SUSPENDED 0x2
+#define IOAT_CHANSTS_HALTED 0x3
+
+
+
+#define IOAT_CHAN_DMACOUNT_OFFSET 0x06 /* 16-bit DMA Count register */
+
+#define IOAT_DCACTRL_OFFSET 0x30 /* 32 bit Direct Cache Access Control Register */
+#define IOAT_DCACTRL_CMPL_WRITE_ENABLE 0x10000
+#define IOAT_DCACTRL_TARGET_CPU_MASK 0xFFFF /* APIC ID */
+
+/* CB DCA Memory Space Registers */
+#define IOAT_DCAOFFSET_OFFSET 0x14
+/* CB_BAR + IOAT_DCAOFFSET value */
+#define IOAT_DCA_VER_OFFSET 0x00
+#define IOAT_DCA_VER_MAJOR_MASK 0xF0
+#define IOAT_DCA_VER_MINOR_MASK 0x0F
+
+#define IOAT_DCA_COMP_OFFSET 0x02
+#define IOAT_DCA_COMP_V1 0x1
+
+#define IOAT_FSB_CAPABILITY_OFFSET 0x04
+#define IOAT_FSB_CAPABILITY_PREFETCH 0x1
+
+#define IOAT_PCI_CAPABILITY_OFFSET 0x06
+#define IOAT_PCI_CAPABILITY_MEMWR 0x1
+
+#define IOAT_FSB_CAP_ENABLE_OFFSET 0x08
+#define IOAT_FSB_CAP_ENABLE_PREFETCH 0x1
+
+#define IOAT_PCI_CAP_ENABLE_OFFSET 0x0A
+#define IOAT_PCI_CAP_ENABLE_MEMWR 0x1
+
+#define IOAT_APICID_TAG_MAP_OFFSET 0x0C
+#define IOAT_APICID_TAG_MAP_TAG0 0x0000000F
+#define IOAT_APICID_TAG_MAP_TAG0_SHIFT 0
+#define IOAT_APICID_TAG_MAP_TAG1 0x000000F0
+#define IOAT_APICID_TAG_MAP_TAG1_SHIFT 4
+#define IOAT_APICID_TAG_MAP_TAG2 0x00000F00
+#define IOAT_APICID_TAG_MAP_TAG2_SHIFT 8
+#define IOAT_APICID_TAG_MAP_TAG3 0x0000F000
+#define IOAT_APICID_TAG_MAP_TAG3_SHIFT 12
+#define IOAT_APICID_TAG_MAP_TAG4 0x000F0000
+#define IOAT_APICID_TAG_MAP_TAG4_SHIFT 16
+#define IOAT_APICID_TAG_CB2_VALID 0x8080808080
+
+#define IOAT_DCA_GREQID_OFFSET 0x10
+#define IOAT_DCA_GREQID_SIZE 0x04
+#define IOAT_DCA_GREQID_MASK 0xFFFF
+#define IOAT_DCA_GREQID_IGNOREFUN 0x10000000
+#define IOAT_DCA_GREQID_VALID 0x20000000
+#define IOAT_DCA_GREQID_LASTID 0x80000000
+
+#define IOAT3_CSI_CAPABILITY_OFFSET 0x08
+#define IOAT3_CSI_CAPABILITY_PREFETCH 0x1
+
+#define IOAT3_PCI_CAPABILITY_OFFSET 0x0A
+#define IOAT3_PCI_CAPABILITY_MEMWR 0x1
+
+#define IOAT3_CSI_CONTROL_OFFSET 0x0C
+#define IOAT3_CSI_CONTROL_PREFETCH 0x1
+
+#define IOAT3_PCI_CONTROL_OFFSET 0x0E
+#define IOAT3_PCI_CONTROL_MEMWR 0x1
+
+#define IOAT3_APICID_TAG_MAP_OFFSET 0x10
+#define IOAT3_APICID_TAG_MAP_OFFSET_LOW 0x10
+#define IOAT3_APICID_TAG_MAP_OFFSET_HIGH 0x14
+
+#define IOAT3_DCA_GREQID_OFFSET 0x02
+
+#define IOAT1_CHAINADDR_OFFSET 0x0C /* 64-bit Descriptor Chain Address Register */
+#define IOAT2_CHAINADDR_OFFSET 0x10 /* 64-bit Descriptor Chain Address Register */
+#define IOAT_CHAINADDR_OFFSET(ver) ((ver) < IOAT_VER_2_0 \
+ ? IOAT1_CHAINADDR_OFFSET : IOAT2_CHAINADDR_OFFSET)
+#define IOAT1_CHAINADDR_OFFSET_LOW 0x0C
+#define IOAT2_CHAINADDR_OFFSET_LOW 0x10
+#define IOAT_CHAINADDR_OFFSET_LOW(ver) ((ver) < IOAT_VER_2_0 \
+ ? IOAT1_CHAINADDR_OFFSET_LOW : IOAT2_CHAINADDR_OFFSET_LOW)
+#define IOAT1_CHAINADDR_OFFSET_HIGH 0x10
+#define IOAT2_CHAINADDR_OFFSET_HIGH 0x14
+#define IOAT_CHAINADDR_OFFSET_HIGH(ver) ((ver) < IOAT_VER_2_0 \
+ ? IOAT1_CHAINADDR_OFFSET_HIGH : IOAT2_CHAINADDR_OFFSET_HIGH)
+
+#define IOAT1_CHANCMD_OFFSET 0x14 /* 8-bit DMA Channel Command Register */
+#define IOAT2_CHANCMD_OFFSET 0x04 /* 8-bit DMA Channel Command Register */
+#define IOAT_CHANCMD_OFFSET(ver) ((ver) < IOAT_VER_2_0 \
+ ? IOAT1_CHANCMD_OFFSET : IOAT2_CHANCMD_OFFSET)
+#define IOAT_CHANCMD_RESET 0x20
+#define IOAT_CHANCMD_RESUME 0x10
+#define IOAT_CHANCMD_ABORT 0x08
+#define IOAT_CHANCMD_SUSPEND 0x04
+#define IOAT_CHANCMD_APPEND 0x02
+#define IOAT_CHANCMD_START 0x01
+
+#define IOAT_CHANCMP_OFFSET 0x18 /* 64-bit Channel Completion Address Register */
+#define IOAT_CHANCMP_OFFSET_LOW 0x18
+#define IOAT_CHANCMP_OFFSET_HIGH 0x1C
+
+#define IOAT_CDAR_OFFSET 0x20 /* 64-bit Current Descriptor Address Register */
+#define IOAT_CDAR_OFFSET_LOW 0x20
+#define IOAT_CDAR_OFFSET_HIGH 0x24
+
+#define IOAT_CHANERR_OFFSET 0x28 /* 32-bit Channel Error Register */
+#define IOAT_CHANERR_SRC_ADDR_ERR 0x0001
+#define IOAT_CHANERR_DEST_ADDR_ERR 0x0002
+#define IOAT_CHANERR_NEXT_ADDR_ERR 0x0004
+#define IOAT_CHANERR_NEXT_DESC_ALIGN_ERR 0x0008
+#define IOAT_CHANERR_CHAIN_ADDR_VALUE_ERR 0x0010
+#define IOAT_CHANERR_CHANCMD_ERR 0x0020
+#define IOAT_CHANERR_CHIPSET_UNCORRECTABLE_DATA_INTEGRITY_ERR 0x0040
+#define IOAT_CHANERR_DMA_UNCORRECTABLE_DATA_INTEGRITY_ERR 0x0080
+#define IOAT_CHANERR_READ_DATA_ERR 0x0100
+#define IOAT_CHANERR_WRITE_DATA_ERR 0x0200
+#define IOAT_CHANERR_CONTROL_ERR 0x0400
+#define IOAT_CHANERR_LENGTH_ERR 0x0800
+#define IOAT_CHANERR_COMPLETION_ADDR_ERR 0x1000
+#define IOAT_CHANERR_INT_CONFIGURATION_ERR 0x2000
+#define IOAT_CHANERR_SOFT_ERR 0x4000
+#define IOAT_CHANERR_UNAFFILIATED_ERR 0x8000
+#define IOAT_CHANERR_XOR_P_OR_CRC_ERR 0x10000
+#define IOAT_CHANERR_XOR_Q_ERR 0x20000
+#define IOAT_CHANERR_DESCRIPTOR_COUNT_ERR 0x40000
+
+#define IOAT_CHANERR_HANDLE_MASK (IOAT_CHANERR_XOR_P_OR_CRC_ERR | IOAT_CHANERR_XOR_Q_ERR)
+
+#define IOAT_CHANERR_MASK_OFFSET 0x2C /* 32-bit Channel Error Register */
+
+/* Extras: Added by Aditya Basu <mitthu@google.com> */
+#define CBDMA_CHANCMD_OFFSET 0x84
+#define CBDMA_CHANSTS_OFFSET 0x88
+#define CBDMA_CHANCTRL_OFFSET 0x80
+#define CBDMA_DMACOUNT_OFFSET 0x86
+#define CBDMA_CHAINADDR_OFFSET 0x90
+#define CBDMA_CHANCMP_OFFSET 0x98
+#define CBDMA_CHANERR_OFFSET 0xa8
+#define CBDMA_DESC_CTRL_INTR_ON_COMPLETION 0x01 /* 32-bit field */
+#define CBDMA_DESC_CTRL_WRITE_CHANCMP_ON_COMPLETION 0x08
+#define CBDMA_DESC_CTRL_NULL_DESC 0x20
+
+#define IOAT_CHANSTS_ARMED 0x4
+
+#endif /* _IOAT_REGISTERS_H_ */