cbdma: overhauled #cbdma
This uses the dma engine API, and the Linux IOAT driver beneath that.
For whatever reason, we auto-assign 0:4.3 to the kernel, and that is the
only device used by #cbdma/ktest.
cat cbdma/ktest # tests 0:4.3 in the kernel
echo 00:04.3 0 > iommu/detach # detaches from the kernel
echo 00:04.4 PID > iommu/detach # attaches to PID
ucbdma 0:4.3 # attach, test, detach
ucbdma 0:4.4
ucbdma spits out all of the iommu mappings when it runs. It's basically
the ktest "do a single memcpy and don't break anything" type of test.
i.e. it doesn't test stuff like "can i do a ucbdma without assigning" or
whatnot.
Signed-off-by: Barret Rhoden <brho@cs.berkeley.edu>
diff --git a/kern/drivers/dev/Kbuild b/kern/drivers/dev/Kbuild
index 77e8b78..ca1a5b4 100644
--- a/kern/drivers/dev/Kbuild
+++ b/kern/drivers/dev/Kbuild
@@ -11,7 +11,7 @@
obj-y += mem.o
obj-y += mnt.o
obj-y += pci.o
-obj-n += cbdma.o
+obj-y += cbdma.o
obj-y += iommu.o
obj-y += pipe.o
obj-y += proc.o
diff --git a/kern/drivers/dev/cbdma.c b/kern/drivers/dev/cbdma.c
index fdd1a52..8e3bbf8 100644
--- a/kern/drivers/dev/cbdma.c
+++ b/kern/drivers/dev/cbdma.c
@@ -1,5 +1,6 @@
-/* Copyright (c) 2019 Google Inc
+/* Copyright (c) 2019-2020 Google Inc
* Aditya Basu <mitthu@google.com>
+ * Barret Rhoden <brho@google.com>
* See LICENSE for details.
*
* Useful resources:
@@ -10,38 +11,22 @@
* - IOAT: (Intel) I/O Acceleration Technology
* - CDMA: Crystal Beach DMA
*
- * CBDMA Notes
- * ===========
- * Every CBDMA PCI function has one MMIO address space (so only BAR0). Each
- * function can have multiple channels. Currently these devices only have one
- * channel per function. This can be read from the CHANCNT register (8-bit)
- * at offset 0x0.
- *
- * Each channel be independently configured for DMA. The MMIO config space of
- * every channel is 0x80 bytes. The first channel (or CHANNEL_0) starts at 0x80
- * offset.
- *
- * CHAINADDR points to a descriptor (desc) ring buffer. More precisely it points
- * to the first desc in the ring buffer. Each desc represents a single DMA
- * operation. Look at "struct desc" for it's structure.
- *
- * Each desc is 0x40 bytes (or 64 bytes) in size. A 4k page will be able to hold
- * 4k/64 = 64 entries. Note that the lower 6 bits of CHANADDR should be zero. So
- * the first desc's address needs to be aligned accordingly. Page-aligning the
- * first desc address will work because 4k page-aligned addresses will have
- * the last 12 bits as zero.
- *
* TODO
- * ====
- * *MAJOR*
- * - Update to the correct struct desc (from Linux kernel)
- * - Make the status field embedded in the channel struct (no ptr business)
- * - Add file for errors
- * - Add locks to guard desc access
- * - Freeze VA->PA page mappings till DMA is completed (esp. for ucbdma)
- * *MINOR*
- * - Replace all CBDMA_* constants with IOAT_*
- * - Initializes only the first found CBDMA device
+ * - Consider something lighter-weight than the qlock for ensuring the device
+ * doesn't get detached during operation. kref, perhaps. There's also an
+ * element of "stop new people from coming in", like we do with closing FDs.
+ * There's also stuff that the dmaengine does in linux. See dma_chan_get().
+ * - Freeze or handle faults with VA->PA page mappings, till DMA is completed.
+ * Right now, we could get iommu faults, which was the purpose of this whole
+ * thing.
+ * - The dmaengine has helpers for some of this. dma_set_unmap() is a
+ * "unmap all these things when you're done" approach, called by __cleanup
+ * -> dma_descriptor_unmap(). the unmap struct is basically a todo list.
+ * - There's a lot of stuff we could do with the DMA engine to reduce the
+ * amount of device touches, contention, and other inefficiencies.
+ * issue_dma() is a minimalist one. No batching, etc. And with the pdev
+ * qlock, we have only a single request per PCI device, though there may be
+ * numerous channels.
*/
#include <kmalloc.h>
@@ -54,136 +39,237 @@
#include <arch/pci.h>
#include <page_alloc.h>
#include <pmap.h>
-#include <cbdma_regs.h>
#include <arch/pci_regs.h>
-#define NDESC 1 // initialize these many descs
-#define BUFFERSZ 8192
-
-struct dev cbdmadevtab;
-static struct pci_device *pci;
-static void *mmio;
-static uint8_t chancnt; /* Total number of channels per function */
-static bool iommu_enabled;
-static bool cbdma_break_loop; /* toggle_foo functionality */
-
-/* PCIe Config Space; from Intel Xeon E7 2800/4800/8800 Datasheet Vol. 2 */
-enum {
- DEVSTS = 0x9a, // 16-bit
- PMCSR = 0xe4, // 32-bit
-
- DMAUNCERRSTS = 0x148, // 32-bit (DMA Cluster Uncorrectable Error Status)
- DMAUNCERRMSK = 0x14c, // 32-bit
- DMAUNCERRSEV = 0x150, // 32-bit
- DMAUNCERRPTR = 0x154, // 8-bit
- DMAGLBERRPTR = 0x160, // 8-bit
-
- CHANERR_INT = 0x180, // 32-bit
- CHANERRMSK_INT = 0x184, // 32-bit
- CHANERRSEV_INT = 0x188, // 32-bit
- CHANERRPTR = 0x18c, // 8-bit
-};
+#include <linux/dmaengine.h>
/* QID Path */
enum {
Qdir = 0,
Qcbdmaktest = 1,
- Qcbdmastats = 2,
- Qcbdmareset = 3,
- Qcbdmaucopy = 4,
- Qcbdmaiommu = 5,
-};
-
-/* supported ioat devices */
-enum {
- ioat2021 = (0x2021 << 16) | 0x8086,
- ioat2f20 = (0x2f20 << 16) | 0x8086,
+ Qcbdmaucopy = 2,
};
static struct dirtab cbdmadir[] = {
{".", {Qdir, 0, QTDIR}, 0, 0555},
{"ktest", {Qcbdmaktest, 0, QTFILE}, 0, 0555},
- {"stats", {Qcbdmastats, 0, QTFILE}, 0, 0555},
- {"reset", {Qcbdmareset, 0, QTFILE}, 0, 0755},
{"ucopy", {Qcbdmaucopy, 0, QTFILE}, 0, 0755},
- {"iommu", {Qcbdmaiommu, 0, QTFILE}, 0, 0755},
};
-/* Descriptor structue as defined in the programmer's guide.
- * It describes a single DMA transfer
- */
-struct desc {
- uint32_t xfer_size;
- uint32_t descriptor_control;
- uint64_t src_addr;
- uint64_t dest_addr;
- uint64_t next_desc_addr;
- uint64_t next_source_address;
- uint64_t next_destination_address;
- uint64_t reserved0;
- uint64_t reserved1;
+/* TODO: this is a device/kernel ABI. ucbdma.c has a copy. It's probably not
+ * worth putting in its own header, since this is really cheap test code. */
+struct ucbdma {
+ uint64_t dst_addr;
+ uint64_t src_addr;
+ uint32_t xfer_size;
+ char bdf_str[10];
} __attribute__((packed));
-/* The channels are indexed starting from 0 */
-static struct channel {
- uint8_t number; // channel number
- struct desc *pdesc; // desc ptr
- int ndesc; // num. of desc
- uint64_t *status; // reg: CHANSTS, needs to be 64B aligned
- uint8_t ver; // reg: CBVER
-
-/* DEPRECATED */
-/* MMIO address space; from Intel Xeon E7 2800/4800/8800 Datasheet Vol. 2
- * Every channel 0x80 bytes in size.
- */
- uint8_t chancmd;
- uint8_t xrefcap;
- uint16_t chanctrl;
- uint16_t dmacount;
- uint32_t chanerr;
- uint64_t chansts;
- uint64_t chainaddr;
-} cbdmadev, channel0;
-
#define KTEST_SIZE 64
static struct {
- char printbuf[4096];
char src[KTEST_SIZE];
char dst[KTEST_SIZE];
char srcfill;
char dstfill;
-} ktest; /* TODO: needs locking */
+} ktest = {.srcfill = '0', .dstfill = 'X'};
-/* struct passed from the userspace */
-struct ucbdma {
- struct desc desc;
- uint64_t status;
- uint16_t ndesc;
-};
-
-/* for debugging via kfunc; break out of infinite polling loops */
-void toggle_cbdma_break_loop(void)
+static inline struct pci_device *dma_chan_to_pci_dev(struct dma_chan *dc)
{
- cbdma_break_loop = !cbdma_break_loop;
- printk("cbdma: cbdma_break_loop = %d\n", cbdma_break_loop);
+ return container_of(dc->device->dev, struct pci_device, linux_dev);
}
-/* Function definitions start here */
-static inline bool is_initialized(void)
+/* Filter function for finding a particular PCI device. If
+ * __dma_request_channel() asks for a particular device, we'll only give it that
+ * chan. If you don't care, pass NULL, and you'll get any free chan. */
+static bool filter_pci_dev(struct dma_chan *dc, void *arg)
{
- if (!pci || !mmio)
- return false;
- else
- return true;
+ struct pci_device *pdev = dma_chan_to_pci_dev(dc);
+
+ if (arg)
+ return arg == pdev;
+ return true;
}
-static void *get_register(struct channel *c, int offset)
+/* Addresses are device-physical. Caller holds the pdev qlock. */
+static void issue_dma(struct pci_device *pdev, physaddr_t dst, physaddr_t src,
+ size_t len, bool async)
{
- uint64_t base = (c->number + 1) * IOAT_CHANNEL_MMIO_SIZE;
+ ERRSTACK(1);
+ struct dma_chan *dc;
+ dma_cap_mask_t mask;
+ struct dma_async_tx_descriptor *tx;
+ int flags;
- return (char *) mmio + base + offset;
+ struct completion cmp;
+ unsigned long tmo;
+ dma_cookie_t cookie;
+
+ /* dmaengine_get works for the non-DMA_PRIVATE devices. A lot
+ * of devices turn on DMA_PRIVATE, in which case they won't be in the
+ * general pool available to the dmaengine. Instead, we directly
+ * request DMA channels - particularly since we want specific devices to
+ * use with the IOMMU. */
+
+ dma_cap_zero(mask);
+ dma_cap_set(DMA_MEMCPY, mask);
+ dc = __dma_request_channel(&mask, filter_pci_dev, pdev);
+ if (!dc)
+ error(EFAIL, "Couldn't get a DMA channel");
+ if (waserror()) {
+ dma_release_channel(dc);
+ nexterror();
+ }
+
+ flags = 0;
+ if (async)
+ flags |= DMA_PREP_INTERRUPT;
+
+ if (!is_dma_copy_aligned(dc->device, dst, src, len))
+ error(EINVAL, "Bad copy alignment: %p %p %lu", dst, src, len);
+
+ tx = dmaengine_prep_dma_memcpy(dc, dst, src, len, flags);
+ if (!tx)
+ error(EFAIL, "Couldn't prep the memcpy!\n");
+
+ if (async) {
+ async_tx_ack(tx);
+ init_completion(&cmp);
+ tx->callback = (dma_async_tx_callback)complete;
+ tx->callback_param = &cmp;
+ }
+
+ cookie = dmaengine_submit(tx);
+ if (cookie < 0)
+ error(EFAIL, "Failed to submit the DMA...");
+
+ /* You can poke this. dma_sync_wait() also calls this. */
+ dma_async_issue_pending(dc);
+
+ if (async) {
+ /* Giant warning: the polling methods, like
+ * dmaengine_tx_status(), might actually trigger the
+ * tx->callback. At least the IOAT driver does this. */
+ tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+ if (tmo == 0 || dmaengine_tx_status(dc, cookie, NULL)
+ != DMA_COMPLETE) {
+ error(ETIMEDOUT, "timeout or related spurious failure");
+ }
+ } else {
+ dma_wait_for_async_tx(tx);
+ }
+
+ dma_release_channel(dc);
+ poperror();
}
+static void issue_dma_ucbdma(struct ucbdma *u)
+{
+ ERRSTACK(1);
+ struct pci_device *pdev;
+
+ pdev = pci_match_string(u->bdf_str);
+ if (!pdev)
+ error(ENODEV, "No device %s", u->bdf_str);
+ /* The qlock prevents unassignment from happening during an operation.
+ * If that happened, the driver's reset method would be called while the
+ * op is ongoing. The driver might be able to handle that. Though when
+ * the iommu mappings are destroyed, the driver is likely to get wedged.
+ *
+ * A kref or something else might work better here, to allow multiple
+ * DMAs at a time. */
+ qlock(&pdev->qlock);
+ if (waserror()) {
+ qunlock(&pdev->qlock);
+ nexterror();
+ }
+ if (pdev->proc_owner != current)
+ error(EINVAL, "wrong proc_owner");
+ issue_dma(pdev, u->dst_addr, u->src_addr, u->xfer_size, true);
+ qunlock(&pdev->qlock);
+ poperror();
+}
+
+/* Runs a basic test from within the kernel on 0:4.3.
+ *
+ * One option would be to have write() set the sza buffer. It won't be static
+ * through the chan's lifetime (so you'd need to deal with syncing), but it'd
+ * let you set things. Another would be to have another chan/file for the BDF
+ * (and you'd sync on that). */
+static struct sized_alloc *open_ktest(void)
+{
+ ERRSTACK(2);
+ struct pci_device *pdev = pci_match_tbdf(MKBUS(0, 0, 4, 3));
+ struct sized_alloc *sza;
+ physaddr_t dst, src; /* device addrs */
+ char *dst_d, *src_d; /* driver addrs */
+ uintptr_t prev;
+
+ if (!pdev)
+ error(EINVAL, "no 00:04.3");
+
+ qlock(&pdev->qlock);
+ /* We need to get into the address space of the device, which might be
+ * NULL if it's the kernel's or unassigned. */
+ prev = switch_to(pdev->proc_owner);
+ if (waserror()) {
+ switch_back(pdev->proc_owner, prev);
+ qunlock(&pdev->qlock);
+ nexterror();
+ }
+
+ if (pdev->state != DEV_STATE_ASSIGNED_KERNEL &&
+ pdev->state != DEV_STATE_ASSIGNED_USER)
+ error(EINVAL, "00:04.3 is unassigned (%d)", pdev->state);
+
+ dst_d = dma_alloc_coherent(&pdev->linux_dev, KTEST_SIZE, &dst,
+ MEM_WAIT);
+ src_d = dma_alloc_coherent(&pdev->linux_dev, KTEST_SIZE, &src,
+ MEM_WAIT);
+
+ if (waserror()) {
+ dma_free_coherent(&pdev->linux_dev, KTEST_SIZE, dst_d, dst);
+ dma_free_coherent(&pdev->linux_dev, KTEST_SIZE, src_d, src);
+ nexterror();
+ }
+
+ ktest.srcfill += 1;
+ /* initialize src and dst address */
+ memset(src_d, ktest.srcfill, KTEST_SIZE);
+ memset(dst_d, ktest.dstfill, KTEST_SIZE);
+ src_d[KTEST_SIZE-1] = '\0';
+ dst_d[KTEST_SIZE-1] = '\0';
+
+ issue_dma(pdev, dst, src, KTEST_SIZE, true);
+
+ sza = sized_kzmalloc(1024, MEM_WAIT);
+ sza_printf(sza, "\tCopy Size: %d (0x%x)\n", KTEST_SIZE, KTEST_SIZE);
+ sza_printf(sza, "\tsrcfill: %c (0x%x)\n", ktest.srcfill, ktest.srcfill);
+ sza_printf(sza, "\tdstfill: %c (0x%x)\n", ktest.dstfill, ktest.dstfill);
+
+ /* %s on a uptr causes a printfmt warning. stop at 20 too. sanity.*/
+ sza_printf(sza, "\tsrc_str (after copy): ");
+ for (int i = 0; i < 20; i++)
+ sza_printf(sza, "%c", src_d[i]);
+ sza_printf(sza, "\n");
+
+ sza_printf(sza, "\tdst_str (after copy): ");
+ for (int i = 0; i < 20; i++)
+ sza_printf(sza, "%c", dst_d[i]);
+ sza_printf(sza, "\n");
+
+ dma_free_coherent(&pdev->linux_dev, KTEST_SIZE, dst_d, dst);
+ dma_free_coherent(&pdev->linux_dev, KTEST_SIZE, src_d, src);
+ poperror();
+
+ switch_back(pdev->proc_owner, prev);
+ qunlock(&pdev->qlock);
+ poperror();
+
+ return sza;
+}
+
+struct dev cbdmadevtab;
+
static char *devname(void)
{
return cbdmadevtab.name;
@@ -191,8 +277,6 @@
static struct chan *cbdmaattach(char *spec)
{
- if (!is_initialized())
- error(ENODEV, "no cbdma device detected");
return devattach(devname(), spec);
}
@@ -208,514 +292,9 @@
return devstat(c, dp, n, cbdmadir, ARRAY_SIZE(cbdmadir), devgen);
}
-/* return string representation of chansts */
-char *cbdma_str_chansts(uint64_t chansts)
-{
- char *status = "unrecognized status";
-
- switch (chansts & IOAT_CHANSTS_STATUS) {
- case IOAT_CHANSTS_ACTIVE:
- status = "ACTIVE";
- break;
- case IOAT_CHANSTS_DONE:
- status = "DONE";
- break;
- case IOAT_CHANSTS_SUSPENDED:
- status = "SUSPENDED";
- break;
- case IOAT_CHANSTS_HALTED:
- status = "HALTED";
- break;
- case IOAT_CHANSTS_ARMED:
- status = "ARMED";
- break;
- default:
- break;
- }
- return status;
-}
-
-/* print descriptors on console (for debugging) */
-static void dump_desc(struct desc *d, int count)
-{
- printk("dumping descriptors (count = %d):\n", count);
-
- while (count > 0) {
- printk("desc: 0x%x, size: %d bytes\n",
- d, sizeof(struct desc));
- printk("[32] desc->xfer_size: 0x%x\n",
- d->xfer_size);
- printk("[32] desc->descriptor_control: 0x%x\n",
- d->descriptor_control);
- printk("[64] desc->src_addr: %p\n",
- d->src_addr);
- printk("[64] desc->dest_addr: %p\n",
- d->dest_addr);
- printk("[64] desc->next_desc_addr: %p\n",
- d->next_desc_addr);
- printk("[64] desc->next_source_address: %p\n",
- d->next_source_address);
- printk("[64] desc->next_destination_address: %p\n",
- d->next_destination_address);
- printk("[64] desc->reserved0: %p\n",
- d->reserved0);
- printk("[64] desc->reserved1: %p\n",
- d->reserved1);
-
- count--;
- if (count > 0)
- d = (struct desc *) KADDR(d->next_desc_addr);
- printk("\n");
- }
-}
-
-/* initialize desc ring
- *
- - Can be called multiple times, with different "ndesc" values.
- - NOTE: We only create _one_ valid desc. The next field points back itself
- (ring buffer).
- */
-static void init_desc(struct channel *c, int ndesc)
-{
- struct desc *d, *tmp;
- int i;
- const int max_ndesc = PGSIZE / sizeof(struct desc);
-
- /* sanity checks */
- if (ndesc > max_ndesc) {
- printk("cbdma: allocating only %d desc instead of %d desc\n",
- max_ndesc, ndesc);
- ndesc = max_ndesc;
- }
-
- c->ndesc = ndesc;
-
- /* allocate pages for descriptors, last 6-bits must be zero */
- if (!c->pdesc)
- c->pdesc = kpage_zalloc_addr();
-
- if (!c->pdesc) { /* error does not return */
- printk("cbdma: cannot alloc page for desc\n");
- return; /* TODO: return "false" */
- }
-
- /* preparing descriptors */
- d = c->pdesc;
- d->xfer_size = 1;
- d->descriptor_control = CBDMA_DESC_CTRL_NULL_DESC;
- d->next_desc_addr = PADDR(d);
-}
-
-/* struct channel is only used for get_register */
-static inline void cleanup_post_copy(struct channel *c)
-{
- uint64_t value;
-
- /* mmio_reg: DMACOUNT */
- value = read16(get_register(c, IOAT_CHAN_DMACOUNT_OFFSET));
- if (value != 0) {
- printk("cbdma: info: DMACOUNT = %d\n", value); /* should be 0 */
- write16(0, mmio + CBDMA_DMACOUNT_OFFSET);
- }
-
- /* mmio_reg: CHANERR */
- value = read32(get_register(c, IOAT_CHANERR_OFFSET));
- if (value != 0) {
- printk("cbdma: error: CHANERR = 0x%x\n", value);
- write32(value, get_register(c, IOAT_CHANERR_OFFSET));
- }
-
- /* ack errors */
- if (ACCESS_PCIE_CONFIG_SPACE) {
- /* PCIe_reg: CHANERR_INT */
- value = pcidev_read32(pci, CHANERR_INT);
- if (value != 0) {
- printk("cbdma: error: CHANERR_INT = 0x%x\n", value);
- pcidev_write32(pci, CHANERR_INT, value);
- }
-
- /* PCIe_reg: DMAUNCERRSTS */
- value = pcidev_read32(pci, IOAT_PCI_DMAUNCERRSTS_OFFSET);
- if (value != 0) {
- printk("cbdma: error: DMAUNCERRSTS = 0x%x\n", value);
- pcidev_write32(pci, IOAT_PCI_DMAUNCERRSTS_OFFSET,
- value);
- }
- }
-}
-
-/* struct channel is only used for get_register */
-static inline void perform_dma(struct channel *c, physaddr_t completion_sts,
- physaddr_t desc, uint16_t count)
-{
- void __iomem *offset;
-
- /* Set channel completion register where CBDMA will write content of
- * CHANSTS register upon successful DMA completion or error condition
- */
- offset = get_register(c, IOAT_CHANCMP_OFFSET);
- write64(completion_sts, offset);
-
- /* write locate of first desc to register CHAINADDR */
- offset = get_register(c, IOAT_CHAINADDR_OFFSET(c->ver));
- write64(desc, offset);
- wmb_f();
-
- /* writing valid number of descs: starts the DMA */
- offset = get_register(c, IOAT_CHAN_DMACOUNT_OFFSET);
- write16(count, offset);
-}
-
-static inline void wait_for_dma_completion(uint64_t *cmpsts)
-{
- uint64_t sts;
-
- do {
- cpu_relax();
- sts = *cmpsts;
- if (cbdma_break_loop) {
- printk("cbdma: cmpsts: %p = 0x%llx\n", cmpsts, sts);
- break;
- }
- } while ((sts & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_ACTIVE);
-}
-
-/* cbdma_ktest: performs functional test on CBDMA
- *
- - Allocates 2 kernel pages: ktest_src and ktest_dst.
- - memsets the ktest_src page
- - Prepare descriptors for DMA transfer (need to be aligned)
- - Initiate the transfer
- - Prints results
- */
-static void cbdma_ktest(void)
-{
- static struct desc *d;
- uint64_t value;
- struct channel *c = &channel0;
-
- /* initialize src and dst address */
- memset(ktest.src, ktest.srcfill, KTEST_SIZE);
- memset(ktest.dst, ktest.dstfill, KTEST_SIZE);
- ktest.src[KTEST_SIZE-1] = '\0';
- ktest.dst[KTEST_SIZE-1] = '\0';
-
- /* for subsequent ktests */
- ktest.srcfill += 1;
-
- /* preparing descriptors */
- d = channel0.pdesc;
- d->xfer_size = (uint32_t) KTEST_SIZE;
- d->src_addr = (uint64_t) PADDR(ktest.src);
- d->dest_addr = (uint64_t) PADDR(ktest.dst);
- d->descriptor_control = CBDMA_DESC_CTRL_INTR_ON_COMPLETION |
- CBDMA_DESC_CTRL_WRITE_CHANCMP_ON_COMPLETION;
-
- memset((uint64_t *)c->status, 0, sizeof(c->status));
-
- /* perform actual DMA */
- perform_dma(c, PADDR(c->status), PADDR(c->pdesc), 1);
- wait_for_dma_completion(c->status);
- cleanup_post_copy(c);
-}
-
-/* convert a userspace pointer to kaddr based pointer
- * TODO: this is dangerous and the pages are not pinned. Debugging only! */
-static inline void *uptr_to_kptr(void *ptr)
-{
- return (void *) uva2kva(current, ptr, 1, PROT_WRITE);
-}
-
-/* function that uses kernel addresses to perform DMA.
- * Note: does not perform error checks for src / dest addr.
- * TODO: this only works if ktest is not run. Still it fails on alternate runs.
- * Likely some error in setting up the desc from userspace.
- */
-static void issue_dma_kaddr(struct ucbdma *u)
-{
- struct ucbdma *u_kaddr = uptr_to_kptr(u);
- /* first field is struct desc */
- struct desc *d = (struct desc *) u_kaddr;
- struct channel *c = &channel0;
- uint64_t value;
-
- if (!u_kaddr) {
- printk("[kern] cannot get kaddr for useraddr: %p\n", u);
- return;
- }
- printk("[kern] ucbdma: user: %p kern: %p\n", u, u_kaddr);
-
- /* preparing descriptors */
- d->src_addr = (uint64_t) PADDR(uptr_to_kptr((void*) d->src_addr));
- d->dest_addr = (uint64_t) PADDR(uptr_to_kptr((void*) d->dest_addr));
- d->next_desc_addr = (uint64_t)
- PADDR(uptr_to_kptr((void*) d->next_desc_addr));
-
- /* perform actual DMA */
- perform_dma(c, PADDR(&u_kaddr->status), PADDR(d), u_kaddr->ndesc);
- wait_for_dma_completion(&u_kaddr->status);
- cleanup_post_copy(c);
-}
-
-/* function that uses virtual (process) addresses to perform DMA; IOMMU = ON
- * TODO: Verify once the IOMMU is setup and enabled.
- */
-static void issue_dma_vaddr(struct ucbdma *u)
-{
- struct ucbdma *u_kaddr = uptr_to_kptr(u);
- struct channel *c = &channel0;
- uint64_t value;
-
- printk("[kern] IOMMU = ON\n");
- printk("[kern] ucbdma: user: %p kern: %p ndesc: %d\n", u,
- &u_kaddr->desc, u_kaddr->ndesc);
-
- /* perform actual DMA */
- perform_dma(c, (physaddr_t) &u->status, (physaddr_t) &u->desc,
- u_kaddr->ndesc);
- wait_for_dma_completion(&u_kaddr->status);
- cleanup_post_copy(&channel0);
-}
-
-/* cbdma_stats: get stats about the device and driver
- */
-static struct sized_alloc *open_stats(void)
-{
- struct sized_alloc *sza = sized_kzmalloc(BUFFERSZ, MEM_WAIT);
- uint64_t value;
-
- sza_printf(sza,
- "Intel CBDMA [%x:%x] registered at %02x:%02x.%x\n",
- pci->ven_id, pci->dev_id, pci->bus, pci->dev, pci->func);
-
- /* driver info. */
- sza_printf(sza, " Driver Information:\n");
- sza_printf(sza,
- "\tmmio: %p\n"
- "\ttotal_channels: %d\n"
- "\tdesc_kaddr: %p\n"
- "\tdesc_paddr: %p\n"
- "\tdesc_num: %d\n"
- "\tver: 0x%x\n"
- "\tstatus_kaddr: %p\n"
- "\tstatus_paddr: %p\n"
- "\tstatus_value: 0x%x\n",
- mmio, chancnt,
- channel0.pdesc, PADDR(channel0.pdesc), channel0.ndesc,
- channel0.ver, channel0.status, PADDR(channel0.status),
- *(uint64_t *)channel0.status);
-
- /* print the PCI registers */
- sza_printf(sza, " PCIe Config Registers:\n");
-
- value = pcidev_read16(pci, PCI_CMD_REG);
- sza_printf(sza, "\tPCICMD: 0x%x\n", value);
-
- value = pcidev_read16(pci, PCI_STATUS_REG);
- sza_printf(sza, "\tPCISTS: 0x%x\n", value);
-
- value = pcidev_read16(pci, PCI_REVID_REG);
- sza_printf(sza, "\tRID: 0x%x\n", value);
-
- value = pcidev_read32(pci, PCI_BAR0_STD);
- sza_printf(sza, "\tCB_BAR: 0x%x\n", value);
-
- value = pcidev_read16(pci, DEVSTS);
- sza_printf(sza, "\tDEVSTS: 0x%x\n", value);
-
- value = pcidev_read32(pci, PMCSR);
- sza_printf(sza, "\tPMCSR: 0x%x\n", value);
-
- value = pcidev_read32(pci, DMAUNCERRSTS);
- sza_printf(sza, "\tDMAUNCERRSTS: 0x%x\n", value);
-
- value = pcidev_read32(pci, DMAUNCERRMSK);
- sza_printf(sza, "\tDMAUNCERRMSK: 0x%x\n", value);
-
- value = pcidev_read32(pci, DMAUNCERRSEV);
- sza_printf(sza, "\tDMAUNCERRSEV: 0x%x\n", value);
-
- value = pcidev_read8(pci, DMAUNCERRPTR);
- sza_printf(sza, "\tDMAUNCERRPTR: 0x%x\n", value);
-
- value = pcidev_read8(pci, DMAGLBERRPTR);
- sza_printf(sza, "\tDMAGLBERRPTR: 0x%x\n", value);
-
- value = pcidev_read32(pci, CHANERR_INT);
- sza_printf(sza, "\tCHANERR_INT: 0x%x\n", value);
-
- value = pcidev_read32(pci, CHANERRMSK_INT);
- sza_printf(sza, "\tCHANERRMSK_INT: 0x%x\n", value);
-
- value = pcidev_read32(pci, CHANERRSEV_INT);
- sza_printf(sza, "\tCHANERRSEV_INT: 0x%x\n", value);
-
- value = pcidev_read8(pci, CHANERRPTR);
- sza_printf(sza, "\tCHANERRPTR: 0x%x\n", value);
-
- sza_printf(sza, " CHANNEL_0 MMIO Registers:\n");
-
- value = read8(mmio + CBDMA_CHANCMD_OFFSET);
- sza_printf(sza, "\tCHANCMD: 0x%x\n", value);
-
- value = read8(mmio + IOAT_VER_OFFSET);
- sza_printf(sza, "\tCBVER: 0x%x major=%d minor=%d\n",
- value,
- GET_IOAT_VER_MAJOR(value),
- GET_IOAT_VER_MINOR(value));
-
- value = read16(mmio + CBDMA_CHANCTRL_OFFSET);
- sza_printf(sza, "\tCHANCTRL: 0x%llx\n", value);
-
- value = read64(mmio + CBDMA_CHANSTS_OFFSET);
- sza_printf(sza, "\tCHANSTS: 0x%x [%s], desc_addr: %p, raw: 0x%llx\n",
- (value & IOAT_CHANSTS_STATUS),
- cbdma_str_chansts(value),
- (value & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR),
- value);
-
- value = read64(mmio + CBDMA_CHAINADDR_OFFSET);
- sza_printf(sza, "\tCHAINADDR: %p\n", value);
-
- value = read64(mmio + CBDMA_CHANCMP_OFFSET);
- sza_printf(sza, "\tCHANCMP: %p\n", value);
-
- value = read16(mmio + CBDMA_DMACOUNT_OFFSET);
- sza_printf(sza, "\tDMACOUNT: %d\n", value);
-
- value = read32(mmio + CBDMA_CHANERR_OFFSET);
- sza_printf(sza, "\tCHANERR: 0x%x\n", value);
-
- return sza;
-}
-
-static struct sized_alloc *open_reset(void)
-{
- struct sized_alloc *sza = sized_kzmalloc(BUFFERSZ, MEM_WAIT);
-
- if (cbdma_is_reset_pending())
- sza_printf(sza, "Status: Reset is pending\n");
- else
- sza_printf(sza, "Status: No pending reset\n");
-
- sza_printf(sza, "Write '1' to perform reset!\n");
-
- return sza;
-}
-
-static struct sized_alloc *open_iommu(void)
-{
- struct sized_alloc *sza = sized_kzmalloc(BUFFERSZ, MEM_WAIT);
-
- sza_printf(sza, "IOMMU enabled = %s\n", iommu_enabled ? "yes":"no");
- sza_printf(sza, "Write '0' to disable or '1' to enable the IOMMU\n");
-
- return sza;
-}
-
-/* targets channel0 */
-static struct sized_alloc *open_ktest(void)
-{
- struct sized_alloc *sza = sized_kzmalloc(BUFFERSZ, MEM_WAIT);
-
- /* run the test */
- cbdma_ktest();
-
- sza_printf(sza,
- "Self-test Intel CBDMA [%x:%x] registered at %02x:%02x.%x\n",
- pci->ven_id, pci->dev_id, pci->bus, pci->dev, pci->func);
-
- sza_printf(sza, "\tChannel Status: %s (raw: 0x%x)\n",
- cbdma_str_chansts(*((uint64_t *)channel0.status)),
- (*((uint64_t *)channel0.status) & IOAT_CHANSTS_STATUS));
-
- sza_printf(sza, "\tCopy Size: %d (0x%x)\n", KTEST_SIZE, KTEST_SIZE);
- sza_printf(sza, "\tsrcfill: %c (0x%x)\n", ktest.srcfill, ktest.srcfill);
- sza_printf(sza, "\tdstfill: %c (0x%x)\n", ktest.dstfill, ktest.dstfill);
- sza_printf(sza, "\tsrc_str (after copy): %s\n", ktest.src);
- sza_printf(sza, "\tdst_str (after copy): %s\n", ktest.dst);
-
- return sza;
-}
-
-/* cbdma_reset_device: this fixes any programming errors done before
- */
-void cbdma_reset_device(void)
-{
- int cbdmaver;
- uint32_t error;
-
- /* make sure the driver is initialized */
- if (!mmio)
- error(EIO, "cbdma: mmio addr not set");
-
- pcidev_write16(pci, PCI_COMMAND, PCI_COMMAND_IO | PCI_COMMAND_MEMORY
- | PCI_COMMAND_MASTER);
- /* fetch version */
- cbdmaver = read8(mmio + IOAT_VER_OFFSET);
-
- /* ack channel errros */
- error = read32(mmio + CBDMA_CHANERR_OFFSET);
- write32(error, mmio + CBDMA_CHANERR_OFFSET);
-
- if (ACCESS_PCIE_CONFIG_SPACE) {
- /* ack pci device level errros */
- /* clear DMA Cluster Uncorrectable Error Status */
- error = pcidev_read32(pci, IOAT_PCI_DMAUNCERRSTS_OFFSET);
- pcidev_write32(pci, IOAT_PCI_DMAUNCERRSTS_OFFSET, error);
-
- /* clear DMA Channel Error Status */
- error = pcidev_read32(pci, IOAT_PCI_CHANERR_INT_OFFSET);
- pcidev_write32(pci, IOAT_PCI_CHANERR_INT_OFFSET, error);
- }
-
- /* reset */
- write8(IOAT_CHANCMD_RESET, mmio
- + IOAT_CHANNEL_MMIO_SIZE
- + IOAT_CHANCMD_OFFSET(cbdmaver));
-
- pcidev_write16(pci, PCI_COMMAND, PCI_COMMAND_IO | PCI_COMMAND_MEMORY
- | PCI_COMMAND_MASTER | PCI_COMMAND_INTX_DISABLE);
-
- printk("cbdma: reset performed\n");
-}
-
-/* cbdma_is_reset_pending: returns true if reset is pending
- */
-bool cbdma_is_reset_pending(void)
-{
- int cbdmaver;
- int status;
-
- /* make sure the driver is initialized */
- if (!mmio) {
- error(EPERM, "cbdma: mmio addr not set");
- return false; /* does not reach */
- }
-
- /* fetch version */
- cbdmaver = read8(mmio + IOAT_VER_OFFSET);
-
- status = read8(mmio + IOAT_CHANNEL_MMIO_SIZE
- + IOAT_CHANCMD_OFFSET(cbdmaver));
-
- return (status & IOAT_CHANCMD_RESET) == IOAT_CHANCMD_RESET;
-}
-
-///////// SYS INTERFACE ////////////////////////////////////////////////////////
-
static struct chan *cbdmaopen(struct chan *c, int omode)
{
switch (c->qid.path) {
- case Qcbdmastats:
- c->synth_buf = open_stats();
- break;
- case Qcbdmareset:
- c->synth_buf = open_reset();
- break;
- case Qcbdmaiommu:
- c->synth_buf = open_iommu();
- break;
case Qcbdmaktest:
c->synth_buf = open_ktest();
break;
@@ -732,9 +311,6 @@
static void cbdmaclose(struct chan *c)
{
switch (c->qid.path) {
- case Qcbdmastats:
- case Qcbdmareset:
- case Qcbdmaiommu:
case Qcbdmaktest:
kfree(c->synth_buf);
c->synth_buf = NULL;
@@ -753,9 +329,6 @@
switch (c->qid.path) {
case Qcbdmaktest:
- case Qcbdmastats:
- case Qcbdmareset:
- case Qcbdmaiommu:
return readstr(offset, va, n, sza->buf);
case Qcbdmaucopy:
return readstr(offset, va, n,
@@ -770,63 +343,22 @@
return -1; /* not reached */
}
-static void init_channel(struct channel *c, int cnum, int ndesc)
-{
- c->number = cnum;
- c->pdesc = NULL;
- init_desc(c, ndesc);
-
- /* this is a writeback field; the hardware will update this value */
- if (c->status == 0)
- c->status = kmalloc_align(sizeof(uint64_t), MEM_WAIT, 8);
- assert(c->status != 0);
-
- /* cbdma version */
- c->ver = read8(mmio + IOAT_VER_OFFSET);
-
- /* Set "Any Error Abort Enable": enables abort for any error encountered
- * Set "Error Completion Enable": enables completion write to address in
- CHANCMP for any error
- * Reset "Interrupt Disable": W1C, when clear enables interrupt to fire
- for next descriptor that specifies interrupt
- */
- write8(IOAT_CHANCTRL_ANY_ERR_ABORT_EN | IOAT_CHANCTRL_ERR_COMPLETION_EN,
- get_register(c, IOAT_CHANCTRL_OFFSET));
-}
-
static size_t cbdmawrite(struct chan *c, void *va, size_t n, off64_t offset)
{
+ struct ucbdma ucbdma[1];
+
switch (c->qid.path) {
case Qdir:
error(EPERM, "writing not permitted");
case Qcbdmaktest:
- case Qcbdmastats:
error(EPERM, ERROR_FIXME);
- case Qcbdmareset:
- if (offset == 0 && n > 0 && *(char *)va == '1') {
- cbdma_reset_device();
- init_channel(&channel0, 0, NDESC);
- } else {
- error(EINVAL, "cannot be empty string");
- }
- return n;
case Qcbdmaucopy:
- if (offset == 0 && n > 0) {
- printk("[kern] value from userspace: %p\n", va);
- if (iommu_enabled)
- issue_dma_vaddr(va);
- else
- issue_dma_kaddr(va);
- return sizeof(8);
- }
- return 0;
- case Qcbdmaiommu:
- if (offset == 0 && n > 0 && *(char *)va == '1')
- iommu_enabled = true;
- else if (offset == 0 && n > 0 && *(char *)va == '0')
- iommu_enabled = false;
- else
- error(EINVAL, "cannot be empty string");
+ if (n != sizeof(struct ucbdma))
+ error(EINVAL, "Bad ucbdma size %u (%u)", n,
+ sizeof(struct ucbdma));
+ if (copy_from_user(ucbdma, va, sizeof(struct ucbdma)))
+ error(EINVAL, "Bad ucbdma pointer");
+ issue_dma_ucbdma(ucbdma);
return n;
default:
error(EIO, "cbdma: qid 0x%x is impossible", c->qid.path);
@@ -835,97 +367,10 @@
return -1; /* not reached */
}
-static void cbdma_interrupt(struct hw_trapframe *hw_tf, void *arg)
-{
- uint16_t value;
-
- value = read16(get_register(&channel0, IOAT_CHANCTRL_OFFSET));
- write16(value | IOAT_CHANCTRL_INT_REARM,
- get_register(&channel0, IOAT_CHANCTRL_OFFSET));
-}
-
-void cbdmainit(void)
-{
- int tbdf;
- int i;
- int id;
- struct pci_device *pci_iter;
-
- printk("cbdma: skipping it\n");
- return;
-
- /* assigning global variables */
- pci = NULL;
- mmio = NULL;
-
- /* initialize cbdmadev */
- memset(&cbdmadev, 0x0, sizeof(cbdmadev));
-
- /* search for the device 00:04.0 */
- STAILQ_FOREACH(pci_iter, &pci_devices, all_dev) {
- id = pci_iter->dev_id << 16 | pci_iter->ven_id;
- switch (id) {
- default:
- continue;
- case ioat2021:
- case ioat2f20:
- /* hack: bus 0 is the PCI_ALL iommu.
- * Can remove this once we add code for scoped IOMMU */
- if (pci_iter->bus != 0)
- continue;
- pci = pci_iter;
- break;
- }
- }
-
- if (pci == NULL) {
- printk("cbdma: no Intel CBDMA device found\n");
- return;
- }
-
- /* search and find the mapped mmio region */
- for (i = 0; i < COUNT_OF(pci->bar); i++) {
- mmio = pci_get_mmio_bar_kva(pci, i);
- if (!mmio)
- continue;
- break;
- }
-
- if (mmio == NULL) {
- printk("cbdma: cannot map any bars!\n");
- return;
- }
-
- /* performance related stuff */
- pci_set_cacheline_size(pci);
-
- /* Get the channel count. Top 3 bits of the register are reserved. */
- chancnt = read8(mmio + IOAT_CHANCNT_OFFSET) & 0x1F;
-
- /* initialization successful; print stats */
- printk("cbdma: registered [%x:%x] at %02x:%02x.%x // "
- "mmio:%p\n",
- pci->ven_id, pci->dev_id, pci->bus, pci->dev, pci->func,
- mmio);
-
- tbdf = MKBUS(BusPCI, pci->bus, pci->dev, pci->func);
- register_irq(pci->irqline, cbdma_interrupt, NULL, tbdf);
-
- /* reset device */
- cbdma_reset_device();
-
- /* initialize channel(s) */
- init_channel(&channel0, 0, NDESC);
-
- /* setup ktest struct */
- ktest.srcfill = '1';
- ktest.dstfill = '0';
-}
-
struct dev cbdmadevtab __devtab = {
.name = "cbdma",
.reset = devreset,
- .init = cbdmainit,
+ .init = devinit,
.shutdown = devshutdown,
.attach = cbdmaattach,
.walk = cbdmawalk,
diff --git a/kern/include/cbdma_regs.h b/kern/include/cbdma_regs.h
deleted file mode 100644
index 6c8ec3d..0000000
--- a/kern/include/cbdma_regs.h
+++ /dev/null
@@ -1,268 +0,0 @@
-/* Copyright (c) 2019 Google Inc
- * Aditya Basu <mitthu@google.com>
- * See LICENSE for details.
- *
- * Copy of CBDMA register definitions from Linux kernel (around v5.1)
- * drivers/dma/ioat/registers.h
- */
-#ifndef _IOAT_REGISTERS_H_
-#define _IOAT_REGISTERS_H_
-
-#define ACCESS_PCIE_CONFIG_SPACE 1
-
-bool cbdma_is_reset_pending(void);
-void cbdma_reset_device(void);
-
-/* file: drivers/dma/ioat/hw.h */
-#define IOAT_VER_1_2 0x12 /* Version 1.2 */
-#define IOAT_VER_2_0 0x20 /* Version 2.0 */
-#define IOAT_VER_3_0 0x30 /* Version 3.0 */
-#define IOAT_VER_3_2 0x32 /* Version 3.2 */
-#define IOAT_VER_3_3 0x33 /* Version 3.3 */
-#define IOAT_VER_3_4 0x34 /* Version 3.4 */
-/* -------------------------------------- */
-
-#define IOAT_PCI_DMACTRL_OFFSET 0x48
-#define IOAT_PCI_DMACTRL_DMA_EN 0x00000001
-#define IOAT_PCI_DMACTRL_MSI_EN 0x00000002
-
-#define IOAT_PCI_DEVICE_ID_OFFSET 0x02
-#define IOAT_PCI_DMAUNCERRSTS_OFFSET 0x148
-#define IOAT_PCI_CHANERR_INT_OFFSET 0x180
-#define IOAT_PCI_CHANERRMASK_INT_OFFSET 0x184
-
-/* MMIO Device Registers */
-#define IOAT_CHANCNT_OFFSET 0x00 /* 8-bit */
-
-#define IOAT_XFERCAP_OFFSET 0x01 /* 8-bit */
-#define IOAT_XFERCAP_4KB 12
-#define IOAT_XFERCAP_8KB 13
-#define IOAT_XFERCAP_16KB 14
-#define IOAT_XFERCAP_32KB 15
-#define IOAT_XFERCAP_32GB 0
-
-#define IOAT_GENCTRL_OFFSET 0x02 /* 8-bit */
-#define IOAT_GENCTRL_DEBUG_EN 0x01
-
-#define IOAT_INTRCTRL_OFFSET 0x03 /* 8-bit */
-#define IOAT_INTRCTRL_MASTER_INT_EN 0x01 /* Master Interrupt Enable */
-#define IOAT_INTRCTRL_INT_STATUS 0x02 /* ATTNSTATUS -or- Channel Int */
-#define IOAT_INTRCTRL_INT 0x04 /* INT_STATUS -and- MASTER_INT_EN */
-#define IOAT_INTRCTRL_MSIX_VECTOR_CONTROL 0x08 /* Enable all MSI-X vectors */
-
-#define IOAT_ATTNSTATUS_OFFSET 0x04 /* Each bit is a channel */
-
-#define IOAT_VER_OFFSET 0x08 /* 8-bit */
-#define IOAT_VER_MAJOR_MASK 0xF0
-#define IOAT_VER_MINOR_MASK 0x0F
-#define GET_IOAT_VER_MAJOR(x) (((x) & IOAT_VER_MAJOR_MASK) >> 4)
-#define GET_IOAT_VER_MINOR(x) ((x) & IOAT_VER_MINOR_MASK)
-
-#define IOAT_PERPORTOFFSET_OFFSET 0x0A /* 16-bit */
-
-#define IOAT_INTRDELAY_OFFSET 0x0C /* 16-bit */
-#define IOAT_INTRDELAY_MASK 0x3FFF /* Interrupt Delay Time */
-#define IOAT_INTRDELAY_COALESE_SUPPORT 0x8000 /* Interrupt Coalescing Supported */
-
-#define IOAT_DEVICE_STATUS_OFFSET 0x0E /* 16-bit */
-#define IOAT_DEVICE_STATUS_DEGRADED_MODE 0x0001
-#define IOAT_DEVICE_MMIO_RESTRICTED 0x0002
-#define IOAT_DEVICE_MEMORY_BYPASS 0x0004
-#define IOAT_DEVICE_ADDRESS_REMAPPING 0x0008
-
-#define IOAT_DMA_CAP_OFFSET 0x10 /* 32-bit */
-#define IOAT_CAP_PAGE_BREAK 0x00000001
-#define IOAT_CAP_CRC 0x00000002
-#define IOAT_CAP_SKIP_MARKER 0x00000004
-#define IOAT_CAP_DCA 0x00000010
-#define IOAT_CAP_CRC_MOVE 0x00000020
-#define IOAT_CAP_FILL_BLOCK 0x00000040
-#define IOAT_CAP_APIC 0x00000080
-#define IOAT_CAP_XOR 0x00000100
-#define IOAT_CAP_PQ 0x00000200
-#define IOAT_CAP_DWBES 0x00002000
-#define IOAT_CAP_RAID16SS 0x00020000
-
-#define IOAT_CHANNEL_MMIO_SIZE 0x80 /* Each Channel MMIO space is this size */
-
-/* DMA Channel Registers */
-#define IOAT_CHANCTRL_OFFSET 0x00 /* 16-bit Channel Control Register */
-#define IOAT_CHANCTRL_CHANNEL_PRIORITY_MASK 0xF000
-#define IOAT3_CHANCTRL_COMPL_DCA_EN 0x0200
-#define IOAT_CHANCTRL_CHANNEL_IN_USE 0x0100
-#define IOAT_CHANCTRL_DESCRIPTOR_ADDR_SNOOP_CONTROL 0x0020
-#define IOAT_CHANCTRL_ERR_INT_EN 0x0010
-#define IOAT_CHANCTRL_ANY_ERR_ABORT_EN 0x0008
-#define IOAT_CHANCTRL_ERR_COMPLETION_EN 0x0004
-#define IOAT_CHANCTRL_INT_REARM 0x0001
-#define IOAT_CHANCTRL_RUN (IOAT_CHANCTRL_INT_REARM |\
- IOAT_CHANCTRL_ERR_INT_EN |\
- IOAT_CHANCTRL_ERR_COMPLETION_EN |\
- IOAT_CHANCTRL_ANY_ERR_ABORT_EN)
-
-#define IOAT_DMA_COMP_OFFSET 0x02 /* 16-bit DMA channel compatibility */
-#define IOAT_DMA_COMP_V1 0x0001 /* Compatibility with DMA version 1 */
-#define IOAT_DMA_COMP_V2 0x0002 /* Compatibility with DMA version 2 */
-
-
-#define IOAT1_CHANSTS_OFFSET 0x04 /* 64-bit Channel Status Register */
-#define IOAT2_CHANSTS_OFFSET 0x08 /* 64-bit Channel Status Register */
-#define IOAT_CHANSTS_OFFSET(ver) ((ver) < IOAT_VER_2_0 \
- ? IOAT1_CHANSTS_OFFSET : IOAT2_CHANSTS_OFFSET)
-#define IOAT1_CHANSTS_OFFSET_LOW 0x04
-#define IOAT2_CHANSTS_OFFSET_LOW 0x08
-#define IOAT_CHANSTS_OFFSET_LOW(ver) ((ver) < IOAT_VER_2_0 \
- ? IOAT1_CHANSTS_OFFSET_LOW : IOAT2_CHANSTS_OFFSET_LOW)
-#define IOAT1_CHANSTS_OFFSET_HIGH 0x08
-#define IOAT2_CHANSTS_OFFSET_HIGH 0x0C
-#define IOAT_CHANSTS_OFFSET_HIGH(ver) ((ver) < IOAT_VER_2_0 \
- ? IOAT1_CHANSTS_OFFSET_HIGH : IOAT2_CHANSTS_OFFSET_HIGH)
-#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR (~0x3fULL)
-#define IOAT_CHANSTS_SOFT_ERR 0x10ULL
-#define IOAT_CHANSTS_UNAFFILIATED_ERR 0x8ULL
-#define IOAT_CHANSTS_STATUS 0x7ULL
-#define IOAT_CHANSTS_ACTIVE 0x0
-#define IOAT_CHANSTS_DONE 0x1
-#define IOAT_CHANSTS_SUSPENDED 0x2
-#define IOAT_CHANSTS_HALTED 0x3
-
-
-
-#define IOAT_CHAN_DMACOUNT_OFFSET 0x06 /* 16-bit DMA Count register */
-
-#define IOAT_DCACTRL_OFFSET 0x30 /* 32 bit Direct Cache Access Control Register */
-#define IOAT_DCACTRL_CMPL_WRITE_ENABLE 0x10000
-#define IOAT_DCACTRL_TARGET_CPU_MASK 0xFFFF /* APIC ID */
-
-/* CB DCA Memory Space Registers */
-#define IOAT_DCAOFFSET_OFFSET 0x14
-/* CB_BAR + IOAT_DCAOFFSET value */
-#define IOAT_DCA_VER_OFFSET 0x00
-#define IOAT_DCA_VER_MAJOR_MASK 0xF0
-#define IOAT_DCA_VER_MINOR_MASK 0x0F
-
-#define IOAT_DCA_COMP_OFFSET 0x02
-#define IOAT_DCA_COMP_V1 0x1
-
-#define IOAT_FSB_CAPABILITY_OFFSET 0x04
-#define IOAT_FSB_CAPABILITY_PREFETCH 0x1
-
-#define IOAT_PCI_CAPABILITY_OFFSET 0x06
-#define IOAT_PCI_CAPABILITY_MEMWR 0x1
-
-#define IOAT_FSB_CAP_ENABLE_OFFSET 0x08
-#define IOAT_FSB_CAP_ENABLE_PREFETCH 0x1
-
-#define IOAT_PCI_CAP_ENABLE_OFFSET 0x0A
-#define IOAT_PCI_CAP_ENABLE_MEMWR 0x1
-
-#define IOAT_APICID_TAG_MAP_OFFSET 0x0C
-#define IOAT_APICID_TAG_MAP_TAG0 0x0000000F
-#define IOAT_APICID_TAG_MAP_TAG0_SHIFT 0
-#define IOAT_APICID_TAG_MAP_TAG1 0x000000F0
-#define IOAT_APICID_TAG_MAP_TAG1_SHIFT 4
-#define IOAT_APICID_TAG_MAP_TAG2 0x00000F00
-#define IOAT_APICID_TAG_MAP_TAG2_SHIFT 8
-#define IOAT_APICID_TAG_MAP_TAG3 0x0000F000
-#define IOAT_APICID_TAG_MAP_TAG3_SHIFT 12
-#define IOAT_APICID_TAG_MAP_TAG4 0x000F0000
-#define IOAT_APICID_TAG_MAP_TAG4_SHIFT 16
-#define IOAT_APICID_TAG_CB2_VALID 0x8080808080
-
-#define IOAT_DCA_GREQID_OFFSET 0x10
-#define IOAT_DCA_GREQID_SIZE 0x04
-#define IOAT_DCA_GREQID_MASK 0xFFFF
-#define IOAT_DCA_GREQID_IGNOREFUN 0x10000000
-#define IOAT_DCA_GREQID_VALID 0x20000000
-#define IOAT_DCA_GREQID_LASTID 0x80000000
-
-#define IOAT3_CSI_CAPABILITY_OFFSET 0x08
-#define IOAT3_CSI_CAPABILITY_PREFETCH 0x1
-
-#define IOAT3_PCI_CAPABILITY_OFFSET 0x0A
-#define IOAT3_PCI_CAPABILITY_MEMWR 0x1
-
-#define IOAT3_CSI_CONTROL_OFFSET 0x0C
-#define IOAT3_CSI_CONTROL_PREFETCH 0x1
-
-#define IOAT3_PCI_CONTROL_OFFSET 0x0E
-#define IOAT3_PCI_CONTROL_MEMWR 0x1
-
-#define IOAT3_APICID_TAG_MAP_OFFSET 0x10
-#define IOAT3_APICID_TAG_MAP_OFFSET_LOW 0x10
-#define IOAT3_APICID_TAG_MAP_OFFSET_HIGH 0x14
-
-#define IOAT3_DCA_GREQID_OFFSET 0x02
-
-#define IOAT1_CHAINADDR_OFFSET 0x0C /* 64-bit Descriptor Chain Address Register */
-#define IOAT2_CHAINADDR_OFFSET 0x10 /* 64-bit Descriptor Chain Address Register */
-#define IOAT_CHAINADDR_OFFSET(ver) ((ver) < IOAT_VER_2_0 \
- ? IOAT1_CHAINADDR_OFFSET : IOAT2_CHAINADDR_OFFSET)
-#define IOAT1_CHAINADDR_OFFSET_LOW 0x0C
-#define IOAT2_CHAINADDR_OFFSET_LOW 0x10
-#define IOAT_CHAINADDR_OFFSET_LOW(ver) ((ver) < IOAT_VER_2_0 \
- ? IOAT1_CHAINADDR_OFFSET_LOW : IOAT2_CHAINADDR_OFFSET_LOW)
-#define IOAT1_CHAINADDR_OFFSET_HIGH 0x10
-#define IOAT2_CHAINADDR_OFFSET_HIGH 0x14
-#define IOAT_CHAINADDR_OFFSET_HIGH(ver) ((ver) < IOAT_VER_2_0 \
- ? IOAT1_CHAINADDR_OFFSET_HIGH : IOAT2_CHAINADDR_OFFSET_HIGH)
-
-#define IOAT1_CHANCMD_OFFSET 0x14 /* 8-bit DMA Channel Command Register */
-#define IOAT2_CHANCMD_OFFSET 0x04 /* 8-bit DMA Channel Command Register */
-#define IOAT_CHANCMD_OFFSET(ver) ((ver) < IOAT_VER_2_0 \
- ? IOAT1_CHANCMD_OFFSET : IOAT2_CHANCMD_OFFSET)
-#define IOAT_CHANCMD_RESET 0x20
-#define IOAT_CHANCMD_RESUME 0x10
-#define IOAT_CHANCMD_ABORT 0x08
-#define IOAT_CHANCMD_SUSPEND 0x04
-#define IOAT_CHANCMD_APPEND 0x02
-#define IOAT_CHANCMD_START 0x01
-
-#define IOAT_CHANCMP_OFFSET 0x18 /* 64-bit Channel Completion Address Register */
-#define IOAT_CHANCMP_OFFSET_LOW 0x18
-#define IOAT_CHANCMP_OFFSET_HIGH 0x1C
-
-#define IOAT_CDAR_OFFSET 0x20 /* 64-bit Current Descriptor Address Register */
-#define IOAT_CDAR_OFFSET_LOW 0x20
-#define IOAT_CDAR_OFFSET_HIGH 0x24
-
-#define IOAT_CHANERR_OFFSET 0x28 /* 32-bit Channel Error Register */
-#define IOAT_CHANERR_SRC_ADDR_ERR 0x0001
-#define IOAT_CHANERR_DEST_ADDR_ERR 0x0002
-#define IOAT_CHANERR_NEXT_ADDR_ERR 0x0004
-#define IOAT_CHANERR_NEXT_DESC_ALIGN_ERR 0x0008
-#define IOAT_CHANERR_CHAIN_ADDR_VALUE_ERR 0x0010
-#define IOAT_CHANERR_CHANCMD_ERR 0x0020
-#define IOAT_CHANERR_CHIPSET_UNCORRECTABLE_DATA_INTEGRITY_ERR 0x0040
-#define IOAT_CHANERR_DMA_UNCORRECTABLE_DATA_INTEGRITY_ERR 0x0080
-#define IOAT_CHANERR_READ_DATA_ERR 0x0100
-#define IOAT_CHANERR_WRITE_DATA_ERR 0x0200
-#define IOAT_CHANERR_CONTROL_ERR 0x0400
-#define IOAT_CHANERR_LENGTH_ERR 0x0800
-#define IOAT_CHANERR_COMPLETION_ADDR_ERR 0x1000
-#define IOAT_CHANERR_INT_CONFIGURATION_ERR 0x2000
-#define IOAT_CHANERR_SOFT_ERR 0x4000
-#define IOAT_CHANERR_UNAFFILIATED_ERR 0x8000
-#define IOAT_CHANERR_XOR_P_OR_CRC_ERR 0x10000
-#define IOAT_CHANERR_XOR_Q_ERR 0x20000
-#define IOAT_CHANERR_DESCRIPTOR_COUNT_ERR 0x40000
-
-#define IOAT_CHANERR_HANDLE_MASK (IOAT_CHANERR_XOR_P_OR_CRC_ERR | IOAT_CHANERR_XOR_Q_ERR)
-
-#define IOAT_CHANERR_MASK_OFFSET 0x2C /* 32-bit Channel Error Register */
-
-/* Extras: Added by Aditya Basu <mitthu@google.com> */
-#define CBDMA_CHANCMD_OFFSET 0x84
-#define CBDMA_CHANSTS_OFFSET 0x88
-#define CBDMA_CHANCTRL_OFFSET 0x80
-#define CBDMA_DMACOUNT_OFFSET 0x86
-#define CBDMA_CHAINADDR_OFFSET 0x90
-#define CBDMA_CHANCMP_OFFSET 0x98
-#define CBDMA_CHANERR_OFFSET 0xa8
-#define CBDMA_DESC_CTRL_INTR_ON_COMPLETION 0x01 /* 32-bit field */
-#define CBDMA_DESC_CTRL_WRITE_CHANCMP_ON_COMPLETION 0x08
-#define CBDMA_DESC_CTRL_NULL_DESC 0x20
-
-#define IOAT_CHANSTS_ARMED 0x4
-
-#endif /* _IOAT_REGISTERS_H_ */
diff --git a/tests/ucbdma.c b/tests/ucbdma.c
index c49f22b..2efb750 100644
--- a/tests/ucbdma.c
+++ b/tests/ucbdma.c
@@ -1,66 +1,45 @@
/* Copyright (c) 2019 Google Inc
* Aditya Basu <mitthu@google.com>
+ * Barret Rhoden <brho@google.com>
* See LICENSE for details.
-
- * For kernel space
- * ----------------
- * uintptr_t uva2kva(struct proc *p, void *uva, size_t len, int prot)
- * prot is e.g. PROT_WRITE (writable by userspace).
- * returns a KVA, which you can convert to a phys addr with PADDR().
- *
- * TODO:
- * - Bypass DMA re-mapping if iommu is not turned on (in #cbdma/iommu).
*/
-#include <stdio.h>
+#include <parlib/stdio.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <string.h>
#include <inttypes.h>
#include <fcntl.h>
#include <unistd.h>
-#include <parlib/assert.h>
-
-#define CBDMA_DESC_CTRL_INTR_ON_COMPLETION 0x00000001
-#define CBDMA_DESC_CTRL_WRITE_CHANCMP_ON_COMPLETION 0x00000008
-#define CBDMA_DESC_CTRL_NULL_DESC 0x20
#define BUFFERSIZE 20
+#define error_exit(s) \
+do { \
+ perror((s)); \
+ exit(-1); \
+} while (1)
+
/* Descriptor structue as defined in the programmer's guide.
* It describes a single DMA transfer
*/
-struct desc {
- uint32_t xfer_size;
- uint32_t descriptor_control;
- uint64_t src_addr;
- uint64_t dest_addr;
- uint64_t next_desc_addr;
- uint64_t next_source_address;
- uint64_t next_destination_address;
- uint64_t reserved0;
- uint64_t reserved1;
-} __attribute__((packed));
-
-/* describe a DMA */
struct ucbdma {
- struct desc desc;
- uint64_t status;
- uint16_t ndesc;
-};
+ uint64_t dst_addr;
+ uint64_t src_addr;
+ uint32_t xfer_size;
+ char bdf_str[10];
+} __attribute__((packed));
static void *map_page(void)
{
void *region;
size_t pagesize = getpagesize();
- printf("[user] page size: %zu bytes\n", pagesize);
-
region = mmap(0, pagesize, PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_ANON | MAP_PRIVATE, 0, 0);
if (region == MAP_FAILED)
- panic("cannot mmap");
+ error_exit("cannot mmap");
return region;
}
@@ -72,7 +51,7 @@
err = munmap(region, pagesize);
if (err)
- panic("cannot munmap");
+ error_exit("cannot munmap");
}
static void issue_dma(struct ucbdma *ptr)
@@ -80,10 +59,11 @@
int fd = open("#cbdma/ucopy", O_RDWR);
if (fd < 0)
- panic("open failed: #cbdma/ucopy");
+ error_exit("open failed: #cbdma/ucopy");
printf("[user] ucbdma ptr: %p\n", ptr);
- write(fd, ptr, sizeof(struct ucbdma *));
+ if (write(fd, ptr, sizeof(struct ucbdma)) < 0)
+ error_exit("write ucbdma");
close(fd);
}
@@ -98,13 +78,9 @@
{
printf("[user] ucbdma: %p, size: %d (or 0x%x)\n", ucbdma,
sizeof(struct ucbdma), sizeof(struct ucbdma));
- printf("[user] \tdesc->xref_size: %d\n", ucbdma->desc.xfer_size);
- printf("[user] \tdesc->src_addr: %p\n", ucbdma->desc.src_addr);
- printf("[user] \tdesc->dest_addr: %p\n", ucbdma->desc.dest_addr);
- printf("[user] \tdesc->next_desc_addr: %p\n",
- ucbdma->desc.next_desc_addr);
- printf("[user] \tndesc: %d\n", ucbdma->ndesc);
- printf("[user] \tstatus: 0x%llx\n", ucbdma->status);
+ printf("[user] \txref_size: %d\n", ucbdma->xfer_size);
+ printf("[user] \tsrc_addr: %p\n", ucbdma->src_addr);
+ printf("[user] \tdst_addr: %p\n", ucbdma->dst_addr);
}
static void attach_device(char *pcistr)
@@ -113,11 +89,11 @@
int fd = open("#iommu/attach", O_RDWR);
if (fd < 0)
- panic("open failed: #iommu/attach");
+ error_exit("open failed: #iommu/attach");
sprintf(buf, "%s %d\n", pcistr, getpid());
- write(fd, buf, strlen(buf));
-
+ if (write(fd, buf, strlen(buf)) < 0)
+ error_exit("attach");
close(fd);
system("cat \\#iommu/mappings");
@@ -125,19 +101,34 @@
static void detach_device(char *pcistr)
{
+ char buf[1024];
int fd = open("#iommu/detach", O_RDWR);
if (fd < 0)
- panic("open failed: #iommu/detach");
+ error_exit("open failed: #iommu/detach");
- write(fd, pcistr, strlen(pcistr));
-
+ sprintf(buf, "%s %d\n", pcistr, getpid());
+ if (write(fd, buf, strlen(buf)) < 0)
+ error_exit("dettach");
close(fd);
}
-int main(int argc, char **argv)
+static void showmapping(pid_t pid, char *dst)
{
- char *region;
+ /* One could imagine typeof-based macros that create a string of the
+ * right size and snprintf variables with %d, %p, whatever... */
+ char pid_s[20];
+ char addr_s[20];
+ char *argv[] = { "m", "showmapping", pid_s, addr_s, NULL };
+
+ snprintf(pid_s, sizeof(pid_s), "%d", pid);
+ snprintf(addr_s, sizeof(addr_s), "%p", dst);
+
+ run_and_wait(argv[0], sizeof(argv), argv);
+}
+
+int main(int argc, char *argv[])
+{
struct ucbdma *ucbdma;
char *src, *dst;
char *pcistr;
@@ -151,39 +142,40 @@
attach_device(pcistr);
- /* map page for placing ucbdma */
- region = map_page();
-
/* setup src and dst buffers; 100 is random padding */
- src = region + sizeof(struct ucbdma) + 100;
- dst = region + sizeof(struct ucbdma) + 100 + BUFFERSIZE;
+ src = map_page();
+ dst = map_page();
+ printf("[user] mmaped src %p\n", src);
+ printf("[user] mmaped dst %p\n", dst);
+ /* No need to fill dst, it is all zeros (\0, not '0') from the OS */
fill_buffer(src, '1', BUFFERSIZE);
- fill_buffer(dst, '0', BUFFERSIZE);
printf("[user] src: %s\n", src);
printf("[user] dst: %s\n", dst);
/* setup ucbdma*/
- ucbdma = (struct ucbdma *) region;
- ucbdma->status = 0;
- ucbdma->desc.descriptor_control
- = CBDMA_DESC_CTRL_INTR_ON_COMPLETION
- | CBDMA_DESC_CTRL_WRITE_CHANCMP_ON_COMPLETION;
- ucbdma->desc.xfer_size = BUFFERSIZE;
- ucbdma->desc.src_addr = (uint64_t) src;
- ucbdma->desc.dest_addr = (uint64_t) dst;
- ucbdma->desc.next_desc_addr = (uint64_t) &ucbdma->desc;
- ucbdma->ndesc = 1;
+ ucbdma = malloc(sizeof(struct ucbdma));
+ ucbdma->xfer_size = BUFFERSIZE;
+ ucbdma->src_addr = (uint64_t) src;
+ ucbdma->dst_addr = (uint64_t) dst;
+ memcpy(&ucbdma->bdf_str, pcistr, sizeof(ucbdma->bdf_str));
- dump_ucbdma(ucbdma);
issue_dma(ucbdma);
- dump_ucbdma(ucbdma);
- printf("[user] channel_status: %llx\n", ucbdma->status);
printf("[user] src: %s\n", src);
printf("[user] dst: %s\n", dst);
+ /* Force an IOTLB flush by mmaping/munmapping an arbitrary page */
+ unmap_page(map_page());
+
+ /* Ideally, we'd see the dirty bit set in the PTE. But we probably
+ * won't. The user would have to dirty the page to tell the OS it was
+ * dirtied, which is really nasty. */
+ printf("[user] Asking the kernel to show the PTE for %p\n", dst);
+ showmapping(getpid(), dst);
+
/* cleanup */
- unmap_page(region);
+ unmap_page(src);
+ unmap_page(dst);
detach_device(pcistr);