| /* Copyright (c) 2019-2020 Google Inc |
| * Aditya Basu <mitthu@google.com> |
| * Barret Rhoden <brho@google.com> |
| * See LICENSE for details. |
| * |
| * Useful resources: |
| * - Intel Xeon E7 2800/4800/8800 Datasheet Vol. 2 |
| * - Purley Programmer's Guide |
| * |
| * Acronyms: |
| * - IOAT: (Intel) I/O Acceleration Technology |
| * - CDMA: Crystal Beach DMA |
| * |
| * TODO |
| * - Consider something lighter-weight than the qlock for ensuring the device |
| * doesn't get detached during operation. kref, perhaps. There's also an |
| * element of "stop new people from coming in", like we do with closing FDs. |
| * There's also stuff that the dmaengine does in linux. See dma_chan_get(). |
| * - Freeze or handle faults with VA->PA page mappings, till DMA is completed. |
| * Right now, we could get iommu faults, which was the purpose of this whole |
| * thing. |
| * - The dmaengine has helpers for some of this. dma_set_unmap() is a |
| * "unmap all these things when you're done" approach, called by __cleanup |
| * -> dma_descriptor_unmap(). the unmap struct is basically a todo list. |
| * - There's a lot of stuff we could do with the DMA engine to reduce the |
| * amount of device touches, contention, and other inefficiencies. |
| * issue_dma() is a minimalist one. No batching, etc. And with the pdev |
| * qlock, we have only a single request per PCI device, though there may be |
| * numerous channels. |
| */ |
| |
| #include <kmalloc.h> |
| #include <string.h> |
| #include <stdio.h> |
| #include <assert.h> |
| #include <error.h> |
| #include <net/ip.h> |
| #include <linux_compat.h> |
| #include <arch/pci.h> |
| #include <page_alloc.h> |
| #include <pmap.h> |
| #include <arch/pci_regs.h> |
| |
| #include <linux/dmaengine.h> |
| |
| /* QID Path */ |
| enum { |
| Qdir = 0, |
| Qcbdmaktest = 1, |
| Qcbdmaucopy = 2, |
| }; |
| |
| static struct dirtab cbdmadir[] = { |
| {".", {Qdir, 0, QTDIR}, 0, 0555}, |
| {"ktest", {Qcbdmaktest, 0, QTFILE}, 0, 0555}, |
| {"ucopy", {Qcbdmaucopy, 0, QTFILE}, 0, 0755}, |
| }; |
| |
| /* TODO: this is a device/kernel ABI. ucbdma.c has a copy. It's probably not |
| * worth putting in its own header, since this is really cheap test code. */ |
| struct ucbdma { |
| uint64_t dst_addr; |
| uint64_t src_addr; |
| uint32_t xfer_size; |
| char bdf_str[10]; |
| } __attribute__((packed)); |
| |
| #define KTEST_SIZE 64 |
| static struct { |
| char src[KTEST_SIZE]; |
| char dst[KTEST_SIZE]; |
| char srcfill; |
| char dstfill; |
| } ktest = {.srcfill = '0', .dstfill = 'X'}; |
| |
| static inline struct pci_device *dma_chan_to_pci_dev(struct dma_chan *dc) |
| { |
| return container_of(dc->device->dev, struct pci_device, linux_dev); |
| } |
| |
| /* Filter function for finding a particular PCI device. If |
| * __dma_request_channel() asks for a particular device, we'll only give it that |
| * chan. If you don't care, pass NULL, and you'll get any free chan. */ |
| static bool filter_pci_dev(struct dma_chan *dc, void *arg) |
| { |
| struct pci_device *pdev = dma_chan_to_pci_dev(dc); |
| |
| if (arg) |
| return arg == pdev; |
| return true; |
| } |
| |
| /* Addresses are device-physical. Caller holds the pdev qlock. */ |
| static void issue_dma(struct pci_device *pdev, physaddr_t dst, physaddr_t src, |
| size_t len, bool async) |
| { |
| ERRSTACK(1); |
| struct dma_chan *dc; |
| dma_cap_mask_t mask; |
| struct dma_async_tx_descriptor *tx; |
| int flags; |
| |
| struct completion cmp; |
| unsigned long tmo; |
| dma_cookie_t cookie; |
| |
| /* dmaengine_get works for the non-DMA_PRIVATE devices. A lot |
| * of devices turn on DMA_PRIVATE, in which case they won't be in the |
| * general pool available to the dmaengine. Instead, we directly |
| * request DMA channels - particularly since we want specific devices to |
| * use with the IOMMU. */ |
| |
| dma_cap_zero(mask); |
| dma_cap_set(DMA_MEMCPY, mask); |
| dc = __dma_request_channel(&mask, filter_pci_dev, pdev); |
| if (!dc) |
| error(EFAIL, "Couldn't get a DMA channel"); |
| if (waserror()) { |
| dma_release_channel(dc); |
| nexterror(); |
| } |
| |
| flags = 0; |
| if (async) |
| flags |= DMA_PREP_INTERRUPT; |
| |
| if (!is_dma_copy_aligned(dc->device, dst, src, len)) |
| error(EINVAL, "Bad copy alignment: %p %p %lu", dst, src, len); |
| |
| tx = dmaengine_prep_dma_memcpy(dc, dst, src, len, flags); |
| if (!tx) |
| error(EFAIL, "Couldn't prep the memcpy!\n"); |
| |
| if (async) { |
| async_tx_ack(tx); |
| init_completion(&cmp); |
| tx->callback = (dma_async_tx_callback)complete; |
| tx->callback_param = &cmp; |
| } |
| |
| cookie = dmaengine_submit(tx); |
| if (cookie < 0) |
| error(EFAIL, "Failed to submit the DMA..."); |
| |
| /* You can poke this. dma_sync_wait() also calls this. */ |
| dma_async_issue_pending(dc); |
| |
| if (async) { |
| /* Giant warning: the polling methods, like |
| * dmaengine_tx_status(), might actually trigger the |
| * tx->callback. At least the IOAT driver does this. */ |
| tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); |
| if (tmo == 0 || dmaengine_tx_status(dc, cookie, NULL) |
| != DMA_COMPLETE) { |
| error(ETIMEDOUT, "timeout or related spurious failure"); |
| } |
| } else { |
| dma_wait_for_async_tx(tx); |
| } |
| |
| dma_release_channel(dc); |
| poperror(); |
| } |
| |
| static void issue_dma_ucbdma(struct ucbdma *u) |
| { |
| ERRSTACK(1); |
| struct pci_device *pdev; |
| |
| pdev = pci_match_string(u->bdf_str); |
| if (!pdev) |
| error(ENODEV, "No device %s", u->bdf_str); |
| /* The qlock prevents unassignment from happening during an operation. |
| * If that happened, the driver's reset method would be called while the |
| * op is ongoing. The driver might be able to handle that. Though when |
| * the iommu mappings are destroyed, the driver is likely to get wedged. |
| * |
| * A kref or something else might work better here, to allow multiple |
| * DMAs at a time. */ |
| qlock(&pdev->qlock); |
| if (waserror()) { |
| qunlock(&pdev->qlock); |
| nexterror(); |
| } |
| if (pdev->proc_owner != current) |
| error(EINVAL, "wrong proc_owner"); |
| issue_dma(pdev, u->dst_addr, u->src_addr, u->xfer_size, true); |
| qunlock(&pdev->qlock); |
| poperror(); |
| } |
| |
| /* Runs a basic test from within the kernel on 0:4.3. |
| * |
| * One option would be to have write() set the sza buffer. It won't be static |
| * through the chan's lifetime (so you'd need to deal with syncing), but it'd |
| * let you set things. Another would be to have another chan/file for the BDF |
| * (and you'd sync on that). */ |
| static struct sized_alloc *open_ktest(void) |
| { |
| ERRSTACK(2); |
| struct pci_device *pdev = pci_match_tbdf(MKBUS(0, 0, 4, 3)); |
| struct sized_alloc *sza; |
| physaddr_t dst, src; /* device addrs */ |
| char *dst_d, *src_d; /* driver addrs */ |
| uintptr_t prev; |
| |
| if (!pdev) |
| error(EINVAL, "no 00:04.3"); |
| |
| qlock(&pdev->qlock); |
| /* We need to get into the address space of the device, which might be |
| * NULL if it's the kernel's or unassigned. */ |
| prev = switch_to(pdev->proc_owner); |
| if (waserror()) { |
| switch_back(pdev->proc_owner, prev); |
| qunlock(&pdev->qlock); |
| nexterror(); |
| } |
| |
| if (pdev->state != DEV_STATE_ASSIGNED_KERNEL && |
| pdev->state != DEV_STATE_ASSIGNED_USER) |
| error(EINVAL, "00:04.3 is unassigned (%d)", pdev->state); |
| |
| dst_d = dma_alloc_coherent(&pdev->linux_dev, KTEST_SIZE, &dst, |
| MEM_WAIT); |
| src_d = dma_alloc_coherent(&pdev->linux_dev, KTEST_SIZE, &src, |
| MEM_WAIT); |
| |
| if (waserror()) { |
| dma_free_coherent(&pdev->linux_dev, KTEST_SIZE, dst_d, dst); |
| dma_free_coherent(&pdev->linux_dev, KTEST_SIZE, src_d, src); |
| nexterror(); |
| } |
| |
| ktest.srcfill += 1; |
| /* initialize src and dst address */ |
| memset(src_d, ktest.srcfill, KTEST_SIZE); |
| memset(dst_d, ktest.dstfill, KTEST_SIZE); |
| src_d[KTEST_SIZE-1] = '\0'; |
| dst_d[KTEST_SIZE-1] = '\0'; |
| |
| issue_dma(pdev, dst, src, KTEST_SIZE, true); |
| |
| sza = sized_kzmalloc(1024, MEM_WAIT); |
| sza_printf(sza, "\tCopy Size: %d (0x%x)\n", KTEST_SIZE, KTEST_SIZE); |
| sza_printf(sza, "\tsrcfill: %c (0x%x)\n", ktest.srcfill, ktest.srcfill); |
| sza_printf(sza, "\tdstfill: %c (0x%x)\n", ktest.dstfill, ktest.dstfill); |
| |
| /* %s on a uptr causes a printfmt warning. stop at 20 too. sanity.*/ |
| sza_printf(sza, "\tsrc_str (after copy): "); |
| for (int i = 0; i < 20; i++) |
| sza_printf(sza, "%c", src_d[i]); |
| sza_printf(sza, "\n"); |
| |
| sza_printf(sza, "\tdst_str (after copy): "); |
| for (int i = 0; i < 20; i++) |
| sza_printf(sza, "%c", dst_d[i]); |
| sza_printf(sza, "\n"); |
| |
| dma_free_coherent(&pdev->linux_dev, KTEST_SIZE, dst_d, dst); |
| dma_free_coherent(&pdev->linux_dev, KTEST_SIZE, src_d, src); |
| poperror(); |
| |
| switch_back(pdev->proc_owner, prev); |
| qunlock(&pdev->qlock); |
| poperror(); |
| |
| return sza; |
| } |
| |
| struct dev cbdmadevtab; |
| |
| static char *devname(void) |
| { |
| return cbdmadevtab.name; |
| } |
| |
| static struct chan *cbdmaattach(char *spec) |
| { |
| return devattach(devname(), spec); |
| } |
| |
| struct walkqid *cbdmawalk(struct chan *c, struct chan *nc, char **name, |
| unsigned int nname) |
| { |
| return devwalk(c, nc, name, nname, cbdmadir, |
| ARRAY_SIZE(cbdmadir), devgen); |
| } |
| |
| static size_t cbdmastat(struct chan *c, uint8_t *dp, size_t n) |
| { |
| return devstat(c, dp, n, cbdmadir, ARRAY_SIZE(cbdmadir), devgen); |
| } |
| |
| static struct chan *cbdmaopen(struct chan *c, int omode) |
| { |
| switch (c->qid.path) { |
| case Qcbdmaktest: |
| c->synth_buf = open_ktest(); |
| break; |
| case Qdir: |
| case Qcbdmaucopy: |
| break; |
| default: |
| error(EIO, "cbdma: qid 0x%x is impossible", c->qid.path); |
| } |
| |
| return devopen(c, omode, cbdmadir, ARRAY_SIZE(cbdmadir), devgen); |
| } |
| |
| static void cbdmaclose(struct chan *c) |
| { |
| switch (c->qid.path) { |
| case Qcbdmaktest: |
| kfree(c->synth_buf); |
| c->synth_buf = NULL; |
| break; |
| case Qdir: |
| case Qcbdmaucopy: |
| break; |
| default: |
| error(EIO, "cbdma: qid 0x%x is impossible", c->qid.path); |
| } |
| } |
| |
| static size_t cbdmaread(struct chan *c, void *va, size_t n, off64_t offset) |
| { |
| struct sized_alloc *sza = c->synth_buf; |
| |
| switch (c->qid.path) { |
| case Qcbdmaktest: |
| return readstr(offset, va, n, sza->buf); |
| case Qcbdmaucopy: |
| return readstr(offset, va, n, |
| "Write address of struct ucopy to issue DMA\n"); |
| case Qdir: |
| return devdirread(c, va, n, cbdmadir, ARRAY_SIZE(cbdmadir), |
| devgen); |
| default: |
| error(EIO, "cbdma: qid 0x%x is impossible", c->qid.path); |
| } |
| |
| return -1; /* not reached */ |
| } |
| |
| static size_t cbdmawrite(struct chan *c, void *va, size_t n, off64_t offset) |
| { |
| struct ucbdma ucbdma[1]; |
| |
| switch (c->qid.path) { |
| case Qdir: |
| error(EPERM, "writing not permitted"); |
| case Qcbdmaktest: |
| error(EPERM, ERROR_FIXME); |
| case Qcbdmaucopy: |
| if (n != sizeof(struct ucbdma)) |
| error(EINVAL, "Bad ucbdma size %u (%u)", n, |
| sizeof(struct ucbdma)); |
| if (copy_from_user(ucbdma, va, sizeof(struct ucbdma))) |
| error(EINVAL, "Bad ucbdma pointer"); |
| issue_dma_ucbdma(ucbdma); |
| return n; |
| default: |
| error(EIO, "cbdma: qid 0x%x is impossible", c->qid.path); |
| } |
| |
| return -1; /* not reached */ |
| } |
| |
| struct dev cbdmadevtab __devtab = { |
| .name = "cbdma", |
| .reset = devreset, |
| .init = devinit, |
| .shutdown = devshutdown, |
| .attach = cbdmaattach, |
| .walk = cbdmawalk, |
| .stat = cbdmastat, |
| .open = cbdmaopen, |
| .create = devcreate, |
| .close = cbdmaclose, |
| .read = cbdmaread, |
| .bread = devbread, |
| .write = cbdmawrite, |
| .bwrite = devbwrite, |
| .remove = devremove, |
| .wstat = devwstat, |
| }; |