| /* Copyright (c) 2019-2020 Google Inc | 
 |  * Aditya Basu <mitthu@google.com> | 
 |  * Barret Rhoden <brho@google.com> | 
 |  * See LICENSE for details. | 
 |  * | 
 |  * Useful resources: | 
 |  *   - Intel Xeon E7 2800/4800/8800 Datasheet Vol. 2 | 
 |  *   - Purley Programmer's Guide | 
 |  * | 
 |  * Acronyms: | 
 |  *   - IOAT: (Intel) I/O Acceleration Technology | 
 |  *   - CDMA: Crystal Beach DMA | 
 |  * | 
 |  * TODO | 
 |  * - Consider something lighter-weight than the qlock for ensuring the device | 
 |  * doesn't get detached during operation.  kref, perhaps.  There's also an | 
 |  * element of "stop new people from coming in", like we do with closing FDs. | 
 |  * There's also stuff that the dmaengine does in linux.  See dma_chan_get(). | 
 |  * - Freeze or handle faults with VA->PA page mappings, till DMA is completed. | 
 |  * Right now, we could get iommu faults, which was the purpose of this whole | 
 |  * thing. | 
 |  *	- The dmaengine has helpers for some of this.  dma_set_unmap() is a | 
 |  *	"unmap all these things when you're done" approach, called by __cleanup | 
 |  *	-> dma_descriptor_unmap().  the unmap struct is basically a todo list. | 
 |  * - There's a lot of stuff we could do with the DMA engine to reduce the | 
 |  * amount of device touches, contention, and other inefficiencies. | 
 |  * issue_dma() is a minimalist one.  No batching, etc.  And with the pdev | 
 |  * qlock, we have only a single request per PCI device, though there may be | 
 |  * numerous channels. | 
 |  */ | 
 |  | 
 | #include <kmalloc.h> | 
 | #include <string.h> | 
 | #include <stdio.h> | 
 | #include <assert.h> | 
 | #include <error.h> | 
 | #include <net/ip.h> | 
 | #include <linux_compat.h> | 
 | #include <arch/pci.h> | 
 | #include <page_alloc.h> | 
 | #include <pmap.h> | 
 | #include <arch/pci_regs.h> | 
 |  | 
 | #include <linux/dmaengine.h> | 
 |  | 
 | /* QID Path */ | 
 | enum { | 
 | 	Qdir           = 0, | 
 | 	Qcbdmaktest    = 1, | 
 | 	Qcbdmaucopy    = 2, | 
 | }; | 
 |  | 
 | static struct dirtab cbdmadir[] = { | 
 | 	{".",         {Qdir, 0, QTDIR}, 0, 0555}, | 
 | 	{"ktest",     {Qcbdmaktest, 0, QTFILE}, 0, 0555}, | 
 | 	{"ucopy",     {Qcbdmaucopy, 0, QTFILE}, 0, 0755}, | 
 | }; | 
 |  | 
 | /* TODO: this is a device/kernel ABI.  ucbdma.c has a copy.  It's probably not | 
 |  * worth putting in its own header, since this is really cheap test code. */ | 
 | struct ucbdma { | 
 | 	uint64_t		dst_addr; | 
 | 	uint64_t		src_addr; | 
 | 	uint32_t		xfer_size; | 
 | 	char			bdf_str[10]; | 
 | } __attribute__((packed)); | 
 |  | 
 | #define KTEST_SIZE 64 | 
 | static struct { | 
 | 	char    src[KTEST_SIZE]; | 
 | 	char    dst[KTEST_SIZE]; | 
 | 	char    srcfill; | 
 | 	char    dstfill; | 
 | } ktest = {.srcfill = '0', .dstfill = 'X'}; | 
 |  | 
 | static inline struct pci_device *dma_chan_to_pci_dev(struct dma_chan *dc) | 
 | { | 
 | 	return container_of(dc->device->dev, struct pci_device, linux_dev); | 
 | } | 
 |  | 
 | /* Filter function for finding a particular PCI device.  If | 
 |  * __dma_request_channel() asks for a particular device, we'll only give it that | 
 |  * chan.  If you don't care, pass NULL, and you'll get any free chan. */ | 
 | static bool filter_pci_dev(struct dma_chan *dc, void *arg) | 
 | { | 
 | 	struct pci_device *pdev = dma_chan_to_pci_dev(dc); | 
 |  | 
 | 	if (arg) | 
 | 		return arg == pdev; | 
 | 	return true; | 
 | } | 
 |  | 
 | /* Addresses are device-physical.  Caller holds the pdev qlock. */ | 
 | static void issue_dma(struct pci_device *pdev, physaddr_t dst, physaddr_t src, | 
 | 		      size_t len, bool async) | 
 | { | 
 | 	ERRSTACK(1); | 
 | 	struct dma_chan *dc; | 
 | 	dma_cap_mask_t mask; | 
 | 	struct dma_async_tx_descriptor *tx; | 
 | 	int flags; | 
 |  | 
 | 	struct completion cmp; | 
 | 	unsigned long tmo; | 
 | 	dma_cookie_t cookie; | 
 |  | 
 | 	/* dmaengine_get works for the non-DMA_PRIVATE devices.  A lot | 
 | 	 * of devices turn on DMA_PRIVATE, in which case they won't be in the | 
 | 	 * general pool available to the dmaengine.  Instead, we directly | 
 | 	 * request DMA channels - particularly since we want specific devices to | 
 | 	 * use with the IOMMU. */ | 
 |  | 
 | 	dma_cap_zero(mask); | 
 | 	dma_cap_set(DMA_MEMCPY, mask); | 
 | 	dc = __dma_request_channel(&mask, filter_pci_dev, pdev); | 
 | 	if (!dc) | 
 | 		error(EFAIL, "Couldn't get a DMA channel"); | 
 | 	if (waserror()) { | 
 | 		dma_release_channel(dc); | 
 | 		nexterror(); | 
 | 	} | 
 |  | 
 | 	flags = 0; | 
 | 	if (async) | 
 | 		flags |= DMA_PREP_INTERRUPT; | 
 |  | 
 | 	if (!is_dma_copy_aligned(dc->device, dst, src, len)) | 
 | 		error(EINVAL, "Bad copy alignment: %p %p %lu", dst, src, len); | 
 |  | 
 | 	tx = dmaengine_prep_dma_memcpy(dc, dst, src, len, flags); | 
 | 	if (!tx) | 
 | 		error(EFAIL, "Couldn't prep the memcpy!\n"); | 
 |  | 
 | 	if (async) { | 
 | 		async_tx_ack(tx); | 
 | 		init_completion(&cmp); | 
 | 		tx->callback = (dma_async_tx_callback)complete; | 
 | 		tx->callback_param = &cmp; | 
 | 	} | 
 |  | 
 | 	cookie = dmaengine_submit(tx); | 
 | 	if (cookie < 0) | 
 | 		error(EFAIL, "Failed to submit the DMA..."); | 
 |  | 
 | 	/* You can poke this.  dma_sync_wait() also calls this. */ | 
 | 	dma_async_issue_pending(dc); | 
 |  | 
 | 	if (async) { | 
 | 		/* Giant warning: the polling methods, like | 
 | 		 * dmaengine_tx_status(), might actually trigger the | 
 | 		 * tx->callback.  At least the IOAT driver does this. */ | 
 | 		tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); | 
 | 		if (tmo == 0 || dmaengine_tx_status(dc, cookie, NULL) | 
 | 					!= DMA_COMPLETE) { | 
 | 			error(ETIMEDOUT, "timeout or related spurious failure"); | 
 | 		} | 
 | 	} else { | 
 | 		dma_wait_for_async_tx(tx); | 
 | 	} | 
 |  | 
 | 	dma_release_channel(dc); | 
 | 	poperror(); | 
 | } | 
 |  | 
 | static void issue_dma_ucbdma(struct ucbdma *u) | 
 | { | 
 | 	ERRSTACK(1); | 
 | 	struct pci_device *pdev; | 
 |  | 
 | 	pdev = pci_match_string(u->bdf_str); | 
 | 	if (!pdev) | 
 | 		error(ENODEV, "No device %s", u->bdf_str); | 
 | 	/* The qlock prevents unassignment from happening during an operation. | 
 | 	 * If that happened, the driver's reset method would be called while the | 
 | 	 * op is ongoing.  The driver might be able to handle that.  Though when | 
 | 	 * the iommu mappings are destroyed, the driver is likely to get wedged. | 
 | 	 * | 
 | 	 * A kref or something else might work better here, to allow multiple | 
 | 	 * DMAs at a time. */ | 
 | 	qlock(&pdev->qlock); | 
 | 	if (waserror()) { | 
 | 		qunlock(&pdev->qlock); | 
 | 		nexterror(); | 
 | 	} | 
 | 	if (pdev->proc_owner != current) | 
 | 		error(EINVAL, "wrong proc_owner"); | 
 | 	issue_dma(pdev, u->dst_addr, u->src_addr, u->xfer_size, true); | 
 | 	qunlock(&pdev->qlock); | 
 | 	poperror(); | 
 | } | 
 |  | 
 | /* Runs a basic test from within the kernel on 0:4.3. | 
 |  * | 
 |  * One option would be to have write() set the sza buffer.  It won't be static | 
 |  * through the chan's lifetime (so you'd need to deal with syncing), but it'd | 
 |  * let you set things.  Another would be to have another chan/file for the BDF | 
 |  * (and you'd sync on that). */ | 
 | static struct sized_alloc *open_ktest(void) | 
 | { | 
 | 	ERRSTACK(2); | 
 | 	struct pci_device *pdev = pci_match_tbdf(MKBUS(0, 0, 4, 3)); | 
 | 	struct sized_alloc *sza; | 
 | 	physaddr_t dst, src;	/* device addrs */ | 
 | 	char *dst_d, *src_d;	/* driver addrs */ | 
 | 	uintptr_t prev; | 
 |  | 
 | 	if (!pdev) | 
 | 		error(EINVAL, "no 00:04.3"); | 
 |  | 
 | 	qlock(&pdev->qlock); | 
 | 	/* We need to get into the address space of the device, which might be | 
 | 	 * NULL if it's the kernel's or unassigned. */ | 
 | 	prev = switch_to(pdev->proc_owner); | 
 | 	if (waserror()) { | 
 | 		switch_back(pdev->proc_owner, prev); | 
 | 		qunlock(&pdev->qlock); | 
 | 		nexterror(); | 
 | 	} | 
 |  | 
 | 	if (pdev->state != DEV_STATE_ASSIGNED_KERNEL && | 
 | 	    pdev->state != DEV_STATE_ASSIGNED_USER) | 
 | 		error(EINVAL, "00:04.3 is unassigned (%d)", pdev->state); | 
 |  | 
 | 	dst_d = dma_alloc_coherent(&pdev->linux_dev, KTEST_SIZE, &dst, | 
 | 				   MEM_WAIT); | 
 | 	src_d = dma_alloc_coherent(&pdev->linux_dev, KTEST_SIZE, &src, | 
 | 				   MEM_WAIT); | 
 |  | 
 | 	if (waserror()) { | 
 | 		dma_free_coherent(&pdev->linux_dev, KTEST_SIZE, dst_d, dst); | 
 | 		dma_free_coherent(&pdev->linux_dev, KTEST_SIZE, src_d, src); | 
 | 		nexterror(); | 
 | 	} | 
 |  | 
 | 	ktest.srcfill += 1; | 
 | 	/* initialize src and dst address */ | 
 | 	memset(src_d, ktest.srcfill, KTEST_SIZE); | 
 | 	memset(dst_d, ktest.dstfill, KTEST_SIZE); | 
 | 	src_d[KTEST_SIZE-1] = '\0'; | 
 | 	dst_d[KTEST_SIZE-1] = '\0'; | 
 |  | 
 | 	issue_dma(pdev, dst, src, KTEST_SIZE, true); | 
 |  | 
 | 	sza = sized_kzmalloc(1024, MEM_WAIT); | 
 | 	sza_printf(sza, "\tCopy Size: %d (0x%x)\n", KTEST_SIZE, KTEST_SIZE); | 
 | 	sza_printf(sza, "\tsrcfill: %c (0x%x)\n", ktest.srcfill, ktest.srcfill); | 
 | 	sza_printf(sza, "\tdstfill: %c (0x%x)\n", ktest.dstfill, ktest.dstfill); | 
 |  | 
 | 	/* %s on a uptr causes a printfmt warning.  stop at 20 too.  sanity.*/ | 
 | 	sza_printf(sza, "\tsrc_str (after copy): "); | 
 | 	for (int i = 0; i < 20; i++) | 
 | 		sza_printf(sza, "%c", src_d[i]); | 
 | 	sza_printf(sza, "\n"); | 
 |  | 
 | 	sza_printf(sza, "\tdst_str (after copy): "); | 
 | 	for (int i = 0; i < 20; i++) | 
 | 		sza_printf(sza, "%c", dst_d[i]); | 
 | 	sza_printf(sza, "\n"); | 
 |  | 
 | 	dma_free_coherent(&pdev->linux_dev, KTEST_SIZE, dst_d, dst); | 
 | 	dma_free_coherent(&pdev->linux_dev, KTEST_SIZE, src_d, src); | 
 | 	poperror(); | 
 |  | 
 | 	switch_back(pdev->proc_owner, prev); | 
 | 	qunlock(&pdev->qlock); | 
 | 	poperror(); | 
 |  | 
 | 	return sza; | 
 | } | 
 |  | 
 | struct dev cbdmadevtab; | 
 |  | 
 | static char *devname(void) | 
 | { | 
 | 	return cbdmadevtab.name; | 
 | } | 
 |  | 
 | static struct chan *cbdmaattach(char *spec) | 
 | { | 
 | 	return devattach(devname(), spec); | 
 | } | 
 |  | 
 | struct walkqid *cbdmawalk(struct chan *c, struct chan *nc, char **name, | 
 | 			 unsigned int nname) | 
 | { | 
 | 	return devwalk(c, nc, name, nname, cbdmadir, | 
 | 		       ARRAY_SIZE(cbdmadir), devgen); | 
 | } | 
 |  | 
 | static size_t cbdmastat(struct chan *c, uint8_t *dp, size_t n) | 
 | { | 
 | 	return devstat(c, dp, n, cbdmadir, ARRAY_SIZE(cbdmadir), devgen); | 
 | } | 
 |  | 
 | static struct chan *cbdmaopen(struct chan *c, int omode) | 
 | { | 
 | 	switch (c->qid.path) { | 
 | 	case Qcbdmaktest: | 
 | 		c->synth_buf = open_ktest(); | 
 | 		break; | 
 | 	case Qdir: | 
 | 	case Qcbdmaucopy: | 
 | 		break; | 
 | 	default: | 
 | 		error(EIO, "cbdma: qid 0x%x is impossible", c->qid.path); | 
 | 	} | 
 |  | 
 | 	return devopen(c, omode, cbdmadir, ARRAY_SIZE(cbdmadir), devgen); | 
 | } | 
 |  | 
 | static void cbdmaclose(struct chan *c) | 
 | { | 
 | 	switch (c->qid.path) { | 
 | 	case Qcbdmaktest: | 
 | 		kfree(c->synth_buf); | 
 | 		c->synth_buf = NULL; | 
 | 		break; | 
 | 	case Qdir: | 
 | 	case Qcbdmaucopy: | 
 | 		break; | 
 | 	default: | 
 | 		error(EIO, "cbdma: qid 0x%x is impossible", c->qid.path); | 
 | 	} | 
 | } | 
 |  | 
 | static size_t cbdmaread(struct chan *c, void *va, size_t n, off64_t offset) | 
 | { | 
 | 	struct sized_alloc *sza = c->synth_buf; | 
 |  | 
 | 	switch (c->qid.path) { | 
 | 	case Qcbdmaktest: | 
 | 		return readstr(offset, va, n, sza->buf); | 
 | 	case Qcbdmaucopy: | 
 | 		return readstr(offset, va, n, | 
 | 			"Write address of struct ucopy to issue DMA\n"); | 
 | 	case Qdir: | 
 | 		return devdirread(c, va, n, cbdmadir, ARRAY_SIZE(cbdmadir), | 
 | 					devgen); | 
 | 	default: | 
 | 		error(EIO, "cbdma: qid 0x%x is impossible", c->qid.path); | 
 | 	} | 
 |  | 
 | 	return -1;      /* not reached */ | 
 | } | 
 |  | 
 | static size_t cbdmawrite(struct chan *c, void *va, size_t n, off64_t offset) | 
 | { | 
 | 	struct ucbdma ucbdma[1]; | 
 |  | 
 | 	switch (c->qid.path) { | 
 | 	case Qdir: | 
 | 		error(EPERM, "writing not permitted"); | 
 | 	case Qcbdmaktest: | 
 | 		error(EPERM, ERROR_FIXME); | 
 | 	case Qcbdmaucopy: | 
 | 		if (n != sizeof(struct ucbdma)) | 
 | 			error(EINVAL, "Bad ucbdma size %u (%u)", n, | 
 | 			      sizeof(struct ucbdma)); | 
 | 		if (copy_from_user(ucbdma, va, sizeof(struct ucbdma))) | 
 | 			error(EINVAL, "Bad ucbdma pointer"); | 
 | 		issue_dma_ucbdma(ucbdma); | 
 | 		return n; | 
 | 	default: | 
 | 		error(EIO, "cbdma: qid 0x%x is impossible", c->qid.path); | 
 | 	} | 
 |  | 
 | 	return -1;      /* not reached */ | 
 | } | 
 |  | 
 | struct dev cbdmadevtab __devtab = { | 
 | 	.name       = "cbdma", | 
 | 	.reset      = devreset, | 
 | 	.init       = devinit, | 
 | 	.shutdown   = devshutdown, | 
 | 	.attach     = cbdmaattach, | 
 | 	.walk       = cbdmawalk, | 
 | 	.stat       = cbdmastat, | 
 | 	.open       = cbdmaopen, | 
 | 	.create     = devcreate, | 
 | 	.close      = cbdmaclose, | 
 | 	.read       = cbdmaread, | 
 | 	.bread      = devbread, | 
 | 	.write      = cbdmawrite, | 
 | 	.bwrite     = devbwrite, | 
 | 	.remove     = devremove, | 
 | 	.wstat      = devwstat, | 
 | }; |