ioat: port the IOAT driver Passes the self-test. Signed-off-by: Barret Rhoden <brho@cs.berkeley.edu>
diff --git a/kern/drivers/dma/Kbuild b/kern/drivers/dma/Kbuild index 43bf5c7..d13ffb5 100644 --- a/kern/drivers/dma/Kbuild +++ b/kern/drivers/dma/Kbuild
@@ -1 +1,2 @@ obj-y += dmaengine.o +obj-y += ioat/
diff --git a/kern/drivers/dma/ioat/Kbuild b/kern/drivers/dma/ioat/Kbuild new file mode 100644 index 0000000..c9a9755 --- /dev/null +++ b/kern/drivers/dma/ioat/Kbuild
@@ -0,0 +1,3 @@ +obj-y += dma.o +obj-y += init.o +obj-y += prep.o
diff --git a/kern/drivers/dma/ioat/dma.c b/kern/drivers/dma/ioat/dma.c index 2554f37..94fc5eb 100644 --- a/kern/drivers/dma/ioat/dma.c +++ b/kern/drivers/dma/ioat/dma.c
@@ -21,17 +21,9 @@ * copy operations. */ -#include <linux/init.h> -#include <linux/module.h> -#include <linux/slab.h> -#include <linux/pci.h> -#include <linux/interrupt.h> -#include <linux/dmaengine.h> -#include <linux/delay.h> -#include <linux/dma-mapping.h> -#include <linux/workqueue.h> -#include <linux/prefetch.h> +#include <linux_compat.h> #include <linux/sizes.h> + #include "dma.h" #include "registers.h" #include "hw.h" @@ -701,9 +693,9 @@ uint64_t phys_complete; /* set the completion address register again */ - write32(lower_32_bits(ioat_chan->completion_dma), + write32(low32(ioat_chan->completion_dma), ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_LOW); - write32(upper_32_bits(ioat_chan->completion_dma), + write32(high32(ioat_chan->completion_dma), ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); ioat_quiesce(ioat_chan, 0);
diff --git a/kern/drivers/dma/ioat/dma.h b/kern/drivers/dma/ioat/dma.h index 737f5c5..14d2e2f 100644 --- a/kern/drivers/dma/ioat/dma.h +++ b/kern/drivers/dma/ioat/dma.h
@@ -17,13 +17,9 @@ #ifndef IOATDMA_H #define IOATDMA_H +#include <linux_compat.h> #include <linux/dmaengine.h> -#include <linux/init.h> -#include <linux/dmapool.h> -#include <linux/cache.h> -#include <linux/pci_ids.h> #include <linux/circ_buf.h> -#include <linux/interrupt.h> #include "registers.h" #include "hw.h" @@ -32,7 +28,7 @@ #define IOAT_DMA_DCA_ANY_CPU ~0 #define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, dma_dev) -#define to_dev(ioat_chan) (&(ioat_chan)->ioat_dma->pdev->dev) +#define to_dev(ioat_chan) (&(ioat_chan)->ioat_dma->pdev->linux_dev) #define to_pdev(ioat_chan) ((ioat_chan)->ioat_dma->pdev) #define chan_num(ch) ((int)((ch)->reg_base - (ch)->ioat_dma->reg_base) / 0x80) @@ -118,7 +114,9 @@ dma_addr_t completion_dma; uint64_t *completion; struct tasklet_struct cleanup_task; +#if 0 // AKAROS struct kobject kobj; +#endif /* ioat v2 / v3 channel attributes * @xfercap_log; log2 of channel max transfer length (for fast division) @@ -146,11 +144,13 @@ int prev_intr_coalesce; }; +#if 0 // AKAROS struct ioat_sysfs_entry { struct attribute attr; ssize_t (*show)(struct dma_chan *, char *); ssize_t (*store)(struct dma_chan *, const char *, size_t); }; +#endif /** * struct ioat_sed_ent - wrapper around super extended hardware descriptor @@ -202,8 +202,10 @@ }; extern const struct sysfs_ops ioat_sysfs_ops; +#if 0 // AKAROS extern struct ioat_sysfs_entry ioat_version_attr; extern struct ioat_sysfs_entry ioat_cap_attr; +#endif extern int ioat_pending_level; extern int ioat_ring_alloc_order; extern struct kobj_type ioat_ktype;
diff --git a/kern/drivers/dma/ioat/hw.h b/kern/drivers/dma/ioat/hw.h index 96a99da..adc6ccb 100644 --- a/kern/drivers/dma/ioat/hw.h +++ b/kern/drivers/dma/ioat/hw.h
@@ -20,6 +20,40 @@ /* PCI Configuration Space Values */ #define IOAT_MMIO_BAR 0 +/* From Linux pci_ids.h */ +#define PCI_DEVICE_ID_INTEL_IOAT 0x1a38 +#define PCI_DEVICE_ID_INTEL_IOAT_TBG4 0x3429 +#define PCI_DEVICE_ID_INTEL_IOAT_TBG5 0x342a +#define PCI_DEVICE_ID_INTEL_IOAT_TBG6 0x342b +#define PCI_DEVICE_ID_INTEL_IOAT_TBG7 0x342c +#define PCI_DEVICE_ID_INTEL_IOAT_TBG0 0x3430 +#define PCI_DEVICE_ID_INTEL_IOAT_TBG1 0x3431 +#define PCI_DEVICE_ID_INTEL_IOAT_TBG2 0x3432 +#define PCI_DEVICE_ID_INTEL_IOAT_TBG3 0x3433 +#define PCI_DEVICE_ID_INTEL_IOAT_CNB 0x360b +#define PCI_DEVICE_ID_INTEL_IOAT_JSF0 0x3710 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF1 0x3711 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF2 0x3712 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF3 0x3713 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF4 0x3714 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF5 0x3715 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF6 0x3716 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF7 0x3717 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF8 0x3718 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF9 0x3719 +#define PCI_DEVICE_ID_INTEL_IOAT_SNB0 0x3c20 +#define PCI_DEVICE_ID_INTEL_IOAT_SNB1 0x3c21 +#define PCI_DEVICE_ID_INTEL_IOAT_SNB2 0x3c22 +#define PCI_DEVICE_ID_INTEL_IOAT_SNB3 0x3c23 +#define PCI_DEVICE_ID_INTEL_IOAT_SNB4 0x3c24 +#define PCI_DEVICE_ID_INTEL_IOAT_SNB5 0x3c25 +#define PCI_DEVICE_ID_INTEL_IOAT_SNB6 0x3c26 +#define PCI_DEVICE_ID_INTEL_IOAT_SNB7 0x3c27 +#define PCI_DEVICE_ID_INTEL_IOAT_SNB8 0x3c2e +#define PCI_DEVICE_ID_INTEL_IOAT_SNB9 0x3c2f +#define PCI_DEVICE_ID_INTEL_IOAT_SNB 0x402f +#define PCI_DEVICE_ID_INTEL_IOAT_SCNB 0x65ff + /* CB device ID's */ #define PCI_DEVICE_ID_INTEL_IOAT_IVB0 0x0e20 #define PCI_DEVICE_ID_INTEL_IOAT_IVB1 0x0e21
diff --git a/kern/drivers/dma/ioat/init.c b/kern/drivers/dma/ioat/init.c index d3a7382..badca85 100644 --- a/kern/drivers/dma/ioat/init.c +++ b/kern/drivers/dma/ioat/init.c
@@ -16,19 +16,9 @@ * */ -#include <linux/init.h> -#include <linux/module.h> -#include <linux/slab.h> -#include <linux/pci.h> -#include <linux/interrupt.h> -#include <linux/dmaengine.h> -#include <linux/delay.h> -#include <linux/dma-mapping.h> -#include <linux/workqueue.h> -#include <linux/prefetch.h> -#include <linux/dca.h> -#include <linux/aer.h> +#include <linux_compat.h> #include <linux/sizes.h> + #include "dma.h" #include "registers.h" #include "hw.h" @@ -423,6 +413,47 @@ int err = -EINVAL; uint8_t intrctrl = 0; +#if 1 // AKAROS + /* Our IRQ setup needs a lot of work. Let's just assume MSI-X, since + * any platform that has an IOAT should have MSI-X. */ + if (pci_msix_init(pdev)) { + dev_err(dev, "Failed to setup IOAT MSI-X\n"); + goto err_no_irq; + } + + /* The number of MSI-X vectors should equal the number of channels */ + msixcnt = ioat_dma->dma_dev.chancnt; + if (pdev->msix_nr_vec < msixcnt) { + dev_err(dev, "Too few msix vec (%d < %d)\n", pdev->msix_nr_vec, + msixcnt); + goto err_no_irq; + } + for (i = 0; i < msixcnt; i++) { + msix = &ioat_dma->msix_entries[i]; + ioat_chan = ioat_chan_by_index(ioat_dma, i); + /* register_irq is a mess... the IRQ is the line, but isn't used + * for msi/msix. Passing 0 for now, since -1 doesn't seem like + * a good idea. This tries to do too much, and you have no + * control / insight into what its doing. */ + err = register_irq(0 /* ignored for msi(x)! */, + ioat_dma_do_interrupt_msix, ioat_chan, + pci_to_tbdf(pdev)); + /* TODO: this is a mess - we also don't know if we're actually + * MSIX or not! We don't even know our vector... */ + if (err) { + warn("MSIX failed (cnt %d), leaking vectors etc!", i); + for (j = 0; j < i; j++) { + msix = &ioat_dma->msix_entries[j]; + ioat_chan = ioat_chan_by_index(ioat_dma, j); + //devm_free_irq(dev, msix->vector, ioat_chan); + } + goto err_no_irq; + } + } + intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL; + ioat_dma->irq_mode = IOAT_MSIX; + goto done; +#else if (!strcmp(ioat_interrupt_style, "msix")) goto msix; if (!strcmp(ioat_interrupt_style, "msi")) @@ -482,6 +513,7 @@ goto err_no_irq; ioat_dma->irq_mode = IOAT_INTX; +#endif done: if (is_bwd_ioat(pdev)) ioat_intr_quirk(ioat_dma); @@ -566,7 +598,9 @@ ioat_disable_interrupts(ioat_dma); +#if 0 // AKAROS ioat_kobject_del(ioat_dma); +#endif dma_async_device_unregister(dma); @@ -608,7 +642,7 @@ ioat_init_channel(ioat_dma, ioat_chan, i); ioat_chan->xfercap_log = xfercap_log; - spinlock_init_irqsave(&ioat_chan->prep_lock); + spinlock_init(&ioat_chan->prep_lock); if (ioat_reset_hw(ioat_chan)) { i = 0; break; @@ -785,7 +819,7 @@ ioat_chan->ioat_dma = ioat_dma; ioat_chan->reg_base = ioat_dma->reg_base + (0x80 * (idx + 1)); - spinlock_init_irqsave(&ioat_chan->cleanup_lock); + spinlock_init(&ioat_chan->cleanup_lock); ioat_chan->dma_chan.device = dma; dma_cookie_init(&ioat_chan->dma_chan); list_add_tail(&ioat_chan->dma_chan.device_node, &dma->channels); @@ -1100,6 +1134,11 @@ } } +int system_has_dca_enabled(struct pci_device *p) +{ + return 0; +} + static int ioat3_dma_probe(struct ioatdma_device *ioat_dma, int dca) { struct pci_device *pdev = ioat_dma->pdev; @@ -1201,10 +1240,12 @@ if (err) return err; +#if 0 // AKAROS ioat_kobject_add(ioat_dma, &ioat_ktype); if (dca) ioat_dma->dca = ioat_dca_init(pdev, ioat_dma->reg_base); +#endif /* disable relaxed ordering */ err = pcie_capability_read_word(pdev, IOAT_DEVCTRL_OFFSET, &val16); @@ -1321,6 +1362,7 @@ ioat_resume(ioat_dma); } +#if 0 // AKAROS static const struct pci_error_handlers ioat_err_handler = { .error_detected = ioat_pcie_error_detected, .slot_reset = ioat_pcie_error_slot_reset, @@ -1335,6 +1377,7 @@ .shutdown = ioat_shutdown, .err_handler = &ioat_err_handler, }; +#endif static struct ioatdma_device * alloc_ioatdma(struct pci_device *pdev, void __iomem *iobase) @@ -1357,6 +1400,7 @@ struct ioatdma_device *device; int err; +#if 0 // AKAROS err = pcim_enable_device(pdev); if (err) return err; @@ -1379,6 +1423,17 @@ err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); if (err) return err; +#else + /* TODO: Make a bar-mapping helper, similar to Linux. Given BAR id, + * vmap it, and put it's info in a table. */ + void *bar; + + bar = (void*)vmap_pmem_nocache(pdev->bar[0].mmio_base32 + ? pdev->bar[0].mmio_base32 : + pdev->bar[0].mmio_base64, + pdev->bar[0].mmio_sz); + iomap = &bar; +#endif device = alloc_ioatdma(pdev, iomap[IOAT_MMIO_BAR]); if (!device) @@ -1408,6 +1463,26 @@ return 0; } +/* In lieu of a decent PCI processing system... */ +static void __init ioat_init(void) +{ + struct pci_device *p; + + STAILQ_FOREACH(p, &pci_devices, all_dev) { + if (p->ven_id != PCI_VENDOR_ID_INTEL) + continue; + for (int i = 0; ioat_pci_tbl[i].device; i++) { + if (p->dev_id == ioat_pci_tbl[i].device) { + ioat_pci_probe(p, &ioat_pci_tbl[i]); + break; + } + } + } +} +/* The 'arch_initcall' setup functions run at level 2. */ +init_func_3(ioat_init); + +#if 0 // AKAROS static void ioat_remove(struct pci_device *pdev) { struct ioatdma_device *device = pci_get_drvdata(pdev); @@ -1425,6 +1500,7 @@ pci_disable_pcie_error_reporting(pdev); ioat_dma_remove(device); } +#endif static int __init ioat_init_module(void) { @@ -1434,7 +1510,8 @@ DRV_NAME, IOAT_DMA_VERSION); ioat_cache = kmem_cache_create("ioat", sizeof(struct ioat_ring_ent), - 0, SLAB_HWCACHE_ALIGN, NULL); + ARCH_CL_SIZE, 0, NULL, NULL, NULL, + NULL); if (!ioat_cache) return -ENOMEM; @@ -1442,9 +1519,11 @@ if (!ioat_sed_cache) goto err_ioat_cache; +#if 0 // AKAROS err = pci_register_driver(&ioat_pci_driver); if (err) goto err_ioat3_cache; +#endif return 0; @@ -1457,10 +1536,13 @@ return err; } module_init(ioat_init_module); +init_func_2(ioat_init_module); +#if 0 // AKAROS static void __exit ioat_exit_module(void) { pci_unregister_driver(&ioat_pci_driver); kmem_cache_destroy(ioat_cache); } module_exit(ioat_exit_module); +#endif
diff --git a/kern/drivers/dma/ioat/prep.c b/kern/drivers/dma/ioat/prep.c index 2322c74..d41f1f8 100644 --- a/kern/drivers/dma/ioat/prep.c +++ b/kern/drivers/dma/ioat/prep.c
@@ -15,12 +15,9 @@ * the file called "COPYING". * */ -#include <linux/module.h> -#include <linux/pci.h> -#include <linux/gfp.h> -#include <linux/dmaengine.h> -#include <linux/dma-mapping.h> -#include <linux/prefetch.h> + +#include <linux_compat.h> + #include "../dmaengine.h" #include "registers.h" #include "hw.h" @@ -94,15 +91,15 @@ ioat3_alloc_sed(struct ioatdma_device *ioat_dma, unsigned int hw_pool) { struct ioat_sed_ent *sed; - gfp_t flags = __GFP_ZERO | 0; + gfp_t flags = /* __GFP_ZERO | */ 0; - sed = kmem_cache_alloc(ioat_sed_cache, flags); + sed = kmem_cache_zalloc(ioat_sed_cache, flags); if (!sed) return NULL; sed->hw_pool = hw_pool; - sed->hw = dma_pool_alloc(ioat_dma->sed_hw_pool[hw_pool], - flags, &sed->dma); + sed->hw = dma_pool_zalloc(ioat_dma->sed_hw_pool[hw_pool], + flags, &sed->dma); if (!sed->hw) { kmem_cache_free(ioat_sed_cache, sed); return NULL;