ioat: port the IOAT driver
Passes the self-test.
Signed-off-by: Barret Rhoden <brho@cs.berkeley.edu>
diff --git a/kern/drivers/dma/Kbuild b/kern/drivers/dma/Kbuild
index 43bf5c7..d13ffb5 100644
--- a/kern/drivers/dma/Kbuild
+++ b/kern/drivers/dma/Kbuild
@@ -1 +1,2 @@
obj-y += dmaengine.o
+obj-y += ioat/
diff --git a/kern/drivers/dma/ioat/Kbuild b/kern/drivers/dma/ioat/Kbuild
new file mode 100644
index 0000000..c9a9755
--- /dev/null
+++ b/kern/drivers/dma/ioat/Kbuild
@@ -0,0 +1,3 @@
+obj-y += dma.o
+obj-y += init.o
+obj-y += prep.o
diff --git a/kern/drivers/dma/ioat/dma.c b/kern/drivers/dma/ioat/dma.c
index 2554f37..94fc5eb 100644
--- a/kern/drivers/dma/ioat/dma.c
+++ b/kern/drivers/dma/ioat/dma.c
@@ -21,17 +21,9 @@
* copy operations.
*/
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/pci.h>
-#include <linux/interrupt.h>
-#include <linux/dmaengine.h>
-#include <linux/delay.h>
-#include <linux/dma-mapping.h>
-#include <linux/workqueue.h>
-#include <linux/prefetch.h>
+#include <linux_compat.h>
#include <linux/sizes.h>
+
#include "dma.h"
#include "registers.h"
#include "hw.h"
@@ -701,9 +693,9 @@
uint64_t phys_complete;
/* set the completion address register again */
- write32(lower_32_bits(ioat_chan->completion_dma),
+ write32(low32(ioat_chan->completion_dma),
ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_LOW);
- write32(upper_32_bits(ioat_chan->completion_dma),
+ write32(high32(ioat_chan->completion_dma),
ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
ioat_quiesce(ioat_chan, 0);
diff --git a/kern/drivers/dma/ioat/dma.h b/kern/drivers/dma/ioat/dma.h
index 737f5c5..14d2e2f 100644
--- a/kern/drivers/dma/ioat/dma.h
+++ b/kern/drivers/dma/ioat/dma.h
@@ -17,13 +17,9 @@
#ifndef IOATDMA_H
#define IOATDMA_H
+#include <linux_compat.h>
#include <linux/dmaengine.h>
-#include <linux/init.h>
-#include <linux/dmapool.h>
-#include <linux/cache.h>
-#include <linux/pci_ids.h>
#include <linux/circ_buf.h>
-#include <linux/interrupt.h>
#include "registers.h"
#include "hw.h"
@@ -32,7 +28,7 @@
#define IOAT_DMA_DCA_ANY_CPU ~0
#define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, dma_dev)
-#define to_dev(ioat_chan) (&(ioat_chan)->ioat_dma->pdev->dev)
+#define to_dev(ioat_chan) (&(ioat_chan)->ioat_dma->pdev->linux_dev)
#define to_pdev(ioat_chan) ((ioat_chan)->ioat_dma->pdev)
#define chan_num(ch) ((int)((ch)->reg_base - (ch)->ioat_dma->reg_base) / 0x80)
@@ -118,7 +114,9 @@
dma_addr_t completion_dma;
uint64_t *completion;
struct tasklet_struct cleanup_task;
+#if 0 // AKAROS
struct kobject kobj;
+#endif
/* ioat v2 / v3 channel attributes
* @xfercap_log; log2 of channel max transfer length (for fast division)
@@ -146,11 +144,13 @@
int prev_intr_coalesce;
};
+#if 0 // AKAROS
struct ioat_sysfs_entry {
struct attribute attr;
ssize_t (*show)(struct dma_chan *, char *);
ssize_t (*store)(struct dma_chan *, const char *, size_t);
};
+#endif
/**
* struct ioat_sed_ent - wrapper around super extended hardware descriptor
@@ -202,8 +202,10 @@
};
extern const struct sysfs_ops ioat_sysfs_ops;
+#if 0 // AKAROS
extern struct ioat_sysfs_entry ioat_version_attr;
extern struct ioat_sysfs_entry ioat_cap_attr;
+#endif
extern int ioat_pending_level;
extern int ioat_ring_alloc_order;
extern struct kobj_type ioat_ktype;
diff --git a/kern/drivers/dma/ioat/hw.h b/kern/drivers/dma/ioat/hw.h
index 96a99da..adc6ccb 100644
--- a/kern/drivers/dma/ioat/hw.h
+++ b/kern/drivers/dma/ioat/hw.h
@@ -20,6 +20,40 @@
/* PCI Configuration Space Values */
#define IOAT_MMIO_BAR 0
+/* From Linux pci_ids.h */
+#define PCI_DEVICE_ID_INTEL_IOAT 0x1a38
+#define PCI_DEVICE_ID_INTEL_IOAT_TBG4 0x3429
+#define PCI_DEVICE_ID_INTEL_IOAT_TBG5 0x342a
+#define PCI_DEVICE_ID_INTEL_IOAT_TBG6 0x342b
+#define PCI_DEVICE_ID_INTEL_IOAT_TBG7 0x342c
+#define PCI_DEVICE_ID_INTEL_IOAT_TBG0 0x3430
+#define PCI_DEVICE_ID_INTEL_IOAT_TBG1 0x3431
+#define PCI_DEVICE_ID_INTEL_IOAT_TBG2 0x3432
+#define PCI_DEVICE_ID_INTEL_IOAT_TBG3 0x3433
+#define PCI_DEVICE_ID_INTEL_IOAT_CNB 0x360b
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF0 0x3710
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF1 0x3711
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF2 0x3712
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF3 0x3713
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF4 0x3714
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF5 0x3715
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF6 0x3716
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF7 0x3717
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF8 0x3718
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF9 0x3719
+#define PCI_DEVICE_ID_INTEL_IOAT_SNB0 0x3c20
+#define PCI_DEVICE_ID_INTEL_IOAT_SNB1 0x3c21
+#define PCI_DEVICE_ID_INTEL_IOAT_SNB2 0x3c22
+#define PCI_DEVICE_ID_INTEL_IOAT_SNB3 0x3c23
+#define PCI_DEVICE_ID_INTEL_IOAT_SNB4 0x3c24
+#define PCI_DEVICE_ID_INTEL_IOAT_SNB5 0x3c25
+#define PCI_DEVICE_ID_INTEL_IOAT_SNB6 0x3c26
+#define PCI_DEVICE_ID_INTEL_IOAT_SNB7 0x3c27
+#define PCI_DEVICE_ID_INTEL_IOAT_SNB8 0x3c2e
+#define PCI_DEVICE_ID_INTEL_IOAT_SNB9 0x3c2f
+#define PCI_DEVICE_ID_INTEL_IOAT_SNB 0x402f
+#define PCI_DEVICE_ID_INTEL_IOAT_SCNB 0x65ff
+
/* CB device ID's */
#define PCI_DEVICE_ID_INTEL_IOAT_IVB0 0x0e20
#define PCI_DEVICE_ID_INTEL_IOAT_IVB1 0x0e21
diff --git a/kern/drivers/dma/ioat/init.c b/kern/drivers/dma/ioat/init.c
index d3a7382..badca85 100644
--- a/kern/drivers/dma/ioat/init.c
+++ b/kern/drivers/dma/ioat/init.c
@@ -16,19 +16,9 @@
*
*/
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/pci.h>
-#include <linux/interrupt.h>
-#include <linux/dmaengine.h>
-#include <linux/delay.h>
-#include <linux/dma-mapping.h>
-#include <linux/workqueue.h>
-#include <linux/prefetch.h>
-#include <linux/dca.h>
-#include <linux/aer.h>
+#include <linux_compat.h>
#include <linux/sizes.h>
+
#include "dma.h"
#include "registers.h"
#include "hw.h"
@@ -423,6 +413,47 @@
int err = -EINVAL;
uint8_t intrctrl = 0;
+#if 1 // AKAROS
+ /* Our IRQ setup needs a lot of work. Let's just assume MSI-X, since
+ * any platform that has an IOAT should have MSI-X. */
+ if (pci_msix_init(pdev)) {
+ dev_err(dev, "Failed to setup IOAT MSI-X\n");
+ goto err_no_irq;
+ }
+
+ /* The number of MSI-X vectors should equal the number of channels */
+ msixcnt = ioat_dma->dma_dev.chancnt;
+ if (pdev->msix_nr_vec < msixcnt) {
+ dev_err(dev, "Too few msix vec (%d < %d)\n", pdev->msix_nr_vec,
+ msixcnt);
+ goto err_no_irq;
+ }
+ for (i = 0; i < msixcnt; i++) {
+ msix = &ioat_dma->msix_entries[i];
+ ioat_chan = ioat_chan_by_index(ioat_dma, i);
+ /* register_irq is a mess... the IRQ is the line, but isn't used
+ * for msi/msix. Passing 0 for now, since -1 doesn't seem like
+ * a good idea. This tries to do too much, and you have no
+ * control / insight into what its doing. */
+ err = register_irq(0 /* ignored for msi(x)! */,
+ ioat_dma_do_interrupt_msix, ioat_chan,
+ pci_to_tbdf(pdev));
+ /* TODO: this is a mess - we also don't know if we're actually
+ * MSIX or not! We don't even know our vector... */
+ if (err) {
+ warn("MSIX failed (cnt %d), leaking vectors etc!", i);
+ for (j = 0; j < i; j++) {
+ msix = &ioat_dma->msix_entries[j];
+ ioat_chan = ioat_chan_by_index(ioat_dma, j);
+ //devm_free_irq(dev, msix->vector, ioat_chan);
+ }
+ goto err_no_irq;
+ }
+ }
+ intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL;
+ ioat_dma->irq_mode = IOAT_MSIX;
+ goto done;
+#else
if (!strcmp(ioat_interrupt_style, "msix"))
goto msix;
if (!strcmp(ioat_interrupt_style, "msi"))
@@ -482,6 +513,7 @@
goto err_no_irq;
ioat_dma->irq_mode = IOAT_INTX;
+#endif
done:
if (is_bwd_ioat(pdev))
ioat_intr_quirk(ioat_dma);
@@ -566,7 +598,9 @@
ioat_disable_interrupts(ioat_dma);
+#if 0 // AKAROS
ioat_kobject_del(ioat_dma);
+#endif
dma_async_device_unregister(dma);
@@ -608,7 +642,7 @@
ioat_init_channel(ioat_dma, ioat_chan, i);
ioat_chan->xfercap_log = xfercap_log;
- spinlock_init_irqsave(&ioat_chan->prep_lock);
+ spinlock_init(&ioat_chan->prep_lock);
if (ioat_reset_hw(ioat_chan)) {
i = 0;
break;
@@ -785,7 +819,7 @@
ioat_chan->ioat_dma = ioat_dma;
ioat_chan->reg_base = ioat_dma->reg_base + (0x80 * (idx + 1));
- spinlock_init_irqsave(&ioat_chan->cleanup_lock);
+ spinlock_init(&ioat_chan->cleanup_lock);
ioat_chan->dma_chan.device = dma;
dma_cookie_init(&ioat_chan->dma_chan);
list_add_tail(&ioat_chan->dma_chan.device_node, &dma->channels);
@@ -1100,6 +1134,11 @@
}
}
+int system_has_dca_enabled(struct pci_device *p)
+{
+ return 0;
+}
+
static int ioat3_dma_probe(struct ioatdma_device *ioat_dma, int dca)
{
struct pci_device *pdev = ioat_dma->pdev;
@@ -1201,10 +1240,12 @@
if (err)
return err;
+#if 0 // AKAROS
ioat_kobject_add(ioat_dma, &ioat_ktype);
if (dca)
ioat_dma->dca = ioat_dca_init(pdev, ioat_dma->reg_base);
+#endif
/* disable relaxed ordering */
err = pcie_capability_read_word(pdev, IOAT_DEVCTRL_OFFSET, &val16);
@@ -1321,6 +1362,7 @@
ioat_resume(ioat_dma);
}
+#if 0 // AKAROS
static const struct pci_error_handlers ioat_err_handler = {
.error_detected = ioat_pcie_error_detected,
.slot_reset = ioat_pcie_error_slot_reset,
@@ -1335,6 +1377,7 @@
.shutdown = ioat_shutdown,
.err_handler = &ioat_err_handler,
};
+#endif
static struct ioatdma_device *
alloc_ioatdma(struct pci_device *pdev, void __iomem *iobase)
@@ -1357,6 +1400,7 @@
struct ioatdma_device *device;
int err;
+#if 0 // AKAROS
err = pcim_enable_device(pdev);
if (err)
return err;
@@ -1379,6 +1423,17 @@
err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
if (err)
return err;
+#else
+ /* TODO: Make a bar-mapping helper, similar to Linux. Given BAR id,
+ * vmap it, and put it's info in a table. */
+ void *bar;
+
+ bar = (void*)vmap_pmem_nocache(pdev->bar[0].mmio_base32
+ ? pdev->bar[0].mmio_base32 :
+ pdev->bar[0].mmio_base64,
+ pdev->bar[0].mmio_sz);
+ iomap = &bar;
+#endif
device = alloc_ioatdma(pdev, iomap[IOAT_MMIO_BAR]);
if (!device)
@@ -1408,6 +1463,26 @@
return 0;
}
+/* In lieu of a decent PCI processing system... */
+static void __init ioat_init(void)
+{
+ struct pci_device *p;
+
+ STAILQ_FOREACH(p, &pci_devices, all_dev) {
+ if (p->ven_id != PCI_VENDOR_ID_INTEL)
+ continue;
+ for (int i = 0; ioat_pci_tbl[i].device; i++) {
+ if (p->dev_id == ioat_pci_tbl[i].device) {
+ ioat_pci_probe(p, &ioat_pci_tbl[i]);
+ break;
+ }
+ }
+ }
+}
+/* The 'arch_initcall' setup functions run at level 2. */
+init_func_3(ioat_init);
+
+#if 0 // AKAROS
static void ioat_remove(struct pci_device *pdev)
{
struct ioatdma_device *device = pci_get_drvdata(pdev);
@@ -1425,6 +1500,7 @@
pci_disable_pcie_error_reporting(pdev);
ioat_dma_remove(device);
}
+#endif
static int __init ioat_init_module(void)
{
@@ -1434,7 +1510,8 @@
DRV_NAME, IOAT_DMA_VERSION);
ioat_cache = kmem_cache_create("ioat", sizeof(struct ioat_ring_ent),
- 0, SLAB_HWCACHE_ALIGN, NULL);
+ ARCH_CL_SIZE, 0, NULL, NULL, NULL,
+ NULL);
if (!ioat_cache)
return -ENOMEM;
@@ -1442,9 +1519,11 @@
if (!ioat_sed_cache)
goto err_ioat_cache;
+#if 0 // AKAROS
err = pci_register_driver(&ioat_pci_driver);
if (err)
goto err_ioat3_cache;
+#endif
return 0;
@@ -1457,10 +1536,13 @@
return err;
}
module_init(ioat_init_module);
+init_func_2(ioat_init_module);
+#if 0 // AKAROS
static void __exit ioat_exit_module(void)
{
pci_unregister_driver(&ioat_pci_driver);
kmem_cache_destroy(ioat_cache);
}
module_exit(ioat_exit_module);
+#endif
diff --git a/kern/drivers/dma/ioat/prep.c b/kern/drivers/dma/ioat/prep.c
index 2322c74..d41f1f8 100644
--- a/kern/drivers/dma/ioat/prep.c
+++ b/kern/drivers/dma/ioat/prep.c
@@ -15,12 +15,9 @@
* the file called "COPYING".
*
*/
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/gfp.h>
-#include <linux/dmaengine.h>
-#include <linux/dma-mapping.h>
-#include <linux/prefetch.h>
+
+#include <linux_compat.h>
+
#include "../dmaengine.h"
#include "registers.h"
#include "hw.h"
@@ -94,15 +91,15 @@
ioat3_alloc_sed(struct ioatdma_device *ioat_dma, unsigned int hw_pool)
{
struct ioat_sed_ent *sed;
- gfp_t flags = __GFP_ZERO | 0;
+ gfp_t flags = /* __GFP_ZERO | */ 0;
- sed = kmem_cache_alloc(ioat_sed_cache, flags);
+ sed = kmem_cache_zalloc(ioat_sed_cache, flags);
if (!sed)
return NULL;
sed->hw_pool = hw_pool;
- sed->hw = dma_pool_alloc(ioat_dma->sed_hw_pool[hw_pool],
- flags, &sed->dma);
+ sed->hw = dma_pool_zalloc(ioat_dma->sed_hw_pool[hw_pool],
+ flags, &sed->dma);
if (!sed->hw) {
kmem_cache_free(ioat_sed_cache, sed);
return NULL;