ioat: support device reset
This isn't a full PCI reset. It's more of a driver reset. PCI things,
like BARs and MSIX tables are still maintained. The driver undid its
things, back to where it was when the system booted. (Ideally).
Signed-off-by: Barret Rhoden <brho@cs.berkeley.edu>
diff --git a/kern/drivers/dma/ioat/init.c b/kern/drivers/dma/ioat/init.c
index ef5fb28..1ab1a5f 100644
--- a/kern/drivers/dma/ioat/init.c
+++ b/kern/drivers/dma/ioat/init.c
@@ -439,17 +439,24 @@
irq_h = register_irq(0 /* ignored for msi(x)! */,
ioat_dma_do_interrupt_msix, ioat_chan,
pci_to_tbdf(pdev));
- /* TODO: this is a mess - we also don't know if we're actually
- * MSIX or not! We don't even know our vector... */
if (!irq_h) {
- warn("MSIX failed (cnt %d), leaking vectors etc!", i);
+ warn("MSIX setup failed (cnt %d)!", i);
for (j = 0; j < i; j++) {
msix = &ioat_dma->msix_entries[j];
ioat_chan = ioat_chan_by_index(ioat_dma, j);
- //devm_free_irq(dev, msix->vector, ioat_chan);
+ deregister_irq(msix->vector, pci_to_tbdf(pdev));
}
goto err_no_irq;
}
+ /* TODO: this is ugly. Though really we need register_irq() to
+ * not fallback on its own here. This case here is when we did
+ * get an irq_h, but it wasn't the type we wanted, and this
+ * driver has different IRQ handlers for different types. */
+ if (strcmp("msi-x", irq_h->type)) {
+ warn("IRQ setup didn't get an MSIX!");
+ goto err_no_irq;
+ }
+ msix->vector = irq_h->apic_vector;
}
intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL;
ioat_dma->irq_mode = IOAT_MSIX;
@@ -1459,26 +1466,6 @@
return 0;
}
-/* In lieu of a decent PCI processing system... */
-static void __init ioat_init(void)
-{
- struct pci_device *p;
-
- STAILQ_FOREACH(p, &pci_devices, all_dev) {
- if (p->ven_id != PCI_VENDOR_ID_INTEL)
- continue;
- for (int i = 0; ioat_pci_tbl[i].device; i++) {
- if (p->dev_id == ioat_pci_tbl[i].device) {
- ioat_pci_probe(p, &ioat_pci_tbl[i]);
- break;
- }
- }
- }
-}
-/* The 'arch_initcall' setup functions run at level 2. */
-init_func_3(ioat_init);
-
-#if 0 // AKAROS
static void ioat_remove(struct pci_device *pdev)
{
struct ioatdma_device *device = pci_get_drvdata(pdev);
@@ -1488,15 +1475,133 @@
dev_err(&pdev->linux_dev, "Removing dma and dca services\n");
if (device->dca) {
+#if 0 // AKAROS
unregister_dca_provider(device->dca, &pdev->linux_dev);
free_dca_provider(device->dca);
+#else
+ warn("Unexpected dca on PCI %x:%x.%x", pdev->bus, pdev->dev,
+ pdev->func);
+#endif
device->dca = NULL;
}
pci_disable_pcie_error_reporting(pdev);
ioat_dma_remove(device);
}
-#endif
+
+/* TODO (DEVM): Akaros doesn't do the 'managed' part of devm_kzalloc and
+ * friends. This helper will cleanup the things I noticed that were alloced
+ * in this manner. This was made manually, so YMMV.
+ *
+ * Note that dmaengine.c has a dmam_device_release set up that calls
+ * dma_async_device_unregister, but this driver doesn't use the 'managed'
+ * dmaenginem_async_device_register(). */
+static void devm_cleanup(struct pci_device *pdev)
+{
+ struct ioatdma_device *ioat_dma = pci_get_drvdata(pdev);
+ struct ioatdma_chan *ioat_chan;
+
+ if (!ioat_dma)
+ return;
+ pci_set_drvdata(pdev, NULL);
+ for (int i = 0; i < IOAT_MAX_CHANS; i++) {
+ ioat_chan = ioat_dma->idx[i];
+ if (!ioat_chan)
+ continue;
+ kfree(ioat_chan);
+ }
+ kfree(ioat_dma);
+}
+
+/* TODO (DEVM): Akaros doesn't do any of the 'managed' pci/dev stuff, so we'll
+ * have to free things if probe fails. */
+static int ioat_pci_probe_wrapper(struct pci_device *pdev,
+ const struct pci_device_id *id)
+{
+ int ret;
+
+ ret = ioat_pci_probe(pdev, id);
+ if (ret < 0) {
+ devm_cleanup(pdev);
+ /* Might be a bug in the linux driver, but there are error paths
+ * that happen after BME is set. */
+ pci_clr_bus_master(pdev);
+ }
+ return ret;
+}
+
+/* In lieu of a decent PCI processing system... */
+static bool ioat_pci_init(struct pci_device *pdev)
+{
+ const struct pci_device_id *pci_id;
+
+ pci_id = srch_linux_pci_tbl(ioat_pci_tbl, pdev);
+ if (!pci_id)
+ return false;
+ if (ioat_pci_probe_wrapper(pdev, pci_id) < 0)
+ return false;
+ return true;
+}
+
+/* We have support to stop individual IRQs, but the device is still somewhat
+ * initialized from a PCI perspective. It's not torn down completely:
+ *
+ * We do:
+ * - Turn off and free specific MSI-X vectors.
+ * - Deregister and free the IRQ handler
+ * - Clear bus master enabled
+ * We do not:
+ * - Tear down pci_msi stuff, which is managed by the PCI layer. Like the msix
+ * table, or the msix_ready flag
+ * - Tear down the BAR mmio mappings. Those are managed by the PCI layer.
+ */
+static bool ioat_pci_reset(struct pci_device *pdev)
+{
+ struct ioatdma_device *ioat_dma = pci_get_drvdata(pdev);
+ int msixcnt = ioat_dma->dma_dev.chancnt;
+ struct msix_entry *msix;
+
+ ioat_shutdown(pdev);
+ ioat_remove(pdev);
+
+ /* Assuming MSIX, which is enforced elsewhere.
+ *
+ * In Linux, devm resources are freed in reverse order, so the IRQs are
+ * freed before the channels are freed.
+ *
+ * I'm a little reluctant to do this in devm_cleanup, since probe
+ * failures clean up their own IRQs already. (Or at least warn if they
+ * need to. */
+ for (int i = 0; i < msixcnt; i++) {
+ msix = &ioat_dma->msix_entries[i];
+ deregister_irq(msix->vector, pci_to_tbdf(pdev));
+ }
+ devm_cleanup(pdev);
+ pci_clr_bus_master(pdev);
+ return true;
+}
+
+static struct pci_ops ioat_pci_ops = {
+ .driver_name = "ioat",
+ .init = ioat_pci_init,
+ .reset = ioat_pci_reset,
+};
+
+static void __init ioat_init(void)
+{
+ struct pci_device *p;
+ const struct pci_device_id *pci_id;
+
+ STAILQ_FOREACH(p, &pci_devices, all_dev) {
+ if (p->ven_id != PCI_VENDOR_ID_INTEL)
+ continue;
+ if (ioat_pci_init(p))
+ pci_set_ops(p, &ioat_pci_ops,
+ DEV_STATE_ASSIGNED_KERNEL);
+ }
+}
+/* The 'arch_initcall' setup functions run at level 2. */
+init_func_3(ioat_init);
static int __init ioat_init_module(void)
{