| /* |
| * File: iommu.c - Driver for accessing Intel iommu |
| * Author: Aditya Basu <mitthu@google.com> |
| * |
| * (1) proc->proc_lock => (2) iommu->iommu_lock |
| * |
| * TODO |
| * ==== |
| * - iommu_process_cleanup() is untested. |
| * - In iommu_map_pci_devices() assign the correct iommu for scoped DRHD. Right |
| * now the default iommu is assigned to all devices. |
| * - In assign_device() make sure the process in not in DYING or DYING_ABORT |
| * state. |
| * - Assigning processes across multiple IOMMUs / DRHDs will result in |
| * corruption of iommu->procs. This is because the tailq relies on |
| * proc->iommu_link. |
| * - IOMMU_DID_DEFAULT = 1; this means pid = 1 cannot have a device passthru |
| * because we use the pid as "did" or domain ID. |
| */ |
| |
| #include <stdio.h> |
| #include <error.h> |
| #include <common.h> |
| #include <net/ip.h> |
| #include <atomic.h> |
| |
| #include <acpi.h> |
| #include <arch/intel-iommu.h> |
| #include <env.h> |
| #include <arch/pci.h> |
| #include <linux_compat.h> |
| |
| #define IOMMU "iommu: " |
| #define BUFFERSZ 8192 |
| |
| struct dev iommudevtab; |
| struct iommu_list_tq iommu_list = TAILQ_HEAD_INITIALIZER(iommu_list); |
| static bool is_initialized; /* to detect absence of IOMMU */ |
| |
| /* QID Path */ |
| enum { |
| Qdir = 0, |
| Qmappings = 1, |
| Qadddev = 2, |
| Qremovedev = 3, |
| Qinfo = 4, |
| Qpower = 5, |
| }; |
| |
| static struct dirtab iommudir[] = { |
| {".", {Qdir, 0, QTDIR}, 0, 0555}, |
| {"mappings", {Qmappings, 0, QTFILE}, 0, 0755}, |
| {"attach", {Qadddev, 0, QTFILE}, 0, 0755}, |
| {"detach", {Qremovedev, 0, QTFILE}, 0, 0755}, |
| {"info", {Qinfo, 0, QTFILE}, 0, 0755}, |
| {"power", {Qpower, 0, QTFILE}, 0, 0755}, |
| }; |
| |
| /* this is might be necessary when updating mapping structures: context-cache, |
| * IOTLB or IEC. */ |
| static inline void write_buffer_flush(struct iommu *iommu) |
| { |
| uint32_t cmd, status; |
| |
| if (!iommu->rwbf) |
| return; |
| |
| cmd = read32(iommu->regio + DMAR_GCMD_REG) | DMA_GCMD_WBF; |
| write32(cmd, iommu->regio + DMAR_GCMD_REG); |
| |
| /* read status */ |
| do { |
| status = read32(iommu->regio + DMAR_GSTS_REG); |
| } while (status & DMA_GSTS_WBFS); |
| } |
| |
| /* this is necessary when caching mode is supported. |
| * ASSUMES: No pending flush requests. This is a problem only if other function |
| * is used to perform the flush. */ |
| static inline void iotlb_flush(struct iommu *iommu, uint16_t did) |
| { |
| uint64_t cmd, status; |
| |
| cmd = 0x0 |
| | DMA_TLB_IVT /* issue the flush command */ |
| | DMA_TLB_DSI_FLUSH /* DID specific shootdown */ |
| | DMA_TLB_READ_DRAIN |
| | DMA_TLB_WRITE_DRAIN |
| | DMA_TLB_DID(did); |
| write64(cmd, iommu->regio + iommu->iotlb_cmd_offset); |
| |
| /* read status */ |
| do { |
| status = read64(iommu->regio + iommu->iotlb_cmd_offset); |
| status >>= 63; /* bit 64 (IVT): gets cleared on completion */ |
| } while (status); |
| } |
| |
| static inline struct root_entry *get_root_entry(physaddr_t paddr) |
| { |
| return (struct root_entry *) KADDR(paddr); |
| } |
| |
| static inline struct context_entry *get_context_entry(physaddr_t paddr) |
| { |
| return (struct context_entry *) KADDR(paddr); |
| } |
| |
| /* iommu is not modified by this function or its callees. */ |
| static physaddr_t ct_init(struct iommu *iommu, uint16_t did) |
| { |
| struct context_entry *cte; |
| physaddr_t ct; |
| uint8_t ctx_aw = CTX_AW_L4; |
| |
| cte = (struct context_entry *) kpage_zalloc_addr(); |
| ct = PADDR(cte); |
| |
| for (int i = 0; i < 32 * 8; i++, cte++) { // device * func |
| /* initializations such as the domain */ |
| cte->hi = 0 |
| | (did << CTX_HI_DID_SHIFT) // DID bit: 72 to 87 |
| | (ctx_aw << CTX_HI_AW_SHIFT); // AW |
| cte->lo = 0 |
| | (0x2 << CTX_LO_TRANS_SHIFT) // 0x2: pass through |
| | (0x1 << CTX_LO_FPD_SHIFT) // disable faults |
| | (0x1 << CTX_LO_PRESENT_SHIFT); // is present |
| } |
| |
| return ct; |
| } |
| |
| /* Get a new root_entry table. Allocates all context entries. |
| * iommu is not modified by this function or its callees. */ |
| static physaddr_t rt_init(struct iommu *iommu, uint16_t did) |
| { |
| struct root_entry *rte; |
| physaddr_t rt; |
| physaddr_t ct; |
| |
| /* Page Align = 0x1000 */ |
| rte = (struct root_entry *) kpage_zalloc_addr(); |
| rt = PADDR(rte); |
| |
| /* create context table */ |
| for (int i = 0; i < 256; i++, rte++) { |
| ct = ct_init(iommu, did); |
| rte->hi = 0; |
| rte->lo = 0 |
| | ct |
| | (0x1 << RT_LO_PRESENT_SHIFT); |
| } |
| |
| return rt; |
| } |
| |
| static struct context_entry *get_ctx_for(int bus, int dev, int func, |
| physaddr_t roottable) |
| { |
| struct root_entry *rte; |
| physaddr_t cte_phy; |
| struct context_entry *cte; |
| uint32_t offset = 0; |
| |
| rte = get_root_entry(roottable) + bus; |
| |
| cte_phy = rte->lo & 0xFFFFFFFFFFFFF000; |
| cte = get_context_entry(cte_phy); |
| |
| offset = (dev * 8) + func; |
| cte += offset; |
| |
| return cte; |
| } |
| |
| /* The process pid is used as the Domain ID (DID) */ |
| static void setup_page_tables(struct proc *p, struct pci_device *d) |
| { |
| uint32_t cmd, status; |
| uint16_t did = p->pid; /* casts down to 16-bit */ |
| struct iommu *iommu = d->iommu; |
| struct context_entry *cte = |
| get_ctx_for(d->bus, d->dev, d->func, iommu->roottable); |
| |
| /* Mark the entry as not present */ |
| cte->lo &= ~0x1; |
| write_buffer_flush(iommu); |
| iotlb_flush(iommu, IOMMU_DID_DEFAULT); |
| |
| cte->hi = 0 |
| | (did << CTX_HI_DID_SHIFT) // DID bit: 72 to 87 |
| | (CTX_AW_L4 << CTX_HI_AW_SHIFT); // AW |
| |
| cte->lo = PTE_ADDR(p->env_pgdir.eptp) |
| | (0x0 << CTX_LO_TRANS_SHIFT) |
| | (0x1 << CTX_LO_FPD_SHIFT) // disable faults |
| | (0x1 << CTX_LO_PRESENT_SHIFT); /* mark present */ |
| } |
| |
| /* TODO: We should mark the entry as not present to block any stray DMAs from |
| * reaching the kernel. To force a re-attach the device to the kernel, we can |
| * use pid 0. */ |
| static void teardown_page_tables(struct proc *p, struct pci_device *d) |
| { |
| uint16_t did = IOMMU_DID_DEFAULT; |
| struct iommu *iommu = d->iommu; |
| struct context_entry *cte = |
| get_ctx_for(d->bus, d->dev, d->func, iommu->roottable); |
| |
| /* Mark the entry as not present */ |
| cte->lo &= ~0x1; |
| write_buffer_flush(iommu); |
| iotlb_flush(iommu, p->pid); |
| |
| cte->hi = 0 |
| | (did << CTX_HI_DID_SHIFT) // DID bit: 72 to 87 |
| | (CTX_AW_L4 << CTX_HI_AW_SHIFT); // AW |
| |
| cte->lo = 0 /* assumes page alignment */ |
| | (0x2 << CTX_LO_TRANS_SHIFT) |
| | (0x1 << CTX_LO_FPD_SHIFT) // disable faults |
| | (0x1 << CTX_LO_PRESENT_SHIFT); /* mark present */ |
| } |
| |
| static bool _iommu_enable(struct iommu *iommu) |
| { |
| uint32_t cmd, status; |
| |
| spin_lock_irqsave(&iommu->iommu_lock); |
| |
| /* write the root table address */ |
| write64(iommu->roottable, iommu->regio + DMAR_RTADDR_REG); |
| |
| // TODO: flush IOTLB if reported as necessary by cap register |
| // TODO: issue TE only once |
| |
| /* set root table - needs to be done first */ |
| cmd = DMA_GCMD_SRTP; |
| write32(cmd, iommu->regio + DMAR_GCMD_REG); |
| |
| /* enable translation */ |
| cmd = DMA_GCMD_TE; |
| write32(cmd, iommu->regio + DMAR_GCMD_REG); |
| |
| /* read status */ |
| status = read32(iommu->regio + DMAR_GSTS_REG); |
| |
| spin_unlock_irqsave(&iommu->iommu_lock); |
| |
| return status & DMA_GSTS_TES; |
| } |
| |
| void iommu_enable(void) |
| { |
| struct iommu *iommu; |
| |
| /* races are possible; add a global lock? */ |
| if (iommu_status()) |
| return; |
| |
| TAILQ_FOREACH(iommu, &iommu_list, iommu_link) |
| _iommu_enable(iommu); |
| } |
| |
| static bool _iommu_disable(struct iommu *iommu) |
| { |
| uint32_t cmd, status; |
| |
| spin_lock_irqsave(&iommu->iommu_lock); |
| |
| /* write the root table address */ |
| write64(iommu->roottable, iommu->regio + DMAR_RTADDR_REG); |
| |
| // TODO: flush IOTLB if reported as necessary by cap register |
| /* disable translation */ |
| cmd = 0; |
| write32(cmd, iommu->regio + DMAR_GCMD_REG); |
| |
| /* read status */ |
| status = read32(iommu->regio + DMAR_GSTS_REG); |
| |
| spin_unlock_irqsave(&iommu->iommu_lock); |
| |
| return status & DMA_GSTS_TES; |
| } |
| |
| void iommu_disable(void) |
| { |
| struct iommu *iommu; |
| |
| /* races are possible; add a global lock? */ |
| if (!iommu_status()) |
| return; |
| |
| TAILQ_FOREACH(iommu, &iommu_list, iommu_link) |
| _iommu_disable(iommu); |
| } |
| |
| static bool _iommu_status(struct iommu *iommu) |
| { |
| uint32_t status = 0; |
| |
| spin_lock_irqsave(&iommu->iommu_lock); |
| |
| /* read status */ |
| status = read32(iommu->regio + DMAR_GSTS_REG); |
| |
| spin_unlock_irqsave(&iommu->iommu_lock); |
| |
| return status & DMA_GSTS_TES; |
| } |
| |
| bool iommu_status(void) |
| { |
| struct iommu *iommu; |
| |
| TAILQ_FOREACH(iommu, &iommu_list, iommu_link) |
| if (_iommu_status(iommu)) |
| return true; |
| |
| return false; |
| } |
| |
| /* Helpers for set/get/init PCI device (BDF) <=> Process map */ |
| static bool proc_already_in_iommu_list(struct iommu *iommu, struct proc *p) |
| { |
| struct proc *proc_iter; |
| |
| TAILQ_FOREACH(proc_iter, &iommu->procs, iommu_link) |
| if (proc_iter == p) |
| return true; |
| |
| return false; |
| } |
| |
| /* this function retains a KREF to struct proc for each assigned PCI device */ |
| static bool assign_device(int bus, int dev, int func, pid_t pid) |
| { |
| int tbdf = MKBUS(BusPCI, bus, dev, func); |
| struct pci_device *d = pci_match_tbdf(tbdf); |
| struct proc *p = pid2proc(pid); |
| |
| if (!p) |
| error(EIO, "cannot find pid %d\n", pid); |
| |
| if (pid == 1) { |
| proc_decref(p); |
| error(EIO, "device passthru not supported for pid = 1"); |
| } |
| |
| if (!d) { |
| proc_decref(p); |
| error(EIO, "cannot find dev %x:%x.%x\n", bus, dev, func); |
| } |
| |
| /* grab locks */ |
| spin_lock_irqsave(&p->proc_lock); |
| spin_lock_irqsave(&d->iommu->iommu_lock); |
| |
| if (d->proc_owner) { |
| spin_unlock_irqsave(&d->iommu->iommu_lock); |
| spin_unlock_irqsave(&p->proc_lock); |
| proc_decref(p); |
| error(EIO, "dev already assigned to pid = %d\n", p->pid); |
| } |
| |
| d->proc_owner = p; /* protected by iommu_lock */ |
| d->iommu->num_assigned_devs += 1; /* protected by iommu_lock */ |
| |
| /* add device to list in struct proc */ |
| TAILQ_INSERT_TAIL(&p->pci_devices, d, proc_link); |
| |
| /* add proc to list in struct iommu */ |
| if (!proc_already_in_iommu_list(d->iommu, p)) |
| TAILQ_INSERT_TAIL(&d->iommu->procs, p, iommu_link); |
| |
| /* setup the actual page tables */ |
| setup_page_tables(p, d); |
| |
| /* release locks */ |
| spin_unlock_irqsave(&d->iommu->iommu_lock); |
| spin_unlock_irqsave(&p->proc_lock); |
| |
| return true; |
| } |
| |
| static bool unassign_device(int bus, int dev, int func) |
| { |
| int tbdf = MKBUS(BusPCI, bus, dev, func); |
| struct pci_device *d = pci_match_tbdf(tbdf); |
| struct proc *p; |
| |
| if (!d) |
| error(EIO, "cannot find dev %x:%x.%x", bus, dev, func); |
| |
| /* TODO: this will break if there are multiple threads calling unassign. |
| * Might require rethinking the lock ordering and synchronization */ |
| p = d->proc_owner; |
| if (!p) |
| error(EIO, "%x:%x.%x is not assigned to any process", |
| bus, dev, func); |
| |
| /* grab locks */ |
| spin_lock_irqsave(&p->proc_lock); |
| spin_lock_irqsave(&d->iommu->iommu_lock); |
| |
| /* teardown page table association */ |
| teardown_page_tables(p, d); |
| |
| d->proc_owner = NULL; /* protected by iommu_lock */ |
| d->iommu->num_assigned_devs -= 1; /* protected by iommu_lock */ |
| |
| /* remove device from list in struct proc */ |
| TAILQ_REMOVE(&p->pci_devices, d, proc_link); |
| |
| /* remove proc from list in struct iommu, if active device passthru */ |
| if (TAILQ_EMPTY(&p->pci_devices)) |
| TAILQ_REMOVE(&d->iommu->procs, p, iommu_link); |
| |
| /* release locks */ |
| spin_unlock_irqsave(&d->iommu->iommu_lock); |
| spin_unlock_irqsave(&p->proc_lock); |
| |
| /* decrement KREF for this PCI device */ |
| proc_decref(p); |
| |
| return true; |
| } |
| |
| void iommu_process_cleanup(struct proc *p) |
| { |
| struct pci_device *pcidev; |
| |
| // TODO: grab proc_lock |
| TAILQ_FOREACH(pcidev, &p->pci_devices, proc_link) |
| unassign_device(pcidev->bus, pcidev->dev, pcidev->func); |
| } |
| |
| static int write_add_dev(char *va, size_t n) |
| { |
| int bus, dev, func, err; |
| pid_t pid; |
| |
| err = sscanf(va, "%x:%x.%x %d\n", &bus, &dev, &func, &pid); |
| |
| if (err != 4) |
| error(EIO, |
| IOMMU "error parsing #iommu/attach; items parsed: %d", err); |
| |
| if (pid == 1) |
| error(EIO, IOMMU "device passthru not supported for pid = 1"); |
| |
| if (!assign_device(bus, dev, func, pid)) |
| error(EIO, "passthru failed"); |
| |
| return n; |
| } |
| |
| static int write_remove_dev(char *va, size_t n) |
| { |
| int bus, dev, func, err; |
| |
| err = sscanf(va, "%x:%x.%x\n", &bus, &dev, &func); |
| |
| if (err != 3) |
| error(EIO, |
| IOMMU "error parsing #iommu/detach; items parsed: %d", err); |
| |
| unassign_device(bus, dev, func); |
| |
| return n; |
| } |
| |
| static int write_power(char *va, size_t n) |
| { |
| int err; |
| |
| if (!strcmp(va, "enable") || !strcmp(va, "on")) { |
| iommu_enable(); |
| return n; |
| } else if (!strcmp(va, "disable") || !strcmp(va, "off")) { |
| iommu_disable(); |
| return n; |
| } else |
| return n; |
| } |
| |
| static void _open_mappings(struct sized_alloc *sza, struct proc *proc) |
| { |
| struct pci_device *pcidev; |
| |
| sza_printf(sza, "\tpid = %d\n", proc->pid); |
| TAILQ_FOREACH(pcidev, &proc->pci_devices, proc_link) { |
| sza_printf(sza, "\t\tdevice = %x:%x.%x\n", pcidev->bus, |
| pcidev->dev, pcidev->func); |
| } |
| } |
| |
| static struct sized_alloc *open_mappings(void) |
| { |
| struct iommu *iommu; |
| struct proc *proc; |
| struct sized_alloc *sza = sized_kzmalloc(BUFFERSZ, MEM_WAIT); |
| |
| TAILQ_FOREACH(iommu, &iommu_list, iommu_link) { |
| spin_lock_irqsave(&iommu->iommu_lock); |
| |
| sza_printf(sza, "Mappings for iommu@%p\n", iommu); |
| if (TAILQ_EMPTY(&iommu->procs)) |
| sza_printf(sza, "\t<empty>\n"); |
| else |
| TAILQ_FOREACH(proc, &iommu->procs, iommu_link) |
| _open_mappings(sza, proc); |
| |
| spin_unlock_irqsave(&iommu->iommu_lock); |
| } |
| |
| return sza; |
| } |
| |
| static void _open_info(struct iommu *iommu, struct sized_alloc *sza) |
| { |
| uint64_t value; |
| |
| sza_printf(sza, "\niommu@%p\n", iommu); |
| sza_printf(sza, "\trba = %p\n", iommu->rba); |
| sza_printf(sza, "\tsupported = %s\n", iommu->supported ? "yes" : "no"); |
| sza_printf(sza, "\tnum_assigned_devs = %d\n", iommu->num_assigned_devs); |
| sza_printf(sza, "\tregspace = %p\n", iommu->regio); |
| sza_printf(sza, "\thost addr width (dmar) = %d\n", iommu->haw_dmar); |
| sza_printf(sza, "\thost addr width (cap[mgaw]) = %d\n", |
| iommu->haw_cap); |
| value = read32(iommu->regio + DMAR_VER_REG); |
| sza_printf(sza, "\tversion = 0x%x\n", value); |
| |
| value = read64(iommu->regio + DMAR_CAP_REG); |
| sza_printf(sza, "\tcapabilities = %p\n", value); |
| sza_printf(sza, "\t\tmgaw: %d\n", cap_mgaw(value)); |
| sza_printf(sza, "\t\tsagaw (paging level): 0x%x\n", cap_sagaw(value)); |
| sza_printf(sza, "\t\tcaching mode: %s (%d)\n", cap_caching_mode(value) ? |
| "yes" : "no", cap_caching_mode(value)); |
| sza_printf(sza, "\t\tzlr: 0x%x\n", cap_zlr(value)); |
| sza_printf(sza, "\t\trwbf: %s\n", cap_rwbf(value) ? "required" |
| : "not required"); |
| sza_printf(sza, "\t\tnum domains: %d\n", cap_ndoms(value)); |
| sza_printf(sza, "\t\tsupports protected high-memory region: %s\n", |
| cap_phmr(value) ? "yes" : "no"); |
| sza_printf(sza, "\t\tsupports Protected low-memory region: %s\n", |
| cap_plmr(value) ? "yes" : "no"); |
| |
| value = read64(iommu->regio + DMAR_ECAP_REG); |
| sza_printf(sza, "\text. capabilities = %p\n", value); |
| sza_printf(sza, "\t\tpass through: %s\n", |
| ecap_pass_through(value) ? "yes" : "no"); |
| sza_printf(sza, "\t\tdevice iotlb: %s\n", |
| ecap_dev_iotlb_support(value) ? "yes" : "no"); |
| sza_printf(sza, "\t\tiotlb register offset: 0x%x\n", |
| ecap_iotlb_offset(value)); |
| sza_printf(sza, "\t\tsnoop control: %s\n", |
| ecap_sc_support(value) ? "yes" : "no"); |
| sza_printf(sza, "\t\tcoherency: %s\n", |
| ecap_coherent(value) ? "yes" : "no"); |
| sza_printf(sza, "\t\tqueue invalidation support: %s\n", |
| ecap_qis(value) ? "yes" : "no"); |
| sza_printf(sza, "\t\tinterrupt remapping support: %s\n", |
| ecap_ir_support(value) ? "yes" : "no"); |
| sza_printf(sza, "\t\textended interrupt mode: 0x%x\n", |
| ecap_eim_support(value)); |
| |
| value = read32(iommu->regio + DMAR_GSTS_REG); |
| sza_printf(sza, "\tglobal status = 0x%x\n", value); |
| sza_printf(sza, "\t\ttranslation: %s\n", |
| value & DMA_GSTS_TES ? "enabled" : "disabled"); |
| sza_printf(sza, "\t\troot table: %s\n", |
| value & DMA_GSTS_RTPS ? "set" : "not set"); |
| |
| value = read64(iommu->regio + DMAR_RTADDR_REG); |
| sza_printf(sza, "\troot entry table = %p (phy) or %p (vir)\n", |
| value, KADDR(value)); |
| } |
| |
| static struct sized_alloc *open_info(void) |
| { |
| struct sized_alloc *sza = sized_kzmalloc(BUFFERSZ, MEM_WAIT); |
| uint64_t value; |
| struct iommu *iommu; |
| |
| sza_printf(sza, "driver info:\n"); |
| |
| value = IOMMU_DID_DEFAULT; |
| sza_printf(sza, "\tdefault did = %d\n", value); |
| sza_printf(sza, "\tstatus = %s\n", |
| iommu_status() ? "enabled" : "disabled"); |
| |
| TAILQ_FOREACH(iommu, &iommu_list, iommu_link) { |
| _open_info(iommu, sza); |
| } |
| |
| return sza; |
| } |
| |
| static struct sized_alloc *open_power(void) |
| { |
| struct sized_alloc *sza = sized_kzmalloc(BUFFERSZ, MEM_WAIT); |
| uint64_t value; |
| struct iommu *iommu; |
| |
| sza_printf(sza, "IOMMU status: %s\n\n", |
| iommu_status() ? "enabled" : "disabled"); |
| |
| sza_printf(sza, |
| "Write 'enable' or 'disable' OR 'on' or 'off' to change status\n"); |
| |
| return sza; |
| } |
| |
| static char *devname(void) |
| { |
| return iommudevtab.name; |
| } |
| |
| static struct chan *iommuattach(char *spec) |
| { |
| return devattach(devname(), spec); |
| } |
| |
| static struct walkqid *iommuwalk(struct chan *c, struct chan *nc, char **name, |
| unsigned int nname) |
| { |
| return devwalk(c, nc, name, nname, iommudir, |
| ARRAY_SIZE(iommudir), devgen); |
| } |
| |
| static size_t iommustat(struct chan *c, uint8_t *dp, size_t n) |
| { |
| return devstat(c, dp, n, iommudir, ARRAY_SIZE(iommudir), devgen); |
| } |
| |
| static struct chan *iommuopen(struct chan *c, int omode) |
| { |
| switch (c->qid.path) { |
| case Qmappings: |
| c->synth_buf = open_mappings(); |
| break; |
| case Qinfo: |
| c->synth_buf = open_info(); |
| break; |
| case Qpower: |
| c->synth_buf = open_power(); |
| break; |
| case Qadddev: |
| case Qremovedev: |
| case Qdir: |
| default: |
| break; |
| } |
| |
| return devopen(c, omode, iommudir, ARRAY_SIZE(iommudir), devgen); |
| } |
| |
| /* |
| * All files are synthetic. Hence we do not need to implement any close |
| * function. |
| */ |
| static void iommuclose(struct chan *c) |
| { |
| switch (c->qid.path) { |
| case Qmappings: |
| case Qinfo: |
| case Qpower: |
| kfree(c->synth_buf); |
| c->synth_buf = NULL; |
| break; |
| case Qadddev: |
| case Qremovedev: |
| case Qdir: |
| default: |
| break; |
| } |
| } |
| |
| static size_t iommuread(struct chan *c, void *va, size_t n, off64_t offset) |
| { |
| struct sized_alloc *sza = c->synth_buf; |
| |
| switch (c->qid.path) { |
| case Qdir: |
| return devdirread(c, va, n, iommudir, |
| ARRAY_SIZE(iommudir), devgen); |
| case Qadddev: |
| return readstr(offset, va, n, |
| "write format: xx:yy.z pid\n" |
| " xx = bus (in hex)\n" |
| " yy = device (in hex)\n" |
| " z = function (in hex)\n" |
| " pid = process pid\n" |
| "\nexample:\n" |
| "$ echo 00:1f.2 13 >\\#iommu/attach\n"); |
| case Qremovedev: |
| return readstr(offset, va, n, |
| "write format: xx:yy.z\n" |
| " xx = bus (in hex)\n" |
| " yy = device (in hex)\n" |
| " z = function (in hex)\n" |
| "\nexample:\n" |
| "$ echo 00:1f.2 >\\#iommu/detach\n"); |
| case Qmappings: |
| case Qinfo: |
| case Qpower: |
| return readstr(offset, va, n, sza->buf); |
| default: |
| error(EIO, "read: qid %d is impossible", c->qid.path); |
| } |
| |
| return -1; /* not reached */ |
| } |
| |
| static size_t iommuwrite(struct chan *c, void *va, size_t n, off64_t offset) |
| { |
| int err = -1; |
| |
| switch (c->qid.path) { |
| case Qadddev: |
| if (!iommu_supported()) |
| error(EROFS, IOMMU "not supported"); |
| err = write_add_dev(va, n); |
| break; |
| case Qremovedev: |
| if (!iommu_supported()) |
| error(EROFS, IOMMU "not supported"); |
| err = write_remove_dev(va, n); |
| break; |
| case Qpower: |
| err = write_power(va, n); |
| break; |
| case Qmappings: |
| case Qinfo: |
| case Qdir: |
| error(EROFS, IOMMU "cannot modify"); |
| default: |
| error(EIO, "write: qid %d is impossible", c->qid.path); |
| } |
| |
| return err; |
| } |
| |
| /* Iterate over all IOMMUs and make sure the "rba" present in DRHD are unique */ |
| static bool iommu_asset_unique_regio(void) |
| { |
| struct iommu *outer, *inner; |
| uint64_t rba; |
| bool result = true; |
| |
| TAILQ_FOREACH(outer, &iommu_list, iommu_link) { |
| rba = outer->rba; |
| |
| TAILQ_FOREACH(inner, &iommu_list, iommu_link) { |
| if (outer != inner && rba == inner->rba) { |
| outer->supported = false; |
| result = false; |
| } |
| } |
| } |
| |
| return result; |
| } |
| |
| static bool iommu_assert_required_capabilities(struct iommu *iommu) |
| { |
| uint64_t cap, ecap; |
| bool support, result; |
| |
| if (!iommu || !iommu->regio) |
| return false; |
| |
| cap = read64(iommu->regio + DMAR_CAP_REG); |
| ecap = read64(iommu->regio + DMAR_ECAP_REG); |
| result = true; /* default */ |
| |
| support = (cap_sagaw(cap) & 0x4) >> 2; |
| if (!support) { |
| printk(IOMMU "%p: unsupported paging level: 0x%x\n", |
| iommu, cap_sagaw(cap)); |
| result = false; |
| } |
| |
| support = cap_super_page_val(cap) & 0x1; |
| if (!support) { |
| printk(IOMMU "%p: 1GB super pages not supported\n", iommu); |
| result = false; |
| } |
| |
| support = ecap_pass_through(ecap); |
| if (!support) { |
| printk(IOMMU "%p: pass-through translation type in context entries not supported\n", iommu); |
| result = false; |
| } |
| |
| /* max haw reported by iommu */ |
| iommu->haw_cap = cap_mgaw(cap); |
| if (iommu->haw_cap != iommu->haw_dmar) { |
| printk(IOMMU "%p: HAW mismatch; DAMR reports %d, CAP reports %d\n", |
| iommu, iommu->haw_dmar, iommu->haw_cap); |
| } |
| |
| /* mark the iommu as not supported, if any required cap is present */ |
| if (!result) |
| iommu->supported = false; |
| |
| return result; |
| } |
| |
| static void iommu_assert_all(void) |
| { |
| struct iommu *iommu; |
| |
| if (!iommu_asset_unique_regio()) |
| warn(IOMMU "same register base addresses detected"); |
| |
| TAILQ_FOREACH(iommu, &iommu_list, iommu_link) |
| iommu_assert_required_capabilities(iommu); |
| } |
| |
| static void iommu_populate_fields(void) |
| { |
| struct iommu *iommu; |
| |
| TAILQ_FOREACH(iommu, &iommu_list, iommu_link) |
| iommu->roottable = rt_init(iommu, IOMMU_DID_DEFAULT); |
| } |
| |
| /* Run this function after all individual IOMMUs are initialized. */ |
| void iommu_initialize_global(void) |
| { |
| if (!is_initialized) |
| return; |
| |
| /* fill the supported field in struct iommu */ |
| run_once(iommu_assert_all()); |
| run_once(iommu_populate_fields()); |
| |
| iommu_enable(); |
| } |
| |
| /* should only be called after all iommus are initialized */ |
| bool iommu_supported(void) |
| { |
| struct iommu *iommu; |
| |
| if (!is_initialized) |
| return false; |
| |
| /* return false if any of the iommus isn't supported */ |
| TAILQ_FOREACH(iommu, &iommu_list, iommu_link) |
| if (!iommu->supported) |
| return false; |
| |
| return true; |
| } |
| |
| /* grabs the iommu of the first DRHD with INCLUDE_PCI_ALL */ |
| struct iommu *get_default_iommu(void) |
| { |
| struct Dmar *dt; |
| |
| /* dmar is a global variable; see acpi.h */ |
| if (dmar == NULL) |
| return NULL; |
| |
| dt = dmar->tbl; |
| for (int i = 0; i < dmar->nchildren; i++) { |
| struct Atable *at = dmar->children[i]; |
| struct Drhd *drhd = at->tbl; |
| |
| if (drhd->all & 1) |
| return &drhd->iommu; |
| } |
| |
| return NULL; |
| } |
| |
| void iommu_map_pci_devices(void) |
| { |
| struct pci_device *pci_iter; |
| struct iommu *iommu = get_default_iommu(); |
| |
| if (!iommu) |
| return; |
| |
| /* set the default iommu */ |
| STAILQ_FOREACH(pci_iter, &pci_devices, all_dev) |
| pci_iter->iommu = iommu; |
| |
| // TODO: parse devscope and assign scoped iommus |
| } |
| |
| /* This is called from acpi.c to initialize struct iommu. |
| * The actual IOMMU hardware is not touch or configured in any way. */ |
| void iommu_initialize(struct iommu *iommu, uint8_t haw, uint64_t rba) |
| { |
| is_initialized = true; |
| |
| /* initialize the struct */ |
| TAILQ_INIT(&iommu->procs); |
| spinlock_init_irqsave(&iommu->iommu_lock); |
| iommu->rba = rba; |
| iommu->regio = (void __iomem *) vmap_pmem_nocache(rba, VTD_PAGE_SIZE); |
| iommu->supported = true; /* this gets updated by iommu_supported() */ |
| iommu->num_assigned_devs = 0; |
| iommu->haw_dmar = haw; |
| |
| /* add the iommu to the list of all discovered iommu */ |
| TAILQ_INSERT_TAIL(&iommu_list, iommu, iommu_link); |
| } |
| |
| static void iommuinit(void) |
| { |
| if (iommu_supported()) |
| printk(IOMMU "initialized\n"); |
| else |
| printk(IOMMU "not supported\n"); |
| } |
| |
| struct dev iommudevtab __devtab = { |
| .name = "iommu", |
| .reset = devreset, |
| .init = iommuinit, |
| .shutdown = devshutdown, |
| .attach = iommuattach, |
| .walk = iommuwalk, |
| .stat = iommustat, |
| .open = iommuopen, |
| .create = devcreate, |
| .close = iommuclose, |
| .read = iommuread, |
| .bread = devbread, |
| .write = iommuwrite, |
| .bwrite = devbwrite, |
| .remove = devremove, |
| .wstat = devwstat, |
| }; |