| /* Copyright (c) 2019, 2020 Google, Inc.x |
| * |
| * Driver for accessing Intel iommu |
| * |
| * Aditya Basu <mitthu@google.com> |
| * Barret Rhoden <brho@cs.berkeley.edu> |
| * |
| * (1) proc->dev_qlock => (2) iommu->iommu_lock |
| * (1) proc->dev_qlock => (2) pdev->qlock |
| * |
| * TODO |
| * ==== |
| * - In iommu_map_pci_devices() assign the correct iommu for scoped DRHD. Right |
| * now the default iommu is assigned to all devices. |
| * - IOMMU_DID_DEFAULT = 1; this means pid = 1 cannot have a device passthru |
| * because we use the pid as "did" or domain ID. |
| * |
| * lifecycle of CTE entries: |
| * - at boot, every CTE (per pdev on an iommu) is set to non-translating. In |
| * essence, an identity map. |
| * - pci devices are initially assigned to the kernel. |
| * - when devices are unassigned, their cte mapping is destroyed. |
| * - when they are reassigned, their mapping is set to either an identity map |
| * (kernel) or a process's page table. |
| * |
| * - On the topic of disabling the IOMMU, we used to have an option to just |
| * unset it completely. Disable TE, clear the root pointer. Though the code |
| * we had was hokey and broken. Even then, if we have a device behind an |
| * IOMMU and disable the IOMMU, that would just fuck everything up. Maybe if |
| * we had identity mapped pages in the IPT, so that when translation turned |
| * off, the device would still work. Seems like a mess. |
| * |
| * - We ought to do a domain-selective, context-cache invalidation whenever we |
| * reuse DIDs. aka, whenever there is a new IPT for a pid, which is every 65k |
| * processes. Or maybe every 16k, depending on how many pids we have. |
| */ |
| |
| #include <stdio.h> |
| #include <error.h> |
| #include <common.h> |
| #include <net/ip.h> |
| #include <atomic.h> |
| |
| #include <acpi.h> |
| #include <arch/intel-iommu.h> |
| #include <env.h> |
| #include <arch/pci.h> |
| #include <linux_compat.h> |
| |
| #define IOMMU "iommu: " |
| #define BUFFERSZ 8192 |
| |
| struct dev iommudevtab; |
| |
| static struct iommu_list_tq iommu_list = TAILQ_HEAD_INITIALIZER(iommu_list); |
| static bool iommu_is_supported; |
| |
| /* QID Path */ |
| enum { |
| Qdir = 0, |
| Qmappings = 1, |
| Qadddev = 2, |
| Qremovedev = 3, |
| Qinfo = 4, |
| }; |
| |
| static struct dirtab iommudir[] = { |
| {".", {Qdir, 0, QTDIR}, 0, 0555}, |
| {"mappings", {Qmappings, 0, QTFILE}, 0, 0755}, |
| {"attach", {Qadddev, 0, QTFILE}, 0, 0755}, |
| {"detach", {Qremovedev, 0, QTFILE}, 0, 0755}, |
| {"info", {Qinfo, 0, QTFILE}, 0, 0755}, |
| }; |
| |
| /* OK, we never actually use these, since we won't support any IOMMU that |
| * requires RWBF (Required Write Buffer Flushing). |
| * |
| * On older hardware, if we updated data structures from software, the IOMMU |
| * wouldn't necessarily see it. The software write would get held up at various |
| * write buffers. See 6.8. |
| * |
| * Certain operations, such as ctx cache and iotlb flushes, were OK. The HW |
| * would implicitly do a write buffer flush. Other operations, like changing an |
| * IPT PTE, which do not necessarily require a command flush, would need the |
| * WBF. |
| * |
| * This is different than caching mode (CM). In CM, hardware (or more often a |
| * virtual IOMMU) caches negative PTEs, and you need to poke the IOMMU whenever |
| * changing any PTE. This RWBF isn't about caching old values; it's about not |
| * seeing new values due to buffering. |
| * |
| * Just about any time you want to do a CM operation, you'd also want to check |
| * for RWBF. Though note that we do not use the IOMMU if it requires either CM |
| * or RWBF. */ |
| static inline void write_buffer_flush(struct iommu *iommu) |
| { |
| uint32_t cmd, status; |
| |
| if (!iommu->rwbf) |
| return; |
| |
| cmd = read32(iommu->regio + DMAR_GCMD_REG) | DMA_GCMD_WBF; |
| write32(cmd, iommu->regio + DMAR_GCMD_REG); |
| |
| do { |
| status = read32(iommu->regio + DMAR_GSTS_REG); |
| } while (status & DMA_GSTS_WBFS); |
| } |
| |
| /* OK, read and write draining on flush. At first I thought this was about |
| * ops that queued up, but hadn't gone through the IOMMU yet. Instead, this is |
| * about ops that made it through the IOMMU, but have not made it to main |
| * memory. i.e., the IOMMU translated to a physical address, but the write to |
| * that paddr hasn't made it to RAM. The reason we ask for a TLB flush is |
| * typically to make sure the PTE / translation is no longer in use. Undrained |
| * operations that made it past the IOMMU are still using the old translation. |
| * Thus we should always read/write drain. */ |
| static void __iotlb_flush_global(struct iommu *iommu) |
| { |
| write64(DMA_TLB_IVT | DMA_TLB_READ_DRAIN | DMA_TLB_WRITE_DRAIN | |
| DMA_TLB_GLOBAL_FLUSH, |
| iommu->regio + iommu->iotlb_cmd_offset); |
| |
| while (read64(iommu->regio + iommu->iotlb_cmd_offset) & DMA_TLB_IVT) |
| cpu_relax(); |
| } |
| |
| static void iotlb_flush(struct iommu *iommu, uint16_t did) |
| { |
| write64(DMA_TLB_IVT | DMA_TLB_READ_DRAIN | DMA_TLB_WRITE_DRAIN | |
| DMA_TLB_DSI_FLUSH | DMA_TLB_DID(did), |
| iommu->regio + iommu->iotlb_cmd_offset); |
| |
| while (read64(iommu->regio + iommu->iotlb_cmd_offset) & DMA_TLB_IVT) |
| cpu_relax(); |
| } |
| |
| static inline struct root_entry *get_root_entry(physaddr_t paddr) |
| { |
| return (struct root_entry *) KADDR(paddr); |
| } |
| |
| static inline struct context_entry *get_context_entry(physaddr_t paddr) |
| { |
| return (struct context_entry *) KADDR(paddr); |
| } |
| |
| static void __cte_set_identity_pgtbl(struct context_entry *cte) |
| { |
| cte->hi = 0 |
| | (IOMMU_DID_DEFAULT << CTX_HI_DID_SHIFT) // DID bit: 72 to 87 |
| | (CTX_AW_L4 << CTX_HI_AW_SHIFT); // AW |
| |
| cte->lo = 0 /* assumes page alignment */ |
| | (0x2 << CTX_LO_TRANS_SHIFT) |
| | (0x1 << CTX_LO_FPD_SHIFT) // disable faults |
| | (0x1 << CTX_LO_PRESENT_SHIFT); /* mark present */ |
| } |
| |
| static void __cte_set_proc_pgtbl(struct context_entry *cte, struct proc *p) |
| { |
| /* TODO: need to limit PID to 16 bits or come up with an alternative */ |
| warn_on(p->pid & ~0xffff); |
| |
| cte->hi = 0 |
| | ((uint16_t)p->pid << CTX_HI_DID_SHIFT) // DID bit: 72 to 87 |
| | (CTX_AW_L4 << CTX_HI_AW_SHIFT); // AW |
| |
| /* The only difference here is PGDIR and the LO_TRANS_SHIFT */ |
| cte->lo = PTE_ADDR(p->env_pgdir.eptp) |
| | (0x0 << CTX_LO_TRANS_SHIFT) |
| | (0x1 << CTX_LO_FPD_SHIFT) // disable faults |
| | (0x1 << CTX_LO_PRESENT_SHIFT); /* mark present */ |
| } |
| |
| static physaddr_t ct_init(void) |
| { |
| struct context_entry *cte; |
| physaddr_t ct; |
| |
| cte = (struct context_entry *) kpage_zalloc_addr(); |
| ct = PADDR(cte); |
| |
| for (int i = 0; i < 32 * 8; i++, cte++) // device * func |
| __cte_set_identity_pgtbl(cte); |
| |
| return ct; |
| } |
| |
| /* Get a new root_entry table. Allocates all context entries. */ |
| static physaddr_t rt_init(void) |
| { |
| struct root_entry *rte; |
| physaddr_t rt; |
| physaddr_t ct; |
| |
| /* Page Align = 0x1000 */ |
| rte = (struct root_entry *) kpage_zalloc_addr(); |
| rt = PADDR(rte); |
| |
| /* create context table */ |
| for (int i = 0; i < 256; i++, rte++) { |
| ct = ct_init(); |
| rte->hi = 0; |
| rte->lo = 0 |
| | ct |
| | (0x1 << RT_LO_PRESENT_SHIFT); |
| } |
| |
| return rt; |
| } |
| |
| static struct context_entry *get_ctx_for(struct iommu *iommu, |
| struct pci_device *pdev) |
| { |
| struct root_entry *rte; |
| physaddr_t cte_phy; |
| struct context_entry *cte; |
| uint32_t offset = 0; |
| |
| rte = get_root_entry(iommu->roottable) + pdev->bus; |
| |
| cte_phy = rte->lo & 0xFFFFFFFFFFFFF000; |
| cte = get_context_entry(cte_phy); |
| |
| offset = (pdev->dev * 8) + pdev->func; |
| cte += offset; |
| |
| return cte; |
| } |
| |
| static void __iommu_clear_pgtbl(struct pci_device *pdev, uint16_t did) |
| { |
| struct iommu *iommu = pdev->iommu; |
| struct context_entry *cte = get_ctx_for(iommu, pdev); |
| |
| cte->lo &= ~0x1; |
| |
| spin_lock_irqsave(&iommu->iommu_lock); |
| iotlb_flush(iommu, did); |
| spin_unlock_irqsave(&iommu->iommu_lock); |
| } |
| |
| /* Hold the proc's dev_qlock. This returns the linkage for p and i, and inserts |
| * if it it didn't exist. */ |
| static struct iommu_proc_link *__get_linkage(struct proc *p, struct iommu *i) |
| { |
| struct iommu_proc_link *l; |
| |
| list_for_each_entry(l, &p->iommus, link) { |
| if (l->i == i) |
| return l; |
| } |
| l = kmalloc(sizeof(struct iommu_proc_link), MEM_WAIT); |
| l->i = i; |
| l->p = p; |
| l->nr_devices = 0; |
| list_add_rcu(&l->link, &p->iommus); |
| return l; |
| } |
| |
| /* Caller holds the pdev->qlock and if proc, the proc->dev_qlock. |
| * Careful, this can throw. */ |
| void __iommu_device_assign(struct pci_device *pdev, struct proc *proc) |
| { |
| struct iommu *iommu = pdev->iommu; |
| struct iommu_proc_link *l; |
| |
| if (!proc) { |
| __cte_set_identity_pgtbl(get_ctx_for(pdev->iommu, pdev)); |
| return; |
| } |
| |
| /* Lockless peek. We hold the dev_qlock, so if we are concurrently |
| * dying, proc_destroy() will come behind us and undo this. If |
| * proc_destroy() already removed all devices, we would see DYING. */ |
| if (proc_is_dying(proc)) |
| error(EINVAL, "process is dying"); |
| l = __get_linkage(proc, iommu); |
| |
| l->nr_devices++; |
| TAILQ_INSERT_TAIL(&proc->pci_devs, pdev, proc_link); |
| |
| __cte_set_proc_pgtbl(get_ctx_for(pdev->iommu, pdev), proc); |
| } |
| |
| /* Caller holds the pdev->qlock and if proc, the proc->dev_qlock. */ |
| void __iommu_device_unassign(struct pci_device *pdev, struct proc *proc) |
| { |
| struct iommu *iommu = pdev->iommu; |
| struct iommu_proc_link *l; |
| |
| assert(iommu == pdev->iommu); |
| |
| if (!proc) { |
| __iommu_clear_pgtbl(pdev, IOMMU_DID_DEFAULT); |
| return; |
| } |
| |
| l = __get_linkage(proc, iommu); |
| |
| __iommu_clear_pgtbl(pdev, proc->pid); |
| |
| l->nr_devices--; |
| if (!l->nr_devices) { |
| list_del_rcu(&l->link); |
| kfree_rcu(l, rcu); |
| } |
| |
| TAILQ_REMOVE(&proc->pci_devs, pdev, proc_link); |
| } |
| |
| void iommu_unassign_all_devices(struct proc *p) |
| { |
| struct pci_device *pdev, *tp; |
| |
| qlock(&p->dev_qlock); |
| /* If you want to get clever and try to batch up the iotlb flushes, it's |
| * probably not worth it. The big concern is that the moment you unlock |
| * the pdev, it can be reassigned. If you didn't flush the iotlb yet, |
| * it might have old entries. Note that when we flush, we pass the DID |
| * (p->pid), which the next user of the pdev won't know. I don't know |
| * if you need to flush the old DID entry or not before reusing a CTE, |
| * though probably. */ |
| TAILQ_FOREACH_SAFE(pdev, &p->pci_devs, proc_link, tp) { |
| qlock(&pdev->qlock); |
| pci_device_unassign_known(pdev, p); |
| qunlock(&pdev->qlock); |
| } |
| qunlock(&p->dev_qlock); |
| } |
| |
| void proc_iotlb_flush(struct proc *p) |
| { |
| struct iommu_proc_link *l; |
| |
| rcu_read_lock(); |
| list_for_each_entry_rcu(l, &p->iommus, link) { |
| spin_lock_irqsave(&l->i->iommu_lock); |
| iotlb_flush(l->i, p->pid); |
| spin_unlock_irqsave(&l->i->iommu_lock); |
| } |
| rcu_read_unlock(); |
| } |
| |
| static void __set_root_table(struct iommu *iommu, physaddr_t roottable) |
| { |
| write64(roottable, iommu->regio + DMAR_RTADDR_REG); |
| write32(DMA_GCMD_SRTP, iommu->regio + DMAR_GCMD_REG); |
| /* Unlike the write-buffer-flush status and ICC completion check, |
| * hardware *sets* the bit to 1 when it is done */ |
| while (!(read32(iommu->regio + DMAR_GSTS_REG) & DMA_GSTS_RTPS)) |
| cpu_relax(); |
| } |
| |
| static void __inval_ctx_cache_global(struct iommu *iommu) |
| { |
| write64(DMA_CCMD_ICC | DMA_CCMD_GLOBAL_INVL, |
| iommu->regio + DMAR_CCMD_REG); |
| while (read64(iommu->regio + DMAR_CCMD_REG) & DMA_CCMD_ICC) |
| cpu_relax(); |
| } |
| |
| static void __enable_translation(struct iommu *iommu) |
| { |
| /* see 10.4.4 for some concerns if we want to update multiple fields. |
| * (read status, mask the one-shot commands we don't want on, then set |
| * the ones we do want). */ |
| write32(DMA_GCMD_TE, iommu->regio + DMAR_GCMD_REG); |
| while (!(read32(iommu->regio + DMAR_GSTS_REG) & DMA_GSTS_TES)) |
| cpu_relax(); |
| } |
| |
| /* Given an iommu with a root table, enable translation. The default root table |
| * (from rt_init()) is set up to not translate. i.e. IOVA == PA. */ |
| static void iommu_enable_translation(struct iommu *iommu) |
| { |
| spin_lock_irqsave(&iommu->iommu_lock); |
| __set_root_table(iommu, iommu->roottable); |
| __inval_ctx_cache_global(iommu); |
| __iotlb_flush_global(iommu); |
| __enable_translation(iommu); |
| spin_unlock_irqsave(&iommu->iommu_lock); |
| } |
| |
| /* Iterate over all IOMMUs and make sure the "rba" present in DRHD are unique */ |
| static bool iommu_asset_unique_regio(void) |
| { |
| struct iommu *outer, *inner; |
| uint64_t rba; |
| bool result = true; |
| |
| TAILQ_FOREACH(outer, &iommu_list, iommu_link) { |
| rba = outer->rba; |
| |
| TAILQ_FOREACH(inner, &iommu_list, iommu_link) { |
| if (outer != inner && rba == inner->rba) { |
| outer->supported = false; |
| result = false; |
| } |
| } |
| } |
| |
| return result; |
| } |
| |
| static bool iommu_has_required_capabilities(struct iommu *iommu) |
| { |
| uint64_t cap, ecap; |
| bool support, result = true; |
| |
| cap = read64(iommu->regio + DMAR_CAP_REG); |
| ecap = read64(iommu->regio + DMAR_ECAP_REG); |
| |
| support = (cap_sagaw(cap) & 0x4) >> 2; |
| if (!support) { |
| printk(IOMMU "%p: unsupported paging level: 0x%x\n", |
| iommu, cap_sagaw(cap)); |
| result = false; |
| } |
| |
| support = cap_super_page_val(cap) & 0x1; |
| if (!support) { |
| printk(IOMMU "%p: 1GB super pages not supported\n", iommu); |
| result = false; |
| } |
| |
| if (cap_rwbf(cap)) { |
| printk(IOMMU "%p: HW requires RWBF, will abort\n", iommu); |
| result = false; |
| } |
| |
| if (cap_caching_mode(cap)) { |
| printk(IOMMU "%p: HW requires caching_mode, will abort\n", |
| iommu); |
| result = false; |
| } |
| |
| support = ecap_pass_through(ecap); |
| if (!support) { |
| printk(IOMMU "%p: pass-through translation type in context entries not supported\n", iommu); |
| result = false; |
| } |
| |
| /* max gaw/haw reported by iommu. It's fine if these differ. Spec says |
| * MGAW must be at least the HAW. It's OK to be more. */ |
| iommu->haw_cap = cap_mgaw(cap); |
| if (iommu->haw_cap < iommu->haw_dmar) { |
| printk(IOMMU "%p: HAW mismatch; DMAR reports %d, CAP reports %d, check CPUID\n", |
| iommu, iommu->haw_dmar, iommu->haw_cap); |
| } |
| |
| return result; |
| } |
| |
| /* All or nothing */ |
| static bool have_iommu_support(void) |
| { |
| struct iommu *iommu; |
| |
| if (TAILQ_EMPTY(&iommu_list)) |
| return false; |
| |
| TAILQ_FOREACH(iommu, &iommu_list, iommu_link) { |
| if (!iommu->supported) |
| return false; |
| } |
| return true; |
| } |
| |
| /* Run this function after all individual IOMMUs are initialized. */ |
| void iommu_enable_all(void) |
| { |
| struct iommu *iommu; |
| static bool once = false; |
| |
| if (once) |
| warn(IOMMU "Called twice, aborting!"); |
| once = true; |
| |
| if (!iommu_asset_unique_regio()) |
| warn(IOMMU "same register base addresses detected"); |
| |
| iommu_is_supported = have_iommu_support(); |
| if (!iommu_is_supported) { |
| printk("No supported IOMMUs detected\n"); |
| return; |
| } |
| |
| TAILQ_FOREACH(iommu, &iommu_list, iommu_link) { |
| printk("IOMMU: enabling translation on %p\n", iommu); |
| iommu_enable_translation(iommu); |
| } |
| } |
| |
| static bool _iommu_is_enabled(struct iommu *iommu) |
| { |
| uint32_t status = 0; |
| |
| /* Arguably we don't need the lock when reading. */ |
| spin_lock_irqsave(&iommu->iommu_lock); |
| status = read32(iommu->regio + DMAR_GSTS_REG); |
| spin_unlock_irqsave(&iommu->iommu_lock); |
| |
| return status & DMA_GSTS_TES; |
| } |
| |
| static bool iommu_some_is_enabled(void) |
| { |
| struct iommu *iommu; |
| |
| TAILQ_FOREACH(iommu, &iommu_list, iommu_link) |
| if (_iommu_is_enabled(iommu)) |
| return true; |
| |
| return false; |
| } |
| |
| /* grabs the iommu of the first DRHD with INCLUDE_PCI_ALL */ |
| struct iommu *get_default_iommu(void) |
| { |
| struct Dmar *dt; |
| |
| /* dmar is a global variable; see acpi.h */ |
| if (dmar == NULL) |
| return NULL; |
| |
| dt = dmar->tbl; |
| for (int i = 0; i < dmar->nchildren; i++) { |
| struct Atable *at = dmar->children[i]; |
| struct Drhd *drhd = at->tbl; |
| |
| if (drhd->all & 1) |
| return &drhd->iommu; |
| } |
| |
| return NULL; |
| } |
| |
| void iommu_map_pci_devices(void) |
| { |
| struct pci_device *pci_iter; |
| struct iommu *iommu = get_default_iommu(); |
| |
| if (!iommu) |
| return; |
| |
| /* set the default iommu */ |
| STAILQ_FOREACH(pci_iter, &pci_devices, all_dev) { |
| pci_iter->iommu = iommu; |
| TAILQ_INSERT_TAIL(&iommu->pci_devs, pci_iter, iommu_link); |
| } |
| } |
| |
| /* This is called from acpi.c to initialize an iommu. */ |
| void iommu_acpi_init(struct iommu *iommu, uint8_t haw, uint64_t rba) |
| { |
| uint64_t cap, ecap; |
| |
| TAILQ_INIT(&iommu->pci_devs); |
| spinlock_init_irqsave(&iommu->iommu_lock); |
| iommu->rba = rba; |
| iommu->regio = (void __iomem *) vmap_pmem_nocache(rba, VTD_PAGE_SIZE); |
| if (!iommu->regio) |
| warn("Unable to map the iommu, aborting!"); |
| iommu->haw_dmar = haw; |
| |
| iommu->supported = iommu_has_required_capabilities(iommu); |
| |
| cap = read64(iommu->regio + DMAR_CAP_REG); |
| ecap = read64(iommu->regio + DMAR_ECAP_REG); |
| |
| /* Creates a root table for non-translating identity maps, but it is not |
| * enabled / turned on yet. */ |
| iommu->roottable = rt_init(); |
| iommu->iotlb_cmd_offset = ecap_iotlb_offset(ecap) + 8; |
| iommu->iotlb_addr_offset = ecap_iotlb_offset(ecap); |
| |
| iommu->rwbf = cap_rwbf(cap); |
| iommu->device_iotlb = ecap_dev_iotlb_support(ecap); |
| |
| /* add the iommu to the list of all discovered iommu */ |
| TAILQ_INSERT_TAIL(&iommu_list, iommu, iommu_link); |
| } |
| |
| static void assign_device(int bus, int dev, int func, pid_t pid) |
| { |
| ERRSTACK(1); |
| int tbdf = MKBUS(BusPCI, bus, dev, func); |
| struct pci_device *pdev = pci_match_tbdf(tbdf); |
| struct proc *p; |
| |
| if (!pdev) |
| error(EIO, "cannot find dev %x:%x.%x\n", bus, dev, func); |
| if (!pid) { |
| pci_device_assign(pdev, NULL); |
| return; |
| } |
| if (pid == 1) |
| error(EIO, "device passthru not supported for pid = 1"); |
| p = pid2proc(pid); |
| if (!p) |
| error(EIO, "cannot find pid %d\n", pid); |
| if (waserror()) { |
| proc_decref(p); |
| nexterror(); |
| } |
| pci_device_assign(pdev, p); |
| proc_decref(p); |
| poperror(); |
| } |
| |
| static void unassign_device(int bus, int dev, int func, pid_t pid) |
| { |
| ERRSTACK(1); |
| int tbdf = MKBUS(BusPCI, bus, dev, func); |
| struct pci_device *pdev = pci_match_tbdf(tbdf); |
| struct proc *p; |
| |
| if (!pdev) |
| error(EIO, "cannot find dev %x:%x.%x\n", bus, dev, func); |
| if (!pid) { |
| pci_device_unassign(pdev, NULL); |
| return; |
| } |
| p = pid2proc(pid); |
| if (!p) |
| error(EIO, "cannot find pid %d\n", pid); |
| if (waserror()) { |
| proc_decref(p); |
| nexterror(); |
| } |
| pci_device_unassign(pdev, p); |
| proc_decref(p); |
| poperror(); |
| } |
| |
| static struct sized_alloc *open_mappings(void) |
| { |
| struct iommu *iommu; |
| bool has_dev = false; |
| struct pci_device *pdev; |
| struct sized_alloc *sza = sized_kzmalloc(BUFFERSZ, MEM_WAIT); |
| |
| TAILQ_FOREACH(iommu, &iommu_list, iommu_link) { |
| sza_printf(sza, "Mappings for iommu@%p\n", iommu); |
| spin_lock_irqsave(&iommu->iommu_lock); |
| TAILQ_FOREACH(pdev, &iommu->pci_devs, iommu_link) { |
| if (!pdev->proc_owner) |
| continue; |
| has_dev = true; |
| sza_printf(sza, "\tdevice %02x:%02x.%x, PID %u\n", |
| pdev->bus, pdev->dev, pdev->func, |
| pdev->proc_owner->pid); |
| } |
| spin_unlock_irqsave(&iommu->iommu_lock); |
| if (!has_dev) |
| sza_printf(sza, "\t<empty>\n"); |
| } |
| |
| return sza; |
| } |
| |
| static void _open_info(struct iommu *iommu, struct sized_alloc *sza) |
| { |
| uint64_t value; |
| |
| sza_printf(sza, "\niommu@%p\n", iommu); |
| sza_printf(sza, "\trba = %p\n", iommu->rba); |
| sza_printf(sza, "\tsupported = %s\n", iommu->supported ? "yes" : "no"); |
| sza_printf(sza, "\tregspace = %p\n", iommu->regio); |
| sza_printf(sza, "\thost addr width (dmar) = %d\n", iommu->haw_dmar); |
| sza_printf(sza, "\thost addr width (cap[mgaw]) = %d\n", |
| iommu->haw_cap); |
| value = read32(iommu->regio + DMAR_VER_REG); |
| sza_printf(sza, "\tversion = 0x%x\n", value); |
| |
| value = read64(iommu->regio + DMAR_CAP_REG); |
| sza_printf(sza, "\tcapabilities = %p\n", value); |
| sza_printf(sza, "\t\tmgaw: %d\n", cap_mgaw(value)); |
| sza_printf(sza, "\t\tsagaw (paging level): 0x%x\n", cap_sagaw(value)); |
| sza_printf(sza, "\t\tcaching mode: %s (%d)\n", cap_caching_mode(value) ? |
| "yes" : "no", cap_caching_mode(value)); |
| sza_printf(sza, "\t\tzlr: 0x%x\n", cap_zlr(value)); |
| sza_printf(sza, "\t\trwbf: %s\n", cap_rwbf(value) ? "required" |
| : "not required"); |
| sza_printf(sza, "\t\tnum domains: %d\n", cap_ndoms(value)); |
| sza_printf(sza, "\t\tsupports protected high-memory region: %s\n", |
| cap_phmr(value) ? "yes" : "no"); |
| sza_printf(sza, "\t\tsupports Protected low-memory region: %s\n", |
| cap_plmr(value) ? "yes" : "no"); |
| |
| value = read64(iommu->regio + DMAR_ECAP_REG); |
| sza_printf(sza, "\text. capabilities = %p\n", value); |
| sza_printf(sza, "\t\tpass through: %s\n", |
| ecap_pass_through(value) ? "yes" : "no"); |
| sza_printf(sza, "\t\tdevice iotlb: %s\n", |
| ecap_dev_iotlb_support(value) ? "yes" : "no"); |
| sza_printf(sza, "\t\tiotlb register offset: 0x%x\n", |
| ecap_iotlb_offset(value)); |
| sza_printf(sza, "\t\tsnoop control: %s\n", |
| ecap_sc_support(value) ? "yes" : "no"); |
| sza_printf(sza, "\t\tcoherency: %s\n", |
| ecap_coherent(value) ? "yes" : "no"); |
| sza_printf(sza, "\t\tqueue invalidation support: %s\n", |
| ecap_qis(value) ? "yes" : "no"); |
| sza_printf(sza, "\t\tinterrupt remapping support: %s\n", |
| ecap_ir_support(value) ? "yes" : "no"); |
| sza_printf(sza, "\t\textended interrupt mode: 0x%x\n", |
| ecap_eim_support(value)); |
| |
| value = read32(iommu->regio + DMAR_GSTS_REG); |
| sza_printf(sza, "\tglobal status = 0x%x\n", value); |
| sza_printf(sza, "\t\ttranslation: %s\n", |
| value & DMA_GSTS_TES ? "enabled" : "disabled"); |
| sza_printf(sza, "\t\troot table: %s\n", |
| value & DMA_GSTS_RTPS ? "set" : "not set"); |
| |
| value = read64(iommu->regio + DMAR_RTADDR_REG); |
| sza_printf(sza, "\troot entry table = %p (phy) or %p (vir)\n", |
| value, KADDR(value)); |
| } |
| |
| static struct sized_alloc *open_info(void) |
| { |
| struct sized_alloc *sza = sized_kzmalloc(BUFFERSZ, MEM_WAIT); |
| uint64_t value; |
| struct iommu *iommu; |
| |
| sza_printf(sza, "driver info:\n"); |
| |
| value = IOMMU_DID_DEFAULT; |
| sza_printf(sza, "\tdefault did = %d\n", value); |
| sza_printf(sza, "\tstatus = %s\n", |
| iommu_some_is_enabled() ? "enabled" : "disabled"); |
| |
| TAILQ_FOREACH(iommu, &iommu_list, iommu_link) { |
| _open_info(iommu, sza); |
| } |
| |
| return sza; |
| } |
| |
| static char *devname(void) |
| { |
| return iommudevtab.name; |
| } |
| |
| static struct chan *iommuattach(char *spec) |
| { |
| return devattach(devname(), spec); |
| } |
| |
| static struct walkqid *iommuwalk(struct chan *c, struct chan *nc, char **name, |
| unsigned int nname) |
| { |
| return devwalk(c, nc, name, nname, iommudir, |
| ARRAY_SIZE(iommudir), devgen); |
| } |
| |
| static size_t iommustat(struct chan *c, uint8_t *dp, size_t n) |
| { |
| return devstat(c, dp, n, iommudir, ARRAY_SIZE(iommudir), devgen); |
| } |
| |
| static struct chan *iommuopen(struct chan *c, int omode) |
| { |
| switch (c->qid.path) { |
| case Qmappings: |
| c->synth_buf = open_mappings(); |
| break; |
| case Qinfo: |
| c->synth_buf = open_info(); |
| break; |
| case Qadddev: |
| case Qremovedev: |
| case Qdir: |
| default: |
| break; |
| } |
| |
| return devopen(c, omode, iommudir, ARRAY_SIZE(iommudir), devgen); |
| } |
| |
| /* |
| * All files are synthetic. Hence we do not need to implement any close |
| * function. |
| */ |
| static void iommuclose(struct chan *c) |
| { |
| switch (c->qid.path) { |
| case Qmappings: |
| case Qinfo: |
| kfree(c->synth_buf); |
| c->synth_buf = NULL; |
| break; |
| case Qadddev: |
| case Qremovedev: |
| case Qdir: |
| default: |
| break; |
| } |
| } |
| |
| static size_t iommuread(struct chan *c, void *va, size_t n, off64_t offset) |
| { |
| struct sized_alloc *sza = c->synth_buf; |
| |
| switch (c->qid.path) { |
| case Qdir: |
| return devdirread(c, va, n, iommudir, |
| ARRAY_SIZE(iommudir), devgen); |
| case Qadddev: |
| return readstr(offset, va, n, |
| "write format: xx:yy.z pid\n" |
| " xx = bus (in hex)\n" |
| " yy = device (in hex)\n" |
| " z = function (in hex)\n" |
| " pid = process pid\n" |
| "\nexample:\n" |
| "$ echo 00:1f.2 13 >\\#iommu/attach\n"); |
| case Qremovedev: |
| return readstr(offset, va, n, |
| "write format: xx:yy.z\n" |
| " xx = bus (in hex)\n" |
| " yy = device (in hex)\n" |
| " z = function (in hex)\n" |
| "\nexample:\n" |
| "$ echo 00:1f.2 >\\#iommu/detach\n"); |
| case Qmappings: |
| case Qinfo: |
| return readstr(offset, va, n, sza->buf); |
| default: |
| error(EIO, "read: qid %d is impossible", c->qid.path); |
| } |
| |
| return -1; /* not reached */ |
| } |
| |
| static void get_bdf_pid(struct cmdbuf *cb, int *bus, int *dev, int *func, |
| pid_t *pid) |
| { |
| int err; |
| |
| if (cb->nf < 2) |
| error(EFAIL, "bb:dd.f pid"); |
| |
| err = sscanf(cb->f[0], "%x:%x.%x", bus, dev, func); |
| if (err != 3) |
| error(EIO, |
| IOMMU "error parsing bdf %s; nr parsed: %d", cb->f[0], err); |
| |
| *pid = strtoul(cb->f[1], 0, 0); |
| } |
| |
| static void write_add_dev(struct chan *c, struct cmdbuf *cb) |
| { |
| int bus, dev, func; |
| pid_t pid; |
| |
| get_bdf_pid(cb, &bus, &dev, &func, &pid); |
| |
| if (pid == 1) |
| error(EIO, IOMMU "device passthru not supported for pid = 1"); |
| |
| assign_device(bus, dev, func, pid); |
| } |
| |
| static void write_remove_dev(struct chan *c, struct cmdbuf *cb) |
| { |
| int bus, dev, func; |
| pid_t pid; |
| |
| get_bdf_pid(cb, &bus, &dev, &func, &pid); |
| |
| unassign_device(bus, dev, func, pid); |
| } |
| |
| static size_t iommuwrite(struct chan *c, void *va, size_t n, off64_t offset) |
| { |
| ERRSTACK(1); |
| struct cmdbuf *cb = parsecmd(va, n); |
| |
| if (waserror()) { |
| kfree(cb); |
| nexterror(); |
| } |
| switch (c->qid.path) { |
| case Qadddev: |
| if (!iommu_is_supported) |
| error(EROFS, IOMMU "not supported"); |
| write_add_dev(c, cb); |
| break; |
| case Qremovedev: |
| if (!iommu_is_supported) |
| error(EROFS, IOMMU "not supported"); |
| write_remove_dev(c, cb); |
| break; |
| case Qmappings: |
| case Qinfo: |
| case Qdir: |
| error(EROFS, IOMMU "cannot modify"); |
| default: |
| error(EIO, "write: qid %d is impossible", c->qid.path); |
| } |
| kfree(cb); |
| poperror(); |
| return n; |
| } |
| |
| struct dev iommudevtab __devtab = { |
| .name = "iommu", |
| .reset = devreset, |
| .init = devinit, |
| .shutdown = devshutdown, |
| .attach = iommuattach, |
| .walk = iommuwalk, |
| .stat = iommustat, |
| .open = iommuopen, |
| .create = devcreate, |
| .close = iommuclose, |
| .read = iommuread, |
| .bread = devbread, |
| .write = iommuwrite, |
| .bwrite = devbwrite, |
| .remove = devremove, |
| .wstat = devwstat, |
| }; |