| /* | 
 |  * File: iommu.c - Driver for accessing Intel iommu | 
 |  * Author: Aditya Basu <mitthu@google.com> | 
 |  * | 
 |  * (1) proc->proc_lock => (2) iommu->iommu_lock | 
 |  * | 
 |  * TODO | 
 |  * ==== | 
 |  *  - iommu_process_cleanup() is untested. | 
 |  *  - In iommu_map_pci_devices() assign the correct iommu for scoped DRHD. Right | 
 |  *    now the default iommu is assigned to all devices. | 
 |  *  - In assign_device() make sure the process in not in DYING or DYING_ABORT | 
 |  *    state. | 
 |  *  - Assigning processes across multiple IOMMUs / DRHDs will result in | 
 |  *    corruption of iommu->procs. This is because the tailq relies on | 
 |  *    proc->iommu_link. | 
 |  *  - IOMMU_DID_DEFAULT = 1; this means pid = 1 cannot have a device passthru | 
 |  *    because we use the pid as "did" or domain ID. | 
 |  */ | 
 |  | 
 | #include <stdio.h> | 
 | #include <error.h> | 
 | #include <common.h> | 
 | #include <net/ip.h> | 
 | #include <atomic.h> | 
 |  | 
 | #include <acpi.h> | 
 | #include <arch/intel-iommu.h> | 
 | #include <env.h> | 
 | #include <arch/pci.h> | 
 | #include <linux_compat.h> | 
 |  | 
 | #define IOMMU "iommu: " | 
 | #define BUFFERSZ 8192 | 
 |  | 
 | struct dev iommudevtab; | 
 | struct iommu_list_tq iommu_list = TAILQ_HEAD_INITIALIZER(iommu_list); | 
 | static bool is_initialized; /* to detect absence of IOMMU */ | 
 |  | 
 | /* QID Path */ | 
 | enum { | 
 | 	Qdir         = 0, | 
 | 	Qmappings    = 1, | 
 | 	Qadddev      = 2, | 
 | 	Qremovedev   = 3, | 
 | 	Qinfo        = 4, | 
 | 	Qpower       = 5, | 
 | }; | 
 |  | 
 | static struct dirtab iommudir[] = { | 
 | 	{".",                   {Qdir, 0, QTDIR}, 0, 0555}, | 
 | 	{"mappings",            {Qmappings, 0, QTFILE}, 0, 0755}, | 
 | 	{"attach",              {Qadddev, 0, QTFILE}, 0, 0755}, | 
 | 	{"detach",              {Qremovedev, 0, QTFILE}, 0, 0755}, | 
 | 	{"info",                {Qinfo, 0, QTFILE}, 0, 0755}, | 
 | 	{"power",               {Qpower, 0, QTFILE}, 0, 0755}, | 
 | }; | 
 |  | 
 | /* this is might be necessary when updating mapping structures: context-cache, | 
 |  * IOTLB or IEC. */ | 
 | static inline void write_buffer_flush(struct iommu *iommu) | 
 | { | 
 | 	uint32_t cmd, status; | 
 |  | 
 | 	if (!iommu->rwbf) | 
 | 		return; | 
 |  | 
 | 	cmd = read32(iommu->regio + DMAR_GCMD_REG) | DMA_GCMD_WBF; | 
 | 	write32(cmd, iommu->regio + DMAR_GCMD_REG); | 
 |  | 
 | 	/* read status */ | 
 | 	do { | 
 | 		status = read32(iommu->regio + DMAR_GSTS_REG); | 
 | 	} while (status & DMA_GSTS_WBFS); | 
 | } | 
 |  | 
 | /* this is necessary when caching mode is supported. | 
 |  * ASSUMES: No pending flush requests. This is a problem only if other function | 
 |  * is used to perform the flush. */ | 
 | static inline void iotlb_flush(struct iommu *iommu, uint16_t did) | 
 | { | 
 | 	uint64_t cmd, status; | 
 |  | 
 | 	cmd = 0x0 | 
 | 	| DMA_TLB_IVT        /* issue the flush command */ | 
 | 	| DMA_TLB_DSI_FLUSH  /* DID specific shootdown */ | 
 | 	| DMA_TLB_READ_DRAIN | 
 | 	| DMA_TLB_WRITE_DRAIN | 
 | 	| DMA_TLB_DID(did); | 
 | 	write64(cmd, iommu->regio + iommu->iotlb_cmd_offset); | 
 |  | 
 | 	/* read status */ | 
 | 	do { | 
 | 		status = read64(iommu->regio + iommu->iotlb_cmd_offset); | 
 | 		status >>= 63; /* bit 64 (IVT): gets cleared on completion */ | 
 | 	} while (status); | 
 | } | 
 |  | 
 | static inline struct root_entry *get_root_entry(physaddr_t paddr) | 
 | { | 
 | 	return (struct root_entry *) KADDR(paddr); | 
 | } | 
 |  | 
 | static inline struct context_entry *get_context_entry(physaddr_t paddr) | 
 | { | 
 | 	return (struct context_entry *) KADDR(paddr); | 
 | } | 
 |  | 
 | /* iommu is not modified by this function or its callees. */ | 
 | static physaddr_t ct_init(struct iommu *iommu, uint16_t did) | 
 | { | 
 | 	struct context_entry *cte; | 
 | 	physaddr_t ct; | 
 | 	uint8_t ctx_aw = CTX_AW_L4; | 
 |  | 
 | 	cte = (struct context_entry *) kpage_zalloc_addr(); | 
 | 	ct = PADDR(cte); | 
 |  | 
 | 	for (int i = 0; i < 32 * 8; i++, cte++) { // device * func | 
 | 		/* initializations such as the domain */ | 
 | 		cte->hi = 0 | 
 | 			| (did << CTX_HI_DID_SHIFT) // DID bit: 72 to 87 | 
 | 			| (ctx_aw << CTX_HI_AW_SHIFT); // AW | 
 | 		cte->lo = 0 | 
 | 			| (0x2 << CTX_LO_TRANS_SHIFT) // 0x2: pass through | 
 | 			| (0x1 << CTX_LO_FPD_SHIFT) // disable faults | 
 | 			| (0x1 << CTX_LO_PRESENT_SHIFT); // is present | 
 | 	} | 
 |  | 
 | 	return ct; | 
 | } | 
 |  | 
 | /* Get a new root_entry table. Allocates all context entries. | 
 |  * iommu is not modified by this function or its callees. */ | 
 | static physaddr_t rt_init(struct iommu *iommu, uint16_t did) | 
 | { | 
 | 	struct root_entry *rte; | 
 | 	physaddr_t rt; | 
 | 	physaddr_t ct; | 
 |  | 
 | 	/* Page Align = 0x1000 */ | 
 | 	rte = (struct root_entry *) kpage_zalloc_addr(); | 
 | 	rt = PADDR(rte); | 
 |  | 
 | 	/* create context table */ | 
 | 	for (int i = 0; i < 256; i++, rte++) { | 
 | 		ct = ct_init(iommu, did); | 
 | 		rte->hi = 0; | 
 | 		rte->lo = 0 | 
 | 			| ct | 
 | 			| (0x1 << RT_LO_PRESENT_SHIFT); | 
 | 	} | 
 |  | 
 | 	return rt; | 
 | } | 
 |  | 
 | static struct context_entry *get_ctx_for(int bus, int dev, int func, | 
 | 	physaddr_t roottable) | 
 | { | 
 | 	struct root_entry *rte; | 
 | 	physaddr_t cte_phy; | 
 | 	struct context_entry *cte; | 
 | 	uint32_t offset = 0; | 
 |  | 
 | 	rte = get_root_entry(roottable) + bus; | 
 |  | 
 | 	cte_phy = rte->lo & 0xFFFFFFFFFFFFF000; | 
 | 	cte = get_context_entry(cte_phy); | 
 |  | 
 | 	offset = (dev * 8) + func; | 
 | 	cte += offset; | 
 |  | 
 | 	return cte; | 
 | } | 
 |  | 
 | /* The process pid is used as the Domain ID (DID) */ | 
 | static void setup_page_tables(struct proc *p, struct pci_device *d) | 
 | { | 
 | 	uint32_t cmd, status; | 
 | 	uint16_t did = p->pid; /* casts down to 16-bit */ | 
 | 	struct iommu *iommu = d->iommu; | 
 | 	struct context_entry *cte = | 
 | 		get_ctx_for(d->bus, d->dev, d->func, iommu->roottable); | 
 |  | 
 | 	/* Mark the entry as not present */ | 
 | 	cte->lo &= ~0x1; | 
 | 	write_buffer_flush(iommu); | 
 | 	iotlb_flush(iommu, IOMMU_DID_DEFAULT); | 
 |  | 
 | 	cte->hi = 0 | 
 | 		| (did << CTX_HI_DID_SHIFT) // DID bit: 72 to 87 | 
 | 		| (CTX_AW_L4 << CTX_HI_AW_SHIFT); // AW | 
 |  | 
 | 	cte->lo = PTE_ADDR(p->env_pgdir.eptp) | 
 | 		| (0x0 << CTX_LO_TRANS_SHIFT) | 
 | 		| (0x1 << CTX_LO_FPD_SHIFT) // disable faults | 
 | 		| (0x1 << CTX_LO_PRESENT_SHIFT); /* mark present */ | 
 | } | 
 |  | 
 | /* TODO: We should mark the entry as not present to block any stray DMAs from | 
 |  * reaching the kernel. To force a re-attach the device to the kernel, we can | 
 |  * use pid 0. */ | 
 | static void teardown_page_tables(struct proc *p, struct pci_device *d) | 
 | { | 
 | 	uint16_t did = IOMMU_DID_DEFAULT; | 
 | 	struct iommu *iommu = d->iommu; | 
 | 	struct context_entry *cte = | 
 | 		get_ctx_for(d->bus, d->dev, d->func, iommu->roottable); | 
 |  | 
 | 	/* Mark the entry as not present */ | 
 | 	cte->lo &= ~0x1; | 
 | 	write_buffer_flush(iommu); | 
 | 	iotlb_flush(iommu, p->pid); | 
 |  | 
 | 	cte->hi = 0 | 
 | 		| (did << CTX_HI_DID_SHIFT) // DID bit: 72 to 87 | 
 | 		| (CTX_AW_L4 << CTX_HI_AW_SHIFT); // AW | 
 |  | 
 | 	cte->lo = 0 /* assumes page alignment */ | 
 | 		| (0x2 << CTX_LO_TRANS_SHIFT) | 
 | 		| (0x1 << CTX_LO_FPD_SHIFT) // disable faults | 
 | 		| (0x1 << CTX_LO_PRESENT_SHIFT); /* mark present */ | 
 | } | 
 |  | 
 | static bool _iommu_enable(struct iommu *iommu) | 
 | { | 
 | 	uint32_t cmd, status; | 
 |  | 
 | 	spin_lock_irqsave(&iommu->iommu_lock); | 
 |  | 
 | 	/* write the root table address */ | 
 | 	write64(iommu->roottable, iommu->regio + DMAR_RTADDR_REG); | 
 |  | 
 | 	// TODO: flush IOTLB if reported as necessary by cap register | 
 | 	// TODO: issue TE only once | 
 |  | 
 | 	/* set root table - needs to be done first */ | 
 | 	cmd = DMA_GCMD_SRTP; | 
 | 	write32(cmd, iommu->regio + DMAR_GCMD_REG); | 
 |  | 
 | 	/* enable translation */ | 
 | 	cmd = DMA_GCMD_TE; | 
 | 	write32(cmd, iommu->regio + DMAR_GCMD_REG); | 
 |  | 
 | 	/* read status */ | 
 | 	status = read32(iommu->regio + DMAR_GSTS_REG); | 
 |  | 
 | 	spin_unlock_irqsave(&iommu->iommu_lock); | 
 |  | 
 | 	return status & DMA_GSTS_TES; | 
 | } | 
 |  | 
 | void iommu_enable(void) | 
 | { | 
 | 	struct iommu *iommu; | 
 |  | 
 | 	/* races are possible; add a global lock? */ | 
 | 	if (iommu_status()) | 
 | 		return; | 
 |  | 
 | 	TAILQ_FOREACH(iommu, &iommu_list, iommu_link) | 
 | 		_iommu_enable(iommu); | 
 | } | 
 |  | 
 | static bool _iommu_disable(struct iommu *iommu) | 
 | { | 
 | 	uint32_t cmd, status; | 
 |  | 
 | 	spin_lock_irqsave(&iommu->iommu_lock); | 
 |  | 
 | 	/* write the root table address */ | 
 | 	write64(iommu->roottable, iommu->regio + DMAR_RTADDR_REG); | 
 |  | 
 | 	// TODO: flush IOTLB if reported as necessary by cap register | 
 | 	/* disable translation */ | 
 | 	cmd = 0; | 
 | 	write32(cmd, iommu->regio + DMAR_GCMD_REG); | 
 |  | 
 | 	/* read status */ | 
 | 	status = read32(iommu->regio + DMAR_GSTS_REG); | 
 |  | 
 | 	spin_unlock_irqsave(&iommu->iommu_lock); | 
 |  | 
 | 	return status & DMA_GSTS_TES; | 
 | } | 
 |  | 
 | void iommu_disable(void) | 
 | { | 
 | 	struct iommu *iommu; | 
 |  | 
 | 	/* races are possible; add a global lock? */ | 
 | 	if (!iommu_status()) | 
 | 		return; | 
 |  | 
 | 	TAILQ_FOREACH(iommu, &iommu_list, iommu_link) | 
 | 		_iommu_disable(iommu); | 
 | } | 
 |  | 
 | static bool _iommu_status(struct iommu *iommu) | 
 | { | 
 | 	uint32_t status = 0; | 
 |  | 
 | 	spin_lock_irqsave(&iommu->iommu_lock); | 
 |  | 
 | 	/* read status */ | 
 | 	status = read32(iommu->regio + DMAR_GSTS_REG); | 
 |  | 
 | 	spin_unlock_irqsave(&iommu->iommu_lock); | 
 |  | 
 | 	return status & DMA_GSTS_TES; | 
 | } | 
 |  | 
 | bool iommu_status(void) | 
 | { | 
 | 	struct iommu *iommu; | 
 |  | 
 | 	TAILQ_FOREACH(iommu, &iommu_list, iommu_link) | 
 | 		if (_iommu_status(iommu)) | 
 | 			return true; | 
 |  | 
 | 	return false; | 
 | } | 
 |  | 
 | /* Helpers for set/get/init PCI device (BDF) <=> Process map */ | 
 | static bool proc_already_in_iommu_list(struct iommu *iommu, struct proc *p) | 
 | { | 
 | 	struct proc *proc_iter; | 
 |  | 
 | 	TAILQ_FOREACH(proc_iter, &iommu->procs, iommu_link) | 
 | 		if (proc_iter == p) | 
 | 			return true; | 
 |  | 
 | 	return false; | 
 | } | 
 |  | 
 | /* this function retains a KREF to struct proc for each assigned PCI device */ | 
 | static bool assign_device(int bus, int dev, int func, pid_t pid) | 
 | { | 
 | 	int tbdf = MKBUS(BusPCI, bus, dev, func); | 
 | 	struct pci_device *d = pci_match_tbdf(tbdf); | 
 | 	struct proc *p = pid2proc(pid); | 
 |  | 
 | 	if (!p) | 
 | 		error(EIO, "cannot find pid %d\n", pid); | 
 |  | 
 | 	if (pid == 1) { | 
 | 		proc_decref(p); | 
 | 		error(EIO, "device passthru not supported for pid = 1"); | 
 | 	} | 
 |  | 
 | 	if (!d) { | 
 | 		proc_decref(p); | 
 | 		error(EIO, "cannot find dev %x:%x.%x\n", bus, dev, func); | 
 | 	} | 
 |  | 
 | 	/* grab locks */ | 
 | 	spin_lock_irqsave(&p->proc_lock); | 
 | 	spin_lock_irqsave(&d->iommu->iommu_lock); | 
 |  | 
 | 	if (d->proc_owner) { | 
 | 		spin_unlock_irqsave(&d->iommu->iommu_lock); | 
 | 		spin_unlock_irqsave(&p->proc_lock); | 
 | 		proc_decref(p); | 
 | 		error(EIO, "dev already assigned to pid = %d\n", p->pid); | 
 | 	} | 
 |  | 
 | 	d->proc_owner = p; /* protected by iommu_lock */ | 
 | 	d->iommu->num_assigned_devs += 1; /* protected by iommu_lock */ | 
 |  | 
 | 	/* add device to list in struct proc */ | 
 | 	TAILQ_INSERT_TAIL(&p->pci_devices, d, proc_link); | 
 |  | 
 | 	/* add proc to list in struct iommu */ | 
 | 	if (!proc_already_in_iommu_list(d->iommu, p)) | 
 | 		TAILQ_INSERT_TAIL(&d->iommu->procs, p, iommu_link); | 
 |  | 
 | 	/* setup the actual page tables */ | 
 | 	setup_page_tables(p, d); | 
 |  | 
 | 	/* release locks */ | 
 | 	spin_unlock_irqsave(&d->iommu->iommu_lock); | 
 | 	spin_unlock_irqsave(&p->proc_lock); | 
 |  | 
 | 	return true; | 
 | } | 
 |  | 
 | static bool unassign_device(int bus, int dev, int func) | 
 | { | 
 | 	int tbdf = MKBUS(BusPCI, bus, dev, func); | 
 | 	struct pci_device *d = pci_match_tbdf(tbdf); | 
 | 	struct proc *p; | 
 |  | 
 | 	if (!d) | 
 | 		error(EIO, "cannot find dev %x:%x.%x", bus, dev, func); | 
 |  | 
 | 	/* TODO: this will break if there are multiple threads calling unassign. | 
 | 	 * Might require rethinking the lock ordering and synchronization */ | 
 | 	p = d->proc_owner; | 
 | 	if (!p) | 
 | 		error(EIO, "%x:%x.%x is not assigned to any process", | 
 | 			bus, dev, func); | 
 |  | 
 | 	/* grab locks */ | 
 | 	spin_lock_irqsave(&p->proc_lock); | 
 | 	spin_lock_irqsave(&d->iommu->iommu_lock); | 
 |  | 
 | 	/* teardown page table association */ | 
 | 	teardown_page_tables(p, d); | 
 |  | 
 | 	d->proc_owner = NULL; /* protected by iommu_lock */ | 
 | 	d->iommu->num_assigned_devs -= 1; /* protected by iommu_lock */ | 
 |  | 
 | 	/* remove device from list in struct proc */ | 
 | 	TAILQ_REMOVE(&p->pci_devices, d, proc_link); | 
 |  | 
 | 	/* remove proc from list in struct iommu, if active device passthru */ | 
 | 	if (TAILQ_EMPTY(&p->pci_devices)) | 
 | 		TAILQ_REMOVE(&d->iommu->procs, p, iommu_link); | 
 |  | 
 | 	/* release locks */ | 
 | 	spin_unlock_irqsave(&d->iommu->iommu_lock); | 
 | 	spin_unlock_irqsave(&p->proc_lock); | 
 |  | 
 | 	/* decrement KREF for this PCI device */ | 
 | 	proc_decref(p); | 
 |  | 
 | 	return true; | 
 | } | 
 |  | 
 | void iommu_process_cleanup(struct proc *p) | 
 | { | 
 | 	struct pci_device *pcidev; | 
 |  | 
 | 	// TODO: grab proc_lock | 
 | 	TAILQ_FOREACH(pcidev, &p->pci_devices, proc_link) | 
 | 		unassign_device(pcidev->bus, pcidev->dev, pcidev->func); | 
 | } | 
 |  | 
 | static int write_add_dev(char *va, size_t n) | 
 | { | 
 | 	int bus, dev, func, err; | 
 | 	pid_t pid; | 
 |  | 
 | 	err = sscanf(va, "%x:%x.%x %d\n", &bus, &dev, &func, &pid); | 
 |  | 
 | 	if (err != 4) | 
 | 		error(EIO, | 
 | 		  IOMMU "error parsing #iommu/attach; items parsed: %d", err); | 
 |  | 
 | 	if (pid == 1) | 
 | 		error(EIO, IOMMU "device passthru not supported for pid = 1"); | 
 |  | 
 | 	if (!assign_device(bus, dev, func, pid)) | 
 | 		error(EIO, "passthru failed"); | 
 |  | 
 | 	return n; | 
 | } | 
 |  | 
 | static int write_remove_dev(char *va, size_t n) | 
 | { | 
 | 	int bus, dev, func, err; | 
 |  | 
 | 	err = sscanf(va, "%x:%x.%x\n", &bus, &dev, &func); | 
 |  | 
 | 	if (err != 3) | 
 | 		error(EIO, | 
 | 		  IOMMU "error parsing #iommu/detach; items parsed: %d", err); | 
 |  | 
 | 	unassign_device(bus, dev, func); | 
 |  | 
 | 	return n; | 
 | } | 
 |  | 
 | static int write_power(char *va, size_t n) | 
 | { | 
 | 	int err; | 
 |  | 
 | 	if (!strcmp(va, "enable") || !strcmp(va, "on")) { | 
 | 		iommu_enable(); | 
 | 		return n; | 
 | 	} else if (!strcmp(va, "disable") || !strcmp(va, "off")) { | 
 | 		iommu_disable(); | 
 | 		return n; | 
 | 	} else | 
 | 		return n; | 
 | } | 
 |  | 
 | static void _open_mappings(struct sized_alloc *sza, struct proc *proc) | 
 | { | 
 | 	struct pci_device *pcidev; | 
 |  | 
 | 	sza_printf(sza, "\tpid = %d\n", proc->pid); | 
 | 	TAILQ_FOREACH(pcidev, &proc->pci_devices, proc_link) { | 
 | 		sza_printf(sza, "\t\tdevice = %x:%x.%x\n", pcidev->bus, | 
 | 				pcidev->dev, pcidev->func); | 
 | 	} | 
 | } | 
 |  | 
 | static struct sized_alloc *open_mappings(void) | 
 | { | 
 | 	struct iommu *iommu; | 
 | 	struct proc *proc; | 
 | 	struct sized_alloc *sza = sized_kzmalloc(BUFFERSZ, MEM_WAIT); | 
 |  | 
 | 	TAILQ_FOREACH(iommu, &iommu_list, iommu_link) { | 
 | 		spin_lock_irqsave(&iommu->iommu_lock); | 
 |  | 
 | 		sza_printf(sza, "Mappings for iommu@%p\n", iommu); | 
 | 		if (TAILQ_EMPTY(&iommu->procs)) | 
 | 			sza_printf(sza, "\t<empty>\n"); | 
 | 		else | 
 | 			TAILQ_FOREACH(proc, &iommu->procs, iommu_link) | 
 | 				_open_mappings(sza, proc); | 
 |  | 
 | 		spin_unlock_irqsave(&iommu->iommu_lock); | 
 | 	} | 
 |  | 
 | 	return sza; | 
 | } | 
 |  | 
 | static void _open_info(struct iommu *iommu, struct sized_alloc *sza) | 
 | { | 
 | 	uint64_t value; | 
 |  | 
 | 	sza_printf(sza, "\niommu@%p\n", iommu); | 
 | 	sza_printf(sza, "\trba = %p\n", iommu->rba); | 
 | 	sza_printf(sza, "\tsupported = %s\n", iommu->supported ? "yes" : "no"); | 
 | 	sza_printf(sza, "\tnum_assigned_devs = %d\n", iommu->num_assigned_devs); | 
 | 	sza_printf(sza, "\tregspace = %p\n", iommu->regio); | 
 | 	sza_printf(sza, "\thost addr width (dmar) = %d\n", iommu->haw_dmar); | 
 | 	sza_printf(sza, "\thost addr width (cap[mgaw]) = %d\n", | 
 | 		iommu->haw_cap); | 
 | 	value = read32(iommu->regio + DMAR_VER_REG); | 
 | 	sza_printf(sza, "\tversion = 0x%x\n", value); | 
 |  | 
 | 	value = read64(iommu->regio + DMAR_CAP_REG); | 
 | 	sza_printf(sza, "\tcapabilities = %p\n", value); | 
 | 	sza_printf(sza, "\t\tmgaw: %d\n", cap_mgaw(value)); | 
 | 	sza_printf(sza, "\t\tsagaw (paging level): 0x%x\n", cap_sagaw(value)); | 
 | 	sza_printf(sza, "\t\tcaching mode: %s (%d)\n", cap_caching_mode(value) ? | 
 | 		"yes" : "no", cap_caching_mode(value)); | 
 | 	sza_printf(sza, "\t\tzlr: 0x%x\n", cap_zlr(value)); | 
 | 	sza_printf(sza, "\t\trwbf: %s\n", cap_rwbf(value) ? "required" | 
 | 							  : "not required"); | 
 | 	sza_printf(sza, "\t\tnum domains: %d\n", cap_ndoms(value)); | 
 | 	sza_printf(sza, "\t\tsupports protected high-memory region: %s\n", | 
 | 		cap_phmr(value) ? "yes" : "no"); | 
 | 	sza_printf(sza, "\t\tsupports Protected low-memory region: %s\n", | 
 | 		cap_plmr(value) ? "yes" : "no"); | 
 |  | 
 | 	value = read64(iommu->regio + DMAR_ECAP_REG); | 
 | 	sza_printf(sza, "\text. capabilities = %p\n", value); | 
 | 	sza_printf(sza, "\t\tpass through: %s\n", | 
 | 		ecap_pass_through(value) ? "yes" : "no"); | 
 | 	sza_printf(sza, "\t\tdevice iotlb: %s\n", | 
 | 		ecap_dev_iotlb_support(value) ? "yes" : "no"); | 
 | 	sza_printf(sza, "\t\tiotlb register offset: 0x%x\n", | 
 | 		ecap_iotlb_offset(value)); | 
 | 	sza_printf(sza, "\t\tsnoop control: %s\n", | 
 | 		ecap_sc_support(value) ? "yes" : "no"); | 
 | 	sza_printf(sza, "\t\tcoherency: %s\n", | 
 | 		ecap_coherent(value) ? "yes" : "no"); | 
 | 	sza_printf(sza, "\t\tqueue invalidation support: %s\n", | 
 | 		ecap_qis(value) ? "yes" : "no"); | 
 | 	sza_printf(sza, "\t\tinterrupt remapping support: %s\n", | 
 | 		ecap_ir_support(value) ? "yes" : "no"); | 
 | 	sza_printf(sza, "\t\textended interrupt mode: 0x%x\n", | 
 | 		ecap_eim_support(value)); | 
 |  | 
 | 	value = read32(iommu->regio + DMAR_GSTS_REG); | 
 | 	sza_printf(sza, "\tglobal status = 0x%x\n", value); | 
 | 	sza_printf(sza, "\t\ttranslation: %s\n", | 
 | 		value & DMA_GSTS_TES ? "enabled" : "disabled"); | 
 | 	sza_printf(sza, "\t\troot table: %s\n", | 
 | 		value & DMA_GSTS_RTPS ? "set" : "not set"); | 
 |  | 
 | 	value = read64(iommu->regio + DMAR_RTADDR_REG); | 
 | 	sza_printf(sza, "\troot entry table = %p (phy) or %p (vir)\n", | 
 | 			value, KADDR(value)); | 
 | } | 
 |  | 
 | static struct sized_alloc *open_info(void) | 
 | { | 
 | 	struct sized_alloc *sza = sized_kzmalloc(BUFFERSZ, MEM_WAIT); | 
 | 	uint64_t value; | 
 | 	struct iommu *iommu; | 
 |  | 
 | 	sza_printf(sza, "driver info:\n"); | 
 |  | 
 | 	value = IOMMU_DID_DEFAULT; | 
 | 	sza_printf(sza, "\tdefault did = %d\n", value); | 
 | 	sza_printf(sza, "\tstatus = %s\n", | 
 | 		iommu_status() ? "enabled" : "disabled"); | 
 |  | 
 | 	TAILQ_FOREACH(iommu, &iommu_list, iommu_link) { | 
 | 		_open_info(iommu, sza); | 
 | 	} | 
 |  | 
 | 	return sza; | 
 | } | 
 |  | 
 | static struct sized_alloc *open_power(void) | 
 | { | 
 | 	struct sized_alloc *sza = sized_kzmalloc(BUFFERSZ, MEM_WAIT); | 
 | 	uint64_t value; | 
 | 	struct iommu *iommu; | 
 |  | 
 | 	sza_printf(sza, "IOMMU status: %s\n\n", | 
 | 		iommu_status() ? "enabled" : "disabled"); | 
 |  | 
 | 	sza_printf(sza, | 
 | 	     "Write 'enable' or 'disable' OR 'on' or 'off' to change status\n"); | 
 |  | 
 | 	return sza; | 
 | } | 
 |  | 
 | static char *devname(void) | 
 | { | 
 | 	return iommudevtab.name; | 
 | } | 
 |  | 
 | static struct chan *iommuattach(char *spec) | 
 | { | 
 | 	return devattach(devname(), spec); | 
 | } | 
 |  | 
 | static struct walkqid *iommuwalk(struct chan *c, struct chan *nc, char **name, | 
 | 			 unsigned int nname) | 
 | { | 
 | 	return devwalk(c, nc, name, nname, iommudir, | 
 | 		       ARRAY_SIZE(iommudir), devgen); | 
 | } | 
 |  | 
 | static size_t iommustat(struct chan *c, uint8_t *dp, size_t n) | 
 | { | 
 | 	return devstat(c, dp, n, iommudir, ARRAY_SIZE(iommudir), devgen); | 
 | } | 
 |  | 
 | static struct chan *iommuopen(struct chan *c, int omode) | 
 | { | 
 | 	switch (c->qid.path) { | 
 | 	case Qmappings: | 
 | 		c->synth_buf = open_mappings(); | 
 | 		break; | 
 | 	case Qinfo: | 
 | 		c->synth_buf = open_info(); | 
 | 		break; | 
 | 	case Qpower: | 
 | 		c->synth_buf = open_power(); | 
 | 		break; | 
 | 	case Qadddev: | 
 | 	case Qremovedev: | 
 | 	case Qdir: | 
 | 	default: | 
 | 		break; | 
 | 	} | 
 |  | 
 | 	return devopen(c, omode, iommudir, ARRAY_SIZE(iommudir), devgen); | 
 | } | 
 |  | 
 | /* | 
 |  * All files are synthetic. Hence we do not need to implement any close | 
 |  * function. | 
 |  */ | 
 | static void iommuclose(struct chan *c) | 
 | { | 
 | 	switch (c->qid.path) { | 
 | 	case Qmappings: | 
 | 	case Qinfo: | 
 | 	case Qpower: | 
 | 		kfree(c->synth_buf); | 
 | 		c->synth_buf = NULL; | 
 | 		break; | 
 | 	case Qadddev: | 
 | 	case Qremovedev: | 
 | 	case Qdir: | 
 | 	default: | 
 | 		break; | 
 | 	} | 
 | } | 
 |  | 
 | static size_t iommuread(struct chan *c, void *va, size_t n, off64_t offset) | 
 | { | 
 | 	struct sized_alloc *sza = c->synth_buf; | 
 |  | 
 | 	switch (c->qid.path) { | 
 | 	case Qdir: | 
 | 		return devdirread(c, va, n, iommudir, | 
 | 				  ARRAY_SIZE(iommudir), devgen); | 
 | 	case Qadddev: | 
 | 		return readstr(offset, va, n, | 
 | 		    "write format: xx:yy.z pid\n" | 
 | 		    "   xx  = bus (in hex)\n" | 
 | 		    "   yy  = device (in hex)\n" | 
 | 		    "   z   = function (in hex)\n" | 
 | 		    "   pid = process pid\n" | 
 | 		    "\nexample:\n" | 
 | 		    "$ echo 00:1f.2 13 >\\#iommu/attach\n"); | 
 | 	case Qremovedev: | 
 | 		return readstr(offset, va, n, | 
 | 		    "write format: xx:yy.z\n" | 
 | 		    "   xx  = bus (in hex)\n" | 
 | 		    "   yy  = device (in hex)\n" | 
 | 		    "   z   = function (in hex)\n" | 
 | 		    "\nexample:\n" | 
 | 		    "$ echo 00:1f.2 >\\#iommu/detach\n"); | 
 | 	case Qmappings: | 
 | 	case Qinfo: | 
 | 	case Qpower: | 
 | 		return readstr(offset, va, n, sza->buf); | 
 | 	default: | 
 | 		error(EIO, "read: qid %d is impossible", c->qid.path); | 
 | 	} | 
 |  | 
 | 	return -1; /* not reached */ | 
 | } | 
 |  | 
 | static size_t iommuwrite(struct chan *c, void *va, size_t n, off64_t offset) | 
 | { | 
 | 	int err = -1; | 
 |  | 
 | 	switch (c->qid.path) { | 
 | 	case Qadddev: | 
 | 		if (!iommu_supported()) | 
 | 			error(EROFS, IOMMU "not supported"); | 
 | 		err = write_add_dev(va, n); | 
 | 		break; | 
 | 	case Qremovedev: | 
 | 		if (!iommu_supported()) | 
 | 			error(EROFS, IOMMU "not supported"); | 
 | 		err = write_remove_dev(va, n); | 
 | 		break; | 
 | 	case Qpower: | 
 | 		err = write_power(va, n); | 
 | 		break; | 
 | 	case Qmappings: | 
 | 	case Qinfo: | 
 | 	case Qdir: | 
 | 		error(EROFS, IOMMU "cannot modify"); | 
 | 	default: | 
 | 		error(EIO, "write: qid %d is impossible", c->qid.path); | 
 | 	} | 
 |  | 
 | 	return err; | 
 | } | 
 |  | 
 | /* Iterate over all IOMMUs and make sure the "rba" present in DRHD are unique */ | 
 | static bool iommu_asset_unique_regio(void) | 
 | { | 
 | 	struct iommu *outer, *inner; | 
 | 	uint64_t rba; | 
 | 	bool result = true; | 
 |  | 
 | 	TAILQ_FOREACH(outer, &iommu_list, iommu_link) { | 
 | 		rba = outer->rba; | 
 |  | 
 | 		TAILQ_FOREACH(inner, &iommu_list, iommu_link) { | 
 | 			if (outer != inner && rba == inner->rba) { | 
 | 				outer->supported = false; | 
 | 				result = false; | 
 | 			} | 
 | 		} | 
 | 	} | 
 |  | 
 | 	return result; | 
 | } | 
 |  | 
 | static bool iommu_assert_required_capabilities(struct iommu *iommu) | 
 | { | 
 | 	uint64_t cap, ecap; | 
 | 	bool support, result; | 
 |  | 
 | 	if (!iommu || !iommu->regio) | 
 | 		return false; | 
 |  | 
 | 	cap = read64(iommu->regio + DMAR_CAP_REG); | 
 | 	ecap = read64(iommu->regio + DMAR_ECAP_REG); | 
 | 	result = true; /* default */ | 
 |  | 
 | 	support = (cap_sagaw(cap) & 0x4) >> 2; | 
 | 	if (!support) { | 
 | 		printk(IOMMU "%p: unsupported paging level: 0x%x\n", | 
 | 			iommu, cap_sagaw(cap)); | 
 | 		result = false; | 
 | 	} | 
 |  | 
 | 	support = cap_super_page_val(cap) & 0x1; | 
 | 	if (!support) { | 
 | 		printk(IOMMU "%p: 1GB super pages not supported\n", iommu); | 
 | 		result = false; | 
 | 	} | 
 |  | 
 | 	support = ecap_pass_through(ecap); | 
 | 	if (!support) { | 
 | 		printk(IOMMU "%p: pass-through translation type in context entries not supported\n", iommu); | 
 | 		result = false; | 
 | 	} | 
 |  | 
 | 	/* max haw reported by iommu */ | 
 | 	iommu->haw_cap = cap_mgaw(cap); | 
 | 	if (iommu->haw_cap != iommu->haw_dmar) { | 
 | 		printk(IOMMU "%p: HAW mismatch; DAMR reports %d, CAP reports %d\n", | 
 | 			iommu, iommu->haw_dmar, iommu->haw_cap); | 
 | 	} | 
 |  | 
 | 	/* mark the iommu as not supported, if any required cap is present */ | 
 | 	if (!result) | 
 | 		iommu->supported = false; | 
 |  | 
 | 	return result; | 
 | } | 
 |  | 
 | static void iommu_assert_all(void) | 
 | { | 
 | 	struct iommu *iommu; | 
 |  | 
 | 	if (!iommu_asset_unique_regio()) | 
 | 		warn(IOMMU "same register base addresses detected"); | 
 |  | 
 | 	TAILQ_FOREACH(iommu, &iommu_list, iommu_link) | 
 | 		iommu_assert_required_capabilities(iommu); | 
 | } | 
 |  | 
 | static void iommu_populate_fields(void) | 
 | { | 
 | 	struct iommu *iommu; | 
 |  | 
 | 	TAILQ_FOREACH(iommu, &iommu_list, iommu_link) | 
 | 		iommu->roottable = rt_init(iommu, IOMMU_DID_DEFAULT); | 
 | } | 
 |  | 
 | /* Run this function after all individual IOMMUs are initialized. */ | 
 | void iommu_initialize_global(void) | 
 | { | 
 | 	if (!is_initialized) | 
 | 		return; | 
 |  | 
 | 	/* fill the supported field in struct iommu */ | 
 | 	run_once(iommu_assert_all()); | 
 | 	run_once(iommu_populate_fields()); | 
 |  | 
 | 	iommu_enable(); | 
 | } | 
 |  | 
 | /* should only be called after all iommus are initialized */ | 
 | bool iommu_supported(void) | 
 | { | 
 | 	struct iommu *iommu; | 
 |  | 
 | 	if (!is_initialized) | 
 | 		return false; | 
 |  | 
 | 	/* return false if any of the iommus isn't supported  */ | 
 | 	TAILQ_FOREACH(iommu, &iommu_list, iommu_link) | 
 | 		if (!iommu->supported) | 
 | 			return false; | 
 |  | 
 | 	return true; | 
 | } | 
 |  | 
 | /* grabs the iommu of the first DRHD with INCLUDE_PCI_ALL */ | 
 | struct iommu *get_default_iommu(void) | 
 | { | 
 | 	struct Dmar *dt; | 
 |  | 
 | 	/* dmar is a global variable; see acpi.h */ | 
 | 	if (dmar == NULL) | 
 | 		return NULL; | 
 |  | 
 | 	dt = dmar->tbl; | 
 | 	for (int i = 0; i < dmar->nchildren; i++) { | 
 | 		struct Atable *at = dmar->children[i]; | 
 | 		struct Drhd *drhd = at->tbl; | 
 |  | 
 | 		if (drhd->all & 1) | 
 | 			return &drhd->iommu; | 
 | 	} | 
 |  | 
 | 	return NULL; | 
 | } | 
 |  | 
 | void iommu_map_pci_devices(void) | 
 | { | 
 | 	struct pci_device *pci_iter; | 
 | 	struct iommu *iommu = get_default_iommu(); | 
 |  | 
 | 	if (!iommu) | 
 | 		return; | 
 |  | 
 | 	/* set the default iommu */ | 
 | 	STAILQ_FOREACH(pci_iter, &pci_devices, all_dev) | 
 | 		pci_iter->iommu = iommu; | 
 |  | 
 | 	// TODO: parse devscope and assign scoped iommus | 
 | } | 
 |  | 
 | /* This is called from acpi.c to initialize struct iommu. | 
 |  * The actual IOMMU hardware is not touch or configured in any way. */ | 
 | void iommu_initialize(struct iommu *iommu, uint8_t haw, uint64_t rba) | 
 | { | 
 | 	is_initialized = true; | 
 |  | 
 | 	/* initialize the struct */ | 
 | 	TAILQ_INIT(&iommu->procs); | 
 | 	spinlock_init_irqsave(&iommu->iommu_lock); | 
 | 	iommu->rba = rba; | 
 | 	iommu->regio = (void __iomem *) vmap_pmem_nocache(rba, VTD_PAGE_SIZE); | 
 | 	iommu->supported = true; /* this gets updated by iommu_supported() */ | 
 | 	iommu->num_assigned_devs = 0; | 
 | 	iommu->haw_dmar = haw; | 
 |  | 
 | 	/* add the iommu to the list of all discovered iommu */ | 
 | 	TAILQ_INSERT_TAIL(&iommu_list, iommu, iommu_link); | 
 | } | 
 |  | 
 | static void iommuinit(void) | 
 | { | 
 | 	if (iommu_supported()) | 
 | 		printk(IOMMU "initialized\n"); | 
 | 	else | 
 | 		printk(IOMMU "not supported\n"); | 
 | } | 
 |  | 
 | struct dev iommudevtab __devtab = { | 
 | 	.name       = "iommu", | 
 | 	.reset      = devreset, | 
 | 	.init       = iommuinit, | 
 | 	.shutdown   = devshutdown, | 
 | 	.attach     = iommuattach, | 
 | 	.walk       = iommuwalk, | 
 | 	.stat       = iommustat, | 
 | 	.open       = iommuopen, | 
 | 	.create     = devcreate, | 
 | 	.close      = iommuclose, | 
 | 	.read       = iommuread, | 
 | 	.bread      = devbread, | 
 | 	.write      = iommuwrite, | 
 | 	.bwrite     = devbwrite, | 
 | 	.remove     = devremove, | 
 | 	.wstat      = devwstat, | 
 | }; |