|  | /* | 
|  | * This file is part of the UCB release of Plan 9. It is subject to the license | 
|  | * terms in the LICENSE file found in the top-level directory of this | 
|  | * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No | 
|  | * part of the UCB release of Plan 9, including this file, may be copied, | 
|  | * modified, propagated, or distributed except according to the terms contained | 
|  | * in the LICENSE file. | 
|  | */ | 
|  |  | 
|  | #include <slab.h> | 
|  | #include <kmalloc.h> | 
|  | #include <kref.h> | 
|  | #include <string.h> | 
|  | #include <stdio.h> | 
|  | #include <assert.h> | 
|  | #include <error.h> | 
|  | #include <cpio.h> | 
|  | #include <pmap.h> | 
|  | #include <smp.h> | 
|  | #include <net/ip.h> | 
|  |  | 
|  | enum { | 
|  | Dpcicap		= 1<<0, | 
|  | Dmsicap		= 1<<1, | 
|  | Dvec		= 1<<2, | 
|  | Debug		= 0, | 
|  | }; | 
|  |  | 
|  | enum { | 
|  | /* MSI address format | 
|  | * | 
|  | * +31----------------------20+19----------12+11--------4+--3--+--2--+1---0+ | 
|  | * |       0xfee              | Dest APIC ID |  Reserved | RH  | DM  |  XX | | 
|  | * +--------------------------+--------------+-----------+-----+-----+-----+ | 
|  | * | 
|  | * RH: Redirection Hint | 
|  | * DM: Destinatio Mode | 
|  | * XX: Probably reserved, set to 0 | 
|  | */ | 
|  | Msiabase	= 0xfee00000u, | 
|  | Msiadest	= 1<<12,	/* same as 63:56 of apic vector */ | 
|  | Msiaedest	= 1<<4,		/* same as 55:48 of apic vector */ | 
|  | Msialowpri	= 1<<3,		/* redirection hint */ | 
|  | Msialogical	= 1<<2, | 
|  |  | 
|  | /* MSI data format | 
|  | * +63-------------------------------------------------------------------32+ | 
|  | * |                          Reserved                                     | | 
|  | * +-------------------------------+-15-+-14-+--------+10----8+7----------0+ | 
|  | * |          Reserved             | TM | Lv | Reserv | Dmode |   Vector   | | 
|  | * +-------------------------------+----+----+--------+-------+------------+ | 
|  | * | 
|  | * Dmode: delivery mode (like APIC/LVT messages).  Usually 000 (Fixed). | 
|  | * TM: Trigger mode (0 Edge, 1 Level) | 
|  | * Lv: Level assert (0 Deassert, 1 Assert) | 
|  | * | 
|  | * | 
|  | * for more info, check intel's SDMv3 (grep message signal) */ | 
|  | Msidlevel	= 1<<15, | 
|  | Msidassert	= 1<<14, | 
|  | Msidmode	= 1<<8,		/* 3 bits; delivery mode */ | 
|  | Msidvector	= 0xff<<0, | 
|  | }; | 
|  |  | 
|  | enum{ | 
|  | /* msi capabilities */ | 
|  | Vmask		= 1<<8,	/* Vectors can be masked. Optional. */ | 
|  | Cap64		= 1<<7, /* 64-bit addresses. Optional. */ | 
|  | Mmesgmsk	= 7<<4, /* Mask for # of messages allowed. See 6.8.1.3*/ | 
|  | Mmcap		= 7<<1, /* # of messages the function can support. */ | 
|  | Msienable	= 1<<0, /* Enable. */ | 
|  | /* msix capabilities */ | 
|  | Msixenable      = 1<<15, | 
|  | Msixmask        = 1<<14, | 
|  | Msixtblsize     = 0x7ff, | 
|  | }; | 
|  |  | 
|  | /* Find the offset in config space of this function of the msi capability. | 
|  | * It is defined in 6.8.1 and is variable-sized.  Returns 0 on failure. */ | 
|  | static int msicap(struct pci_device *p) | 
|  | { | 
|  | return p->caps[PCI_CAP_ID_MSI]; | 
|  | } | 
|  |  | 
|  | /* Find the offset in config space of this function of the msi-x capability. | 
|  | * It is defined in 6.8.1 and is variable-sized. | 
|  | */ | 
|  | static int msixcap(struct pci_device *p) | 
|  | { | 
|  | return p->caps[PCI_CAP_ID_MSIX]; | 
|  | } | 
|  |  | 
|  | static int msi_blacklist(struct pci_device *p) | 
|  | { | 
|  | switch (p->ven_id << 16 | p->dev_id) { | 
|  | case 0x11ab << 16 | 0x6485: | 
|  | case 0x8086 << 16 | 0x100f: | 
|  | return -1; | 
|  | } | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static int msix_blacklist(struct pci_device *p) | 
|  | { | 
|  | switch (p->ven_id << 16 | p->dev_id) { | 
|  | //	case 0x11ab << 16 | 0x6485:	/* placeholder */ | 
|  | return -1; | 
|  | } | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static uint32_t msi_make_addr_lo(uint64_t vec) | 
|  | { | 
|  | unsigned int dest, lopri, logical; | 
|  |  | 
|  | /* The destination is the traditional 8-bit APIC id is in 63:56 of the | 
|  | * vector.  Later we may need to deal with extra destination bits | 
|  | * (Msiaedest, in this code).  I haven't seen anything in the Intel SDM | 
|  | * about using Msiaedest (the bits are reserved) */ | 
|  | dest = vec >> 56; | 
|  | /* lopri is rarely set, and intel doesn't recommend using it.  with msi, | 
|  | * the lopri field is actually a redirection hint, and also must be set | 
|  | * when sending logical messages. */ | 
|  | lopri = (vec & 0x700) == MTlp; | 
|  | logical = (vec & Lm) != 0; | 
|  | if (logical) | 
|  | lopri = 1; | 
|  | return Msiabase | Msiadest * dest | Msialowpri * lopri | | 
|  | Msialogical * logical; | 
|  | } | 
|  |  | 
|  | static uint32_t msi_make_data(uint64_t vec) | 
|  | { | 
|  | unsigned int deliv_mode; | 
|  |  | 
|  | deliv_mode = (vec >> 8) & 7; | 
|  | /* We can only specify the lower 16 bits of the MSI message, the rest | 
|  | * gets forced to 0 by the device.  MSI-X can use the full 32 bits. | 
|  | * We're assuming edge triggered here. */ | 
|  | return Msidmode * deliv_mode | ((unsigned int)vec & 0xff); | 
|  | } | 
|  |  | 
|  | /* see section 6.8.1 of the pci spec. */ | 
|  | /* Set up a single function on a single device. | 
|  | * We need to take the vec, bust it up into bits, | 
|  | * and put parts of it in the msi address and parts | 
|  | * in the msi data. | 
|  | */ | 
|  | int pci_msi_enable(struct pci_device *p, uint64_t vec) | 
|  | { | 
|  | unsigned int c, f, datao; | 
|  |  | 
|  | spin_lock_irqsave(&p->lock); | 
|  | if (p->msix_ready) { | 
|  | printk("MSI: MSI-X is already enabled, aborting\n"); | 
|  | spin_unlock_irqsave(&p->lock); | 
|  | return -1; | 
|  | } | 
|  | if (p->msi_ready) { | 
|  | /* only allowing one enable of MSI per device (not supporting | 
|  | * multiple vectors) */ | 
|  | printk("MSI: MSI is already enabled, aborting\n"); | 
|  | spin_unlock_irqsave(&p->lock); | 
|  | return -1; | 
|  | } | 
|  | p->msi_ready = TRUE; | 
|  |  | 
|  | /* Get the offset of the MSI capability in the function's config space. | 
|  | */ | 
|  | c = msicap(p); | 
|  | if (!c) { | 
|  | spin_unlock_irqsave(&p->lock); | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | /* read it, clear out the Mmesgmsk bits. | 
|  | * This means that there will be no multiple | 
|  | * messages enabled. | 
|  | */ | 
|  | f = pcidev_read16(p, c + 2) & ~Mmesgmsk; | 
|  |  | 
|  | if (msi_blacklist(p) != 0) { | 
|  | spin_unlock_irqsave(&p->lock); | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | /* Data begins at 8 bytes in. */ | 
|  | datao = 8; | 
|  | p->msi_msg_addr_lo = msi_make_addr_lo(vec); | 
|  | printd("Write to %d %08lx \n",c + 4, p->msi_msg_addr_lo); | 
|  | pcidev_write32(p, c + 4, p->msi_msg_addr_lo); | 
|  |  | 
|  | /* And even if it's 64-bit capable, we do nothing with | 
|  | * the high order bits. If it is 64-bit we need to offset | 
|  | * datao (data offset) by 4 (i.e. another 32 bits) | 
|  | */ | 
|  | if(f & Cap64){ | 
|  | datao += 4; | 
|  | pcidev_write32(p, c + 8, 0); | 
|  | } | 
|  | p->msi_msg_addr_hi = 0; | 
|  |  | 
|  | p->msi_msg_data = msi_make_data(vec); | 
|  | printd("Write data %d %04x\n", c + datao, p->msi_msg_data); | 
|  | pcidev_write16(p, c + datao, p->msi_msg_data); | 
|  |  | 
|  | /* If we have the option of masking the vectors, | 
|  | * blow all the masks to 0. It's a 32-bit mask. | 
|  | */ | 
|  | if(f & Vmask) | 
|  | pcidev_write32(p, c + datao + 4, 0); | 
|  |  | 
|  | /* Now write the control bits back, with the Mmesg mask (which is a | 
|  | * power of 2) set to 0 (meaning one vector only).  Note we still | 
|  | * haven't enabled MSI.  Will do that when we unmask.  According to the | 
|  | * spec, we're not supposed to use the Msienable bit to mask the IRQ, | 
|  | * though I don't see how we can mask on non-Vmask-supported HW. */ | 
|  | printd("write @ %d %04lx\n",c + 2, f); | 
|  | pcidev_write16(p, c + 2, f); | 
|  | spin_unlock_irqsave(&p->lock); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static void __msix_mask_entry(struct msix_entry *entry) | 
|  | { | 
|  | uintptr_t reg = (uintptr_t)&entry->vector; | 
|  | write_mmreg32(reg, read_mmreg32(reg) | 0x1); | 
|  | } | 
|  |  | 
|  | static void __msix_unmask_entry(struct msix_entry *entry) | 
|  | { | 
|  | uintptr_t reg = (uintptr_t)&entry->vector; | 
|  | write_mmreg32(reg, read_mmreg32(reg) & ~0x1); | 
|  | } | 
|  |  | 
|  | static uintptr_t msix_get_capbar_paddr(struct pci_device *p, int offset) | 
|  | { | 
|  | uint32_t bir, capbar_off; | 
|  | uintptr_t membar; | 
|  |  | 
|  | bir = pcidev_read32(p, offset); | 
|  | capbar_off = bir & ~0x7; | 
|  | bir &= 0x7; | 
|  | membar = pci_get_membar(p, bir); | 
|  |  | 
|  | if (!membar) { | 
|  | printk("MSI-X: no cap membar, bir %d\n", bir); | 
|  | return 0; | 
|  | } | 
|  | membar += capbar_off; | 
|  | return membar; | 
|  | } | 
|  |  | 
|  | /* One time initialization of MSI-X for a PCI device.  -1 on error.  Otherwise, | 
|  | * the device will be ready to assign/route MSI-X entries/vectors.  All vectors | 
|  | * are masked, but the overall MSI-X function is unmasked. | 
|  | * | 
|  | * Hold the pci_device lock. */ | 
|  | static int __pci_msix_init(struct pci_device *p) | 
|  | { | 
|  | unsigned int c; | 
|  | uint16_t f; | 
|  | int tbl_bir, tbl_off, pba_bir, pba_off; | 
|  | struct msix_entry *entry; | 
|  |  | 
|  | if (p->msix_ready) | 
|  | return 0; | 
|  | if (p->msi_ready) { | 
|  | printk("MSI-X: MSI is already on, aborting\n"); | 
|  | return -1; | 
|  | } | 
|  | if (msix_blacklist(p) != 0) | 
|  | return -1; | 
|  | c = msixcap(p); | 
|  | if (c == 0) | 
|  | return -1; | 
|  | f = pcidev_read16(p, c + 2); | 
|  | /* enable and mask the entire function/all vectors */ | 
|  | f |= Msixenable | Msixmask; | 
|  | pcidev_write16(p, c + 2, f); | 
|  |  | 
|  | p->msix_tbl_paddr = msix_get_capbar_paddr(p, c + 4); | 
|  | p->msix_pba_paddr = msix_get_capbar_paddr(p, c + 8); | 
|  | if (!p->msix_tbl_paddr || !p->msix_pba_paddr) { | 
|  | /* disable msix, so we can possibly use msi */ | 
|  | pcidev_write16(p, c + 2, f & ~Msixenable); | 
|  | printk("MSI-X: Missing a tbl (%p) or PBA (%p) paddr!\n", | 
|  | p->msix_tbl_paddr, p->msix_pba_paddr); | 
|  | return -1; | 
|  | } | 
|  | p->msix_nr_vec = (f & Msixtblsize) + 1; | 
|  | p->msix_tbl_vaddr = vmap_pmem_nocache(p->msix_tbl_paddr, | 
|  | p->msix_nr_vec * | 
|  | sizeof(struct msix_entry)); | 
|  | if (!p->msix_tbl_vaddr) { | 
|  | pcidev_write16(p, c + 2, f & ~Msixenable); | 
|  | printk("MSI-X: unable to vmap the Table!\n"); | 
|  | return -1; | 
|  | } | 
|  | p->msix_pba_vaddr = vmap_pmem_nocache(p->msix_pba_paddr, | 
|  | ROUNDUP(p->msix_nr_vec, 8) / 8); | 
|  | if (!p->msix_pba_vaddr) { | 
|  | pcidev_write16(p, c + 2, f & ~Msixenable); | 
|  | printk("MSI-X: unable to vmap the PBA!\n"); | 
|  | vunmap_vmem(p->msix_tbl_paddr, | 
|  | p->msix_nr_vec * sizeof(struct msix_entry)); | 
|  | return -1; | 
|  | } | 
|  | /* they should all be masked already, but remasking just in case. | 
|  | * likewise, we need to 0 out the data, since we'll use the lower byte | 
|  | * later when determining if an msix vector is free or not. */ | 
|  | entry = (struct msix_entry*)p->msix_tbl_vaddr; | 
|  | for (int i = 0; i < p->msix_nr_vec; i++, entry++) { | 
|  | __msix_mask_entry(entry); | 
|  | write_mmreg32((uintptr_t)&entry->data, 0); | 
|  | } | 
|  | /* unmask the device, now that all the vectors are masked */ | 
|  | f &= ~Msixmask; | 
|  | pcidev_write16(p, c + 2, f); | 
|  | p->msix_ready = TRUE; | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | /* Some parts of msix init need to happen during boot.  Devices can call this | 
|  | * during their reset methods, and then later register their IRQs during attach. | 
|  | * Other OS's also alloc the vector around this time, though we'll hold off on | 
|  | * that for now. */ | 
|  | int pci_msix_init(struct pci_device *p) | 
|  | { | 
|  | int ret; | 
|  | spin_lock_irqsave(&p->lock); | 
|  | ret = __pci_msix_init(p); | 
|  | spin_unlock_irqsave(&p->lock); | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | /* Enables an MSI-X vector for a PCI device.  vec is formatted like an ioapic | 
|  | * route.  This should be able to handle multiple vectors for a device.  Returns | 
|  | * a msix_irq_vector linkage struct on success (the connection btw an irq_h and | 
|  | * the specific {pcidev, entry}), and 0 on failure. */ | 
|  | struct msix_irq_vector *pci_msix_enable(struct pci_device *p, uint64_t vec) | 
|  | { | 
|  | int i; | 
|  | struct msix_entry *entry; | 
|  | struct msix_irq_vector *linkage; | 
|  | unsigned int c, datao; | 
|  |  | 
|  | spin_lock_irqsave(&p->lock); | 
|  | /* Ensure we're init'd.  We could remove this in the future, though not | 
|  | * everyone calls the extern pci_msix_init. */ | 
|  | if (__pci_msix_init(p) < 0) { | 
|  | spin_unlock_irqsave(&p->lock); | 
|  | return 0; | 
|  | } | 
|  | /* find an unused slot (no apic_vector assigned).  later, we might want | 
|  | * to point back to the irq_hs for each entry.  not a big deal now. */ | 
|  | entry = (struct msix_entry*)p->msix_tbl_vaddr; | 
|  | for (i = 0; i < p->msix_nr_vec; i++, entry++) | 
|  | if (!(read_mmreg32((uintptr_t)&entry->data) & 0xff)) | 
|  | break; | 
|  | if (i == p->msix_nr_vec) { | 
|  | printk("[kernel] unable to alloc an MSI-X vector (bug?)\n"); | 
|  | spin_unlock_irqsave(&p->lock); | 
|  | return 0; | 
|  | } | 
|  | linkage = kmalloc(sizeof(struct msix_irq_vector), MEM_WAIT); | 
|  | linkage->pcidev = p; | 
|  | linkage->entry = entry; | 
|  | linkage->addr_lo = msi_make_addr_lo(vec); | 
|  | linkage->addr_hi = 0; | 
|  | linkage->data = msi_make_data(vec); | 
|  | write_mmreg32((uintptr_t)&entry->data, linkage->data); | 
|  | write_mmreg32((uintptr_t)&entry->addr_lo, linkage->addr_lo); | 
|  | write_mmreg32((uintptr_t)&entry->addr_hi, linkage->addr_hi); | 
|  | spin_unlock_irqsave(&p->lock); | 
|  | return linkage; | 
|  | } | 
|  |  | 
|  | void pci_dump_msix_table(struct pci_device *p) | 
|  | { | 
|  | struct msix_entry *entry; | 
|  | void *tbl = (void*)p->msix_tbl_vaddr; | 
|  |  | 
|  | hexdump(tbl, p->msix_nr_vec * sizeof(struct msix_entry)); | 
|  | entry = (struct msix_entry*)p->msix_tbl_vaddr; | 
|  | for (int i = 0; i < p->msix_nr_vec; i++, entry++) | 
|  | printk("Entry %d, addr hi:lo 0x%08x:%08x data 0x%08x\n", i, | 
|  | entry->addr_hi, entry->addr_lo, entry->data); | 
|  | } | 
|  |  | 
|  | void pci_msi_mask(struct pci_device *p) | 
|  | { | 
|  | unsigned int c, f; | 
|  |  | 
|  | c = msicap(p); | 
|  | assert(c); | 
|  |  | 
|  | spin_lock_irqsave(&p->lock); | 
|  | f = pcidev_read16(p, c + 2); | 
|  | pcidev_write16(p, c + 2, f & ~Msienable); | 
|  | spin_unlock_irqsave(&p->lock); | 
|  | } | 
|  |  | 
|  | void pci_msi_unmask(struct pci_device *p) | 
|  | { | 
|  | unsigned int c, f; | 
|  |  | 
|  | c = msicap(p); | 
|  | assert(c); | 
|  |  | 
|  | spin_lock_irqsave(&p->lock); | 
|  | f = pcidev_read16(p, c + 2); | 
|  | pcidev_write16(p, c + 2, f | Msienable); | 
|  | spin_unlock_irqsave(&p->lock); | 
|  | } | 
|  |  | 
|  | void pci_msi_route(struct pci_device *p, int dest) | 
|  | { | 
|  | unsigned int c, f; | 
|  |  | 
|  | c = msicap(p); | 
|  | assert(c); | 
|  |  | 
|  | spin_lock_irqsave(&p->lock); | 
|  | /* mask out the old destination, replace with new */ | 
|  | p->msi_msg_addr_lo &= ~(((1 << 8) - 1) << 12); | 
|  | p->msi_msg_addr_lo |= (dest & 0xff) << 12; | 
|  | pcidev_write32(p, c + 4, p->msi_msg_addr_lo); | 
|  | spin_unlock_irqsave(&p->lock); | 
|  | } | 
|  |  | 
|  | void pci_msix_mask_vector(struct msix_irq_vector *linkage) | 
|  | { | 
|  | spin_lock_irqsave(&linkage->pcidev->lock); | 
|  | __msix_mask_entry(linkage->entry); | 
|  | spin_unlock_irqsave(&linkage->pcidev->lock); | 
|  | } | 
|  |  | 
|  | void pci_msix_unmask_vector(struct msix_irq_vector *linkage) | 
|  | { | 
|  | spin_lock_irqsave(&linkage->pcidev->lock); | 
|  | __msix_unmask_entry(linkage->entry); | 
|  | spin_unlock_irqsave(&linkage->pcidev->lock); | 
|  | } | 
|  |  | 
|  | void pci_msix_route_vector(struct msix_irq_vector *linkage, int dest) | 
|  | { | 
|  | spin_lock_irqsave(&linkage->pcidev->lock); | 
|  | /* mask out the old destination, replace with new */ | 
|  | linkage->addr_lo &= ~(((1 << 8) - 1) << 12); | 
|  | linkage->addr_lo |= (dest & 0xff) << 12; | 
|  | write_mmreg32((uintptr_t)&linkage->entry->addr_lo, linkage->addr_lo); | 
|  | spin_unlock_irqsave(&linkage->pcidev->lock); | 
|  | } |