blob: 06e8319e75df838f4f9b327b65d5600e209e7958 [file] [log] [blame]
/*
* This file is part of the UCB release of Plan 9. It is subject to the license
* terms in the LICENSE file found in the top-level directory of this
* distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
* part of the UCB release of Plan 9, including this file, may be copied,
* modified, propagated, or distributed except according to the terms contained
* in the LICENSE file.
*/
#include <slab.h>
#include <kmalloc.h>
#include <kref.h>
#include <string.h>
#include <stdio.h>
#include <assert.h>
#include <error.h>
#include <cpio.h>
#include <pmap.h>
#include <smp.h>
#include <net/ip.h>
enum {
Dpcicap = 1<<0,
Dmsicap = 1<<1,
Dvec = 1<<2,
Debug = 0,
};
enum {
/* MSI address format
*
* +31----------------------20+19----------12+11--------4+--3--+--2--+1---0+
* | 0xfee | Dest APIC ID | Reserved | RH | DM | XX |
* +--------------------------+--------------+-----------+-----+-----+-----+
*
* RH: Redirection Hint
* DM: Destinatio Mode
* XX: Probably reserved, set to 0
*/
Msiabase = 0xfee00000u,
Msiadest = 1<<12, /* same as 63:56 of apic vector */
Msiaedest = 1<<4, /* same as 55:48 of apic vector */
Msialowpri = 1<<3, /* redirection hint */
Msialogical = 1<<2,
/* MSI data format
* +63-------------------------------------------------------------------32+
* | Reserved |
* +-------------------------------+-15-+-14-+--------+10----8+7----------0+
* | Reserved | TM | Lv | Reserv | Dmode | Vector |
* +-------------------------------+----+----+--------+-------+------------+
*
* Dmode: delivery mode (like APIC/LVT messages). Usually 000 (Fixed).
* TM: Trigger mode (0 Edge, 1 Level)
* Lv: Level assert (0 Deassert, 1 Assert)
*
*
* for more info, check intel's SDMv3 (grep message signal) */
Msidlevel = 1<<15,
Msidassert = 1<<14,
Msidmode = 1<<8, /* 3 bits; delivery mode */
Msidvector = 0xff<<0,
};
enum{
/* msi capabilities */
Vmask = 1<<8, /* Vectors can be masked. Optional. */
Cap64 = 1<<7, /* 64-bit addresses. Optional. */
Mmesgmsk = 7<<4, /* Mask for # of messages allowed. See 6.8.1.3*/
Mmcap = 7<<1, /* # of messages the function can support. */
Msienable = 1<<0, /* Enable. */
/* msix capabilities */
Msixenable = 1<<15,
Msixmask = 1<<14,
Msixtblsize = 0x7ff,
};
/* Find the offset in config space of this function of the msi capability.
* It is defined in 6.8.1 and is variable-sized. Returns 0 on failure. */
static int msicap(struct pci_device *p)
{
return p->caps[PCI_CAP_ID_MSI];
}
/* Find the offset in config space of this function of the msi-x capability.
* It is defined in 6.8.1 and is variable-sized.
*/
static int msixcap(struct pci_device *p)
{
return p->caps[PCI_CAP_ID_MSIX];
}
static int msi_blacklist(struct pci_device *p)
{
switch (p->ven_id << 16 | p->dev_id) {
case 0x11ab << 16 | 0x6485:
case 0x8086 << 16 | 0x100f:
return -1;
}
return 0;
}
static int msix_blacklist(struct pci_device *p)
{
switch (p->ven_id << 16 | p->dev_id) {
// case 0x11ab << 16 | 0x6485: /* placeholder */
return -1;
}
return 0;
}
static uint32_t msi_make_addr_lo(uint64_t vec)
{
unsigned int dest, lopri, logical;
/* The destination is the traditional 8-bit APIC id is in 63:56 of the
* vector. Later we may need to deal with extra destination bits
* (Msiaedest, in this code). I haven't seen anything in the Intel SDM
* about using Msiaedest (the bits are reserved) */
dest = vec >> 56;
/* lopri is rarely set, and intel doesn't recommend using it. with msi,
* the lopri field is actually a redirection hint, and also must be set
* when sending logical messages. */
lopri = (vec & 0x700) == MTlp;
logical = (vec & Lm) != 0;
if (logical)
lopri = 1;
return Msiabase | Msiadest * dest | Msialowpri * lopri |
Msialogical * logical;
}
static uint32_t msi_make_data(uint64_t vec)
{
unsigned int deliv_mode;
deliv_mode = (vec >> 8) & 7;
/* We can only specify the lower 16 bits of the MSI message, the rest
* gets forced to 0 by the device. MSI-X can use the full 32 bits.
* We're assuming edge triggered here. */
return Msidmode * deliv_mode | ((unsigned int)vec & 0xff);
}
/* see section 6.8.1 of the pci spec. */
/* Set up a single function on a single device.
* We need to take the vec, bust it up into bits,
* and put parts of it in the msi address and parts
* in the msi data.
*/
int pci_msi_enable(struct pci_device *p, uint64_t vec)
{
unsigned int c, f, datao;
spin_lock_irqsave(&p->lock);
if (p->msix_ready) {
printk("MSI: MSI-X is already enabled, aborting\n");
spin_unlock_irqsave(&p->lock);
return -1;
}
if (p->msi_ready) {
/* only allowing one enable of MSI per device (not supporting
* multiple vectors) */
printk("MSI: MSI is already enabled, aborting\n");
spin_unlock_irqsave(&p->lock);
return -1;
}
p->msi_ready = TRUE;
/* Get the offset of the MSI capability in the function's config space.
*/
c = msicap(p);
if (!c) {
spin_unlock_irqsave(&p->lock);
return -1;
}
/* read it, clear out the Mmesgmsk bits.
* This means that there will be no multiple
* messages enabled.
*/
f = pcidev_read16(p, c + 2) & ~Mmesgmsk;
if (msi_blacklist(p) != 0) {
spin_unlock_irqsave(&p->lock);
return -1;
}
/* Data begins at 8 bytes in. */
datao = 8;
p->msi_msg_addr_lo = msi_make_addr_lo(vec);
printd("Write to %d %08lx \n",c + 4, p->msi_msg_addr_lo);
pcidev_write32(p, c + 4, p->msi_msg_addr_lo);
/* And even if it's 64-bit capable, we do nothing with
* the high order bits. If it is 64-bit we need to offset
* datao (data offset) by 4 (i.e. another 32 bits)
*/
if(f & Cap64){
datao += 4;
pcidev_write32(p, c + 8, 0);
}
p->msi_msg_addr_hi = 0;
p->msi_msg_data = msi_make_data(vec);
printd("Write data %d %04x\n", c + datao, p->msi_msg_data);
pcidev_write16(p, c + datao, p->msi_msg_data);
/* If we have the option of masking the vectors,
* blow all the masks to 0. It's a 32-bit mask.
*/
if(f & Vmask)
pcidev_write32(p, c + datao + 4, 0);
/* Now write the control bits back, with the Mmesg mask (which is a
* power of 2) set to 0 (meaning one vector only). Note we still
* haven't enabled MSI. Will do that when we unmask. According to the
* spec, we're not supposed to use the Msienable bit to mask the IRQ,
* though I don't see how we can mask on non-Vmask-supported HW. */
printd("write @ %d %04lx\n",c + 2, f);
pcidev_write16(p, c + 2, f);
spin_unlock_irqsave(&p->lock);
return 0;
}
static void __msix_mask_entry(struct msix_entry *entry)
{
uintptr_t reg = (uintptr_t)&entry->vector;
write_mmreg32(reg, read_mmreg32(reg) | 0x1);
}
static void __msix_unmask_entry(struct msix_entry *entry)
{
uintptr_t reg = (uintptr_t)&entry->vector;
write_mmreg32(reg, read_mmreg32(reg) & ~0x1);
}
static uintptr_t msix_get_capbar_paddr(struct pci_device *p, int offset)
{
uint32_t bir, capbar_off;
uintptr_t membar;
bir = pcidev_read32(p, offset);
capbar_off = bir & ~0x7;
bir &= 0x7;
membar = pci_get_membar(p, bir);
if (!membar) {
printk("MSI-X: no cap membar, bir %d\n", bir);
return 0;
}
membar += capbar_off;
return membar;
}
/* One time initialization of MSI-X for a PCI device. -1 on error. Otherwise,
* the device will be ready to assign/route MSI-X entries/vectors. All vectors
* are masked, but the overall MSI-X function is unmasked.
*
* Hold the pci_device lock. */
static int __pci_msix_init(struct pci_device *p)
{
unsigned int c;
uint16_t f;
int tbl_bir, tbl_off, pba_bir, pba_off;
struct msix_entry *entry;
if (p->msix_ready)
return 0;
if (p->msi_ready) {
printk("MSI-X: MSI is already on, aborting\n");
return -1;
}
if (msix_blacklist(p) != 0)
return -1;
c = msixcap(p);
if (c == 0)
return -1;
f = pcidev_read16(p, c + 2);
/* enable and mask the entire function/all vectors */
f |= Msixenable | Msixmask;
pcidev_write16(p, c + 2, f);
p->msix_tbl_paddr = msix_get_capbar_paddr(p, c + 4);
p->msix_pba_paddr = msix_get_capbar_paddr(p, c + 8);
if (!p->msix_tbl_paddr || !p->msix_pba_paddr) {
/* disable msix, so we can possibly use msi */
pcidev_write16(p, c + 2, f & ~Msixenable);
printk("MSI-X: Missing a tbl (%p) or PBA (%p) paddr!\n",
p->msix_tbl_paddr, p->msix_pba_paddr);
return -1;
}
p->msix_nr_vec = (f & Msixtblsize) + 1;
p->msix_tbl_vaddr = vmap_pmem_nocache(p->msix_tbl_paddr,
p->msix_nr_vec *
sizeof(struct msix_entry));
if (!p->msix_tbl_vaddr) {
pcidev_write16(p, c + 2, f & ~Msixenable);
printk("MSI-X: unable to vmap the Table!\n");
return -1;
}
p->msix_pba_vaddr = vmap_pmem_nocache(p->msix_pba_paddr,
ROUNDUP(p->msix_nr_vec, 8) / 8);
if (!p->msix_pba_vaddr) {
pcidev_write16(p, c + 2, f & ~Msixenable);
printk("MSI-X: unable to vmap the PBA!\n");
vunmap_vmem(p->msix_tbl_paddr,
p->msix_nr_vec * sizeof(struct msix_entry));
return -1;
}
/* they should all be masked already, but remasking just in case.
* likewise, we need to 0 out the data, since we'll use the lower byte
* later when determining if an msix vector is free or not. */
entry = (struct msix_entry*)p->msix_tbl_vaddr;
for (int i = 0; i < p->msix_nr_vec; i++, entry++) {
__msix_mask_entry(entry);
write_mmreg32((uintptr_t)&entry->data, 0);
}
/* unmask the device, now that all the vectors are masked */
f &= ~Msixmask;
pcidev_write16(p, c + 2, f);
p->msix_ready = TRUE;
return 0;
}
/* Some parts of msix init need to happen during boot. Devices can call this
* during their reset methods, and then later register their IRQs during attach.
* Other OS's also alloc the vector around this time, though we'll hold off on
* that for now. */
int pci_msix_init(struct pci_device *p)
{
int ret;
spin_lock_irqsave(&p->lock);
ret = __pci_msix_init(p);
spin_unlock_irqsave(&p->lock);
return ret;
}
/* Enables an MSI-X vector for a PCI device. vec is formatted like an ioapic
* route. This should be able to handle multiple vectors for a device. Returns
* a msix_irq_vector linkage struct on success (the connection btw an irq_h and
* the specific {pcidev, entry}), and 0 on failure. */
struct msix_irq_vector *pci_msix_enable(struct pci_device *p, uint64_t vec)
{
int i;
struct msix_entry *entry;
struct msix_irq_vector *linkage;
unsigned int c, datao;
spin_lock_irqsave(&p->lock);
/* Ensure we're init'd. We could remove this in the future, though not
* everyone calls the extern pci_msix_init. */
if (__pci_msix_init(p) < 0) {
spin_unlock_irqsave(&p->lock);
return 0;
}
/* find an unused slot (no apic_vector assigned). later, we might want
* to point back to the irq_hs for each entry. not a big deal now. */
entry = (struct msix_entry*)p->msix_tbl_vaddr;
for (i = 0; i < p->msix_nr_vec; i++, entry++)
if (!(read_mmreg32((uintptr_t)&entry->data) & 0xff))
break;
if (i == p->msix_nr_vec) {
printk("[kernel] unable to alloc an MSI-X vector (bug?)\n");
spin_unlock_irqsave(&p->lock);
return 0;
}
linkage = kmalloc(sizeof(struct msix_irq_vector), MEM_WAIT);
linkage->pcidev = p;
linkage->entry = entry;
linkage->addr_lo = msi_make_addr_lo(vec);
linkage->addr_hi = 0;
linkage->data = msi_make_data(vec);
write_mmreg32((uintptr_t)&entry->data, linkage->data);
write_mmreg32((uintptr_t)&entry->addr_lo, linkage->addr_lo);
write_mmreg32((uintptr_t)&entry->addr_hi, linkage->addr_hi);
spin_unlock_irqsave(&p->lock);
return linkage;
}
void pci_dump_msix_table(struct pci_device *p)
{
struct msix_entry *entry;
void *tbl = (void*)p->msix_tbl_vaddr;
hexdump(tbl, p->msix_nr_vec * sizeof(struct msix_entry));
entry = (struct msix_entry*)p->msix_tbl_vaddr;
for (int i = 0; i < p->msix_nr_vec; i++, entry++)
printk("Entry %d, addr hi:lo 0x%08x:%08x data 0x%08x\n", i,
entry->addr_hi, entry->addr_lo, entry->data);
}
void pci_msi_mask(struct pci_device *p)
{
unsigned int c, f;
c = msicap(p);
assert(c);
spin_lock_irqsave(&p->lock);
f = pcidev_read16(p, c + 2);
pcidev_write16(p, c + 2, f & ~Msienable);
spin_unlock_irqsave(&p->lock);
}
void pci_msi_unmask(struct pci_device *p)
{
unsigned int c, f;
c = msicap(p);
assert(c);
spin_lock_irqsave(&p->lock);
f = pcidev_read16(p, c + 2);
pcidev_write16(p, c + 2, f | Msienable);
spin_unlock_irqsave(&p->lock);
}
void pci_msi_route(struct pci_device *p, int dest)
{
unsigned int c, f;
c = msicap(p);
assert(c);
spin_lock_irqsave(&p->lock);
/* mask out the old destination, replace with new */
p->msi_msg_addr_lo &= ~(((1 << 8) - 1) << 12);
p->msi_msg_addr_lo |= (dest & 0xff) << 12;
pcidev_write32(p, c + 4, p->msi_msg_addr_lo);
spin_unlock_irqsave(&p->lock);
}
void pci_msix_mask_vector(struct msix_irq_vector *linkage)
{
spin_lock_irqsave(&linkage->pcidev->lock);
__msix_mask_entry(linkage->entry);
spin_unlock_irqsave(&linkage->pcidev->lock);
}
void pci_msix_unmask_vector(struct msix_irq_vector *linkage)
{
spin_lock_irqsave(&linkage->pcidev->lock);
__msix_unmask_entry(linkage->entry);
spin_unlock_irqsave(&linkage->pcidev->lock);
}
void pci_msix_route_vector(struct msix_irq_vector *linkage, int dest)
{
spin_lock_irqsave(&linkage->pcidev->lock);
/* mask out the old destination, replace with new */
linkage->addr_lo &= ~(((1 << 8) - 1) << 12);
linkage->addr_lo |= (dest & 0xff) << 12;
write_mmreg32((uintptr_t)&linkage->entry->addr_lo, linkage->addr_lo);
spin_unlock_irqsave(&linkage->pcidev->lock);
}