/* This file is part of the UCB release of Plan 9. It is subject to the license
 * terms in the LICENSE file found in the top-level directory of this
 * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
 * part of the UCB release of Plan 9, including this file, may be copied,
 * modified, propagated, or distributed except according to the terms contained
 * in the LICENSE file. */

#include <slab.h>
#include <kmalloc.h>
#include <kref.h>
#include <string.h>
#include <stdio.h>
#include <assert.h>
#include <error.h>
#include <cpio.h>
#include <pmap.h>
#include <smp.h>
#include <net/ip.h>
#include <arch/mptables.h>
#include <arch/ioapic.h>

/*
 * MultiProcessor Specification Version 1.[14].
 */
typedef struct {				/* MP Floating Pointer */
	uint8_t signature[4];			/* "_MP_" */
	uint8_t addr[4];			/* PCMP */
	uint8_t length;				/* 1 */
	uint8_t revision;			/* [14] */
	uint8_t checksum;
	uint8_t feature[5];
} _MP_;

typedef struct {				/* MP Configuration Table */
	uint8_t signature[4];			/* "PCMP" */
	uint8_t length[2];
	uint8_t revision;			/* [14] */
	uint8_t checksum;
	uint8_t string[20];			/* OEM + Product ID */
	uint8_t oaddr[4];			/* OEM table pointer */
	uint8_t olength[2];			/* OEM table length */
	uint8_t entry[2];			/* entry count */
	uint8_t apicpa[4];			/* local APIC address */
	uint8_t xlength[2];			/* extended table length */
	uint8_t xchecksum;			/* extended table checksum */
	uint8_t reserved;

	uint8_t entries[];
} PCMP;

typedef struct {
	char type[6];
	int polarity;				/* default for this bus */
	int trigger;				/* default for this bus */
} Mpbus;

static Mpbus mpbusdef[] = {
	{"PCI   ", IPlow, TMlevel,},
	{"ISA   ", IPhigh, TMedge,},
};

/* Editable version of the MP tables so we can fix botched entries.  Kmalloced,
 * never freed.  Might be NULL if pcmp checks failed.*/
static PCMP *pcmp;

static Mpbus *mpbus[Nbus];
int mpisabusno = -1;
#define MP_VERBOSE_DEBUG 0

static void mpintrprint(char *s, uint8_t * p)
{
	char buf[128], *b, *e;
	char format[] = " type %d flags %p bus %d IRQ %d APIC %d INTIN %d\n";

	b = buf;
	e = b + sizeof(buf);
/* can't use seprintf yet!
	b = seprintf(b, e, "mpparse: intr:");
	if(s != NULL)
		b = seprintf(b, e, " %s:", s);
	seprintf(b, e, format, p[1], l16get(p+2), p[4], p[5], p[6], p[7]);
	printd(buf);
*/
	printk("mpparse: intr:");
	if (s != NULL)
		printk(" %s:", s);
	printk(format, p[1], l16get(p + 2), p[4], p[5], p[6], p[7]);
}

/* I've seen busted MP tables routes with invalid IOAPIC ids and INTINs that are
 * out of range.  We can look at the INTINs to try to figure out which IOAPIC
 * they meant, and then adjust the INTINs too.
 *
 * Specifically, the machine I saw had two IOAPICs, neither of which had good
 * iointr APIC IDs.  ACPI and the MP tables said I had IOAPICS 8 and 9.  The
 * IOINTRs APIC IDs were 0 and 2.  Additionally, 2's INTINs were all beyond the
 * range of the 24 nrtds for that IOAPIC.  However, that IOAPIC's ibase was 24
 * too.
 *
 * Combined, these two clues mean the INTINs are in the global ibase/route
 * space, and we can tell which IOAPIC to use based on the INTIN.  This works at
 * least for the IOAPIC 0 (8) on my hardware (IRQ routing works).  I haven't
 * been able to test on devices on the upper APIC (9). */
static int repair_iointr(uint8_t *iointr)
{
	struct apic *ioapic;
	int ioapic_id;
	int intin = iointr[7];

	for (int i = 0; i < Napic; i++) {
		ioapic = &xioapic[i];
		if (!ioapic->useable)
			continue;
		if (ioapic->ibase <= intin &&
		    intin < ioapic->ibase + ioapic->nrdt) {
			iointr[6] = i;
			iointr[7] = intin - ioapic->ibase;
			return 0;
		}
	}
	return -1;
}

static uint32_t mpmkintr(uint8_t * p)
{
	uint32_t v;
	struct apic *apic;
	int n, polarity, trigger;

	/*
	 * Check valid bus, interrupt input pin polarity
	 * and trigger mode. If the APIC ID is 0xff it means
	 * all APICs of this type so those checks for useable
	 * APIC and valid INTIN must also be done later in
	 * the appropriate init routine in that case. It's hard
	 * to imagine routing a signal to all IOAPICs, the
	 * usual case is routing NMI and ExtINT to all LAPICs.
	 */
	if (mpbus[p[4]] == NULL) {
		mpintrprint("no source bus", p);
		return 0;
	}
	if (p[6] != 0xff) {
		if (Napic < 256 && p[6] >= Napic) {
			mpintrprint("APIC ID out of range", p);
			return 0;
		}
		switch (p[0]) {
		default:
			mpintrprint("INTIN botch", p);
			return 0;
		case 3:	/* IOINTR */
			apic = &xioapic[p[6]];
			if (!apic->useable) {
				mpintrprint("unuseable ioapic", p);
				if (repair_iointr(p)) {
					mpintrprint("unrepairable iointr", p);
					return 0;
				}
				mpintrprint("repaired iointr", p);
				/* Repair found a usable apic */
				apic = &xioapic[p[6]];
			}
			if (p[7] >= apic->nrdt) {
				mpintrprint("IO INTIN out of range", p);
				return 0;
			}
			break;
		case 4:	/* LINTR */
			apic = &xlapic[p[6]];
			if (!apic->useable) {
				mpintrprint("unuseable lapic", p);
				return 0;
			}
			if (p[7] >= ARRAY_SIZE(apic->lvt)) {
				mpintrprint("LOCAL INTIN out of range", p);
				return 0;
			}
			break;
		}
	}
	n = l16get(p + 2);
	if ((polarity = (n & 0x03)) == 2 || (trigger = ((n >> 2) & 0x03)) == 2)
	{
		mpintrprint("invalid polarity/trigger", p);
		return 0;
	}

	/*
	 * Create the low half of the vector table entry (LVT or RDT).
	 * For the NMI, SMI and ExtINT cases, the polarity and trigger
	 * are fixed (but are not always consistent over IA-32 generations).
	 * For the INT case, either the polarity/trigger are given or
	 * it defaults to that of the source bus;
	 * whether INT is Fixed or Lowest Priority is left until later.
	 */
	v = Im;
	switch (p[1]) {
	default:
		mpintrprint("invalid type", p);
		return 0;
	case 0:	/* INT */
		switch (polarity) {
		case 0:
			v |= mpbus[p[4]]->polarity;
			break;
		case 1:
			v |= IPhigh;
			break;
		case 3:
			v |= IPlow;
			break;
		}
		switch (trigger) {
		case 0:
			v |= mpbus[p[4]]->trigger;
			break;
		case 1:
			v |= TMedge;
			break;
		case 3:
			v |= TMlevel;
			break;
		}
		break;
	case 1:	/* NMI */
		v |= TMedge | IPhigh | MTnmi;
		break;
	case 2:	/* SMI */
		v |= TMedge | IPhigh | MTsmi;
		break;
	case 3:	/* ExtINT */
		v |= TMedge | IPhigh | MTei;
		break;
	}

	return v;
}

static int mpparse(PCMP * pcmp, int maxcores)
{
	uint32_t lo;
	uint8_t *e, *p;
	int devno, i, n;

	p = pcmp->entries;
	e = ((uint8_t *) pcmp) + l16get(pcmp->length);
	while (p < e)
		switch (*p) {
		default:
			printd("mpparse: unknown PCMP type %d (e-p %#ld)\n", *p,
			       e - p);
			for (i = 0; p < e; i++) {
				if (i && ((i & 0x0f) == 0))
					printd("\n");
				printd(" 0x%#2.2x", *p);
				p++;
			}
			printd("\n");
			break;
		case 0:	/* processor */
			/*
			 * Initialise the APIC if it is enabled (p[3] & 0x01).
			 * p[1] is the APIC ID, the memory mapped address comes
			 * from the PCMP structure as the addess is local to the
			 * CPU and identical for all. Indicate whether this is
			 * the bootstrap processor (p[3] & 0x02).
			 */
			printd("mpparse: cpu %d pa %p bp %d\n",
				   p[1], l32get(pcmp->apicpa), p[3] & 0x02);
			if ((p[3] & 0x01) != 0 && maxcores > 0) {
				maxcores--;
				apicinit(p[1], l32get(pcmp->apicpa), p[3] &
					 0x02);
			}
			p += 20;
			break;
		case 1:	/* bus */
			printd("mpparse: bus: %d type %6.6s\n", p[1], (char *)p
			       + 2);
			if (p[1] >= Nbus) {
				printk("mpparse: bus %d out of range\n", p[1]);
				p += 8;
				break;
			}
			if (mpbus[p[1]] != NULL) {
				printk("mpparse: bus %d already allocated\n",
				       p[1]);
				p += 8;
				break;
			}
			for (i = 0; i < ARRAY_SIZE(mpbusdef); i++) {
				if (memcmp(p + 2, mpbusdef[i].type, 6) != 0)
					continue;
				if (memcmp(p + 2, "ISA   ", 6) == 0) {
					if (mpisabusno != -1) {
						printk("mpparse: bus %d already have ISA bus %d\n",
						       p[1], mpisabusno);
						continue;
					}
					mpisabusno = p[1];
				}
				mpbus[p[1]] = &mpbusdef[i];
				break;
			}
			if (mpbus[p[1]] == NULL)
				printk("mpparse: bus %d type %6.6s unknown\n",
					   p[1], (char *)p + 2);

			p += 8;
			break;
		case 2:	/* IOAPIC */
			/*
			 * Initialise the IOAPIC if it is enabled (p[3] & 0x01).
			 * p[1] is the APIC ID, p[4-7] is the memory mapped
			 * address.
			 */
			if (p[3] & 0x01)
				ioapicinit(p[1], -1, l32get(p + 4));

			p += 8;
			break;
		case 3:	/* IOINTR */
			/*
			 * p[1] is the interrupt type;
			 * p[2-3] contains the polarity and trigger mode;
			 * p[4] is the source bus;
			 * p[5] is the IRQ on the source bus;
			 * p[6] is the destination IOAPIC;
			 * p[7] is the INITIN pin on the destination IOAPIC.
			 */
			if (p[6] == 0xff) {
				mpintrprint("routed to all IOAPICs", p);
				p += 8;
				break;
			}
			if ((lo = mpmkintr(p)) == 0) {
				if (MP_VERBOSE_DEBUG)
					mpintrprint("iointr skipped", p);
				p += 8;
				break;
			}
			if (MP_VERBOSE_DEBUG)
				mpintrprint("iointr", p);

			/*
			 * Always present the device number in the style
			 * of a PCI Interrupt Assignment Entry. For the ISA
			 * bus the IRQ is the device number but unencoded.
			 * May need to handle other buses here in the future
			 * (but unlikely).
			 *
			 * For PCI devices, this field's lowest two bits are
			 * INT#A == 0, INT#B == 1, etc.  Bits 2-6 are the PCI
			 * device number.
			 */
			devno = p[5];
			if (memcmp(mpbus[p[4]]->type, "PCI   ", 6) != 0)
				devno <<= 2;
			ioapicintrinit(p[4], p[6], p[7], devno, lo);

			p += 8;
			break;
		case 4:	/* LINTR */
			/*
			 * Format is the same as IOINTR above.
			 */
			if ((lo = mpmkintr(p)) == 0) {
				p += 8;
				break;
			}
			if (MP_VERBOSE_DEBUG)
				mpintrprint("LINTR", p);

			/*
			 * Everything was checked in mpmkintr above.
			 */
			if (p[6] == 0xff) {
				for (i = 0; i < Napic; i++) {
					if (!xlapic[i].useable ||
					    xlapic[i].addr)
						continue;
					xlapic[i].lvt[p[7]] = lo;
				}
			} else
				xlapic[p[6]].lvt[p[7]] = lo;
			p += 8;
			break;
		}

	/*
	 * There's nothing of interest in the extended table,
	 * but check it for consistency.
	 */
	p = e;
	e = p + l16get(pcmp->xlength);
	while (p < e)
		switch (*p) {
		default:
			n = p[1];
			printd("mpparse: unknown extended entry %d length %d\n",
			       *p, n);
			for (i = 0; i < n; i++) {
				if (i && ((i & 0x0f) == 0))
					printd("\n");
				printd(" %#2.2ux", *p);
				p++;
			}
			printd("\n");
			break;
		case 128:
			printd("address space mapping\n");
			printd(" bus %d type %d base %p length %p\n",
				   p[2], p[3], l64get(p + 4), l64get(p + 12));
			p += p[1];
			break;
		case 129:
			printd("bus hierarchy descriptor\n");
			printd(" bus %d sd %d parent bus %d\n", p[2], p[3],
			       p[4]);
			p += p[1];
			break;
		case 130:
			printd("compatibility bus address space modifier\n");
			printd(" bus %d pr %d range list %d\n",
				   p[2], p[3], l32get(p + 4));
			p += p[1];
			break;
		}
	return maxcores;
}

static void *sigsearch(char *signature)
{
	uintptr_t p;
	uint8_t *bda;
	void *r;
#if 0
	/*
	 * Search for the data structure:
	 * 1) in the first KB of the EBDA;
	 * 2) in the last KB of system base memory;
	 * 3) in the BIOS ROM between 0xe0000 and 0xfffff.
	 */
	bda = BIOSSEG(0x40);
	if (memcmp(KADDR(0xfffd9), "EISA", 4) == 0) {
		if ((p = (bda[0x0f] << 8) | bda[0x0e])) {
			if ((r = sigscan(BIOSSEG(p), 1024, signature)) != NULL)
				return r;
		}
	}

	p = ((bda[0x14] << 8) | bda[0x13]) * 1024;
	if ((r = sigscan(KADDR(p - 1024), 1024, signature)) != NULL)
		return r;
#endif
	r = sigscan(KADDR(0xe0000), 0x20000, signature);
	printk("Found MP table at %p\n", r);
	if (r != NULL)
		return r;

	return NULL;
	/* and virtualbox hidden mp tables... */
//  return sigscan(KADDR(0xa0000 - 1024), 1024, signature);
}

static PCMP *copy_pcmp(PCMP *pcmp)
{
	PCMP *new_pcmp;
	size_t n = l16get(pcmp->length) + l16get(pcmp->xlength);

	new_pcmp = kmalloc(n, MEM_ATOMIC);
	assert(new_pcmp);
	memcpy(new_pcmp, pcmp, n);
	return new_pcmp;
}

int mpsinit(int maxcores)
{
	uint8_t *p;
	int i;
	_MP_ *mp;

	if ((mp = sigsearch("_MP_")) == NULL) {
		printk("No mp tables found, might have issues!\n");
		return maxcores;
	}
	/* TODO: if an IMCR exists, we should set it to 1, though i've heard
	 * that ACPI-capable HW doesn't have the IMCR anymore. */

	if (MP_VERBOSE_DEBUG) {
		printk("_MP_ @ %#p, addr %p length %ud rev %d",
			   mp, l32get(mp->addr), mp->length, mp->revision);
		for (i = 0; i < sizeof(mp->feature); i++)
			printk(" %2.2p", mp->feature[i]);
		printk("\n");
	}
	if (mp->revision != 1 && mp->revision != 4)
		return maxcores;
	if (sigchecksum(mp, mp->length * 16) != 0)
		return maxcores;
	if ((pcmp = KADDR_NOCHECK(l32get(mp->addr))) == NULL)
		return maxcores;
	if (pcmp->revision != 1 && pcmp->revision != 4) {
		pcmp = NULL;
		return maxcores;
	}
	if (sigchecksum(pcmp, l16get(pcmp->length)) != 0) {
		pcmp = NULL;
		return maxcores;
	}

	pcmp = copy_pcmp(pcmp);

	if (MP_VERBOSE_DEBUG) {
		printk("PCMP @ %#p length %p revision %d\n",
			   pcmp, l16get(pcmp->length), pcmp->revision);
		printk(" %20.20s oaddr %p olength %p\n",
			   (char *)pcmp->string, l32get(pcmp->oaddr),
			   l16get(pcmp->olength));
		printk(" entry %d apicpa %p\n",
			   l16get(pcmp->entry), l32get(pcmp->apicpa));

		printk(" xlength %p xchecksum %p\n",
			   l16get(pcmp->xlength), pcmp->xchecksum);
	}
	if (pcmp->xchecksum != 0) {
		p = ((uint8_t *) pcmp) + l16get(pcmp->length);
		i = sigchecksum(p, l16get(pcmp->xlength));
		if (((i + pcmp->xchecksum) & 0xff) != 0) {
			printd("extended table checksums to %p\n", i);
			return maxcores;
		}
	}

	/*
	 * Parse the PCMP table and set up the datastructures
	 * for later interrupt enabling and application processor
	 * startup.
	 */
	return mpparse(pcmp, maxcores);
}
