#include <parlib/stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <parlib/arch/arch.h>
#include <parlib/ros_debug.h>
#include <unistd.h>
#include <errno.h>
#include <dirent.h>
#include <stdlib.h>
#include <string.h>
#include <ros/syscall.h>
#include <sys/mman.h>
#include <vmm/coreboot_tables.h>
#include <ros/common.h>
#include <vmm/vmm.h>
#include <vmm/virtio.h>
#include <vmm/virtio_mmio.h>
#include <vmm/virtio_ids.h>
#include <vmm/sched.h>
#include <ros/arch/trapframe.h>

/* crude PCI bus. Just enough to get virtio working. I would rather not add to this. */
struct pciconfig {
	uint32_t registers[256];
};

/* just index by devfn, i.e. 8 bits */
struct pciconfig pcibus[] = {
	/* linux requires that devfn 0 be a bridge.
	 * 00:00.0 Host bridge: Intel Corporation 440BX/ZX/DX - 82443BX/ZX/DX Host bridge (rev 01)
	 */
	{
		{0x71908086, 0x02000006, 0x06000001},
	},
};
/* cf8 is a single-threaded resource. */
static uint32_t cf8;
static uint32_t allones = (uint32_t)-1;

/* Return a pointer to the 32-bit "register" in the "pcibus" give an address.
 * Use cf8.  only for readonly access.  this will fail if we ever want to do
 * writes, but we don't.
 */
void regp(uint32_t **reg)
{
	*reg = &allones;
	int devfn = (cf8>>8) & 0xff;
	//printf("devfn %d\n", devfn);
	if (devfn < COUNT_OF(pcibus))
		*reg = &pcibus[devfn].registers[(cf8>>2)&0x3f];
	//printf("-->regp *reg 0x%lx\n", **reg);
}

static void configaddr(uint32_t val)
{
	printd("%s 0x%lx\n", __func__, val);
	cf8 = val;
}

static void configread32(uint32_t edx, uint64_t *reg)
{
	uint32_t *r = &cf8;
	regp(&r);
	*reg = *r;
	printd("%s: 0x%lx 0x%lx, 0x%lx 0x%lx\n", __func__, cf8, edx, r, *reg);
}

static void configread16(uint32_t edx, uint64_t *reg)
{
	uint64_t val;
	int which = ((edx&2)>>1) * 16;
	configread32(edx, &val);
	val >>= which;
	*reg = val;
	printd("%s: 0x%lx, 0x%lx 0x%lx\n", __func__, edx, val, *reg);
}

static void configread8(uint32_t edx, uint64_t *reg)
{
	uint64_t val;
	int which = (edx&3) * 8;
	configread32(edx, &val);
	val >>= which;
	*reg = val;
	printd("%s: 0x%lx, 0x%lx 0x%lx\n", __func__, edx, val, *reg);
}

static void configwrite32(uint32_t addr, uint32_t val)
{
	uint32_t *r = &cf8;
	regp(&r);
	*r = val;
	printd("%s 0x%lx 0x%lx\n", __func__, addr, val);
}

static void configwrite16(uint32_t addr, uint16_t val)
{
	printd("%s 0x%lx 0x%lx\n", __func__, addr, val);
}

static void configwrite8(uint32_t addr, uint8_t val)
{
	printd("%s 0x%lx 0x%lx\n", __func__, addr, val);
}

/* this is very minimal. It needs to move to vmm/io.c but we don't
 * know if this minimal approach will even be workable. It only (for
 * now) handles pci config space. We'd like to hope that's all we will
 * need.
 * It would have been nice had intel encoded the IO exit info as nicely as they
 * encoded, some of the other exits.
 */
int io(struct guest_thread *vm_thread)
{

	/* Get a pointer to the memory at %rip. This is quite messy and part of
	 * the reason we don't want to do this at all. It sucks. Would have been
	 * nice had linux had an option to ONLY do mmio config space access, but
	 * no such luck.  */
	uint8_t *ip8 = NULL;
	uint16_t *ip16;
	uintptr_t ip;
	uint32_t edx;
	uint32_t eax;
	struct vm_trapframe *vm_tf = &(vm_thread->uthread.u_ctx.tf.vm_tf);

	/* Get the RIP of the io access. */
	if (rippa(vm_thread, (uint64_t *)&ip))
		return VM_PAGE_FAULT;
	edx = vm_tf->tf_rdx;
	eax = vm_tf->tf_rax;
	ip8 = (void *)ip;
	ip16 = (void *)ip;
	//printf("io: ip16 %p\n", *ip16, edx);

	if (*ip8 == 0xef) {
		vm_tf->tf_rip += 1;
		/* out at %edx */
		if (edx == 0xcf8) {
			//printf("Set cf8 ");
			configaddr(vm_tf->tf_rax);
			return 0;
		}
		if (edx == 0xcfc) {
			//printf("Set cfc ");
			configwrite32(edx, vm_tf->tf_rax);
			return 0;
		}
		/* While it is perfectly legal to do IO operations to
		 * nonexistant places, we print a warning here as it
		 * might also indicate a problem.  In practice these
		 * types of IOs happens less frequently, and whether
		 * they are bad or not is not always easy to decide.
		 * Simple example: for about the first 10 years Linux
		 * used to outb 0x98 to port 0x80 while idle. We
		 * wouldn't want to call that an error, but that kind
		 * of thing is a bad practice we ought to know about,
		 * because it can cause chipset errors and result in
		 * other non-obvious failures (in one case, breaking
		 * BIOS reflash operations).  Plus, true story, it
		 * confused people into thinking we were running
		 * Windows 98, not Linux.
		 */
		printd("(out rax, edx): unhandled IO address dx @%p is 0x%x\n",
		       ip8, edx);
		return 0;
	}
	/* TODO: sort out these various OUT operations */
	// out %al, %dx
	if (*ip8 == 0xee) {
		vm_tf->tf_rip += 1;
		/* out al %edx */
		if (edx == 0xcfb) { // special!
			printd("Just ignore the damned cfb write\n");
			return 0;
		}
		if ((edx&~3) == 0xcfc) {
			//printf("ignoring write to cfc ");
			return 0;
		}
		if (edx == 0xcf9) {
			// on real hardware, an outb to 0xcf9 with bit 2 set is
			// about as hard a reset as you can get. It yanks the
			// reset on everything, including all the cores.  It
			// usually happens after the kernel has done lots of
			// work to carefully quiesce the machine but, once it
			// happens, game is over. Hence, an exit(0) is most
			// appropriate, since it's not an error.
			if (eax & (1 << 2)) {
				printf("outb to PCI reset port with bit 2 set: time to die\n");
				exit(0);
			}
			return 0;
		}

		/* Another case where we print a message but it's not an error.
		 * */
		printd("out al, dx: unhandled IO address dx @%p is 0x%x\n", ip8,
		       edx); return 0;
	}
	/* Silently accept OUT imm8, al */
	if (*ip8 == 0xe6) {
		vm_tf->tf_rip += 2;
		return 0;
	}
	/* Silently accept OUT dx, ax with opcode size modifier */
	if (*ip16 ==  0xef66) {
		vm_tf->tf_rip += 2;
		return 0;
	}
	if (*ip8 == 0xec) {
		vm_tf->tf_rip += 1;
		//printf("configread8 ");
		configread8(edx, &vm_tf->tf_rax);
		return 0;
	}
	if (*ip8 == 0xed) {
		vm_tf->tf_rip += 1;
		if (edx == 0xcf8) {
			//printf("read cf8 0x%lx\n", v->regs.tf_rax);
			vm_tf->tf_rax = cf8;
			return 0;
		}
		//printf("configread32 ");
		configread32(edx, &vm_tf->tf_rax);
		return 0;
	}
	/* Detects when something is read from the PIC, so
	 * a value signifying there is no PIC is given.
	 */
	if (*ip16 == 0x21e4) {
		vm_tf->tf_rip += 2;
		vm_tf->tf_rax |= 0x00000ff;
		return 0;
	}
	if (*ip16 == 0xed66) {
		vm_tf->tf_rip += 2;
		//printf("configread16 ");
		configread16(edx, &vm_tf->tf_rax);
		return 0;
	}

	/* This is, so far, the only case in which we indicate
	 * failure: we can't even decode the instruction. We've
	 * implemented the common cases above, and recently this
	 * failure has been seen only when the RIP is set to some
	 * bizarre value and we start fetching instructions from
	 * (e.g.) the middle of a page table. PTEs look like IO
	 * instructions to the CPU.
	 */
	printf("unknown IO %p %x %x\n", ip8, *ip8, *ip16);
	return -1;
}

