| /* | 
 |  * Copyright (c) 2009 The Regents of the University of California | 
 |  * Barret Rhoden <brho@cs.berkeley.edu> | 
 |  * See LICENSE for details. | 
 |  */ | 
 |  | 
 | #include <arch/arch.h> | 
 | #include <atomic.h> | 
 | #include <smp.h> | 
 | #include <error.h> | 
 | #include <stdio.h> | 
 | #include <string.h> | 
 | #include <assert.h> | 
 | #include <pmap.h> | 
 | #include <process.h> | 
 | #include <schedule.h> | 
 | #include <trap.h> | 
 | #include <trace.h> | 
 | #include <kdebug.h> | 
 | #include <kmalloc.h> | 
 | #include <core_set.h> | 
 | #include <completion.h> | 
 |  | 
 | struct all_cpu_work { | 
 | 	struct completion comp; | 
 | 	void (*func)(void *); | 
 | 	void *opaque; | 
 | }; | 
 |  | 
 | struct per_cpu_info per_cpu_info[MAX_NUM_CORES]; | 
 |  | 
 | // tracks number of global waits on smp_calls, must be <= NUM_HANDLER_WRAPPERS | 
 | atomic_t outstanding_calls = 0; | 
 |  | 
 | /* Helper for running a proc (if we should).  Lots of repetition with | 
 |  * proc_restartcore */ | 
 | static void try_run_proc(void) | 
 | { | 
 | 	struct per_cpu_info *pcpui = &per_cpu_info[core_id()]; | 
 | 	/* There was a process running here, and we should return to it. */ | 
 | 	if (pcpui->owning_proc) { | 
 | 		assert(!pcpui->cur_kthread->sysc); | 
 | 		assert(pcpui->cur_ctx); | 
 | 		__proc_startcore(pcpui->owning_proc, pcpui->cur_ctx); | 
 | 		assert(0); | 
 | 	} else { | 
 | 		/* Make sure we have abandoned core.  It's possible to have an owner | 
 | 		 * without a current (smp_idle, __startcore, __death). */ | 
 | 		abandon_core(); | 
 | 	} | 
 | } | 
 |  | 
 | /* All cores end up calling this whenever there is nothing left to do or they | 
 |  * don't know explicitly what to do.  Non-zero cores call it when they are done | 
 |  * booting.  Other cases include after getting a DEATH IPI. | 
 |  * | 
 |  * All cores attempt to run the context of any owning proc.  Barring that, they | 
 |  * halt and wake up when interrupted, do any work on their work queue, then halt | 
 |  * again.  In between, the ksched gets a chance to tell it to do something else, | 
 |  * or perhaps to halt in another manner. */ | 
 | static void __attribute__((noinline, noreturn)) __smp_idle(void) | 
 | { | 
 | 	struct per_cpu_info *pcpui = &per_cpu_info[core_id()]; | 
 |  | 
 | 	clear_rkmsg(pcpui); | 
 | 	pcpui->cur_kthread->flags = KTH_DEFAULT_FLAGS; | 
 | 	enable_irq();	/* one-shot change to get any IRQs before we halt later */ | 
 | 	while (1) { | 
 | 		disable_irq(); | 
 | 		process_routine_kmsg(); | 
 | 		try_run_proc(); | 
 | 		cpu_bored();		/* call out to the ksched */ | 
 | 		/* cpu_halt() atomically turns on interrupts and halts the core. | 
 | 		 * Important to do this, since we could have a RKM come in via an | 
 | 		 * interrupt right while PRKM is returning, and we wouldn't catch | 
 | 		 * it. */ | 
 | 		__set_cpu_state(pcpui, CPU_STATE_IDLE); | 
 | 		cpu_halt(); | 
 | 		/* interrupts are back on now (given our current semantics) */ | 
 | 	} | 
 | 	assert(0); | 
 | } | 
 |  | 
 | void smp_idle(void) | 
 | { | 
 | 	/* FP must be zeroed before SP.  Ideally, we'd do both atomically.  If we | 
 | 	 * take an IRQ in between and set SP first, then a backtrace would be | 
 | 	 * confused since FP points *below* the SP that the *IRQ handler* is now | 
 | 	 * using.  Disabling IRQs gets us most of the way, but we could have an NMI | 
 | 	 * that does a BT (e.g. for debugging).  By zeroing FP first, at least we | 
 | 	 * won't BT at all (though FP is still out of sync with SP). | 
 | 	 * | 
 | 	 * Disabling IRQs here also will help with general sanity. */ | 
 | 	disable_irq(); | 
 | 	#ifdef CONFIG_RESET_STACKS | 
 | 	set_frame_pointer(0); | 
 | 	cmb(); | 
 | 	set_stack_pointer(get_stack_top()); | 
 | 	#endif /* CONFIG_RESET_STACKS */ | 
 | 	__smp_idle(); | 
 | 	assert(0); | 
 | } | 
 |  | 
 | /* Arch-independent per-cpu initialization.  This will call the arch dependent | 
 |  * init first. */ | 
 | void smp_percpu_init(void) | 
 | { | 
 | 	uint32_t coreid = core_id(); | 
 | 	struct per_cpu_info *pcpui = &per_cpu_info[coreid]; | 
 | 	void *trace_buf; | 
 | 	struct kthread *kthread; | 
 | 	/* Don't initialize __ctx_depth here, since it is already 1 (at least on | 
 | 	 * x86), since this runs in irq context. */ | 
 | 	/* Do this first */ | 
 | 	__arch_pcpu_init(coreid); | 
 | 	/* init our kthread (tracks our currently running context) */ | 
 | 	kthread = __kthread_zalloc(); | 
 | 	kthread->stacktop = get_stack_top();	/* assumes we're on the 1st page */ | 
 | 	pcpui->cur_kthread = kthread; | 
 | 	/* Treat the startup threads as ktasks.  This will last until smp_idle when | 
 | 	 * they clear it, either in anticipation of being a user-backing kthread or | 
 | 	 * to handle an RKM. */ | 
 | 	kthread->flags = KTH_KTASK_FLAGS; | 
 | 	per_cpu_info[coreid].spare = 0; | 
 | 	/* Init relevant lists */ | 
 | 	spinlock_init_irqsave(&per_cpu_info[coreid].immed_amsg_lock); | 
 | 	STAILQ_INIT(&per_cpu_info[coreid].immed_amsgs); | 
 | 	spinlock_init_irqsave(&per_cpu_info[coreid].routine_amsg_lock); | 
 | 	STAILQ_INIT(&per_cpu_info[coreid].routine_amsgs); | 
 | 	/* Initialize the per-core timer chain */ | 
 | 	init_timer_chain(&per_cpu_info[coreid].tchain, set_pcpu_alarm_interrupt); | 
 | #ifdef CONFIG_KTHREAD_POISON | 
 | 	*kstack_bottom_addr(kthread->stacktop) = 0xdeadbeef; | 
 | #endif /* CONFIG_KTHREAD_POISON */ | 
 | 	/* Init generic tracing ring */ | 
 | 	trace_buf = kpage_alloc_addr(); | 
 | 	assert(trace_buf); | 
 | 	trace_ring_init(&pcpui->traces, trace_buf, PGSIZE, | 
 | 	                sizeof(struct pcpu_trace_event)); | 
 | 	for (int i = 0; i < NR_CPU_STATES; i++) | 
 | 		pcpui->state_ticks[i] = 0; | 
 | 	pcpui->last_tick_cnt = read_tsc(); | 
 | 	/* Core 0 is in the KERNEL state, called from smp_boot.  The other cores are | 
 | 	 * too, at least on x86, where we were called from asm (woken by POKE). */ | 
 | 	pcpui->cpu_state = CPU_STATE_KERNEL; | 
 | 	/* Enable full lock debugging, after all pcpui work is done */ | 
 | 	pcpui->__lock_checking_enabled = 1; | 
 | } | 
 |  | 
 | /* it's actually okay to set the state to the existing state.  originally, it | 
 |  * was a bug in the state tracking, but it is possible, at least on x86, to have | 
 |  * a halted core (state IDLE) get woken up by an IRQ that does not trigger the | 
 |  * IRQ handling state.  for example, there is the I_POKE_CORE ipi.  smp_idle | 
 |  * will just sleep again, and reset the state from IDLE to IDLE. */ | 
 | void __set_cpu_state(struct per_cpu_info *pcpui, int state) | 
 | { | 
 | 	uint64_t now_ticks; | 
 | 	assert(!irq_is_enabled()); | 
 | 	/* TODO: could put in an option to enable/disable state tracking. */ | 
 | 	now_ticks = read_tsc(); | 
 | 	pcpui->state_ticks[pcpui->cpu_state] += now_ticks - pcpui->last_tick_cnt; | 
 | 	/* TODO: if the state was user, we could account for the vcore's time, | 
 | 	 * similar to the total_ticks in struct vcore.  the difference is that the | 
 | 	 * total_ticks tracks the vcore's virtual time, while this tracks user time. | 
 | 	 * something like vcore->user_ticks. */ | 
 | 	pcpui->cpu_state = state; | 
 | 	pcpui->last_tick_cnt = now_ticks; | 
 | } | 
 |  | 
 | void reset_cpu_state_ticks(int coreid) | 
 | { | 
 | 	struct per_cpu_info *pcpui = &per_cpu_info[coreid]; | 
 | 	uint64_t now_ticks; | 
 | 	if (coreid >= num_cores) | 
 | 		return; | 
 | 	/* need to update last_tick_cnt, so the current value doesn't get added in | 
 | 	 * next time we update */ | 
 | 	now_ticks = read_tsc(); | 
 | 	for (int i = 0; i < NR_CPU_STATES; i++) { | 
 | 		pcpui->state_ticks[i] = 0; | 
 | 		pcpui->last_tick_cnt = now_ticks; | 
 | 	} | 
 | } | 
 |  | 
 | /* PCPUI Trace Rings: */ | 
 |  | 
 | static void pcpui_trace_kmsg_handler(void *event, void *data) | 
 | { | 
 | 	struct pcpu_trace_event *te = (struct pcpu_trace_event*)event; | 
 | 	char *func_name; | 
 | 	uintptr_t addr; | 
 | 	addr = te->arg1; | 
 | 	func_name = get_fn_name(addr); | 
 | 	printk("\tKMSG %p: %s\n", addr, func_name); | 
 | 	kfree(func_name); | 
 | } | 
 |  | 
 | static void pcpui_trace_locks_handler(void *event, void *data) | 
 | { | 
 | 	struct pcpu_trace_event *te = (struct pcpu_trace_event*)event; | 
 | 	char *func_name; | 
 | 	uintptr_t lock_addr = te->arg1; | 
 | 	if (lock_addr > KERN_LOAD_ADDR) | 
 | 		func_name = get_fn_name(lock_addr); | 
 | 	else | 
 | 		func_name = "Dynamic lock"; | 
 | 	printk("Time %uus, lock %p (%s)\n", te->arg0, lock_addr, func_name); | 
 | 	printk("\t"); | 
 | 	spinlock_debug((spinlock_t*)lock_addr); | 
 | 	if (lock_addr > KERN_LOAD_ADDR) | 
 | 		kfree(func_name); | 
 | } | 
 |  | 
 | /* Add specific trace handlers here: */ | 
 | trace_handler_t pcpui_tr_handlers[PCPUI_NR_TYPES] = { | 
 |                                   0, | 
 |                                   pcpui_trace_kmsg_handler, | 
 |                                   pcpui_trace_locks_handler, | 
 |                                   }; | 
 |  | 
 | /* Generic handler for the pcpui ring.  Will switch out to the appropriate | 
 |  * type's handler */ | 
 | static void pcpui_trace_fn(void *event, void *data) | 
 | { | 
 | 	struct pcpu_trace_event *te = (struct pcpu_trace_event*)event; | 
 | 	int desired_type = (int)(long)data; | 
 | 	if (te->type >= PCPUI_NR_TYPES) | 
 | 		printk("Bad trace type %d\n", te->type); | 
 | 	/* desired_type == 0 means all types */ | 
 | 	if (desired_type && desired_type != te->type) | 
 | 		return; | 
 | 	if (pcpui_tr_handlers[te->type]) | 
 | 		pcpui_tr_handlers[te->type](event, data); | 
 | } | 
 |  | 
 | void pcpui_tr_foreach(int coreid, int type) | 
 | { | 
 | 	struct trace_ring *tr = &per_cpu_info[coreid].traces; | 
 | 	assert(tr); | 
 | 	printk("\n\nTrace Ring on Core %d\n--------------\n", coreid); | 
 | 	trace_ring_foreach(tr, pcpui_trace_fn, (void*)(long)type); | 
 | } | 
 |  | 
 | void pcpui_tr_foreach_all(int type) | 
 | { | 
 | 	for (int i = 0; i < num_cores; i++) | 
 | 		pcpui_tr_foreach(i, type); | 
 | } | 
 |  | 
 | void pcpui_tr_reset_all(void) | 
 | { | 
 | 	for (int i = 0; i < num_cores; i++) | 
 | 		trace_ring_reset(&per_cpu_info[i].traces); | 
 | } | 
 |  | 
 | void pcpui_tr_reset_and_clear_all(void) | 
 | { | 
 | 	for (int i = 0; i < num_cores; i++) | 
 | 		trace_ring_reset_and_clear(&per_cpu_info[i].traces); | 
 | } | 
 |  | 
 | static void smp_do_core_work(uint32_t srcid, long a0, long a1, long a2) | 
 | { | 
 | 	struct all_cpu_work *acw = (struct all_cpu_work *) a0; | 
 |  | 
 | 	acw->func(acw->opaque); | 
 | 	completion_complete(&acw->comp, 1); | 
 | } | 
 |  | 
 | void smp_do_in_cores(const struct core_set *cset, void (*func)(void *), | 
 | 					 void *opaque) | 
 | { | 
 | 	int cpu = core_id(); | 
 | 	struct all_cpu_work acw; | 
 |  | 
 | 	memset(&acw, 0, sizeof(acw)); | 
 | 	completion_init(&acw.comp, core_set_remote_count(cset)); | 
 | 	acw.func = func; | 
 | 	acw.opaque = opaque; | 
 |  | 
 | 	for (int i = 0; i < num_cores; i++) { | 
 | 		if (core_set_getcpu(cset, i)) { | 
 | 			if (i == cpu) | 
 | 				func(opaque); | 
 | 			else | 
 | 				send_kernel_message(i, smp_do_core_work, (long) &acw, 0, 0, | 
 | 									KMSG_ROUTINE); | 
 | 		} | 
 | 	} | 
 | 	completion_wait(&acw.comp); | 
 | } |