| /* |
| * Copyright (c) 2009 The Regents of the University of California |
| * Barret Rhoden <brho@cs.berkeley.edu> |
| * See LICENSE for details. |
| */ |
| |
| #include <arch/arch.h> |
| #include <atomic.h> |
| #include <smp.h> |
| #include <error.h> |
| #include <stdio.h> |
| #include <string.h> |
| #include <assert.h> |
| #include <pmap.h> |
| #include <process.h> |
| #include <schedule.h> |
| #include <trap.h> |
| #include <trace.h> |
| #include <kdebug.h> |
| #include <kmalloc.h> |
| #include <core_set.h> |
| #include <completion.h> |
| #include <rcu.h> |
| |
| struct all_cpu_work { |
| struct completion comp; |
| void (*func)(void *); |
| void *opaque; |
| }; |
| |
| struct per_cpu_info per_cpu_info[MAX_NUM_CORES]; |
| |
| // tracks number of global waits on smp_calls, must be <= NUM_HANDLER_WRAPPERS |
| atomic_t outstanding_calls = 0; |
| |
| /* Helper for running a proc (if we should). Lots of repetition with |
| * proc_restartcore */ |
| static void try_run_proc(void) |
| { |
| struct per_cpu_info *pcpui = &per_cpu_info[core_id()]; |
| |
| /* There was a process running here, and we should return to it. */ |
| if (pcpui->owning_proc) { |
| assert(!pcpui->cur_kthread->sysc); |
| assert(pcpui->cur_ctx); |
| __proc_startcore(pcpui->owning_proc, pcpui->cur_ctx); |
| assert(0); |
| } else { |
| /* Make sure we have abandoned core. It's possible to have an |
| * owner without a current (smp_idle, __startcore, __death). |
| * |
| * If we had a current process, we might trigger __proc_free, |
| * which could send us a KMSG. Since we're called after PRKM, |
| * let's just restart the idle loop. */ |
| if (abandon_core()) |
| smp_idle(); |
| } |
| } |
| |
| /* All cores end up calling this whenever there is nothing left to do or they |
| * don't know explicitly what to do. Non-zero cores call it when they are done |
| * booting. Other cases include after getting a DEATH IPI. |
| * |
| * All cores attempt to run the context of any owning proc. Barring that, they |
| * halt and wake up when interrupted, do any work on their work queue, then halt |
| * again. In between, the ksched gets a chance to tell it to do something else, |
| * or perhaps to halt in another manner. */ |
| static void __attribute__((noreturn)) __smp_idle(void *arg) |
| { |
| struct per_cpu_info *pcpui = &per_cpu_info[core_id()]; |
| |
| pcpui->cur_kthread->flags = KTH_DEFAULT_FLAGS; |
| while (1) { |
| /* This might wake a kthread (the gp ktask), so be sure to run |
| * PRKM after reporting the quiescent state. */ |
| rcu_report_qs(); |
| /* If this runs an RKM, we'll call smp_idle from the top. */ |
| process_routine_kmsg(); |
| try_run_proc(); |
| cpu_bored(); /* call out to the ksched */ |
| /* cpu_halt() atomically turns on interrupts and halts the core. |
| * Important to do this, since we could have a RKM come in via |
| * an interrupt right while PRKM is returning, and we wouldn't |
| * catch it. When it returns, IRQs are back off. */ |
| __set_cpu_state(pcpui, CPU_STATE_IDLE); |
| cpu_halt(); |
| __set_cpu_state(pcpui, CPU_STATE_KERNEL); |
| } |
| assert(0); |
| } |
| |
| void smp_idle(void) |
| { |
| disable_irq(); |
| __reset_stack_pointer(0, get_stack_top(), __smp_idle); |
| } |
| |
| /* Arch-independent per-cpu initialization. This will call the arch dependent |
| * init first. */ |
| void smp_percpu_init(void) |
| { |
| uint32_t coreid = core_id(); |
| struct per_cpu_info *pcpui = &per_cpu_info[coreid]; |
| void *trace_buf; |
| struct kthread *kthread; |
| /* Don't initialize __ctx_depth here, since it is already 1 (at least on |
| * x86), since this runs in irq context. */ |
| /* Do this first */ |
| __arch_pcpu_init(coreid); |
| /* init our kthread (tracks our currently running context) */ |
| kthread = __kthread_zalloc(); |
| /* assumes we're on the 1st page */ |
| kthread->stacktop = get_stack_top(); |
| pcpui->cur_kthread = kthread; |
| /* Treat the startup threads as ktasks. This will last until smp_idle |
| * when they clear it, either in anticipation of being a user-backing |
| * kthread or to handle an RKM. */ |
| kthread->flags = KTH_KTASK_FLAGS; |
| per_cpu_info[coreid].spare = 0; |
| /* Init relevant lists */ |
| spinlock_init_irqsave(&per_cpu_info[coreid].immed_amsg_lock); |
| STAILQ_INIT(&per_cpu_info[coreid].immed_amsgs); |
| spinlock_init_irqsave(&per_cpu_info[coreid].routine_amsg_lock); |
| STAILQ_INIT(&per_cpu_info[coreid].routine_amsgs); |
| init_timer_chain(&this_pcpui_var(tchain), set_pcpu_alarm_interrupt); |
| /* Init generic tracing ring */ |
| trace_buf = kpage_alloc_addr(); |
| assert(trace_buf); |
| trace_ring_init(&pcpui->traces, trace_buf, PGSIZE, |
| sizeof(struct pcpu_trace_event)); |
| for (int i = 0; i < NR_CPU_STATES; i++) |
| pcpui->state_ticks[i] = 0; |
| pcpui->last_tick_cnt = read_tsc(); |
| /* Core 0 is in the KERNEL state, called from smp_boot. The other cores |
| * are too, at least on x86, where we were called from asm (woken by |
| * POKE). */ |
| pcpui->cpu_state = CPU_STATE_KERNEL; |
| /* Enable full lock debugging, after all pcpui work is done */ |
| pcpui->__lock_checking_enabled = 1; |
| } |
| |
| /* it's actually okay to set the state to the existing state. originally, it |
| * was a bug in the state tracking, but it is possible, at least on x86, to have |
| * a halted core (state IDLE) get woken up by an IRQ that does not trigger the |
| * IRQ handling state. for example, there is the I_POKE_CORE ipi. smp_idle |
| * will just sleep again, and reset the state from IDLE to IDLE. */ |
| void __set_cpu_state(struct per_cpu_info *pcpui, int state) |
| { |
| uint64_t now_ticks; |
| |
| assert(!irq_is_enabled()); |
| /* TODO: could put in an option to enable/disable state tracking. */ |
| now_ticks = read_tsc(); |
| pcpui->state_ticks[pcpui->cpu_state] += now_ticks - |
| pcpui->last_tick_cnt; |
| /* TODO: if the state was user, we could account for the vcore's time, |
| * similar to the total_ticks in struct vcore. the difference is that |
| * the total_ticks tracks the vcore's virtual time, while this tracks |
| * user time. something like vcore->user_ticks. */ |
| pcpui->cpu_state = state; |
| pcpui->last_tick_cnt = now_ticks; |
| } |
| |
| void reset_cpu_state_ticks(int coreid) |
| { |
| struct per_cpu_info *pcpui = &per_cpu_info[coreid]; |
| uint64_t now_ticks; |
| |
| if (coreid >= num_cores) |
| return; |
| /* need to update last_tick_cnt, so the current value doesn't get added |
| * in next time we update */ |
| now_ticks = read_tsc(); |
| for (int i = 0; i < NR_CPU_STATES; i++) { |
| pcpui->state_ticks[i] = 0; |
| pcpui->last_tick_cnt = now_ticks; |
| } |
| } |
| |
| /* PCPUI Trace Rings: */ |
| |
| static void pcpui_trace_kmsg_handler(void *event, void *data) |
| { |
| struct pcpu_trace_event *te = (struct pcpu_trace_event*)event; |
| uintptr_t addr; |
| |
| addr = te->arg1; |
| printk("\tKMSG %p: %s\n", addr, get_fn_name(addr)); |
| } |
| |
| static void pcpui_trace_locks_handler(void *event, void *data) |
| { |
| struct pcpu_trace_event *te = (struct pcpu_trace_event*)event; |
| const char *func_name; |
| uintptr_t lock_addr = te->arg1; |
| |
| if (lock_addr > KERN_LOAD_ADDR) |
| func_name = get_fn_name(lock_addr); |
| else |
| func_name = "Dynamic lock"; |
| print_lock(); |
| printk("Time %uus, lock %p (%s)\n", te->arg0, lock_addr, func_name); |
| printk("\t"); |
| spinlock_debug((spinlock_t*)lock_addr); |
| print_unlock(); |
| } |
| |
| /* Add specific trace handlers here: */ |
| trace_handler_t pcpui_tr_handlers[PCPUI_NR_TYPES] = { |
| 0, |
| pcpui_trace_kmsg_handler, |
| pcpui_trace_locks_handler, |
| }; |
| |
| /* Generic handler for the pcpui ring. Will switch out to the appropriate |
| * type's handler */ |
| static void pcpui_trace_fn(void *event, void *data) |
| { |
| struct pcpu_trace_event *te = (struct pcpu_trace_event*)event; |
| int desired_type = (int)(long)data; |
| |
| if (te->type >= PCPUI_NR_TYPES) |
| printk("Bad trace type %d\n", te->type); |
| /* desired_type == 0 means all types */ |
| if (desired_type && desired_type != te->type) |
| return; |
| if (pcpui_tr_handlers[te->type]) |
| pcpui_tr_handlers[te->type](event, data); |
| } |
| |
| void pcpui_tr_foreach(int coreid, int type) |
| { |
| struct trace_ring *tr = &per_cpu_info[coreid].traces; |
| assert(tr); |
| printk("\n\nTrace Ring on Core %d\n--------------\n", coreid); |
| trace_ring_foreach(tr, pcpui_trace_fn, (void*)(long)type); |
| } |
| |
| void pcpui_tr_foreach_all(int type) |
| { |
| for (int i = 0; i < num_cores; i++) |
| pcpui_tr_foreach(i, type); |
| } |
| |
| void pcpui_tr_reset_all(void) |
| { |
| for (int i = 0; i < num_cores; i++) |
| trace_ring_reset(&per_cpu_info[i].traces); |
| } |
| |
| void pcpui_tr_reset_and_clear_all(void) |
| { |
| for (int i = 0; i < num_cores; i++) |
| trace_ring_reset_and_clear(&per_cpu_info[i].traces); |
| } |
| |
| static void smp_do_core_work(uint32_t srcid, long a0, long a1, long a2) |
| { |
| struct all_cpu_work *acw = (struct all_cpu_work *) a0; |
| |
| acw->func(acw->opaque); |
| completion_complete(&acw->comp, 1); |
| } |
| |
| void smp_do_in_cores(const struct core_set *cset, void (*func)(void *), |
| void *opaque) |
| { |
| int cpu = core_id(); |
| struct all_cpu_work acw; |
| |
| memset(&acw, 0, sizeof(acw)); |
| completion_init(&acw.comp, core_set_remote_count(cset)); |
| acw.func = func; |
| acw.opaque = opaque; |
| |
| for (int i = 0; i < num_cores; i++) { |
| if (core_set_getcpu(cset, i)) { |
| if (i == cpu) |
| func(opaque); |
| else |
| send_kernel_message(i, smp_do_core_work, |
| (long)&acw, 0, 0, |
| KMSG_ROUTINE); |
| } |
| } |
| completion_wait(&acw.comp); |
| } |