blob: c3629b45bf3e496215762a6381bcaae42f9b779b [file] [log] [blame]
/*
* Copyright (c) 2009 The Regents of the University of California
* Barret Rhoden <brho@cs.berkeley.edu>
* See LICENSE for details.
*/
#include <arch/arch.h>
#include <atomic.h>
#include <smp.h>
#include <error.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <pmap.h>
#include <process.h>
#include <schedule.h>
#include <trap.h>
#include <trace.h>
#include <kdebug.h>
#include <kmalloc.h>
#include <core_set.h>
#include <completion.h>
#include <rcu.h>
struct all_cpu_work {
struct completion comp;
void (*func)(void *);
void *opaque;
};
struct per_cpu_info per_cpu_info[MAX_NUM_CORES];
// tracks number of global waits on smp_calls, must be <= NUM_HANDLER_WRAPPERS
atomic_t outstanding_calls = 0;
/* Helper for running a proc (if we should). Lots of repetition with
* proc_restartcore */
static void try_run_proc(void)
{
struct per_cpu_info *pcpui = &per_cpu_info[core_id()];
/* There was a process running here, and we should return to it. */
if (pcpui->owning_proc) {
assert(!pcpui->cur_kthread->sysc);
assert(pcpui->cur_ctx);
__proc_startcore(pcpui->owning_proc, pcpui->cur_ctx);
assert(0);
} else {
/* Make sure we have abandoned core. It's possible to have an
* owner without a current (smp_idle, __startcore, __death).
*
* If we had a current process, we might trigger __proc_free,
* which could send us a KMSG. Since we're called after PRKM,
* let's just restart the idle loop. */
if (abandon_core())
smp_idle();
}
}
/* All cores end up calling this whenever there is nothing left to do or they
* don't know explicitly what to do. Non-zero cores call it when they are done
* booting. Other cases include after getting a DEATH IPI.
*
* All cores attempt to run the context of any owning proc. Barring that, they
* halt and wake up when interrupted, do any work on their work queue, then halt
* again. In between, the ksched gets a chance to tell it to do something else,
* or perhaps to halt in another manner. */
static void __attribute__((noreturn)) __smp_idle(void *arg)
{
struct per_cpu_info *pcpui = &per_cpu_info[core_id()];
pcpui->cur_kthread->flags = KTH_DEFAULT_FLAGS;
while (1) {
/* This might wake a kthread (the gp ktask), so be sure to run
* PRKM after reporting the quiescent state. */
rcu_report_qs();
/* If this runs an RKM, we'll call smp_idle from the top. */
process_routine_kmsg();
try_run_proc();
cpu_bored(); /* call out to the ksched */
/* cpu_halt() atomically turns on interrupts and halts the core.
* Important to do this, since we could have a RKM come in via
* an interrupt right while PRKM is returning, and we wouldn't
* catch it. When it returns, IRQs are back off. */
__set_cpu_state(pcpui, CPU_STATE_IDLE);
cpu_halt();
__set_cpu_state(pcpui, CPU_STATE_KERNEL);
}
assert(0);
}
void smp_idle(void)
{
disable_irq();
__reset_stack_pointer(0, get_stack_top(), __smp_idle);
}
/* Arch-independent per-cpu initialization. This will call the arch dependent
* init first. */
void smp_percpu_init(void)
{
uint32_t coreid = core_id();
struct per_cpu_info *pcpui = &per_cpu_info[coreid];
void *trace_buf;
struct kthread *kthread;
/* Don't initialize __ctx_depth here, since it is already 1 (at least on
* x86), since this runs in irq context. */
/* Do this first */
__arch_pcpu_init(coreid);
/* init our kthread (tracks our currently running context) */
kthread = __kthread_zalloc();
/* assumes we're on the 1st page */
kthread->stacktop = get_stack_top();
pcpui->cur_kthread = kthread;
/* Treat the startup threads as ktasks. This will last until smp_idle
* when they clear it, either in anticipation of being a user-backing
* kthread or to handle an RKM. */
kthread->flags = KTH_KTASK_FLAGS;
per_cpu_info[coreid].spare = 0;
/* Init relevant lists */
spinlock_init_irqsave(&per_cpu_info[coreid].immed_amsg_lock);
STAILQ_INIT(&per_cpu_info[coreid].immed_amsgs);
spinlock_init_irqsave(&per_cpu_info[coreid].routine_amsg_lock);
STAILQ_INIT(&per_cpu_info[coreid].routine_amsgs);
init_timer_chain(&this_pcpui_var(tchain), set_pcpu_alarm_interrupt);
/* Init generic tracing ring */
trace_buf = kpage_alloc_addr();
assert(trace_buf);
trace_ring_init(&pcpui->traces, trace_buf, PGSIZE,
sizeof(struct pcpu_trace_event));
for (int i = 0; i < NR_CPU_STATES; i++)
pcpui->state_ticks[i] = 0;
pcpui->last_tick_cnt = read_tsc();
/* Core 0 is in the KERNEL state, called from smp_boot. The other cores
* are too, at least on x86, where we were called from asm (woken by
* POKE). */
pcpui->cpu_state = CPU_STATE_KERNEL;
/* Enable full lock debugging, after all pcpui work is done */
pcpui->__lock_checking_enabled = 1;
}
/* it's actually okay to set the state to the existing state. originally, it
* was a bug in the state tracking, but it is possible, at least on x86, to have
* a halted core (state IDLE) get woken up by an IRQ that does not trigger the
* IRQ handling state. for example, there is the I_POKE_CORE ipi. smp_idle
* will just sleep again, and reset the state from IDLE to IDLE. */
void __set_cpu_state(struct per_cpu_info *pcpui, int state)
{
uint64_t now_ticks;
assert(!irq_is_enabled());
/* TODO: could put in an option to enable/disable state tracking. */
now_ticks = read_tsc();
pcpui->state_ticks[pcpui->cpu_state] += now_ticks -
pcpui->last_tick_cnt;
/* TODO: if the state was user, we could account for the vcore's time,
* similar to the total_ticks in struct vcore. the difference is that
* the total_ticks tracks the vcore's virtual time, while this tracks
* user time. something like vcore->user_ticks. */
pcpui->cpu_state = state;
pcpui->last_tick_cnt = now_ticks;
}
void reset_cpu_state_ticks(int coreid)
{
struct per_cpu_info *pcpui = &per_cpu_info[coreid];
uint64_t now_ticks;
if (coreid >= num_cores)
return;
/* need to update last_tick_cnt, so the current value doesn't get added
* in next time we update */
now_ticks = read_tsc();
for (int i = 0; i < NR_CPU_STATES; i++) {
pcpui->state_ticks[i] = 0;
pcpui->last_tick_cnt = now_ticks;
}
}
/* PCPUI Trace Rings: */
static void pcpui_trace_kmsg_handler(void *event, void *data)
{
struct pcpu_trace_event *te = (struct pcpu_trace_event*)event;
uintptr_t addr;
addr = te->arg1;
printk("\tKMSG %p: %s\n", addr, get_fn_name(addr));
}
static void pcpui_trace_locks_handler(void *event, void *data)
{
struct pcpu_trace_event *te = (struct pcpu_trace_event*)event;
const char *func_name;
uintptr_t lock_addr = te->arg1;
if (lock_addr > KERN_LOAD_ADDR)
func_name = get_fn_name(lock_addr);
else
func_name = "Dynamic lock";
print_lock();
printk("Time %uus, lock %p (%s)\n", te->arg0, lock_addr, func_name);
printk("\t");
spinlock_debug((spinlock_t*)lock_addr);
print_unlock();
}
/* Add specific trace handlers here: */
trace_handler_t pcpui_tr_handlers[PCPUI_NR_TYPES] = {
0,
pcpui_trace_kmsg_handler,
pcpui_trace_locks_handler,
};
/* Generic handler for the pcpui ring. Will switch out to the appropriate
* type's handler */
static void pcpui_trace_fn(void *event, void *data)
{
struct pcpu_trace_event *te = (struct pcpu_trace_event*)event;
int desired_type = (int)(long)data;
if (te->type >= PCPUI_NR_TYPES)
printk("Bad trace type %d\n", te->type);
/* desired_type == 0 means all types */
if (desired_type && desired_type != te->type)
return;
if (pcpui_tr_handlers[te->type])
pcpui_tr_handlers[te->type](event, data);
}
void pcpui_tr_foreach(int coreid, int type)
{
struct trace_ring *tr = &per_cpu_info[coreid].traces;
assert(tr);
printk("\n\nTrace Ring on Core %d\n--------------\n", coreid);
trace_ring_foreach(tr, pcpui_trace_fn, (void*)(long)type);
}
void pcpui_tr_foreach_all(int type)
{
for (int i = 0; i < num_cores; i++)
pcpui_tr_foreach(i, type);
}
void pcpui_tr_reset_all(void)
{
for (int i = 0; i < num_cores; i++)
trace_ring_reset(&per_cpu_info[i].traces);
}
void pcpui_tr_reset_and_clear_all(void)
{
for (int i = 0; i < num_cores; i++)
trace_ring_reset_and_clear(&per_cpu_info[i].traces);
}
static void smp_do_core_work(uint32_t srcid, long a0, long a1, long a2)
{
struct all_cpu_work *acw = (struct all_cpu_work *) a0;
acw->func(acw->opaque);
completion_complete(&acw->comp, 1);
}
void smp_do_in_cores(const struct core_set *cset, void (*func)(void *),
void *opaque)
{
int cpu = core_id();
struct all_cpu_work acw;
memset(&acw, 0, sizeof(acw));
completion_init(&acw.comp, core_set_remote_count(cset));
acw.func = func;
acw.opaque = opaque;
for (int i = 0; i < num_cores; i++) {
if (core_set_getcpu(cset, i)) {
if (i == cpu)
func(opaque);
else
send_kernel_message(i, smp_do_core_work,
(long)&acw, 0, 0,
KMSG_ROUTINE);
}
}
completion_wait(&acw.comp);
}