blob: 2673c9fb6600158f4ae5174f4e459b43ea1ff0e2 [file] [log] [blame]
/* Copyright (c) 2012 The Regents of the University of California
* Barret Rhoden <brho@cs.berkeley.edu>
* See LICENSE for details.
*
* Arch-independent trap handling and kernel messaging */
#include <arch/arch.h>
#include <smp.h>
#include <trap.h>
#include <stdio.h>
#include <slab.h>
#include <assert.h>
#include <kdebug.h>
#include <kmalloc.h>
#include <rcu.h>
static void print_unhandled_trap(struct proc *p, struct user_context *ctx,
unsigned int trap_nr, unsigned int err,
unsigned long aux)
{
struct per_cpu_info *pcpui = &per_cpu_info[core_id()];
uint32_t vcoreid = pcpui->owning_vcoreid;
struct preempt_data *vcpd = &p->procdata->vcore_preempt_data[vcoreid];
static spinlock_t print_trap_lock = SPINLOCK_INITIALIZER;
spin_lock(&print_trap_lock);
if (!proc_is_vcctx_ready(p))
printk("Unhandled user trap from early SCP\n");
else if (vcpd->notif_disabled)
printk("Unhandled user trap in vcore context from VC %d\n", vcoreid);
print_user_ctx(ctx);
printk("err 0x%x (for PFs: User 4, Wr 2, Rd 1), aux %p\n", err, aux);
debug_addr_proc(p, get_user_ctx_pc(ctx));
print_vmrs(p);
backtrace_user_ctx(p, ctx);
spin_unlock(&print_trap_lock);
}
/* Traps that are considered normal operations. */
static bool benign_trap(unsigned int err)
{
return err & PF_VMR_BACKED;
}
static void printx_unhandled_trap(struct proc *p, struct user_context *ctx,
unsigned int trap_nr, unsigned int err,
unsigned long aux)
{
if (printx_on && !benign_trap(err))
print_unhandled_trap(p, ctx, trap_nr, err, aux);
}
/* Helper, reflects the current context back to the 2LS. Returns 0 on success,
* -1 on failure. */
int reflect_current_context(void)
{
uint32_t coreid = core_id();
struct per_cpu_info *pcpui = &per_cpu_info[coreid];
struct proc *p = pcpui->cur_proc;
uint32_t vcoreid = pcpui->owning_vcoreid;
struct preempt_data *vcpd = &p->procdata->vcore_preempt_data[vcoreid];
assert(pcpui->cur_proc == pcpui->owning_proc);
if (!proc_is_vcctx_ready(p))
return -1;
if (vcpd->notif_disabled)
return -1;
/* the guts of a __notify */
vcpd->notif_disabled = TRUE;
copy_current_ctx_to(&vcpd->uthread_ctx);
memset(pcpui->cur_ctx, 0, sizeof(struct user_context));
proc_init_ctx(pcpui->cur_ctx, vcoreid, vcpd->vcore_entry,
vcpd->vcore_stack, vcpd->vcore_tls_desc);
return 0;
}
void reflect_unhandled_trap(unsigned int trap_nr, unsigned int err,
unsigned long aux)
{
uint32_t coreid = core_id();
struct per_cpu_info *pcpui = &per_cpu_info[coreid];
struct proc *p = pcpui->cur_proc;
assert(p);
assert(pcpui->cur_ctx && (pcpui->cur_ctx->type == ROS_HW_CTX));
/* need to store trap_nr, err code, and aux into the tf so that it can get
* extracted on the other end, and we need to flag the TF in some way so we
* can tell it was reflected. for example, on a PF, we need some number (14
* on x86), the prot violation (write, read, etc), and the virt addr (aux).
* parlib will know how to extract this info. */
__arch_reflect_trap_hwtf(&pcpui->cur_ctx->tf.hw_tf, trap_nr, err, aux);
printx_unhandled_trap(p, pcpui->cur_ctx, trap_nr, err, aux);
if (reflect_current_context()) {
print_unhandled_trap(p, pcpui->cur_ctx, trap_nr, err, aux);
proc_destroy(p);
}
}
/* Helper, copies the current context to to_ctx. */
void copy_current_ctx_to(struct user_context *to_ctx)
{
struct user_context *cur_ctx = current_ctx;
/* Be sure to finalize into cur_ctx, not the to_ctx. o/w the arch could get
* confused by other calls to finalize. */
arch_finalize_ctx(cur_ctx);
*to_ctx = *cur_ctx;
}
struct kmem_cache *kernel_msg_cache;
void kernel_msg_init(void)
{
kernel_msg_cache = kmem_cache_create("kernel_msgs",
sizeof(struct kernel_message),
ARCH_CL_SIZE, 0, NULL, 0, 0, NULL);
}
uint32_t send_kernel_message(uint32_t dst, amr_t pc, long arg0, long arg1,
long arg2, int type)
{
kernel_message_t *k_msg;
assert(pc);
// note this will be freed on the destination core
k_msg = kmem_cache_alloc(kernel_msg_cache, 0);
k_msg->srcid = core_id();
k_msg->dstid = dst;
k_msg->pc = pc;
k_msg->arg0 = arg0;
k_msg->arg1 = arg1;
k_msg->arg2 = arg2;
switch (type) {
case KMSG_IMMEDIATE:
spin_lock_irqsave(&per_cpu_info[dst].immed_amsg_lock);
STAILQ_INSERT_TAIL(&per_cpu_info[dst].immed_amsgs, k_msg, link);
spin_unlock_irqsave(&per_cpu_info[dst].immed_amsg_lock);
break;
case KMSG_ROUTINE:
spin_lock_irqsave(&per_cpu_info[dst].routine_amsg_lock);
STAILQ_INSERT_TAIL(&per_cpu_info[dst].routine_amsgs, k_msg, link);
spin_unlock_irqsave(&per_cpu_info[dst].routine_amsg_lock);
break;
default:
panic("Unknown type of kernel message!");
}
/* since we touched memory the other core will touch (the lock), we don't
* need an wmb_f() */
/* if we're sending a routine message locally, we don't want/need an IPI */
if ((dst != k_msg->srcid) || (type == KMSG_IMMEDIATE))
send_ipi(dst, I_KERNEL_MSG);
return 0;
}
/* Kernel message IPI/IRQ handler.
*
* This processes immediate messages, and that's it (it used to handle routines
* too, if it came in from userspace). Routine messages will get processed when
* the kernel has a chance (right before popping to userspace or in smp_idle
* before halting).
*
* Note that all of this happens from interrupt context, and interrupts are
* disabled. */
void handle_kmsg_ipi(struct hw_trapframe *hw_tf, void *data)
{
struct per_cpu_info *pcpui = &per_cpu_info[core_id()];
struct kernel_message *kmsg_i, *temp;
/* Avoid locking if the list appears empty (lockless peek is okay) */
if (STAILQ_EMPTY(&pcpui->immed_amsgs))
return;
/* The lock serves as a cmb to force a re-read of the head of the list */
spin_lock_irqsave(&pcpui->immed_amsg_lock);
STAILQ_FOREACH_SAFE(kmsg_i, &pcpui->immed_amsgs, link, temp) {
pcpui_trace_kmsg(pcpui, (uintptr_t)kmsg_i->pc);
kmsg_i->pc(kmsg_i->srcid, kmsg_i->arg0, kmsg_i->arg1, kmsg_i->arg2);
STAILQ_REMOVE(&pcpui->immed_amsgs, kmsg_i, kernel_message, link);
kmem_cache_free(kernel_msg_cache, (void*)kmsg_i);
}
spin_unlock_irqsave(&pcpui->immed_amsg_lock);
}
bool has_routine_kmsg(void)
{
struct per_cpu_info *pcpui = &per_cpu_info[core_id()];
/* lockless peek */
return !STAILQ_EMPTY(&pcpui->routine_amsgs);
}
/* Helper function, gets the next routine KMSG (RKM). Returns 0 if the list was
* empty. */
static kernel_message_t *get_next_rkmsg(struct per_cpu_info *pcpui)
{
struct kernel_message *kmsg;
/* Avoid locking if the list appears empty (lockless peek is okay) */
if (STAILQ_EMPTY(&pcpui->routine_amsgs))
return 0;
/* The lock serves as a cmb to force a re-read of the head of the list.
* IRQs are disabled by our caller. */
spin_lock(&pcpui->routine_amsg_lock);
kmsg = STAILQ_FIRST(&pcpui->routine_amsgs);
if (kmsg)
STAILQ_REMOVE_HEAD(&pcpui->routine_amsgs, link);
spin_unlock(&pcpui->routine_amsg_lock);
return kmsg;
}
/* Runs routine kernel messages. This might not return. In the past, this
* would also run immediate messages, but this is unnecessary. Immediates will
* run whenever we reenable IRQs. We could have some sort of ordering or
* guarantees between KMSG classes, but that's not particularly useful at this
* point.
*
* Note this runs from normal context, with interruptes disabled. However, a
* particular RKM could enable interrupts - for instance __launch_kthread() will
* restore an old kthread that may have had IRQs on. */
void process_routine_kmsg(void)
{
uint32_t pcoreid = core_id();
struct per_cpu_info *pcpui = &per_cpu_info[pcoreid];
struct kernel_message msg_cp, *kmsg;
/* Important that callers have IRQs disabled. When sending cross-core RKMs,
* the IPI is used to keep the core from going to sleep - even though RKMs
* aren't handled in the kmsg handler. Check smp_idle() for more info. */
assert(!irq_is_enabled());
while ((kmsg = get_next_rkmsg(pcpui))) {
/* Copy in, and then free, in case we don't return */
msg_cp = *kmsg;
kmem_cache_free(kernel_msg_cache, (void*)kmsg);
assert(msg_cp.dstid == pcoreid); /* caught a brutal bug with this */
set_rkmsg(pcpui); /* we're now in early RKM ctx */
/* The kmsg could block. If it does, we want the kthread code to know
* it's not running on behalf of a process, and we're actually spawning
* a kernel task. While we do have a syscall that does work in an RKM
* (change_to), it's not really the rest of the syscall context. */
pcpui->cur_kthread->flags = KTH_KTASK_FLAGS;
pcpui_trace_kmsg(pcpui, (uintptr_t)msg_cp.pc);
msg_cp.pc(msg_cp.srcid, msg_cp.arg0, msg_cp.arg1, msg_cp.arg2);
/* And if we make it back, be sure to restore the default flags. If we
* never return, but the kthread exits via some other way (smp_idle()),
* then smp_idle() will deal with the flags. The default state includes
* 'not a ktask'. */
pcpui->cur_kthread->flags = KTH_DEFAULT_FLAGS;
/* PRKM is like a cooperative ksched, and our 'thread' just yielded. If
* this is too much, we can do something more limited, e.g. wait for
* idle, check a pcpui bit that means 'check in', etc. */
rcu_report_qs();
/* If we aren't still in early RKM, it is because the KMSG blocked
* (thus leaving early RKM, finishing in default context) and then
* returned. This is a 'detached' RKM. Must idle in this scenario,
* since we might have migrated or otherwise weren't meant to PRKM
* (can't return twice). Also note that this may involve a core
* migration, so we need to reread pcpui.*/
cmb();
pcpui = &per_cpu_info[core_id()];
if (!in_early_rkmsg_ctx(pcpui))
smp_idle();
clear_rkmsg(pcpui);
/* Some RKMs might turn on interrupts (perhaps in the future) and then
* return. */
disable_irq();
}
}
/* extremely dangerous and racy: prints out the immed and routine kmsgs for a
* specific core (so possibly remotely) */
void print_kmsgs(uint32_t coreid)
{
struct per_cpu_info *pcpui = &per_cpu_info[coreid];
void __print_kmsgs(struct kernel_msg_list *list, char *type)
{
struct kernel_message *kmsg_i;
STAILQ_FOREACH(kmsg_i, list, link) {
printk("%s KMSG on %d from %d to run %p(%s)(%p, %p, %p)\n", type,
kmsg_i->dstid, kmsg_i->srcid, kmsg_i->pc,
get_fn_name((long)kmsg_i->pc),
kmsg_i->arg0, kmsg_i->arg1, kmsg_i->arg2);
}
}
__print_kmsgs(&pcpui->immed_amsgs, "Immedte");
__print_kmsgs(&pcpui->routine_amsgs, "Routine");
}
void __kmsg_trampoline(uint32_t srcid, long a0, long a1, long a2)
{
((void (*)(long arg0, long arg1))a0)(a1, a2);
}
/* Debugging stuff */
void kmsg_queue_stat(void)
{
struct kernel_message *kmsg;
bool immed_emp, routine_emp;
for (int i = 0; i < num_cores; i++) {
spin_lock_irqsave(&per_cpu_info[i].immed_amsg_lock);
immed_emp = STAILQ_EMPTY(&per_cpu_info[i].immed_amsgs);
spin_unlock_irqsave(&per_cpu_info[i].immed_amsg_lock);
spin_lock_irqsave(&per_cpu_info[i].routine_amsg_lock);
routine_emp = STAILQ_EMPTY(&per_cpu_info[i].routine_amsgs);
spin_unlock_irqsave(&per_cpu_info[i].routine_amsg_lock);
printk("Core %d's immed_emp: %d, routine_emp %d\n", i, immed_emp,
routine_emp);
if (!immed_emp) {
kmsg = STAILQ_FIRST(&per_cpu_info[i].immed_amsgs);
printk("Immed msg on core %d:\n", i);
printk("\tsrc: %d\n", kmsg->srcid);
printk("\tdst: %d\n", kmsg->dstid);
printk("\tpc: %p\n", kmsg->pc);
printk("\targ0: %p\n", kmsg->arg0);
printk("\targ1: %p\n", kmsg->arg1);
printk("\targ2: %p\n", kmsg->arg2);
}
if (!routine_emp) {
kmsg = STAILQ_FIRST(&per_cpu_info[i].routine_amsgs);
printk("Routine msg on core %d:\n", i);
printk("\tsrc: %d\n", kmsg->srcid);
printk("\tdst: %d\n", kmsg->dstid);
printk("\tpc: %p\n", kmsg->pc);
printk("\targ0: %p\n", kmsg->arg0);
printk("\targ1: %p\n", kmsg->arg1);
printk("\targ2: %p\n", kmsg->arg2);
}
}
}
void print_kctx_depths(const char *str)
{
uint32_t coreid = core_id();
struct per_cpu_info *pcpui = &per_cpu_info[coreid];
if (!str)
str = "(none)";
printk("%s: Core %d, irq depth %d, ktrap depth %d, irqon %d\n", str, coreid,
irq_depth(pcpui), ktrap_depth(pcpui), irq_is_enabled());
}
void print_user_ctx(struct user_context *ctx)
{
switch (ctx->type) {
case ROS_HW_CTX:
print_trapframe(&ctx->tf.hw_tf);
break;
case ROS_SW_CTX:
print_swtrapframe(&ctx->tf.sw_tf);
break;
case ROS_VM_CTX:
print_vmtrapframe(&ctx->tf.vm_tf);
break;
default:
printk("Bad TF %p type %d!\n", ctx, ctx->type);
}
}