blob: d3a3e3df0192bec4076bac03f62352e74f4a81fb [file] [log] [blame]
#ifndef PARLIB_ARCH_VCORE32_H
#define PARLIB_ARCH_VCORE32_H
#ifndef PARLIB_ARCH_VCORE_H
#error "Do not include include vcore32.h directly"
#endif
#include <ros/common.h>
#include <ros/trapframe.h>
#include <ros/procdata.h>
#include <ros/syscall.h>
#include <ros/arch/mmu.h>
#include <sys/vcore-tls.h>
/* Here's how the HW popping works: It sets up the future stack pointer to
* have extra stuff after it, and then it pops the registers, then pops the new
* context's stack pointer. Then it uses the extra stuff (the new PC is on the
* stack, the location of notif_disabled, and a clobbered work register) to
* enable notifs, make sure notif IPIs weren't pending, restore the work reg,
* and then "ret".
*
* This is what the target uthread's stack will look like (growing down):
*
* Target ESP -> | u_thread's old stuff | the future %esp, tf->tf_esp
* | new eip | 0x04 below %esp (one slot is 0x04)
* | eflags space | 0x08 below
* | eax save space | 0x0c below
* | actual syscall | 0x10 below (0x30 space)
* | *sysc ptr to syscall | 0x40 below (0x10 + 0x30)
* | notif_pending_loc | 0x44 below (0x10 + 0x30)
* | notif_disabled_loc | 0x48 below (0x10 + 0x30)
*
* The important thing is that it can handle a notification after it enables
* notifications, and when it gets resumed it can ultimately run the new
* context. Enough state is saved in the running context and stack to continue
* running.
*
* Related to that is whether or not our stack pointer is sufficiently far down
* so that restarting *this* code won't clobber shit we need later. The way we
* do this is that we do any "stack jumping" before we enable interrupts/notifs.
* These jumps are when we directly modify esp, specifically in the down
* direction (subtracts). Adds would be okay.
*
* Another related concern is the storage for sysc. It used to be on the
* vcore's stack, but if an interrupt comes in before we use it, we trash the
* vcore's stack (and thus the storage for sysc!). Instead, we put it on the
* stack of the user tf. Moral: don't touch a vcore's stack with notifs
* enabled. */
/* Helper for writing the info we need later to the u_tf's stack. Note, this
* could get fucked if the struct syscall isn't a multiple of 4-bytes. Also,
* note this goes backwards, since memory reads up the stack. */
struct restart_helper {
uint32_t notif_disab_loc;
uint32_t notif_pend_loc;
struct syscall *sysc;
struct syscall local_sysc; /* unused for now */
uint32_t eax_save;
uint32_t eflags;
uint32_t eip;
};
/* Static syscall, used for self-notifying. We never wait on it, and we
* actually might submit it multiple times in parallel on different cores!
* While this may seem dangerous, the kernel needs to be able to handle this
* scenario. It's also important that we never wait on this, since for all but
* the first call, the DONE flag will be set. (Set once, then never reset) */
extern struct syscall vc_entry; /* in x86/vcore.c */
static inline void pop_hw_tf(struct hw_trapframe *tf, uint32_t vcoreid)
{
struct restart_helper *rst;
struct preempt_data *vcpd = &__procdata.vcore_preempt_data[vcoreid];
if (!tf->tf_cs) { /* sysenter TF. esp and eip are in other regs. */
tf->tf_esp = tf->tf_regs.reg_ebp;
tf->tf_eip = tf->tf_regs.reg_edx;
}
/* The stuff we need to write will be below the current stack of the utf */
rst = (struct restart_helper*)((void*)tf->tf_esp -
sizeof(struct restart_helper));
/* Fill in the info we'll need later */
rst->notif_disab_loc = (uint32_t)&vcpd->notif_disabled;
rst->notif_pend_loc = (uint32_t)&vcpd->notif_pending;
rst->sysc = &vc_entry;
rst->eax_save = 0; /* avoid bugs */
rst->eflags = tf->tf_eflags;
rst->eip = tf->tf_eip;
asm volatile ("movl %0,%%esp; " /* jump esp to the utf */
"popal; " /* restore normal registers */
"addl $0x24,%%esp; " /* move to the esp slot in the tf */
"popl %%esp; " /* change to the utf's %esp */
"subl $0x08,%%esp; " /* move esp to below eax's slot */
"pushl %%eax; " /* save eax, will clobber soon */
"movl %2,%%eax; " /* sizeof struct syscall */
"addl $0x0c,%%eax; " /* more offset btw eax/notif_en_loc*/
"subl %%eax,%%esp; " /* move to notif_en_loc slot */
"popl %%eax; " /* load notif_disabled addr */
"movb $0x00,(%%eax); " /* enable notifications */
/* Need a wrmb() here so the write of enable_notif can't pass
* the read of notif_pending (racing with a potential
* cross-core call with proc_notify()). */
"lock addl $0,(%%esp); " /* LOCK is a CPU mb() */
/* From here down, we can get interrupted and restarted */
"popl %%eax; " /* get notif_pending status */
"testb $0x01,(%%eax); " /* test if a notif is pending */
"jz 1f; " /* if not pending, skip syscall */
/* Actual syscall. Note we don't wait on the async call */
"popl %%eax; " /* &sysc, trap arg0 */
"pushl %%edx; " /* save edx, will be trap arg1 */
"movl $0x1,%%edx; " /* sending one async syscall: arg1 */
"int %1; " /* fire the syscall */
"popl %%edx; " /* restore regs after syscall */
"jmp 2f; " /* skip 1:, already popped */
"1: popl %%eax; " /* discard &sysc (on non-sc path) */
"2: addl %2,%%esp; " /* jump over the sysc (both paths) */
"popl %%eax; " /* restore tf's %eax */
"popfl; " /* restore utf's eflags */
"ret; " /* return to the new PC */
:
: "g"(tf), "i"(T_SYSCALL), "i"(sizeof(struct syscall))
: "memory");
}
static inline void pop_sw_tf(struct sw_trapframe *sw_tf, uint32_t vcoreid)
{
struct preempt_data *vcpd = &__procdata.vcore_preempt_data[vcoreid];
/* Restore callee-saved FPU state. We need to clear exceptions before
* reloading the FP CW, in case the new CW unmasks any. We also need to
* reset the tag word to clear out the stack.
*
* The main issue here is that while our context was saved in an
* ABI-complaint manner, we may be starting up on a somewhat random FPU
* state. Having gibberish in registers isn't a big deal, but some of the
* FP environment settings could cause trouble. If fnclex; emms isn't
* enough, we could also save/restore the entire FP env with fldenv, or do
* an fninit before fldcw. */
asm volatile ("ldmxcsr %0" : : "m"(sw_tf->tf_mxcsr));
asm volatile ("fnclex; emms; fldcw %0" : : "m"(sw_tf->tf_fpucw));
/* Basic plan: restore all regs, off ecx as the sw_tf. Switch to the new
* stack, push the PC so we can pop it later. Use eax and edx for the
* locations of sysc and vcpd. Once on the new stack, we enable notifs,
* check if we missed one, and if so, self notify. */
asm volatile ("movl 0x00(%0),%%ebp; " /* restore regs */
"movl 0x04(%0),%%ebx; "
"movl 0x08(%0),%%esi; "
"movl 0x0c(%0),%%edi; "
"movl 0x10(%0),%%esp; " /* jump to future stack */
"pushl 0x14(%0); " /* save PC for future ret */
"movl %2,%%ecx; " /* vcpd loc into ecx */
"addl %4,%%ecx; " /* notif_disabled loc into ecx */
"movb $0x00,(%%ecx); " /* enable notifications */
/* Need a wrmb() here so the write of enable_notif can't pass
* the read of notif_pending (racing with a potential
* cross-core call with proc_notify()). */
"lock addl $0,(%%esp); " /* LOCK is a CPU mb() */
/* From here down, we can get interrupted and restarted */
"movl %2,%%ecx; " /* vcpd loc into ecx */
"addl %5,%%ecx; " /* notif_pending loc into ecx */
"testb $0x01,(%%ecx); " /* test if a notif is pending */
"jz 1f; " /* if not pending, skip syscall */
/* Actual syscall. Note we don't wait on the async call.
* &sysc is already in eax (trap arg0). */
"movl $0x1,%%edx; " /* sending one async syscall: arg1 */
"int %3; " /* fire the syscall */
"1: ret; " /* retaddr was pushed earlier */
:
: "c"(sw_tf),
"a"(&vc_entry),
"d"(vcpd),
"i"(T_SYSCALL),
"i"(offsetof(struct preempt_data, notif_disabled)),
"i"(offsetof(struct preempt_data, notif_pending))
: "memory");
}
/* Pops a user context, reanabling notifications at the same time. A Userspace
* scheduler can call this when transitioning off the transition stack.
*
* At some point in vcore context before calling this, you need to clear
* notif_pending (do this by calling handle_events()). As a potential
* optimization, consider clearing the notif_pending flag / handle_events again
* (right before popping), right before calling this. If notif_pending is not
* clear, this will self_notify this core, since it should be because we missed
* a notification message while notifs were disabled. */
static inline void pop_user_ctx(struct user_context *ctx, uint32_t vcoreid)
{
if (ctx->type == ROS_HW_CTX)
pop_hw_tf(&ctx->tf.hw_tf, vcoreid);
else
pop_sw_tf(&ctx->tf.sw_tf, vcoreid);
}
/* Like the regular pop_user_ctx, but this one doesn't check or clear
* notif_pending. The only case where we use this is when an IRQ/notif
* interrupts a uthread that is in the process of disabling notifs. */
static inline void pop_user_ctx_raw(struct user_context *ctx, uint32_t vcoreid)
{
struct hw_trapframe *tf = &ctx->tf.hw_tf;
assert(ctx->type == ROS_HW_CTX);
struct restart_helper *rst;
struct preempt_data *vcpd = &__procdata.vcore_preempt_data[vcoreid];
if (!tf->tf_cs) { /* sysenter TF. esp and eip are in other regs. */
tf->tf_esp = tf->tf_regs.reg_ebp;
tf->tf_eip = tf->tf_regs.reg_edx;
}
/* The stuff we need to write will be below the current stack of the utf */
rst = (struct restart_helper*)((void*)tf->tf_esp -
sizeof(struct restart_helper));
/* Fill in the info we'll need later */
rst->notif_disab_loc = (uint32_t)&vcpd->notif_disabled;
rst->eax_save = 0; /* avoid bugs */
rst->eflags = tf->tf_eflags;
rst->eip = tf->tf_eip;
asm volatile ("movl %0,%%esp; " /* jump esp to the utf */
"popal; " /* restore normal registers */
"addl $0x24,%%esp; " /* move to the esp slot in the tf */
"popl %%esp; " /* change to the utf's %esp */
"subl $0x08,%%esp; " /* move esp to below eax's slot */
"pushl %%eax; " /* save eax, will clobber soon */
"movl %2,%%eax; " /* sizeof struct syscall */
"addl $0x0c,%%eax; " /* more offset btw eax/notif_en_loc*/
"subl %%eax,%%esp; " /* move to notif_en_loc slot */
"popl %%eax; " /* load notif_disabled addr */
"movb $0x00,(%%eax); " /* enable notifications */
/* Here's where we differ from the regular pop_user_ctx().
* We do the same pops/esp moves, just to keep things similar
* and simple, but don't do test, clear notif_pending, or
* call a syscall. */
/* From here down, we can get interrupted and restarted */
"popl %%eax; " /* get notif_pending status */
"popl %%eax; " /* discard &sysc (on non-sc path) */
"addl %2,%%esp; " /* jump over the sysc (both paths) */
"popl %%eax; " /* restore tf's %eax */
"popfl; " /* restore utf's eflags */
"ret; " /* return to the new PC */
:
: "g"(tf), "i"(T_SYSCALL), "i"(sizeof(struct syscall))
: "memory");
}
/* Save's a SW context, setting the PC to the end of this function. We only
* save callee-saved registers (of the sysv abi). The compiler knows to save
* the others via the input/clobber lists.
*
* Callers of this function need to have at least one
* 'calling-convention-compliant' function call between this and any floating
* point, so that the compiler saves any caller-saved FP before getting to
* here.
*
* To some extent, TLS is 'callee-saved', in that no one ever expects it to
* change. We handle uthread TLS changes separately, since we often change to
* them early to set some variables. Arguably we should do this different. */
static inline void save_user_ctx(struct user_context *ctx)
{
struct sw_trapframe *sw_tf = &ctx->tf.sw_tf;
long dummy;
ctx->type = ROS_SW_CTX;
asm volatile ("stmxcsr %0" : "=m"(sw_tf->tf_mxcsr));
asm volatile ("fnstcw %0" : "=m"(sw_tf->tf_fpucw));
/* Pretty simple: save all the regs, IAW the sys-v ABI */
asm volatile ("movl %%ebp,0x00(%0); "
"movl %%ebx,0x04(%0); "
"movl %%esi,0x08(%0); "
"movl %%edi,0x0c(%0); "
"movl %%esp,0x10(%0); "
"leal 1f,%%eax; " /* get future eip */
"movl %%eax,0x14(%0); "
"1: " /* where this tf will restart */
: "=c"(dummy) /* force clobber for ecx */
: "c"(sw_tf)
: "eax", "edx", "memory", "cc");
} __attribute__((always_inline, returns_twice))
/* The old version, kept around for testing */
static inline void save_user_ctx_hw(struct user_context *ctx)
{
struct hw_trapframe *tf = &ctx->tf.hw_tf;
ctx->type = ROS_HW_CTX;
memset(tf, 0, sizeof(struct hw_trapframe)); /* sanity */
/* set CS and make sure eflags is okay */
tf->tf_cs = GD_UT | 3;
tf->tf_eflags = 0x00000200; /* interrupts enabled. bare minimum eflags. */
/* Save the regs and the future esp. */
asm volatile("movl %%esp,(%0); " /* save esp in it's slot*/
"pushl %%eax; " /* temp save eax */
"leal 1f,%%eax; " /* get future eip */
"movl %%eax,(%1); " /* store future eip */
"popl %%eax; " /* restore eax */
"movl %2,%%esp; " /* move to the beginning of the tf */
"addl $0x20,%%esp; " /* move to after the push_regs */
"pushal; " /* save regs */
"addl $0x44,%%esp; " /* move to esp slot */
"popl %%esp; " /* restore esp */
"1: " /* where this tf will restart */
:
: "g"(&tf->tf_esp), "g"(&tf->tf_eip), "g"(tf)
: "eax", "memory", "cc");
} __attribute__((always_inline, returns_twice))
static inline void init_user_ctx(struct user_context *ctx, uint32_t entry_pt,
uint32_t stack_top)
{
struct sw_trapframe *sw_tf = &ctx->tf.sw_tf;
ctx->type = ROS_SW_CTX;
/* No need to bother with setting the other GP registers; the called
* function won't care about their contents. */
sw_tf->tf_esp = stack_top;
sw_tf->tf_eip = entry_pt;
sw_tf->tf_mxcsr = 0x00001f80; /* x86 default mxcsr */
sw_tf->tf_fpucw = 0x037f; /* x86 default FP CW */
}
// this is how we get our thread id on entry.
#define __vcore_id_on_entry \
({ \
register int temp asm ("eax"); \
temp; \
})
/* For debugging. */
#include <stdio.h>
static void print_hw_tf(struct hw_trapframe *tf)
{
printf("[user] HW TRAP frame %08p\n", tf);
printf(" edi 0x%08x\n", tf->tf_regs.reg_edi);
printf(" esi 0x%08x\n", tf->tf_regs.reg_esi);
printf(" ebp 0x%08x\n", tf->tf_regs.reg_ebp);
printf(" oesp 0x%08x\n", tf->tf_regs.reg_oesp);
printf(" ebx 0x%08x\n", tf->tf_regs.reg_ebx);
printf(" edx 0x%08x\n", tf->tf_regs.reg_edx);
printf(" ecx 0x%08x\n", tf->tf_regs.reg_ecx);
printf(" eax 0x%08x\n", tf->tf_regs.reg_eax);
printf(" gs 0x----%04x\n", tf->tf_gs);
printf(" fs 0x----%04x\n", tf->tf_fs);
printf(" es 0x----%04x\n", tf->tf_es);
printf(" ds 0x----%04x\n", tf->tf_ds);
printf(" trap 0x%08x\n", tf->tf_trapno);
printf(" err 0x%08x\n", tf->tf_err);
printf(" eip 0x%08x\n", tf->tf_eip);
printf(" cs 0x----%04x\n", tf->tf_cs);
printf(" flag 0x%08x\n", tf->tf_eflags);
printf(" esp 0x%08x\n", tf->tf_esp);
printf(" ss 0x----%04x\n", tf->tf_ss);
}
static void print_sw_tf(struct sw_trapframe *sw_tf)
{
printf("[user] SW TRAP frame %08p\n", sw_tf);
printf(" ebp 0x%08x\n", sw_tf->tf_ebp);
printf(" ebx 0x%08x\n", sw_tf->tf_ebx);
printf(" esi 0x%08x\n", sw_tf->tf_esi);
printf(" edi 0x%08x\n", sw_tf->tf_edi);
printf(" esp 0x%08x\n", sw_tf->tf_esp);
printf(" eip 0x%08x\n", sw_tf->tf_eip);
printf(" mxcsr 0x%08x\n", sw_tf->tf_mxcsr);
printf(" fpucw 0x----%04x\n", sw_tf->tf_fpucw);
}
static void print_user_context(struct user_context *ctx)
{
if (ctx->type == ROS_HW_CTX)
print_hw_tf(&ctx->tf.hw_tf);
else if (ctx->type == ROS_SW_CTX)
print_sw_tf(&ctx->tf.sw_tf);
else
printf("Unknown context type %d\n", ctx->type);
}
static bool has_refl_fault(struct user_context *ctx)
{
return ctx->tf.hw_tf.tf_padding3 == ROS_ARCH_REFL_ID;
}
static void clear_refl_fault(struct user_context *ctx)
{
ctx->tf.hw_tf.tf_padding3 = 0;
}
static unsigned int __arch_refl_get_nr(struct user_context *ctx)
{
return ctx->tf.hw_tf.tf_trapno;
}
static unsigned int __arch_refl_get_err(struct user_context *ctx)
{
return ctx->tf.hw_tf.tf_err;
}
static unsigned long __arch_refl_get_aux(struct user_context *ctx)
{
return ctx->tf.hw_tf.tf_regs.reg_oesp;
}
#endif /* PARLIB_ARCH_VCORE32_H */