| /* |
| * This file is part of the UCB release of Plan 9. It is subject to the license |
| * terms in the LICENSE file found in the top-level directory of this |
| * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No |
| * part of the UCB release of Plan 9, including this file, may be copied, |
| * modified, propagated, or distributed except according to the terms contained |
| * in the LICENSE file. |
| */ |
| |
| #include <ros/profiler_records.h> |
| #include <arch/time.h> |
| #include <slab.h> |
| #include <kmalloc.h> |
| #include <kref.h> |
| #include <atomic.h> |
| #include <kthread.h> |
| #include <string.h> |
| #include <stdio.h> |
| #include <assert.h> |
| #include <error.h> |
| #include <pmap.h> |
| #include <smp.h> |
| #include <time.h> |
| #include <circular_buffer.h> |
| #include <umem.h> |
| #include <profiler.h> |
| #include <kprof.h> |
| #include <ros/procinfo.h> |
| #include <init.h> |
| |
| #define KTRACE_BUFFER_SIZE (128 * 1024) |
| #define TRACE_PRINTK_BUFFER_SIZE (8 * 1024) |
| |
| enum { |
| Kprofdirqid = 0, |
| Kprofdataqid, |
| Kprofctlqid, |
| Kptracectlqid, |
| Kptraceqid, |
| Kprintxqid, |
| Kmpstatqid, |
| Kmpstatrawqid, |
| }; |
| |
| struct trace_printk_buffer { |
| atomic_t in_use; |
| char buffer[TRACE_PRINTK_BUFFER_SIZE]; |
| }; |
| |
| struct kprof { |
| qlock_t lock; |
| bool mpstat_ipi; |
| bool profiling; |
| bool opened; |
| }; |
| |
| struct dev kprofdevtab; |
| struct dirtab kproftab[] = { |
| {".", {Kprofdirqid, 0, QTDIR},0, DMDIR|0550}, |
| {"kpdata", {Kprofdataqid}, 0, 0600}, |
| {"kpctl", {Kprofctlqid}, 0, 0600}, |
| {"kptrace_ctl", {Kptracectlqid}, 0, 0660}, |
| {"kptrace", {Kptraceqid}, 0, 0600}, |
| {"kprintx", {Kprintxqid}, 0, 0600}, |
| {"mpstat", {Kmpstatqid}, 0, 0600}, |
| {"mpstat-raw", {Kmpstatrawqid}, 0, 0600}, |
| }; |
| |
| static struct kprof kprof; |
| static bool ktrace_init_done = FALSE; |
| static spinlock_t ktrace_lock = SPINLOCK_INITIALIZER_IRQSAVE; |
| static struct circular_buffer ktrace_data; |
| static char ktrace_buffer[KTRACE_BUFFER_SIZE]; |
| static char kprof_control_usage[128]; |
| |
| static size_t mpstat_len(void) |
| { |
| size_t each_row = 7 + NR_CPU_STATES * 26; |
| |
| return each_row * (num_cores + 1) + 1; |
| } |
| |
| static size_t mpstatraw_len(void) |
| { |
| size_t header_row = 27 + NR_CPU_STATES * 7 + 1; |
| size_t cpu_row = 7 + NR_CPU_STATES * 17; |
| |
| return header_row + cpu_row * num_cores + 1; |
| } |
| |
| static char *devname(void) |
| { |
| return kprofdevtab.name; |
| } |
| |
| static struct chan *kprof_attach(char *spec) |
| { |
| return devattach(devname(), spec); |
| } |
| |
| /* Start collecting samples from perf events into the profiler. |
| * |
| * This command only runs if the user successfully opened kpctl, which gives |
| * them a profiler (the global profiler, for now). */ |
| static void kprof_start_profiler(void) |
| { |
| ERRSTACK(1); |
| |
| qlock(&kprof.lock); |
| if (waserror()) { |
| qunlock(&kprof.lock); |
| nexterror(); |
| } |
| if (!kprof.profiling) { |
| profiler_start(); |
| kprof.profiling = TRUE; |
| } |
| poperror(); |
| qunlock(&kprof.lock); |
| } |
| |
| /* Stops collecting samples from perf events. |
| * |
| * This command only runs if the user successfully opened kpctl, which gives |
| * them a profiler (the global profiler, for now). */ |
| static void kprof_stop_profiler(void) |
| { |
| ERRSTACK(1); |
| |
| qlock(&kprof.lock); |
| if (waserror()) { |
| qunlock(&kprof.lock); |
| nexterror(); |
| } |
| if (kprof.profiling) { |
| profiler_stop(); |
| kprof.profiling = FALSE; |
| } |
| poperror(); |
| qunlock(&kprof.lock); |
| } |
| |
| /* Makes each core flush its results into the profiler queue. You can do this |
| * while the profiler is still running. However, this does not hang up the |
| * queue, so reads on kpdata will block. */ |
| static void kprof_flush_profiler(void) |
| { |
| ERRSTACK(1); |
| |
| qlock(&kprof.lock); |
| if (waserror()) { |
| qunlock(&kprof.lock); |
| nexterror(); |
| } |
| if (kprof.profiling) |
| profiler_trace_data_flush(); |
| poperror(); |
| qunlock(&kprof.lock); |
| } |
| |
| static void kprof_init(void) |
| { |
| profiler_init(); |
| |
| qlock_init(&kprof.lock); |
| kprof.profiling = FALSE; |
| kprof.opened = FALSE; |
| |
| for (int i = 0; i < ARRAY_SIZE(kproftab); i++) |
| kproftab[i].length = 0; |
| |
| kprof.mpstat_ipi = TRUE; |
| kproftab[Kmpstatqid].length = mpstat_len(); |
| kproftab[Kmpstatrawqid].length = mpstatraw_len(); |
| |
| strlcpy(kprof_control_usage, "start|stop|flush", |
| sizeof(kprof_control_usage)); |
| profiler_append_configure_usage(kprof_control_usage, |
| sizeof(kprof_control_usage)); |
| } |
| |
| static void kprof_shutdown(void) |
| { |
| } |
| |
| static struct walkqid *kprof_walk(struct chan *c, struct chan *nc, char **name, |
| unsigned int nname) |
| { |
| return devwalk(c, nc, name, nname, kproftab, ARRAY_SIZE(kproftab), |
| devgen); |
| } |
| |
| static size_t kprof_profdata_size(void) |
| { |
| return profiler_size(); |
| } |
| |
| static long kprof_profdata_read(void *dest, long size, int64_t off) |
| { |
| return profiler_read(dest, size); |
| } |
| |
| static size_t kprof_stat(struct chan *c, uint8_t *db, size_t n) |
| { |
| kproftab[Kprofdataqid].length = kprof_profdata_size(); |
| kproftab[Kptraceqid].length = kprof_tracedata_size(); |
| |
| return devstat(c, db, n, kproftab, ARRAY_SIZE(kproftab), devgen); |
| } |
| |
| static struct chan *kprof_open(struct chan *c, int omode) |
| { |
| if (c->qid.type & QTDIR) { |
| if (openmode(omode) != O_READ) |
| error(EPERM, ERROR_FIXME); |
| } |
| switch ((int) c->qid.path) { |
| case Kprofctlqid: |
| /* We have one global profiler. Only one FD may be opened at a |
| * time for it. If we ever have separate profilers, we can |
| * create the profiler here, and every open would get a separate |
| * instance. */ |
| qlock(&kprof.lock); |
| if (kprof.opened) { |
| qunlock(&kprof.lock); |
| error(EBUSY, "Global profiler is already open"); |
| } |
| kprof.opened = TRUE; |
| /* TODO: have a creation function for a non-global profiler */ |
| profiler_setup(); |
| qunlock(&kprof.lock); |
| break; |
| } |
| c->mode = openmode(omode); |
| c->flag |= COPEN; |
| c->offset = 0; |
| return c; |
| } |
| |
| static void kprof_close(struct chan *c) |
| { |
| if (c->flag & COPEN) { |
| switch ((int) c->qid.path) { |
| case Kprofctlqid: |
| kprof_stop_profiler(); |
| qlock(&kprof.lock); |
| profiler_cleanup(); |
| kprof.opened = FALSE; |
| qunlock(&kprof.lock); |
| break; |
| } |
| } |
| } |
| |
| static long mpstat_read(void *va, long n, int64_t off) |
| { |
| size_t bufsz = mpstat_len(); |
| char *buf = kmalloc(bufsz, MEM_WAIT); |
| int len = 0; |
| struct per_cpu_info *pcpui; |
| uint64_t cpu_total; |
| struct timespec ts; |
| |
| /* the IPI interferes with other cores, might want to disable that. */ |
| if (kprof.mpstat_ipi) |
| send_broadcast_ipi(I_POKE_CORE); |
| |
| len += snprintf(buf + len, bufsz - len, " CPU: "); |
| for (int j = 0; j < NR_CPU_STATES; j++) |
| len += snprintf(buf + len, bufsz - len, "%23s%s", |
| cpu_state_names[j], |
| j != NR_CPU_STATES - 1 ? " " : " \n"); |
| |
| for (int i = 0; i < num_cores; i++) { |
| pcpui = &per_cpu_info[i]; |
| cpu_total = 0; |
| len += snprintf(buf + len, bufsz - len, "%5d: ", i); |
| for (int j = 0; j < NR_CPU_STATES; j++) |
| cpu_total += pcpui->state_ticks[j]; |
| cpu_total = MAX(cpu_total, 1); /* for the divide later */ |
| for (int j = 0; j < NR_CPU_STATES; j++) { |
| ts = tsc2timespec(pcpui->state_ticks[j]); |
| len += snprintf(buf + len, bufsz - len, |
| "%10d.%06d (%3d%%)%s", |
| ts.tv_sec, ts.tv_nsec / 1000, |
| MIN((pcpui->state_ticks[j] * 100) / |
| cpu_total, 100), |
| j != NR_CPU_STATES - 1 ? ", " : " \n"); |
| } |
| } |
| n = readstr(off, va, n, buf); |
| kfree(buf); |
| return n; |
| } |
| |
| static long mpstatraw_read(void *va, long n, int64_t off) |
| { |
| size_t bufsz = mpstatraw_len(); |
| char *buf = kmalloc(bufsz, MEM_WAIT); |
| int len = 0; |
| struct per_cpu_info *pcpui; |
| |
| /* could spit it all out in binary, though then it'd be harder to |
| * process the data across a mnt (if we export #K). probably not a big |
| * deal. */ |
| |
| /* header line: version, num_cores, tsc freq, state names */ |
| len += snprintf(buf + len, bufsz - len, "v%03d %5d %16llu", 1, |
| num_cores, __proc_global_info.tsc_freq); |
| for (int j = 0; j < NR_CPU_STATES; j++) |
| len += snprintf(buf + len, bufsz - len, " %6s", |
| cpu_state_names[j]); |
| len += snprintf(buf + len, bufsz - len, "\n"); |
| |
| for (int i = 0; i < num_cores; i++) { |
| pcpui = &per_cpu_info[i]; |
| len += snprintf(buf + len, bufsz - len, "%5d: ", i); |
| for (int j = 0; j < NR_CPU_STATES; j++) { |
| len += snprintf(buf + len, bufsz - len, "%16llx%s", |
| pcpui->state_ticks[j], |
| j != NR_CPU_STATES - 1 ? " " : "\n"); |
| } |
| } |
| n = readstr(off, va, n, buf); |
| kfree(buf); |
| return n; |
| } |
| |
| static size_t kprof_read(struct chan *c, void *va, size_t n, off64_t off) |
| { |
| uint64_t w, *bp; |
| char *a, *ea; |
| uintptr_t offset = off; |
| uint64_t pc; |
| |
| switch ((int) c->qid.path) { |
| case Kprofdirqid: |
| return devdirread(c, va, n, kproftab, ARRAY_SIZE(kproftab), |
| devgen); |
| case Kprofdataqid: |
| n = kprof_profdata_read(va, n, off); |
| break; |
| case Kptraceqid: |
| n = kprof_tracedata_read(va, n, off); |
| break; |
| case Kprintxqid: |
| n = readstr(offset, va, n, printx_on ? "on" : "off"); |
| break; |
| case Kmpstatqid: |
| n = mpstat_read(va, n, offset); |
| break; |
| case Kmpstatrawqid: |
| n = mpstatraw_read(va, n, offset); |
| break; |
| default: |
| n = 0; |
| break; |
| } |
| return n; |
| } |
| |
| static size_t kprof_write(struct chan *c, void *a, size_t n, off64_t unused) |
| { |
| ERRSTACK(1); |
| struct cmdbuf *cb = parsecmd(a, n); |
| |
| if (waserror()) { |
| kfree(cb); |
| nexterror(); |
| } |
| switch ((int) c->qid.path) { |
| case Kprofctlqid: |
| if (cb->nf < 1) |
| error(EFAIL, kprof_control_usage); |
| if (profiler_configure(cb)) |
| break; |
| if (!strcmp(cb->f[0], "start")) { |
| kprof_start_profiler(); |
| } else if (!strcmp(cb->f[0], "flush")) { |
| kprof_flush_profiler(); |
| } else if (!strcmp(cb->f[0], "stop")) { |
| kprof_stop_profiler(); |
| } else { |
| error(EFAIL, kprof_control_usage); |
| } |
| break; |
| case Kptracectlqid: |
| if (cb->nf < 1) |
| error(EFAIL, "Bad kptrace_ctl option (reset)"); |
| if (!strcmp(cb->f[0], "clear")) { |
| spin_lock_irqsave(&ktrace_lock); |
| circular_buffer_clear(&ktrace_data); |
| spin_unlock_irqsave(&ktrace_lock); |
| } |
| break; |
| case Kptraceqid: |
| if (a && (n > 0)) { |
| char *uptr = user_strdup_errno(current, a, n); |
| |
| if (uptr) { |
| trace_printk("%s", uptr); |
| user_memdup_free(current, uptr); |
| } else { |
| n = -1; |
| } |
| } |
| break; |
| case Kprintxqid: |
| if (!strncmp(a, "on", 2)) |
| set_printx(1); |
| else if (!strncmp(a, "off", 3)) |
| set_printx(0); |
| else if (!strncmp(a, "toggle", 6)) |
| set_printx(2); |
| else |
| error(EFAIL, "Invalid option to Kprintx %s\n", a); |
| break; |
| case Kmpstatqid: |
| case Kmpstatrawqid: |
| if (cb->nf < 1) |
| error(EFAIL, "Bad mpstat option (reset|ipi|on|off)"); |
| if (!strcmp(cb->f[0], "reset")) { |
| for (int i = 0; i < num_cores; i++) |
| reset_cpu_state_ticks(i); |
| } else if (!strcmp(cb->f[0], "on")) { |
| /* TODO: enable the ticks */ ; |
| } else if (!strcmp(cb->f[0], "off")) { |
| /* TODO: disable the ticks */ ; |
| } else if (!strcmp(cb->f[0], "ipi")) { |
| if (cb->nf < 2) |
| error(EFAIL, "Need another arg: ipi [on|off]"); |
| if (!strcmp(cb->f[1], "on")) |
| kprof.mpstat_ipi = TRUE; |
| else if (!strcmp(cb->f[1], "off")) |
| kprof.mpstat_ipi = FALSE; |
| else |
| error(EFAIL, "ipi [on|off]"); |
| } else { |
| error(EFAIL, "Bad mpstat option (reset|ipi|on|off)"); |
| } |
| break; |
| default: |
| error(EBADFD, ERROR_FIXME); |
| } |
| kfree(cb); |
| poperror(); |
| return n; |
| } |
| |
| size_t kprof_tracedata_size(void) |
| { |
| return circular_buffer_size(&ktrace_data); |
| } |
| |
| size_t kprof_tracedata_read(void *data, size_t size, size_t offset) |
| { |
| spin_lock_irqsave(&ktrace_lock); |
| if (likely(ktrace_init_done)) |
| size = circular_buffer_read(&ktrace_data, data, size, offset); |
| else |
| size = 0; |
| spin_unlock_irqsave(&ktrace_lock); |
| |
| return size; |
| } |
| |
| void kprof_dump_data(void) |
| { |
| void *buf; |
| size_t len = kprof_tracedata_size(); |
| |
| buf = kmalloc(len, MEM_WAIT); |
| kprof_tracedata_read(buf, len, 0); |
| printk("%s", buf); |
| kfree(buf); |
| } |
| |
| void kprof_tracedata_write(const char *pretty_buf, size_t len) |
| { |
| spin_lock_irqsave(&ktrace_lock); |
| if (unlikely(!ktrace_init_done)) { |
| circular_buffer_init(&ktrace_data, sizeof(ktrace_buffer), |
| ktrace_buffer); |
| ktrace_init_done = TRUE; |
| } |
| circular_buffer_write(&ktrace_data, pretty_buf, len); |
| spin_unlock_irqsave(&ktrace_lock); |
| } |
| |
| static struct trace_printk_buffer *kprof_get_printk_buffer(void) |
| { |
| static struct trace_printk_buffer boot_tpb; |
| static struct trace_printk_buffer *cpu_tpbs; |
| static atomic_t alloc_done; |
| |
| if (unlikely(booting)) |
| return &boot_tpb; |
| if (unlikely(!cpu_tpbs)) { |
| /* Poor man per-CPU data structure. I really do no like |
| * littering global data structures with module specific data. |
| * We cannot take the ktrace_lock to protect the kzmalloc() |
| * call, as that might trigger printk()s, and we would reenter |
| * here. Let only one core into the kzmalloc() path, and let |
| * the others get the boot_tpb until finished. */ |
| if (!atomic_cas(&alloc_done, 0, 1)) |
| return &boot_tpb; |
| cpu_tpbs = kzmalloc(num_cores * |
| sizeof(struct trace_printk_buffer), 0); |
| } |
| |
| return cpu_tpbs + core_id_early(); |
| } |
| |
| void trace_vprintk(const char *fmt, va_list args) |
| { |
| struct print_buf { |
| char *ptr; |
| char *top; |
| }; |
| |
| void emit_print_buf_str(struct print_buf *pb, const char *str, |
| ssize_t size) |
| { |
| if (size < 0) { |
| for (; *str && (pb->ptr < pb->top); str++) |
| *(pb->ptr++) = *str; |
| } else { |
| for (; (size > 0) && (pb->ptr < pb->top); str++, size--) |
| *(pb->ptr++) = *str; |
| } |
| } |
| |
| static const size_t bufsz = TRACE_PRINTK_BUFFER_SIZE; |
| static const size_t usr_bufsz = (3 * bufsz) / 8; |
| static const size_t kp_bufsz = bufsz - usr_bufsz; |
| struct trace_printk_buffer *tpb = kprof_get_printk_buffer(); |
| struct timespec ts_now = { 0, 0 }; |
| struct print_buf pb; |
| char *usrbuf = tpb->buffer, *kpbuf = tpb->buffer + usr_bufsz; |
| const char *utop, *uptr; |
| char hdr[64]; |
| |
| if (!atomic_cas(&tpb->in_use, 0, 1)) |
| return; |
| if (likely(__proc_global_info.tsc_freq)) |
| ts_now = tsc2timespec(read_tsc()); |
| snprintf(hdr, sizeof(hdr), "[%lu.%09lu]:cpu%d: ", ts_now.tv_sec, |
| ts_now.tv_nsec, core_id_early()); |
| |
| pb.ptr = usrbuf + vsnprintf(usrbuf, usr_bufsz, fmt, args); |
| pb.top = usrbuf + usr_bufsz; |
| |
| if (pb.ptr[-1] != '\n') |
| emit_print_buf_str(&pb, "\n", 1); |
| /* snprintf null terminates the buffer, and does not count that as part |
| * of the len. If we maxed out the buffer, let's make sure it has a \n. |
| */ |
| if (pb.ptr == pb.top) |
| pb.ptr[-1] = '\n'; |
| utop = pb.ptr; |
| |
| pb.ptr = kpbuf; |
| pb.top = kpbuf + kp_bufsz; |
| for (uptr = usrbuf; uptr < utop;) { |
| const char *nlptr = memchr(uptr, '\n', utop - uptr); |
| |
| if (nlptr == NULL) |
| nlptr = utop; |
| emit_print_buf_str(&pb, hdr, -1); |
| emit_print_buf_str(&pb, uptr, (nlptr - uptr) + 1); |
| uptr = nlptr + 1; |
| } |
| kprof_tracedata_write(kpbuf, pb.ptr - kpbuf); |
| atomic_set(&tpb->in_use, 0); |
| } |
| |
| void trace_printk(const char *fmt, ...) |
| { |
| va_list args; |
| |
| va_start(args, fmt); |
| trace_vprintk(fmt, args); |
| va_end(args); |
| } |
| |
| struct dev kprofdevtab __devtab = { |
| .name = "kprof", |
| |
| .reset = devreset, |
| .init = kprof_init, |
| .shutdown = kprof_shutdown, |
| .attach = kprof_attach, |
| .walk = kprof_walk, |
| .stat = kprof_stat, |
| .open = kprof_open, |
| .create = devcreate, |
| .close = kprof_close, |
| .read = kprof_read, |
| .bread = devbread, |
| .write = kprof_write, |
| .bwrite = devbwrite, |
| .remove = devremove, |
| .wstat = devwstat, |
| }; |