| /* Copyright (c) 2015-2016 Google Inc |
| * Barret Rhoden <brho@cs.berkeley.edu> |
| * Davide Libenzi <dlibenzi@google.com> |
| * See LICENSE for details. |
| */ |
| |
| #include <sys/types.h> |
| #include <sys/stat.h> |
| #include <sys/wait.h> |
| #include <stdint.h> |
| #include <stdio.h> |
| #include <string.h> |
| #include <stdlib.h> |
| #include <unistd.h> |
| #include <fcntl.h> |
| #include <limits.h> |
| #include <errno.h> |
| #include <argp.h> |
| #include <time.h> |
| #include <parlib/parlib.h> |
| #include <parlib/timing.h> |
| #include <parlib/core_set.h> |
| #include "xlib.h" |
| #include "perfconv.h" |
| #include "perf_core.h" |
| |
| /* Helpers */ |
| static void run_process_and_wait(int argc, char *argv[], |
| const struct core_set *cores); |
| |
| /* For communicating with perf_create_context() */ |
| static struct perf_context_config perf_cfg = { |
| .perf_file = "#arch/perf", |
| .kpctl_file = "#kprof/kpctl", |
| .kpdata_file = "#kprof/kpdata", |
| }; |
| |
| static struct perfconv_context *cctx; |
| static struct perf_context *pctx; |
| extern char **environ; /* POSIX envp */ |
| |
| struct perf_opts { |
| FILE *outfile; |
| const char *events; |
| char **cmd_argv; |
| int cmd_argc; |
| struct core_set cores; |
| bool got_cores; |
| bool verbose; |
| bool sampling; |
| bool stat_bignum; |
| bool record_quiet; |
| unsigned long record_period; |
| }; |
| static struct perf_opts opts; |
| |
| struct perf_cmd { |
| char *name; |
| char *desc; |
| char *opts; |
| int (*func)(struct perf_cmd *, int, char **); |
| }; |
| |
| static int perf_help(struct perf_cmd *cmd, int argc, char *argv[]); |
| static int perf_list(struct perf_cmd *cmd, int argc, char *argv[]); |
| static int perf_record(struct perf_cmd *cmd, int argc, char *argv[]); |
| static int perf_stat(struct perf_cmd *cmd, int argc, char *argv[]); |
| static int perf_pmu_caps(struct perf_cmd *cmd, int argc, char *argv[]); |
| |
| static struct perf_cmd perf_cmds[] = { |
| { .name = "help", |
| .desc = "Detailed help for commands", |
| .opts = "COMMAND", |
| .func = perf_help, |
| }, |
| { .name = "list", |
| .desc = "Lists all available events", |
| .opts = "[REGEX]", |
| .func = perf_list, |
| }, |
| { .name = "record", |
| .desc = "Samples events during command execution", |
| .opts = 0, |
| .func = perf_record, |
| }, |
| { .name = "stat", |
| .desc = "Counts events during command execution", |
| .opts = 0, |
| .func = perf_stat, |
| }, |
| { .name = "pmu_caps", |
| .desc = "Shows PMU capabilities", |
| .opts = "", |
| .func = perf_pmu_caps, |
| }, |
| }; |
| |
| /**************************** perf help ****************************/ |
| |
| static int perf_help(struct perf_cmd *cmd, int argc, char *argv[]) |
| { |
| char *sub_argv[2]; |
| |
| if (argc < 2) { |
| fprintf(stderr, "perf %s %s\n", cmd->name, cmd->opts); |
| return -1; |
| } |
| for (int i = 0; i < COUNT_OF(perf_cmds); i++) { |
| if (!strcmp(perf_cmds[i].name, argv[1])) { |
| if (perf_cmds[i].opts) { |
| fprintf(stdout, "perf %s %s\n", |
| perf_cmds[i].name, perf_cmds[i].opts); |
| fprintf(stdout, "\t%s\n", perf_cmds[i].desc); |
| } else { |
| /* For argp subcommands, call their help |
| * directly. */ |
| sub_argv[0] = xstrdup(perf_cmds[i].name); |
| sub_argv[1] = xstrdup("--help"); |
| perf_cmds[i].func(&perf_cmds[i], 2, sub_argv); |
| free(sub_argv[0]); |
| free(sub_argv[1]); |
| } |
| return 0; |
| } |
| } |
| fprintf(stderr, "Unknown perf command %s\n", argv[1]); |
| return -1; |
| } |
| |
| /**************************** perf list ****************************/ |
| |
| static int perf_list(struct perf_cmd *cmd, int argc, char *argv[]) |
| { |
| char *show_regex = NULL; |
| |
| if (argc > 1) |
| show_regex = argv[1]; |
| perf_show_events(show_regex, stdout); |
| return 0; |
| } |
| |
| /**************************** perf pmu_caps ************************/ |
| |
| static int perf_pmu_caps(struct perf_cmd *cmd, int argc, char *argv[]) |
| { |
| const struct perf_arch_info *pai = perf_context_get_arch_info(pctx); |
| |
| fprintf(stdout, |
| "PERF.version = %u\n" |
| "PERF.proc_arch_events = %u\n" |
| "PERF.bits_x_counter = %u\n" |
| "PERF.counters_x_proc = %u\n" |
| "PERF.bits_x_fix_counter = %u\n" |
| "PERF.fix_counters_x_proc = %u\n", |
| pai->perfmon_version, pai->proc_arch_events, |
| pai->bits_x_counter, pai->counters_x_proc, |
| pai->bits_x_fix_counter, pai->fix_counters_x_proc); |
| return 0; |
| } |
| |
| /**************************** Common argp ************************/ |
| |
| /* Collection argument parsing. These options are common to any function that |
| * will collect perf events, e.g. perf record and perf stat. */ |
| |
| static struct argp_option collect_opts[] = { |
| {"event", 'e', "EVENT", 0, "Event string, e.g. cycles:u:k"}, |
| {"cores", 'C', "CORE_LIST", 0, "List of cores, e.g. 0.2.4:8-19"}, |
| {"cpu", 'C', 0, OPTION_ALIAS}, |
| {"all-cpus", 'a', 0, 0, "Collect events on all cores (on by default)"}, |
| {"verbose", 'v', 0, 0, 0}, |
| { 0 } |
| }; |
| |
| static const char *collect_args_doc = "COMMAND [ARGS]"; |
| |
| static error_t parse_collect_opt(int key, char *arg, struct argp_state *state) |
| { |
| struct perf_opts *p_opts = state->input; |
| |
| /* argp doesn't pass input to the child parser(s) by default... */ |
| state->child_inputs[0] = state->input; |
| |
| switch (key) { |
| case 'a': |
| /* Our default operation is to track all cores; we don't follow |
| * processes yet. */ |
| break; |
| case 'C': |
| parlib_parse_cores(arg, &p_opts->cores); |
| p_opts->got_cores = TRUE; |
| break; |
| case 'e': |
| p_opts->events = arg; |
| break; |
| case 'v': |
| p_opts->verbose = TRUE; |
| break; |
| case ARGP_KEY_ARG: |
| p_opts->cmd_argc = state->argc - state->next + 1; |
| p_opts->cmd_argv = xmalloc(sizeof(char*) * |
| (p_opts->cmd_argc + 1)); |
| p_opts->cmd_argv[0] = arg; |
| memcpy(&p_opts->cmd_argv[1], &state->argv[state->next], |
| sizeof(char*) * (p_opts->cmd_argc - 1)); |
| p_opts->cmd_argv[p_opts->cmd_argc] = NULL; |
| state->next = state->argc; |
| break; |
| case ARGP_KEY_END: |
| if (!p_opts->cmd_argc) |
| argp_usage(state); |
| /* By default, we set all cores (different than linux) */ |
| if (!p_opts->got_cores) |
| parlib_get_all_core_set(&p_opts->cores); |
| break; |
| default: |
| return ARGP_ERR_UNKNOWN; |
| } |
| return 0; |
| } |
| |
| /* Helper, parses args using the collect_opts and the child parser for a given |
| * cmd. */ |
| static void collect_argp(struct perf_cmd *cmd, int argc, char *argv[], |
| struct argp_child *children, struct perf_opts *opts) |
| { |
| struct argp collect_opt = {collect_opts, parse_collect_opt, |
| collect_args_doc, cmd->desc, children}; |
| char *cmd_name; |
| const char *fmt = "perf %s"; |
| size_t cmd_sz = strlen(cmd->name) + strlen(fmt) + 1; |
| |
| /* Rewrite the command name from foo to perf foo for the --help output*/ |
| cmd_name = xmalloc(cmd_sz); |
| snprintf(cmd_name, cmd_sz, fmt, cmd->name); |
| cmd_name[cmd_sz - 1] = '\0'; |
| argv[0] = cmd_name; |
| argp_parse(&collect_opt, argc, argv, ARGP_IN_ORDER, 0, opts); |
| /* It's possible that someone could still be using cmd_name */ |
| } |
| |
| /* Helper, submits the events in opts to the kernel for monitoring. */ |
| static void submit_events(struct perf_opts *opts) |
| { |
| struct perf_eventsel *sel; |
| char *dup_evts, *tok, *tok_save = 0; |
| |
| dup_evts = xstrdup(opts->events); |
| for (tok = strtok_r(dup_evts, ",", &tok_save); |
| tok; |
| tok = strtok_r(NULL, ",", &tok_save)) { |
| |
| sel = perf_parse_event(tok); |
| PMEV_SET_INTEN(sel->ev.event, opts->sampling); |
| sel->ev.trigger_count = opts->record_period; |
| perf_context_event_submit(pctx, &opts->cores, sel); |
| } |
| free(dup_evts); |
| } |
| |
| /**************************** perf record ************************/ |
| |
| static struct argp_option record_opts[] = { |
| {"count", 'c', "PERIOD", 0, "Sampling period"}, |
| {"output", 'o', "FILE", 0, "Output file name (default perf.data)"}, |
| {"freq", 'F', "FREQUENCY", 0, "Sampling frequency (assumes cycles)"}, |
| {"call-graph", 'g', 0, 0, "Backtrace recording (always on!)"}, |
| {"quiet", 'q', 0, 0, "No printing to stdio"}, |
| { 0 } |
| }; |
| |
| /* In lieu of adaptively changing the period to maintain a set freq, we |
| * just assume they want cycles and that the TSC is close to that. |
| * |
| * (cycles/sec) / (samples/sec) = cycles / sample = period. |
| * |
| * TODO: this also assumes we're running the core at full speed. */ |
| static unsigned long freq_to_period(unsigned long freq) |
| { |
| return get_tsc_freq() / freq; |
| } |
| |
| static error_t parse_record_opt(int key, char *arg, struct argp_state *state) |
| { |
| struct perf_opts *p_opts = state->input; |
| |
| switch (key) { |
| case 'c': |
| if (p_opts->record_period) |
| argp_error(state, |
| "Period set. Only use at most one of -c -F"); |
| p_opts->record_period = atol(arg); |
| break; |
| case 'F': |
| if (p_opts->record_period) |
| argp_error(state, |
| "Period set. Only use at most one of -c -F"); |
| /* TODO: when we properly support freq, multiple events will |
| * have the same freq but different, dynamic, periods. */ |
| p_opts->record_period = freq_to_period(atol(arg)); |
| break; |
| case 'g': |
| /* Our default operation is to record backtraces. */ |
| break; |
| case 'o': |
| p_opts->outfile = xfopen(arg, "wb"); |
| break; |
| case 'q': |
| p_opts->record_quiet = TRUE; |
| break; |
| case ARGP_KEY_END: |
| if (!p_opts->events) |
| p_opts->events = "cycles"; |
| if (!p_opts->outfile) |
| p_opts->outfile = xfopen("perf.data", "wb"); |
| if (!p_opts->record_period) |
| p_opts->record_period = freq_to_period(1000); |
| break; |
| default: |
| return ARGP_ERR_UNKNOWN; |
| } |
| return 0; |
| } |
| |
| static int perf_record(struct perf_cmd *cmd, int argc, char *argv[]) |
| { |
| struct argp argp_record = {record_opts, parse_record_opt}; |
| struct argp_child children[] = { {&argp_record, 0, 0, 0}, {0} }; |
| |
| collect_argp(cmd, argc, argv, children, &opts); |
| opts.sampling = TRUE; |
| |
| /* Once a perf event is submitted, it'll start counting and firing the |
| * IRQ. However, we can control whether or not the samples are |
| * collected. */ |
| submit_events(&opts); |
| perf_start_sampling(pctx); |
| run_process_and_wait(opts.cmd_argc, opts.cmd_argv, |
| opts.got_cores ? &opts.cores : NULL); |
| perf_stop_sampling(pctx); |
| if (opts.verbose) |
| perf_context_show_events(pctx, stdout); |
| /* The events are still counting and firing IRQs. Let's be nice and |
| * turn them off to minimize our impact. */ |
| perf_stop_events(pctx); |
| /* Generate the Linux perf file format with the traces which have been |
| * created during this operation. */ |
| perf_convert_trace_data(cctx, perf_cfg.kpdata_file, opts.outfile); |
| fclose(opts.outfile); |
| return 0; |
| } |
| |
| /**************************** perf stat ************************/ |
| |
| static struct argp_option stat_opts[] = { |
| {"big-num", 'B', 0, 0, "Formatting option"}, |
| {"output", 'o', "FILE", 0, "Print output to file (default stdout)"}, |
| { 0 } |
| }; |
| |
| static error_t parse_stat_opt(int key, char *arg, struct argp_state *state) |
| { |
| struct perf_opts *p_opts = state->input; |
| |
| switch (key) { |
| case 'B': |
| p_opts->stat_bignum = TRUE; |
| break; |
| case 'o': |
| p_opts->outfile = xfopen(arg, "w"); |
| break; |
| case ARGP_KEY_END: |
| if (!p_opts->events) |
| p_opts->events = "cache-misses,cache-references," |
| "branch-misses,branches,instructions,cycles"; |
| if (!p_opts->outfile) |
| p_opts->outfile = stdout; |
| break; |
| default: |
| return ARGP_ERR_UNKNOWN; |
| } |
| return 0; |
| } |
| |
| struct stat_val { |
| char *name; |
| uint64_t count; |
| }; |
| |
| /* Helper, given a name, fetches its value as a float. */ |
| static float get_count_for(char *name, struct stat_val *all_vals, |
| size_t nr_vals) |
| { |
| for (int i = 0; i < nr_vals; i++) { |
| if (!strcmp(name, all_vals[i].name)) |
| return (float)all_vals[i].count; |
| } |
| return 0.0; |
| } |
| |
| /* Helper, gets the seconds count as a float */ |
| static float get_seconds(struct stat_val *all_vals, size_t nr_vals) |
| { |
| float sec = get_count_for("nsec", all_vals, nr_vals) / 1000000000; |
| |
| /* We should never have a time of 0, but in case something went wrong, |
| * don't hand back 0 (divide by 0 errors). */ |
| return sec != 0.0 ? sec : 1.0; |
| } |
| |
| /* Prints "X per second", scaling for K, M, or G. */ |
| static void print_default_rate(FILE *out, struct stat_val *val, |
| struct stat_val *all_vals, size_t nr_vals) |
| { |
| float rate = val->count / get_seconds(all_vals, nr_vals); |
| char scale = ' '; |
| |
| if (rate > 1000000000) { |
| rate /= 1000000000; |
| scale = 'G'; |
| } else if (rate > 1000000) { |
| rate /= 1000000; |
| scale = 'M'; |
| } else if (rate > 1000) { |
| rate /= 1000; |
| scale = 'K'; |
| } |
| fprintf(out, "%9.3f %c/sec\n", rate, scale); |
| } |
| |
| /* Prints a line for the given stat val. We pass all the vals since some stats |
| * will adjust their output based on *other* known values. e.g. IPC. */ |
| static void stat_print_val(FILE *out, struct stat_val *val, |
| struct stat_val *all_vals, size_t nr_vals) |
| { |
| /* Everyone gets the same front part of the printout */ |
| fprintf(out, "%18llu %-25s #", val->count, val->name); |
| |
| /* Based on the particular event and what other events we know, we may |
| * print something different to the summary bit after the #. */ |
| if (!strcmp(val->name, "instructions")) { |
| float cycles = get_count_for("cycles", all_vals, nr_vals); |
| |
| if (cycles != 0.0) |
| fprintf(out, "%9.3f insns per cycle\n", |
| val->count / cycles); |
| else |
| print_default_rate(out, val, all_vals, nr_vals); |
| } else if (!strcmp(val->name, "cache-misses")) { |
| float cache_ref = get_count_for("cache-references", all_vals, |
| nr_vals); |
| |
| if (cache_ref != 0.0) |
| fprintf(out, "%8.2f%% of all refs\n", |
| val->count * 100 / cache_ref); |
| else |
| print_default_rate(out, val, all_vals, nr_vals); |
| } else if (!strcmp(val->name, "branch-misses")) { |
| float branches = get_count_for("branches", all_vals, nr_vals); |
| |
| if (branches != 0.0) |
| fprintf(out, "%8.2f%% of all branches\n", |
| val->count * 100 / branches); |
| else |
| print_default_rate(out, val, all_vals, nr_vals); |
| } else { |
| print_default_rate(out, val, all_vals, nr_vals); |
| } |
| } |
| |
| static char *cmd_as_str(int argc, char *const argv[]) |
| { |
| size_t len = 0; |
| char *str; |
| |
| for (int i = 0; i < argc; i++) |
| len += strlen(argv[i]) + 1; |
| str = xzmalloc(len); |
| for (int i = 0; i < argc; i++) { |
| strlcat(str, argv[i], len); |
| if (i != argc - 1) |
| strlcat(str, " ", len); |
| } |
| return str; |
| } |
| |
| static struct stat_val *collect_stats(struct perf_context *pctx, |
| struct timespec *diff) |
| { |
| struct stat_val *stat_vals; |
| |
| /* the last stat is time (nsec). */ |
| stat_vals = xzmalloc(sizeof(struct stat_val) * (pctx->event_count + 1)); |
| for (int i = 0; i < pctx->event_count; i++) { |
| stat_vals[i].count = perf_get_event_count(pctx, i); |
| stat_vals[i].name = pctx->events[i].sel.fq_str; |
| } |
| stat_vals[pctx->event_count].name = "nsec"; |
| stat_vals[pctx->event_count].count = diff->tv_sec * 1000000000 + |
| diff->tv_nsec; |
| return stat_vals; |
| } |
| |
| static int perf_stat(struct perf_cmd *cmd, int argc, char *argv[]) |
| { |
| struct argp argp_stat = {stat_opts, parse_stat_opt}; |
| struct argp_child children[] = { {&argp_stat, 0, 0, 0}, {0} }; |
| FILE *out; |
| struct timespec start, end, diff; |
| struct stat_val *stat_vals; |
| char *cmd_string; |
| |
| collect_argp(cmd, argc, argv, children, &opts); |
| opts.sampling = FALSE; |
| out = opts.outfile; |
| |
| /* As soon as we submit one event, that event is being tracked, meaning |
| * that the setup/teardown of perf events is also tracked. Each event |
| * (including the clock measurement) will roughly account for either the |
| * start or stop of every other event. */ |
| clock_gettime(CLOCK_REALTIME, &start); |
| submit_events(&opts); |
| run_process_and_wait(opts.cmd_argc, opts.cmd_argv, |
| opts.got_cores ? &opts.cores : NULL); |
| clock_gettime(CLOCK_REALTIME, &end); |
| subtract_timespecs(&diff, &end, &start); |
| stat_vals = collect_stats(pctx, &diff); |
| perf_stop_events(pctx); |
| cmd_string = cmd_as_str(opts.cmd_argc, opts.cmd_argv); |
| fprintf(out, "\nPerformance counter stats for '%s':\n\n", cmd_string); |
| free(cmd_string); |
| for (int i = 0; i < pctx->event_count; i++) |
| stat_print_val(out, &stat_vals[i], stat_vals, |
| pctx->event_count + 1); |
| fprintf(out, "\n%8llu.%09llu seconds time elapsed\n\n", diff.tv_sec, |
| diff.tv_nsec); |
| fclose(out); |
| free(stat_vals); |
| return 0; |
| } |
| |
| static void run_process_and_wait(int argc, char *argv[], |
| const struct core_set *cores) |
| { |
| int pid, status; |
| |
| pid = create_child_with_stdfds(argv[0], argc, argv, environ); |
| if (pid < 0) { |
| perror("Unable to spawn child"); |
| fflush(stderr); |
| exit(1); |
| } |
| if (cores) { |
| if (provision_core_set(pid, cores)) { |
| fprintf(stderr, |
| "Unable to provision all cores to PID %d: cmd='%s'\n", |
| pid, argv[0]); |
| sys_proc_destroy(pid, -1); |
| exit(1); |
| } |
| } |
| sys_proc_run(pid); |
| waitpid(pid, &status, 0); |
| } |
| |
| static void save_cmdline(int argc, char *argv[]) |
| { |
| size_t len = 0; |
| char *p; |
| |
| for (int i = 0; i < argc; i++) |
| len += strlen(argv[i]) + 1; |
| cmd_line_save = xmalloc(len); |
| p = cmd_line_save; |
| for (int i = 0; i < argc; i++) { |
| strcpy(p, argv[i]); |
| p += strlen(argv[i]); |
| if (!(i == argc - 1)) { |
| *p = ' '; /* overwrite \0 with ' ' */ |
| p++; |
| } |
| } |
| } |
| |
| static void global_usage(void) |
| { |
| fprintf(stderr, " Usage: perf COMMAND [ARGS]\n"); |
| fprintf(stderr, "\n Available commands:\n\n"); |
| for (int i = 0; i < COUNT_OF(perf_cmds); i++) |
| fprintf(stderr, " \t%s: %s\n", perf_cmds[i].name, |
| perf_cmds[i].desc); |
| exit(-1); |
| } |
| |
| int main(int argc, char *argv[]) |
| { |
| int i, ret = -1; |
| |
| save_cmdline(argc, argv); |
| |
| /* Common inits. Some functions don't need these, but it doesn't hurt. |
| */ |
| perf_initialize(); |
| pctx = perf_create_context(&perf_cfg); |
| cctx = perfconv_create_context(pctx); |
| |
| if (argc < 2) |
| global_usage(); |
| for (i = 0; i < COUNT_OF(perf_cmds); i++) { |
| if (!strcmp(perf_cmds[i].name, argv[1])) { |
| ret = perf_cmds[i].func(&perf_cmds[i], argc - 1, |
| argv + 1); |
| break; |
| } |
| } |
| if (i == COUNT_OF(perf_cmds)) |
| global_usage(); |
| /* This cleanup is optional - they'll all be dealt with when the program |
| * exits. This means its safe for us to exit(-1) at any point in the |
| * program. */ |
| perf_free_context(pctx); |
| perfconv_free_context(cctx); |
| perf_finalize(); |
| return ret; |
| } |