blob: 1938fcf97de07b44858aa94261946e1e89435851 [file] [log] [blame]
/* Copyright (c) 2015-2016 Google Inc
* Barret Rhoden <brho@cs.berkeley.edu>
* Davide Libenzi <dlibenzi@google.com>
* See LICENSE for details.
*/
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <limits.h>
#include <errno.h>
#include <argp.h>
#include <time.h>
#include <parlib/parlib.h>
#include <parlib/timing.h>
#include <parlib/core_set.h>
#include "xlib.h"
#include "perfconv.h"
#include "perf_core.h"
/* Helpers */
static void run_process_and_wait(int argc, char *argv[],
const struct core_set *cores);
/* For communicating with perf_create_context() */
static struct perf_context_config perf_cfg = {
.perf_file = "#arch/perf",
.kpctl_file = "#kprof/kpctl",
.kpdata_file = "#kprof/kpdata",
};
static struct perfconv_context *cctx;
static struct perf_context *pctx;
extern char **environ; /* POSIX envp */
struct perf_opts {
FILE *outfile;
const char *events;
char **cmd_argv;
int cmd_argc;
struct core_set cores;
bool got_cores;
bool verbose;
bool sampling;
bool stat_bignum;
bool record_quiet;
unsigned long record_period;
};
static struct perf_opts opts;
struct perf_cmd {
char *name;
char *desc;
char *opts;
int (*func)(struct perf_cmd *, int, char **);
};
static int perf_help(struct perf_cmd *cmd, int argc, char *argv[]);
static int perf_list(struct perf_cmd *cmd, int argc, char *argv[]);
static int perf_record(struct perf_cmd *cmd, int argc, char *argv[]);
static int perf_stat(struct perf_cmd *cmd, int argc, char *argv[]);
static int perf_pmu_caps(struct perf_cmd *cmd, int argc, char *argv[]);
static struct perf_cmd perf_cmds[] = {
{ .name = "help",
.desc = "Detailed help for commands",
.opts = "COMMAND",
.func = perf_help,
},
{ .name = "list",
.desc = "Lists all available events",
.opts = "[REGEX]",
.func = perf_list,
},
{ .name = "record",
.desc = "Samples events during command execution",
.opts = 0,
.func = perf_record,
},
{ .name = "stat",
.desc = "Counts events during command execution",
.opts = 0,
.func = perf_stat,
},
{ .name = "pmu_caps",
.desc = "Shows PMU capabilities",
.opts = "",
.func = perf_pmu_caps,
},
};
/**************************** perf help ****************************/
static int perf_help(struct perf_cmd *cmd, int argc, char *argv[])
{
char *sub_argv[2];
if (argc < 2) {
fprintf(stderr, "perf %s %s\n", cmd->name, cmd->opts);
return -1;
}
for (int i = 0; i < COUNT_OF(perf_cmds); i++) {
if (!strcmp(perf_cmds[i].name, argv[1])) {
if (perf_cmds[i].opts) {
fprintf(stdout, "perf %s %s\n",
perf_cmds[i].name, perf_cmds[i].opts);
fprintf(stdout, "\t%s\n", perf_cmds[i].desc);
} else {
/* For argp subcommands, call their help
* directly. */
sub_argv[0] = xstrdup(perf_cmds[i].name);
sub_argv[1] = xstrdup("--help");
perf_cmds[i].func(&perf_cmds[i], 2, sub_argv);
free(sub_argv[0]);
free(sub_argv[1]);
}
return 0;
}
}
fprintf(stderr, "Unknown perf command %s\n", argv[1]);
return -1;
}
/**************************** perf list ****************************/
static int perf_list(struct perf_cmd *cmd, int argc, char *argv[])
{
char *show_regex = NULL;
if (argc > 1)
show_regex = argv[1];
perf_show_events(show_regex, stdout);
return 0;
}
/**************************** perf pmu_caps ************************/
static int perf_pmu_caps(struct perf_cmd *cmd, int argc, char *argv[])
{
const struct perf_arch_info *pai = perf_context_get_arch_info(pctx);
fprintf(stdout,
"PERF.version = %u\n"
"PERF.proc_arch_events = %u\n"
"PERF.bits_x_counter = %u\n"
"PERF.counters_x_proc = %u\n"
"PERF.bits_x_fix_counter = %u\n"
"PERF.fix_counters_x_proc = %u\n",
pai->perfmon_version, pai->proc_arch_events,
pai->bits_x_counter, pai->counters_x_proc,
pai->bits_x_fix_counter, pai->fix_counters_x_proc);
return 0;
}
/**************************** Common argp ************************/
/* Collection argument parsing. These options are common to any function that
* will collect perf events, e.g. perf record and perf stat. */
static struct argp_option collect_opts[] = {
{"event", 'e', "EVENT", 0, "Event string, e.g. cycles:u:k"},
{"cores", 'C', "CORE_LIST", 0, "List of cores, e.g. 0.2.4:8-19"},
{"cpu", 'C', 0, OPTION_ALIAS},
{"all-cpus", 'a', 0, 0, "Collect events on all cores (on by default)"},
{"verbose", 'v', 0, 0, 0},
{ 0 }
};
static const char *collect_args_doc = "COMMAND [ARGS]";
static error_t parse_collect_opt(int key, char *arg, struct argp_state *state)
{
struct perf_opts *p_opts = state->input;
/* argp doesn't pass input to the child parser(s) by default... */
state->child_inputs[0] = state->input;
switch (key) {
case 'a':
/* Our default operation is to track all cores; we don't follow
* processes yet. */
break;
case 'C':
parlib_parse_cores(arg, &p_opts->cores);
p_opts->got_cores = TRUE;
break;
case 'e':
p_opts->events = arg;
break;
case 'v':
p_opts->verbose = TRUE;
break;
case ARGP_KEY_ARG:
p_opts->cmd_argc = state->argc - state->next + 1;
p_opts->cmd_argv = xmalloc(sizeof(char*) *
(p_opts->cmd_argc + 1));
p_opts->cmd_argv[0] = arg;
memcpy(&p_opts->cmd_argv[1], &state->argv[state->next],
sizeof(char*) * (p_opts->cmd_argc - 1));
p_opts->cmd_argv[p_opts->cmd_argc] = NULL;
state->next = state->argc;
break;
case ARGP_KEY_END:
if (!p_opts->cmd_argc)
argp_usage(state);
/* By default, we set all cores (different than linux) */
if (!p_opts->got_cores)
parlib_get_all_core_set(&p_opts->cores);
break;
default:
return ARGP_ERR_UNKNOWN;
}
return 0;
}
/* Helper, parses args using the collect_opts and the child parser for a given
* cmd. */
static void collect_argp(struct perf_cmd *cmd, int argc, char *argv[],
struct argp_child *children, struct perf_opts *opts)
{
struct argp collect_opt = {collect_opts, parse_collect_opt,
collect_args_doc, cmd->desc, children};
char *cmd_name;
const char *fmt = "perf %s";
size_t cmd_sz = strlen(cmd->name) + strlen(fmt) + 1;
/* Rewrite the command name from foo to perf foo for the --help output*/
cmd_name = xmalloc(cmd_sz);
snprintf(cmd_name, cmd_sz, fmt, cmd->name);
cmd_name[cmd_sz - 1] = '\0';
argv[0] = cmd_name;
argp_parse(&collect_opt, argc, argv, ARGP_IN_ORDER, 0, opts);
/* It's possible that someone could still be using cmd_name */
}
/* Helper, submits the events in opts to the kernel for monitoring. */
static void submit_events(struct perf_opts *opts)
{
struct perf_eventsel *sel;
char *dup_evts, *tok, *tok_save = 0;
dup_evts = xstrdup(opts->events);
for (tok = strtok_r(dup_evts, ",", &tok_save);
tok;
tok = strtok_r(NULL, ",", &tok_save)) {
sel = perf_parse_event(tok);
PMEV_SET_INTEN(sel->ev.event, opts->sampling);
sel->ev.trigger_count = opts->record_period;
perf_context_event_submit(pctx, &opts->cores, sel);
}
free(dup_evts);
}
/**************************** perf record ************************/
static struct argp_option record_opts[] = {
{"count", 'c', "PERIOD", 0, "Sampling period"},
{"output", 'o', "FILE", 0, "Output file name (default perf.data)"},
{"freq", 'F', "FREQUENCY", 0, "Sampling frequency (assumes cycles)"},
{"call-graph", 'g', 0, 0, "Backtrace recording (always on!)"},
{"quiet", 'q', 0, 0, "No printing to stdio"},
{ 0 }
};
/* In lieu of adaptively changing the period to maintain a set freq, we
* just assume they want cycles and that the TSC is close to that.
*
* (cycles/sec) / (samples/sec) = cycles / sample = period.
*
* TODO: this also assumes we're running the core at full speed. */
static unsigned long freq_to_period(unsigned long freq)
{
return get_tsc_freq() / freq;
}
static error_t parse_record_opt(int key, char *arg, struct argp_state *state)
{
struct perf_opts *p_opts = state->input;
switch (key) {
case 'c':
if (p_opts->record_period)
argp_error(state,
"Period set. Only use at most one of -c -F");
p_opts->record_period = atol(arg);
break;
case 'F':
if (p_opts->record_period)
argp_error(state,
"Period set. Only use at most one of -c -F");
/* TODO: when we properly support freq, multiple events will
* have the same freq but different, dynamic, periods. */
p_opts->record_period = freq_to_period(atol(arg));
break;
case 'g':
/* Our default operation is to record backtraces. */
break;
case 'o':
p_opts->outfile = xfopen(arg, "wb");
break;
case 'q':
p_opts->record_quiet = TRUE;
break;
case ARGP_KEY_END:
if (!p_opts->events)
p_opts->events = "cycles";
if (!p_opts->outfile)
p_opts->outfile = xfopen("perf.data", "wb");
if (!p_opts->record_period)
p_opts->record_period = freq_to_period(1000);
break;
default:
return ARGP_ERR_UNKNOWN;
}
return 0;
}
static int perf_record(struct perf_cmd *cmd, int argc, char *argv[])
{
struct argp argp_record = {record_opts, parse_record_opt};
struct argp_child children[] = { {&argp_record, 0, 0, 0}, {0} };
collect_argp(cmd, argc, argv, children, &opts);
opts.sampling = TRUE;
/* Once a perf event is submitted, it'll start counting and firing the
* IRQ. However, we can control whether or not the samples are
* collected. */
submit_events(&opts);
perf_start_sampling(pctx);
run_process_and_wait(opts.cmd_argc, opts.cmd_argv,
opts.got_cores ? &opts.cores : NULL);
perf_stop_sampling(pctx);
if (opts.verbose)
perf_context_show_events(pctx, stdout);
/* The events are still counting and firing IRQs. Let's be nice and
* turn them off to minimize our impact. */
perf_stop_events(pctx);
/* Generate the Linux perf file format with the traces which have been
* created during this operation. */
perf_convert_trace_data(cctx, perf_cfg.kpdata_file, opts.outfile);
fclose(opts.outfile);
return 0;
}
/**************************** perf stat ************************/
static struct argp_option stat_opts[] = {
{"big-num", 'B', 0, 0, "Formatting option"},
{"output", 'o', "FILE", 0, "Print output to file (default stdout)"},
{ 0 }
};
static error_t parse_stat_opt(int key, char *arg, struct argp_state *state)
{
struct perf_opts *p_opts = state->input;
switch (key) {
case 'B':
p_opts->stat_bignum = TRUE;
break;
case 'o':
p_opts->outfile = xfopen(arg, "w");
break;
case ARGP_KEY_END:
if (!p_opts->events)
p_opts->events = "cache-misses,cache-references,"
"branch-misses,branches,instructions,cycles";
if (!p_opts->outfile)
p_opts->outfile = stdout;
break;
default:
return ARGP_ERR_UNKNOWN;
}
return 0;
}
struct stat_val {
char *name;
uint64_t count;
};
/* Helper, given a name, fetches its value as a float. */
static float get_count_for(char *name, struct stat_val *all_vals,
size_t nr_vals)
{
for (int i = 0; i < nr_vals; i++) {
if (!strcmp(name, all_vals[i].name))
return (float)all_vals[i].count;
}
return 0.0;
}
/* Helper, gets the seconds count as a float */
static float get_seconds(struct stat_val *all_vals, size_t nr_vals)
{
float sec = get_count_for("nsec", all_vals, nr_vals) / 1000000000;
/* We should never have a time of 0, but in case something went wrong,
* don't hand back 0 (divide by 0 errors). */
return sec != 0.0 ? sec : 1.0;
}
/* Prints "X per second", scaling for K, M, or G. */
static void print_default_rate(FILE *out, struct stat_val *val,
struct stat_val *all_vals, size_t nr_vals)
{
float rate = val->count / get_seconds(all_vals, nr_vals);
char scale = ' ';
if (rate > 1000000000) {
rate /= 1000000000;
scale = 'G';
} else if (rate > 1000000) {
rate /= 1000000;
scale = 'M';
} else if (rate > 1000) {
rate /= 1000;
scale = 'K';
}
fprintf(out, "%9.3f %c/sec\n", rate, scale);
}
/* Prints a line for the given stat val. We pass all the vals since some stats
* will adjust their output based on *other* known values. e.g. IPC. */
static void stat_print_val(FILE *out, struct stat_val *val,
struct stat_val *all_vals, size_t nr_vals)
{
/* Everyone gets the same front part of the printout */
fprintf(out, "%18llu %-25s #", val->count, val->name);
/* Based on the particular event and what other events we know, we may
* print something different to the summary bit after the #. */
if (!strcmp(val->name, "instructions")) {
float cycles = get_count_for("cycles", all_vals, nr_vals);
if (cycles != 0.0)
fprintf(out, "%9.3f insns per cycle\n",
val->count / cycles);
else
print_default_rate(out, val, all_vals, nr_vals);
} else if (!strcmp(val->name, "cache-misses")) {
float cache_ref = get_count_for("cache-references", all_vals,
nr_vals);
if (cache_ref != 0.0)
fprintf(out, "%8.2f%% of all refs\n",
val->count * 100 / cache_ref);
else
print_default_rate(out, val, all_vals, nr_vals);
} else if (!strcmp(val->name, "branch-misses")) {
float branches = get_count_for("branches", all_vals, nr_vals);
if (branches != 0.0)
fprintf(out, "%8.2f%% of all branches\n",
val->count * 100 / branches);
else
print_default_rate(out, val, all_vals, nr_vals);
} else {
print_default_rate(out, val, all_vals, nr_vals);
}
}
static char *cmd_as_str(int argc, char *const argv[])
{
size_t len = 0;
char *str;
for (int i = 0; i < argc; i++)
len += strlen(argv[i]) + 1;
str = xzmalloc(len);
for (int i = 0; i < argc; i++) {
strlcat(str, argv[i], len);
if (i != argc - 1)
strlcat(str, " ", len);
}
return str;
}
static struct stat_val *collect_stats(struct perf_context *pctx,
struct timespec *diff)
{
struct stat_val *stat_vals;
/* the last stat is time (nsec). */
stat_vals = xzmalloc(sizeof(struct stat_val) * (pctx->event_count + 1));
for (int i = 0; i < pctx->event_count; i++) {
stat_vals[i].count = perf_get_event_count(pctx, i);
stat_vals[i].name = pctx->events[i].sel.fq_str;
}
stat_vals[pctx->event_count].name = "nsec";
stat_vals[pctx->event_count].count = diff->tv_sec * 1000000000 +
diff->tv_nsec;
return stat_vals;
}
static int perf_stat(struct perf_cmd *cmd, int argc, char *argv[])
{
struct argp argp_stat = {stat_opts, parse_stat_opt};
struct argp_child children[] = { {&argp_stat, 0, 0, 0}, {0} };
FILE *out;
struct timespec start, end, diff;
struct stat_val *stat_vals;
char *cmd_string;
collect_argp(cmd, argc, argv, children, &opts);
opts.sampling = FALSE;
out = opts.outfile;
/* As soon as we submit one event, that event is being tracked, meaning
* that the setup/teardown of perf events is also tracked. Each event
* (including the clock measurement) will roughly account for either the
* start or stop of every other event. */
clock_gettime(CLOCK_REALTIME, &start);
submit_events(&opts);
run_process_and_wait(opts.cmd_argc, opts.cmd_argv,
opts.got_cores ? &opts.cores : NULL);
clock_gettime(CLOCK_REALTIME, &end);
subtract_timespecs(&diff, &end, &start);
stat_vals = collect_stats(pctx, &diff);
perf_stop_events(pctx);
cmd_string = cmd_as_str(opts.cmd_argc, opts.cmd_argv);
fprintf(out, "\nPerformance counter stats for '%s':\n\n", cmd_string);
free(cmd_string);
for (int i = 0; i < pctx->event_count; i++)
stat_print_val(out, &stat_vals[i], stat_vals,
pctx->event_count + 1);
fprintf(out, "\n%8llu.%09llu seconds time elapsed\n\n", diff.tv_sec,
diff.tv_nsec);
fclose(out);
free(stat_vals);
return 0;
}
static void run_process_and_wait(int argc, char *argv[],
const struct core_set *cores)
{
int pid, status;
pid = create_child_with_stdfds(argv[0], argc, argv, environ);
if (pid < 0) {
perror("Unable to spawn child");
fflush(stderr);
exit(1);
}
if (cores) {
if (provision_core_set(pid, cores)) {
fprintf(stderr,
"Unable to provision all cores to PID %d: cmd='%s'\n",
pid, argv[0]);
sys_proc_destroy(pid, -1);
exit(1);
}
}
sys_proc_run(pid);
waitpid(pid, &status, 0);
}
static void save_cmdline(int argc, char *argv[])
{
size_t len = 0;
char *p;
for (int i = 0; i < argc; i++)
len += strlen(argv[i]) + 1;
cmd_line_save = xmalloc(len);
p = cmd_line_save;
for (int i = 0; i < argc; i++) {
strcpy(p, argv[i]);
p += strlen(argv[i]);
if (!(i == argc - 1)) {
*p = ' '; /* overwrite \0 with ' ' */
p++;
}
}
}
static void global_usage(void)
{
fprintf(stderr, " Usage: perf COMMAND [ARGS]\n");
fprintf(stderr, "\n Available commands:\n\n");
for (int i = 0; i < COUNT_OF(perf_cmds); i++)
fprintf(stderr, " \t%s: %s\n", perf_cmds[i].name,
perf_cmds[i].desc);
exit(-1);
}
int main(int argc, char *argv[])
{
int i, ret = -1;
save_cmdline(argc, argv);
/* Common inits. Some functions don't need these, but it doesn't hurt.
*/
perf_initialize();
pctx = perf_create_context(&perf_cfg);
cctx = perfconv_create_context(pctx);
if (argc < 2)
global_usage();
for (i = 0; i < COUNT_OF(perf_cmds); i++) {
if (!strcmp(perf_cmds[i].name, argv[1])) {
ret = perf_cmds[i].func(&perf_cmds[i], argc - 1,
argv + 1);
break;
}
}
if (i == COUNT_OF(perf_cmds))
global_usage();
/* This cleanup is optional - they'll all be dealt with when the program
* exits. This means its safe for us to exit(-1) at any point in the
* program. */
perf_free_context(pctx);
perfconv_free_context(cctx);
perf_finalize();
return ret;
}