| #!/usr/bin/env Rscript |
| # |
| # brho: 2014-10-13, 2020-06-25 |
| # |
| # to install helper libraries, run R, install manually: |
| # |
| # install.packages('oce') |
| # install.packages('optparse') |
| # install.packages('data.table') |
| # didn't need to library()-load this, but data.table wanted it at one point |
| # install.packages('bit64') |
| # |
| # To run it, you can do stuff like this: |
| # |
| # for i in *.dat.gz; do |
| # echo Processing $i... |
| # $AKA_DIR/scripts/lock_test.R -c 1 $i |
| # done |
| # |
| # echo "Creating ${DIR%/}-tput.pdf" |
| # $AKA_DIR/scripts/lock_test.R -c 2 *.dat.gz -o ${DIR%/}-tput.pdf |
| # |
| # echo "Creating ${DIR%/}-kernel-tput.pdf" |
| # $AKA_DIR/scripts/lock_test.R -c 2 *kernel*.dat.gz -o ${DIR%/}-kernel-tput.pdf |
| # |
| # TODO: |
| # - analyze when the lock jumps between locality regions (hyperthread, |
| # core, CCX, numa, whatever). It's a bit of a pain with Linux, since core 1 is |
| # on another numa node than core 0. Whatever. |
| # - For lock_test, have each additional core/thread be allocated close to the |
| # existing core (locality), and then look at the lock scalability. (classic, |
| # X-axis is nr_cpus, Y axis is time per lock (avg acq lat)). |
| # - Figure out if there was something special about disable/enable IRQs |
| |
| # library that includes the pwelch function |
| suppressPackageStartupMessages(library(oce)) |
| # library for command line option parsing |
| suppressPackageStartupMessages(library(optparse)) |
| # read.table is brutally slow. this brings in fread |
| suppressPackageStartupMessages(library(data.table)) |
| |
| # file format: thread_id attempt pre acq(uire) un(lock) tsc_overhead |
| # 0 0 4748619790389387 4748619790429260 4748619790429323 0 |
| |
| g_tsc_overhead <- 0 |
| g_tsc_frequency <- 0 |
| |
| # For PNG output... |
| g_width <- 1920 |
| g_height <- 1200 |
| |
| ###################################### |
| ### Functions |
| ###################################### |
| |
| # takes any outliers 2 * farther than the 99th quantile and rounds them down to |
| # that limit. the limit is pretty arbitrary. useful for not having |
| # ridiculously large graphs, but still is lousy for various datasets. |
| round_outlier <- function(vec) |
| { |
| vec99 = quantile(vec, .99) |
| lim = vec99 + 2 * (vec99 - median(vec)) |
| return(sapply(vec, function(x) min(x, lim))) |
| } |
| |
| # computes acquire latency, using global tsc freq if there isn't one in the |
| # data |
| acq_latency <- function(data) |
| { |
| tsc_overhead = data$V6 |
| if (tsc_overhead[1] == 0) |
| tsc_overhead = sapply(tsc_overhead, function(x) g_tsc_overhead) |
| return (data$V4 - data$V3 - tsc_overhead) |
| } |
| |
| # computes hold latency, using global tsc freq if there isn't one in the data |
| hld_latency <- function(data) |
| { |
| tsc_overhead = data$V6 |
| if (tsc_overhead[1] == 0) |
| tsc_overhead = sapply(tsc_overhead, function(x) g_tsc_overhead) |
| return (data$V5 - data$V4 - tsc_overhead) |
| } |
| |
| # histogram, bins based on percentiles, with limits of the graph based on the |
| # outermost bins. somewhat works. can get a 'need finite ylim' if the bins |
| # are too small. maybe since there are no values in it. |
| # |
| # with density and percentiles for bins, keep in mind the area of a rectangle |
| # is the fraction of data points in the cell. since all bins have the same |
| # amount of data points, taller cells show a denser concentration in a skinnier |
| # bin |
| # |
| # i don't actually like this much. using a round_outlier with 20-bin hist or a |
| # density plot look nicer. |
| quant_hist <- function(vec) |
| { |
| vec_quant = c(quantile(vec, probs=seq(0, 1, .01))) |
| print(vec_quant) |
| # keep the 100 in sync with the 0.01 above |
| hist(vec, breaks=vec_quant, xlim=c(vec_quant[2], vec_quant[100])) |
| } |
| |
| # line_data is a list of N data sets that can be plotted as lines. each set |
| # should just be an array of values; the index will be the X value. |
| # |
| # names is a c() of N strings corresponding to the line_data members |
| # Set the desired x and y min/max. |
| plot_lines <- function(line_data, names=NULL, outfile="", title="", |
| xlab="X", ylab="Y", min_x=0, max_x=0, min_y=0, max_y=0) |
| { |
| nr_lines <- length(line_data) |
| |
| # not guaranteed to work, but might save some pain |
| if (max_x == 0) { |
| for (i in 1:nr_lines) |
| max_x <- max(max_x, length(line_data[[i]])) |
| } |
| if (max_y == 0) { |
| for (i in 1:nr_lines) |
| max_y <- max(max_y, max(line_data[[i]])) |
| } |
| |
| # http://www.statmethods.net/graphs/line.html |
| colors <- rainbow(nr_lines) # not a huge fan. color #2 is light blue. |
| linetype <- c(1:nr_lines) |
| plotchar <- seq(18, 18 + nr_lines, 1) |
| |
| # http://stackoverflow.com/questions/8929663/r-legend-placement-in-a-plot |
| # can manually move it if we don't want to waste space |
| if (!is.null(names)) { |
| plot(c(min_x, max_x), c(min_y, max_y), type="n", xaxt="n", yaxt="n") |
| legend_sz = legend("topright", legend=names, lty=linetype, |
| plot=FALSE) |
| max_y = 1.04 * (max_y + legend_sz$rect$h) |
| } |
| |
| if (outfile != "") |
| png(outfile, width=g_width, height=g_height) |
| |
| plot(c(min_x, max_x), c(min_y, max_y), type="n", main=title, xlab=xlab, |
| ylab=ylab) |
| |
| for (i in 1:nr_lines) { |
| # too many points, so using "l" and no plotchar. |
| #lines(densities[[i]], type="b", lty=linetype[i], col=colors[i], |
| # pch=plotchar[i], lwd=1.5) |
| lines(line_data[[i]], type="l", lty=linetype[i], col=colors[i], |
| lwd=1.5) |
| } |
| |
| #legend(x=min_x, y=max_y, legend=names, lty=linetype, col=colors) |
| if (!is.null(names)) |
| legend("topright", legend=names, lty=linetype, col=colors) |
| |
| if (outfile != "") |
| invisible(dev.off()) |
| } |
| |
| plot_densities <- function(vecs, names=NULL, outfile="") |
| { |
| nr_vecs = length(vecs) |
| densities = list() |
| max_y = 0 |
| min_x = Inf |
| max_x = 0 |
| |
| for (i in 1:nr_vecs) { |
| # [[ ]] chooses the actual element. [] just subsets |
| dense_i = density(vecs[[i]]) |
| densities = c(densities, list(dense_i)) |
| max_y = max(max_y, dense_i$y) |
| max_x = max(max_x, dense_i$x) |
| min_x = min(min_x, dense_i$x) |
| } |
| plot_lines(densities, names=names, outfile=outfile, |
| title=paste(outfile, "Lock Acquisition Latency"), |
| xlab="TSC Ticks", ylab="PDF", |
| min_x=min_x, max_x=max_x, max_y=max_y) |
| } |
| |
| plot_density <- function(vec, outfile="") |
| { |
| vecs = list(vec) |
| plot_densities(vecs=vecs, outfile=outfile) |
| } |
| |
| plot_acq_times <- function(data, outfile="") |
| { |
| if (outfile != "") |
| png(outfile, width=g_width, height=g_height) |
| |
| # all acquire times, timestamps starting at 0 |
| time0 = min(data$V4) |
| total_acq <- data$V4 - time0 |
| |
| threadid <- unique(data$V1) |
| |
| acq_n <- list() |
| names <- c() |
| for (i in threadid) { |
| thread_data <- subset(data, data$V1 == i) - time0 |
| acq_n <- c(acq_n, list(thread_data$V4)) |
| names <- c(names, paste("Thread ", i)) |
| } |
| # can adjust ylim, default are from 1..nr_items |
| stripchart(acq_n, group.names=names, pch='.', xlab="Time (TSC Ticks)", |
| main="Lock Acquisition Timestamps") |
| |
| if (outfile != "") |
| invisible(dev.off()) |
| } |
| |
| print_vec <- function(vec, name) |
| { |
| # goddamn. cat doesn't like integer64 or something, so you need to |
| # pre-paste for the calculations. print will spit out [1] for row |
| # names, which sucks. |
| cat(name, "\n") |
| cat("---------------\n") |
| cat(paste("Average: ", round(mean(vec), 4), "\n")) |
| cat(paste("Stddev: ", round(sd(vec), 4), "\n")) |
| quants = round(quantile(vec, c(.5, .75, .9, .99, .999))) |
| cat(paste("50/75/90/99/99.9: ", quants[[1]], quants[[2]], quants[[3]], |
| quants[[4]], quants[[5]], "\n")) |
| cat(paste("Min: ", min(vec), " Max: ", max(vec), "\n")) |
| cat("\n") |
| } |
| |
| # using something like the tables package to output latex booktab's would be |
| # much nicer |
| print_stats <- function(data) |
| { |
| print_vec(acq_latency(data), "Acquire Latency") |
| print_vec(hld_latency(data), "Hold Latency") |
| } |
| |
| # the 'queue' system includes waiting for the lock and holding the lock. |
| # Returns a data.frame, with X = TSC, Y = qlen |
| get_qlen <- function(data) |
| { |
| if (g_tsc_frequency == 0) |
| stop("WARNING: global TSC freq not set!") |
| |
| # timestamps for all threads, sorted. |
| # a 'pre' is someone entering the q. 'unl' is leaving. |
| # both are normalized to the TSC time for pres |
| pres <- sort(data$V3 - min(data$V3)) |
| unls <- sort(data$V5 - min(data$V3)) |
| |
| # not sure the best way to create the data to print. easiest seems to |
| # be two vectors, X = times, Y = qlens, which we'll pass to plot |
| # but you can't append to them, that's way to slow. we do know the |
| # overall length of both: one entry for each of pres and unls. |
| # so we preallocate. |
| samples <- length(pres) + length(unls) |
| |
| times <- rep(0, samples) |
| qlens <- rep(0, samples) |
| |
| qlen <- 0 |
| # R uses 1 indexing |
| p_i <- 1 |
| u_i <- 1 |
| |
| for (i in 1:samples) { |
| now <- 0 |
| |
| # a bit painful. |
| if (p_i <= length(pres)) |
| pre <- pres[[p_i]] |
| else |
| pre <- Inf |
| if (u_i <= length(unls)) |
| unl <- unls[[u_i]] |
| else |
| unl <- Inf |
| |
| if (pre <= unl) { |
| if (pre == Inf) { |
| print("both pre and unl were Inf!") |
| break |
| } |
| |
| qlen <- qlen + 1 |
| now <- pre |
| p_i <- p_i + 1 |
| } else { |
| if (unl == Inf) { |
| print("both unl and pre were Inf!") |
| break |
| } |
| |
| qlen <- qlen - 1 |
| now <- unl |
| u_i <- u_i + 1 |
| } |
| times[i] = now |
| qlens[i] = qlen |
| } |
| # times is in units of TSC. convert to msec, for easier comparison |
| # with throughput. though note that when plotting with e.g. tput, the |
| # x-axis is set by tput, and whatever we print is scaled to fit in that |
| # space. so if you didn't do this, you wouldn't notice. (I was using |
| # nsec for a while and didn't notice). |
| times <- times / (g_tsc_frequency / 1e3) |
| |
| return(data.frame(setNames(list(times, qlens), |
| c("Time (msec)", "Queue Length")))) |
| } |
| |
| plot_qlen <- function(data, outfile="") |
| { |
| df <- get_qlen(data) |
| if (outfile != "") |
| png(outfile, width=g_width, height=g_height) |
| |
| # df isn't a 'line', so we need to plot it directly |
| plot(df, type="p", main="Spinlock Queue Length") |
| |
| if (outfile != "") |
| invisible(dev.off()) |
| } |
| |
| get_tput <- function(data) |
| { |
| if (g_tsc_frequency == 0) |
| stop("WARNING: global TSC freq not set!") |
| |
| # acq times, sorted, normalized to the lowest, still in ticks |
| total_acq <- sort(data$V4 - min(data$V4)) |
| |
| # convert to nsec |
| total_acq <- total_acq / (g_tsc_frequency / 1e9) |
| |
| # rounds down all times to the nearest msec, will collect into a table, |
| # which counts the freq of each bucket, as per: |
| # http://stackoverflow.com/questions/5034513/how-to-graph-requests-per-second-from-web-log-file-using-r |
| msec_times <- trunc(total_acq/1e6) |
| |
| # if we just table directly, we'll lose the absent values (msec where no |
| # timestamp happened). not sure if factor is the best way, the help |
| # says it should be a small range. |
| # http://stackoverflow.com/questions/1617061/including-absent-values-in-table-results-in-r |
| msec_times <- factor(msec_times, 0:max(msec_times)) |
| |
| # without the c(), it'll be a bunch of bars at each msec |
| tab <- c(table(msec_times)) |
| return(tab) |
| } |
| |
| # this is a little rough. You need the data loaded, but calculating tput |
| # requires the right TSC freq set. So don't use this with data from different |
| # machines (which can be a tough comparison, regardless). |
| plot_tputs <- function(data_l, names=NULL, outfile="") |
| { |
| nr_lines <- length(data_l) |
| tputs <- list() |
| |
| for (i in 1:nr_lines) { |
| # [[ ]] chooses the actual element. [] just subsets |
| tput_i <- get_tput(data_l[[i]]) |
| tputs <- c(tputs, list(tput_i)) |
| } |
| |
| plot_lines(tputs, names, outfile=outfile, |
| title=paste(outfile, "Lock Throughput"), |
| xlab = "Time (msec)", ylab = "Locks per msec") |
| } |
| |
| |
| # helper, plots throughput (tp) and whatever else you give it. e.g. qlen is a |
| # value at a timestamp (msec) |
| plot_tput_foo <- function(tp, foo, foo_str, outfile="") |
| { |
| if (outfile != "") |
| png(outfile, width=g_width, height=g_height) |
| |
| # decent looking margins for the right Y axis |
| par(mar = c(5, 5, 3, 5)) |
| |
| # Scaling ylim to make room for FOO, which tends to be up top |
| plot(tp, type="l", main=paste(outfile, "Lock Throughput and", foo_str), |
| ylim=c(0, max(tp)*1.2), |
| xlab="Time (msec)", ylab="Throughput", |
| col="blue", lty=1) |
| |
| par(new = TRUE) |
| plot(foo, type="p", xaxt = "n", yaxt = "n", ylab = "", xlab = "", |
| col="red", lty=2) |
| axis(side = 4) |
| mtext(foo_str, side = 4) |
| |
| legend("topleft", c("tput", foo_str), col = c("blue", "red"), |
| lty = c(1, 2)) |
| |
| if (outfile != "") |
| invisible(dev.off()) |
| } |
| |
| # plot throughput and queue length on the same graph |
| plot_tput_qlen <- function(data, outfile="") |
| { |
| tp <- get_tput(data) |
| ql <- get_qlen(data) |
| |
| plot_tput_foo(tp, ql, "Qlen", outfile) |
| } |
| |
| # if you know how many msec there are, this is like doing: |
| # hist(total_acq/1000000, breaks=50) |
| # except it gives you a line, with points being the top of the hist bars |
| plot_tput <- function(data, outfile="") |
| { |
| plot_tputs(list(data), outfile=outfile) |
| } |
| |
| # the middle timestamp is the acquire-by-lockholder. sorting on that, we can |
| # see the coreid. it's a really busy thing to graph. |
| # |
| # You can see some gaps in non-MCS lock holders when a core is starved or not |
| # partipating. By comparing that gap to the qlen, you can see if it was |
| # starvation or just not participating. Also, you can easily spot a preempted |
| # lockholder (if they are gone for enough msec). |
| get_core_acqs <- function(data) |
| { |
| earliest <- min(data$V3) |
| sorted <- data[order(data$V3)] |
| |
| times <- sorted$V3 - earliest |
| cores <- sorted$V1 |
| |
| # times is in units of TSC. convert to msec, for easier comparison |
| # with throughput |
| times <- times / (g_tsc_frequency / 1e3) |
| |
| return(data.frame(setNames(list(times, cores), |
| c("Time (msec)", "Holder ID")))) |
| } |
| |
| plot_tput_core_acqs <- function(data, outfile="") |
| { |
| tp <- get_tput(data) |
| ca <- get_core_acqs(data) |
| |
| plot_tput_foo(tp, ca, "Holder ID", outfile) |
| } |
| |
| get_tsc_freq <- function(filename) { |
| # format looks like this: |
| # tsc_frequency 2300002733 |
| header <- readLines(filename, n=50) |
| line <- grep("tsc_frequency", header, value=TRUE) |
| x <- strsplit(line, split=" ") |
| return(as.numeric(x[[1]][3])) |
| } |
| |
| |
| # pretty ugly. reads in the data, but also sets the TSC freq, which is |
| # particular to a given machine. so if you use this, be sure you do any |
| # analyses before loading another. Sorry. |
| load_test_data <- function(filename) |
| { |
| g_tsc_frequency <- get_tsc_freq(filename) |
| if (g_tsc_frequency == 0) |
| stop("WARNING: could not set global TSC freq!") |
| assign("g_tsc_frequency", g_tsc_frequency, envir = .GlobalEnv) |
| |
| # using grep for a hokey comment.char="#". it's fast enough for now. |
| |
| # integer64="numeric", o/w all sorts of things fuck up. like density() |
| # and round_outlier(). Even with sprinkling as.numeric() around, it's |
| # still a mess. This actually worked, though presumably there's a |
| # reason to use int64. |
| |
| if (grepl("\\.gz$", filename)) { |
| return(fread(cmd=paste("gunzip -c", filename, "| grep -v '^#'"), |
| integer64="numeric")) |
| } else { |
| return(fread(cmd=paste("grep -v '^#'", filename), |
| integer64="numeric")) |
| } |
| } |
| |
| ###################################### |
| ### Main |
| ###################################### |
| # establish optional arguments |
| # "-h" and "--help" are automatically in the list |
| option_list <- list( |
| make_option(c("-c", "--cmd"), type="integer", default=-1, |
| help=" |
| 1: acq_lat & tput_qlen for one file |
| 2: tput comparison for multiple files |
| "), |
| make_option(c("-o", "--output"), type="character", |
| default="", help="Output file, if applicable") |
| ) |
| |
| # CMD 1 |
| single_analysis <- function(args) |
| { |
| data_file <- args$args |
| |
| if (file.access(data_file) == -1) |
| stop("cannot access input file") |
| |
| data <- load_test_data(data_file) |
| |
| basename <- data_file |
| basename <- gsub(".gz$", "", basename) |
| basename <- gsub(".dat$", "", basename) |
| |
| sink(paste(basename, "-stats.txt", sep="")) |
| cat("Filename:", data_file, "\n") |
| cat("\n") |
| print(grep("^#", readLines(data_file, n=50), value=TRUE)) |
| cat("\n") |
| print_stats(data) |
| sink() |
| |
| # For now, create all files. Can make options later |
| plot_density(round_outlier(acq_latency(data)), |
| outfile=paste(basename, "-acq.png", sep="")) |
| plot_tput_qlen(data, |
| outfile=paste(basename, "-tput_qlen.png", sep="")) |
| plot_tput_core_acqs(data, |
| outfile=paste(basename, "-tput_core_acqs.png", sep="")) |
| } |
| |
| # CMD 2 |
| tput_comparison <- function(args) |
| { |
| outfile <- args$options$output |
| |
| if (outfile == "") |
| stop("Need an outfile (-o)") |
| |
| tputs <- list() |
| names <- list() |
| |
| for (i in args$args) { |
| data <- load_test_data(i) |
| # I wanted to do this: |
| #tputs <- append(tputs, get_tput(data)) |
| # but it's a collection of lists of TPUT tables. or something |
| tputs <- c(tputs, list(get_tput(data))) |
| names <- append(names, i) |
| rm(data) |
| } |
| plot_lines(tputs, names, outfile=outfile, |
| title="Lock Throughput", |
| xlab = "Time (msec)", |
| ylab = "Locks per msec") |
| } |
| |
| main <- function() |
| { |
| parser <- OptionParser(usage="%prog [options] data.dat", |
| option_list=option_list) |
| # ugly as hell, no error messages, etc. |
| args <- parse_args(parser, positional_arguments = TRUE) |
| opt <- args$options |
| |
| f <- switch(opt$cmd, single_analysis, tput_comparison) |
| if (is.null(f)) |
| stop("Need a valid (-c) command (-h for options)") |
| f(args) |
| } |
| |
| # don't run when sourced |
| if (sys.nframe() == 0L) { |
| main() |
| } |
| |
| # Dirty Hacks Storage |
| |
| # source("scripts/lock_test.R") |
| |
| # this is slow and shitty: |
| # raw = read.table(filename, comment.char="#") |
| |
| # to print stuff to a file, you can do |
| # that can be really slow for large files, so try png instead |
| #pdf("foo.pdf") |
| #command that creates a graphic, like hist or plot |
| #dev.off() |
| |
| # hack, so i can re-source this and not have to reload TSC freq |
| #g_tsc_frequency <- get_tsc_freq("raw1.dat") |