|  | # brho: 2014-10-13 | 
|  | # | 
|  | # this is partly fleshed out.  to use, i've just been sourcing the script in R, | 
|  | # then overriding the tsc overhead and freq.  then just running various | 
|  | # functions directly, like print_stats, plot_densities, plot_tput, etc.  don't | 
|  | # expect any command line options to work. | 
|  |  | 
|  | # library that includes the pwelch function | 
|  | suppressPackageStartupMessages(library(oce)) | 
|  | # library for command line option parsing | 
|  | suppressPackageStartupMessages(library(optparse)) | 
|  |  | 
|  | # file format: thread_id attempt pre acq(uire) un(lock) tsc_overhead | 
|  |  | 
|  | g_tsc_overhead <- 0 | 
|  | g_tsc_frequency <- 0 | 
|  |  | 
|  | ###################################### | 
|  | ### Functions | 
|  | ###################################### | 
|  |  | 
|  | # takes any outliers 2 * farther than the 99th quantile and rounds them down to | 
|  | # that limit.  the limit is pretty arbitrary.  useful for not having | 
|  | # ridiculously large graphs, but still is lousy for various datasets. | 
|  | round_outlier <- function(vec) | 
|  | { | 
|  | vec99 = quantile(vec, .99) | 
|  | lim = vec99 + 2 * (vec99 - median(vec)) | 
|  | return(sapply(vec, function(x) min(x, lim))) | 
|  | } | 
|  |  | 
|  | # computes acquire latency, using global tsc freq if there isn't one in the | 
|  | # data | 
|  | acq_latency <- function(data) | 
|  | { | 
|  | tsc_overhead = data$V6 | 
|  | if (tsc_overhead[1] == 0) | 
|  | tsc_overhead = sapply(tsc_overhead, function(x) g_tsc_overhead) | 
|  | return (data$V4 - data$V3 - tsc_overhead) | 
|  | } | 
|  |  | 
|  | # computes hold latency, using global tsc freq if there isn't one in the data | 
|  | hld_latency <- function(data) | 
|  | { | 
|  | tsc_overhead = data$V6 | 
|  | if (tsc_overhead[1] == 0) | 
|  | tsc_overhead = sapply(tsc_overhead, function(x) g_tsc_overhead) | 
|  | return (data$V5 - data$V4 - tsc_overhead) | 
|  | } | 
|  |  | 
|  | # histogram, bins based on percentiles, with limits of the graph based on the | 
|  | # outermost bins.  somewhat works.  can get a 'need finite ylim' if the bins | 
|  | # are too small.  maybe since there are no values in it. | 
|  | # | 
|  | # with density and percentiles for bins, keep in mind the area of a rectangle | 
|  | # is the fraction of data points in the cell.  since all bins have the same | 
|  | # amount of data points, taller cells show a denser concentration in a skinnier | 
|  | # bin | 
|  | # | 
|  | # i don't actually like this much.  using a round_outlier with 20-bin hist or a | 
|  | # density plot look nicer. | 
|  | quant_hist <- function(vec) | 
|  | { | 
|  | vec_quant = c(quantile(vec, probs=seq(0, 1, .01))) | 
|  | print(vec_quant) | 
|  | # keep the 100 in sync with the 0.01 above | 
|  | hist(vec, breaks=vec_quant, xlim=c(vec_quant[2], vec_quant[100])) | 
|  | } | 
|  |  | 
|  | plot_densities <- function(vecs, names=NULL, outfile="", | 
|  | title="Lock Acquisition Latency", | 
|  | xlab="TSC Ticks") | 
|  | { | 
|  | nr_vecs = length(vecs) | 
|  | densities = list() | 
|  | max_y = 0 | 
|  | min_x = Inf | 
|  | max_x = 0 | 
|  |  | 
|  | for (i in 1:nr_vecs) { | 
|  | # [[ ]] chooses the actual element.  [] just subsets | 
|  | dense_i = density(vecs[[i]]) | 
|  | densities = c(densities, list(dense_i)) | 
|  | max_y = max(max_y, dense_i$y) | 
|  | max_x = max(max_x, dense_i$x) | 
|  | min_x = min(min_x, dense_i$x) | 
|  | } | 
|  |  | 
|  | # http://www.statmethods.net/graphs/line.html | 
|  | colors <- rainbow(nr_vecs) # not a huge fan.  color #2 is light blue. | 
|  | linetype <- c(1:nr_vecs) | 
|  | plotchar <- seq(18, 18 + nr_vecs, 1) | 
|  |  | 
|  | # http://stackoverflow.com/questions/8929663/r-legend-placement-in-a-plot | 
|  | # can manually move it if we don't want to waste space | 
|  | if (!is.null(names)) { | 
|  | plot(c(min_x,max_x), c(0, max_y), type="n", xaxt="n", yaxt="n") | 
|  | legend_sz = legend("topright", legend=names, lty=linetype, | 
|  | plot=FALSE) | 
|  | max_y = 1.04 * (max_y + legend_sz$rect$h) | 
|  | invisible(dev.off()) | 
|  | } | 
|  |  | 
|  | if (outfile != "") | 
|  | pdf(outfile) | 
|  |  | 
|  | plot(c(min_x,max_x), c(0, max_y), type="n", xlab=xlab, main=title, | 
|  | ylab="Density") | 
|  |  | 
|  | for (i in 1:nr_vecs) { | 
|  | # too many points, so using "l" and no plotchar. | 
|  | #lines(densities[[i]], type="b", lty=linetype[i], col=colors[i], | 
|  | #      pch=plotchar[i], lwd=1.5) | 
|  | lines(densities[[i]], type="l", lty=linetype[i], lwd=1.5) | 
|  | } | 
|  |  | 
|  | #legend(x=min_x, y=max_y, legend=names, lty=linetype, col=colors) | 
|  | if (!is.null(names)) | 
|  | legend("topright", legend=names, lty=linetype) | 
|  |  | 
|  | if (outfile != "") | 
|  | invisible(dev.off()) | 
|  | } | 
|  |  | 
|  |  | 
|  | plot_density <- function(vec, outfile="", | 
|  | title="Lock Acquisition Latency", | 
|  | xlab="TSC Ticks") | 
|  | { | 
|  | vecs = list(vec) | 
|  | plot_densities(vecs=vecs, outfile=outfile, title=title, xlab=xlab) | 
|  | } | 
|  |  | 
|  |  | 
|  | plot_acq_times <- function(data, outfile="") | 
|  | { | 
|  | if (outfile != "") | 
|  | pdf(outfile) | 
|  |  | 
|  | # all acquire times, timestamps starting at 0 | 
|  | time0 = min(data$V4) | 
|  | total_acq <- data$V4 - time0 | 
|  |  | 
|  | threadid <- unique(data$V1) | 
|  |  | 
|  | acq_n <- list() | 
|  | names <- c() | 
|  | for (i in threadid) { | 
|  | thread_data <- subset(data, data$V1 == i) - time0 | 
|  | acq_n <- c(acq_n, list(thread_data$V4)) | 
|  | names <- c(names, paste("Thread ", i)) | 
|  | } | 
|  | # can adjust ylim, default are from 1..nr_items | 
|  | stripchart(acq_n, group.names=names, pch='.', xlab="Time (TSC Ticks)", | 
|  | main="Lock Acquisition Timestamps") | 
|  |  | 
|  | if (outfile != "") | 
|  | invisible(dev.off()) | 
|  | } | 
|  |  | 
|  | print_vec <- function(vec) | 
|  | { | 
|  | # this whole str, paste dance is nasty | 
|  | print("---------------") | 
|  | str = paste("Average: ", round(mean(vec), 4)) | 
|  | print(str) | 
|  | str = paste("Stddev: ", round(sd(vec), 4)) | 
|  | print(str) | 
|  | quants = round(quantile(vec, c(.5, .75, .9, .99, .999))) | 
|  | str = paste("50/75/90/99/99.9: ", quants[[1]], quants[[2]], quants[[3]], | 
|  | quants[[4]], quants[[5]]) | 
|  | print(str) | 
|  | str = paste("Min: ", min(vec), " Max: ", max(vec)) | 
|  | print(str) | 
|  | } | 
|  |  | 
|  | # using something like the tables package to output latex booktab's would be | 
|  | # much nicer | 
|  | print_stats <- function(data) | 
|  | { | 
|  | acq_lat = acq_latency(data) | 
|  | hld_lat = hld_latency(data) | 
|  |  | 
|  | print("Acquire Latency") | 
|  | print_vec(acq_lat) | 
|  | print("") | 
|  | print("Hold Latency") | 
|  | print_vec(hld_lat) | 
|  | } | 
|  |  | 
|  | # if you know how many msec there are, this is like doing: | 
|  | #     hist(total_acq/1000000, breaks=50) | 
|  | # except it gives you a line, with points being the top of the hist bars | 
|  | plot_tput <- function(data, title="Lock Acquisition Throughput", outfile="") | 
|  | { | 
|  | if (outfile != "") | 
|  | pdf(outfile) | 
|  |  | 
|  | total_acq = sort(data$V4 - min(data$V4)) | 
|  |  | 
|  | if (g_tsc_frequency == 0) | 
|  | stop("WARNING: global TSC freq not set!") | 
|  | # convert to nsec? XXX | 
|  | total_acq = total_acq / (g_tsc_frequency / 1e9) | 
|  |  | 
|  | # rounds down all times to the nearest msec, will collect into a table, | 
|  | # which counts the freq of each bucket, as per: | 
|  | # http://stackoverflow.com/questions/5034513/how-to-graph-requests-per-second-from-web-log-file-using-r | 
|  | msec_times = trunc(total_acq/1e6) | 
|  |  | 
|  | # if we just table directly, we'll lose the absent values (msec where no | 
|  | # timestamp happened).  not sure if factor is the best way, the help | 
|  | # says it should be a small range. | 
|  | # http://stackoverflow.com/questions/1617061/including-absent-values-in-table-results-in-r | 
|  | msec_times = factor(msec_times, 0:max(msec_times)) | 
|  |  | 
|  | # without the c(), it'll be a bunch of bars at each msec | 
|  | tab = c(table(msec_times)) | 
|  | plot(tab, type="o", main=title, xlab="Time (msec)", | 
|  | ylab="Locks per msec") | 
|  |  | 
|  | if (outfile != "") | 
|  | invisible(dev.off()) | 
|  | } | 
|  |  | 
|  |  | 
|  | # extract useful information from the raw data file | 
|  | extract_data <- function(filename) { | 
|  | mydata = read.table(filename, comment.char="#") | 
|  |  | 
|  | work_amt = mydata$V2 | 
|  |  | 
|  | # calculate time steps and mean time step (all in ns) | 
|  | times = as.numeric(as.character(mydata$V1)) | 
|  | N_entries = length(times) | 
|  | time_steps_ns = times[2:N_entries] - times[1:(N_entries-1)] | 
|  | avg_time_step_ns = mean(time_steps_ns) | 
|  |  | 
|  | return(list(work_amt=work_amt, time_steps_ns=time_steps_ns, | 
|  | N_entries=N_entries, avg_time_step_ns=avg_time_step_ns)) | 
|  | } | 
|  |  | 
|  |  | 
|  | ###################################### | 
|  | ### Main | 
|  | ###################################### | 
|  |  | 
|  | ### collect command line arguments | 
|  | # establish optional arguments | 
|  | # "-h" and "--help" are automatically in the list | 
|  | option_list <- list( | 
|  | make_option(c("-i", "--input"), type="character", | 
|  | default="welch_input.dat", | 
|  | help="Input data file"), | 
|  | make_option(c("-o", "--output"), type="character", | 
|  | default="welch_plot.pdf", | 
|  | help="Output file for plotting"), | 
|  | make_option("--xmin", type="double", default=0, | 
|  | help=paste("Minimum frequency (horizontal axis) ", | 
|  | "in output plot [default %default]",sep="")), | 
|  | make_option("--xmax", type="double", default=40, | 
|  | help=paste("Maximum frequency (horizontal axis) ", | 
|  | "in output plot [default %default]",sep="")), | 
|  | make_option("--ymin", type="double", default=-1, | 
|  | help=paste("Minimum spectrum (vertical axis) ", | 
|  | "in output plot [default adaptive]",sep="")), | 
|  | make_option("--ymax", type="double", default=-1, | 
|  | help=paste("Maximum spectrum (vertical axis) ", | 
|  | "in output plot [default adaptive]",sep="")) | 
|  | ) | 
|  |  | 
|  | ## read command line | 
|  | #opt <- parse_args(OptionParser(option_list=option_list)) | 
|  | # | 
|  | ##max_freq = as.numeric(as.character(args[3])) | 
|  | # | 
|  | #### read in data | 
|  | #mydata = extract_data(opt$input) | 
|  |  | 
|  | #round_outlier <- function(vec) | 
|  | #acq_latency <- function(data) | 
|  | #hld_latency <- function(data) | 
|  | #plot_densities <- function(vecs, names=NULL, outfile="", | 
|  | #plot_density <- function(vec, outfile="", | 
|  | #plot_acq_times <- function(data, outfile="") | 
|  | #print_vec <- function(vec) | 
|  | #print_stats <- function(data) | 
|  | #plot_tput <- function(data) | 
|  | #mydata = read.table(filename, comment.char="#") | 
|  |  |