| # brho: 2014-10-13 |
| # |
| # this is partly fleshed out. to use, i've just been sourcing the script in R, |
| # then overriding the tsc overhead and freq. then just running various |
| # functions directly, like print_stats, plot_densities, plot_tput, etc. don't |
| # expect any command line options to work. |
| |
| # library that includes the pwelch function |
| suppressPackageStartupMessages(library(oce)) |
| # library for command line option parsing |
| suppressPackageStartupMessages(library(optparse)) |
| |
| # file format: thread_id attempt pre acq(uire) un(lock) tsc_overhead |
| |
| g_tsc_overhead <- 0 |
| g_tsc_frequency <- 0 |
| |
| ###################################### |
| ### Functions |
| ###################################### |
| |
| # takes any outliers 2 * farther than the 99th quantile and rounds them down to |
| # that limit. the limit is pretty arbitrary. useful for not having |
| # ridiculously large graphs, but still is lousy for various datasets. |
| round_outlier <- function(vec) |
| { |
| vec99 = quantile(vec, .99) |
| lim = vec99 + 2 * (vec99 - median(vec)) |
| return(sapply(vec, function(x) min(x, lim))) |
| } |
| |
| # computes acquire latency, using global tsc freq if there isn't one in the |
| # data |
| acq_latency <- function(data) |
| { |
| tsc_overhead = data$V6 |
| if (tsc_overhead[1] == 0) |
| tsc_overhead = sapply(tsc_overhead, function(x) g_tsc_overhead) |
| return (data$V4 - data$V3 - tsc_overhead) |
| } |
| |
| # computes hold latency, using global tsc freq if there isn't one in the data |
| hld_latency <- function(data) |
| { |
| tsc_overhead = data$V6 |
| if (tsc_overhead[1] == 0) |
| tsc_overhead = sapply(tsc_overhead, function(x) g_tsc_overhead) |
| return (data$V5 - data$V4 - tsc_overhead) |
| } |
| |
| # histogram, bins based on percentiles, with limits of the graph based on the |
| # outermost bins. somewhat works. can get a 'need finite ylim' if the bins |
| # are too small. maybe since there are no values in it. |
| # |
| # with density and percentiles for bins, keep in mind the area of a rectangle |
| # is the fraction of data points in the cell. since all bins have the same |
| # amount of data points, taller cells show a denser concentration in a skinnier |
| # bin |
| # |
| # i don't actually like this much. using a round_outlier with 20-bin hist or a |
| # density plot look nicer. |
| quant_hist <- function(vec) |
| { |
| vec_quant = c(quantile(vec, probs=seq(0, 1, .01))) |
| print(vec_quant) |
| # keep the 100 in sync with the 0.01 above |
| hist(vec, breaks=vec_quant, xlim=c(vec_quant[2], vec_quant[100])) |
| } |
| |
| plot_densities <- function(vecs, names=NULL, outfile="", |
| title="Lock Acquisition Latency", |
| xlab="TSC Ticks") |
| { |
| nr_vecs = length(vecs) |
| densities = list() |
| max_y = 0 |
| min_x = Inf |
| max_x = 0 |
| |
| for (i in 1:nr_vecs) { |
| # [[ ]] chooses the actual element. [] just subsets |
| dense_i = density(vecs[[i]]) |
| densities = c(densities, list(dense_i)) |
| max_y = max(max_y, dense_i$y) |
| max_x = max(max_x, dense_i$x) |
| min_x = min(min_x, dense_i$x) |
| } |
| |
| # http://www.statmethods.net/graphs/line.html |
| colors <- rainbow(nr_vecs) # not a huge fan. color #2 is light blue. |
| linetype <- c(1:nr_vecs) |
| plotchar <- seq(18, 18 + nr_vecs, 1) |
| |
| # http://stackoverflow.com/questions/8929663/r-legend-placement-in-a-plot |
| # can manually move it if we don't want to waste space |
| if (!is.null(names)) { |
| plot(c(min_x,max_x), c(0, max_y), type="n", xaxt="n", yaxt="n") |
| legend_sz = legend("topright", legend=names, lty=linetype, |
| plot=FALSE) |
| max_y = 1.04 * (max_y + legend_sz$rect$h) |
| invisible(dev.off()) |
| } |
| |
| if (outfile != "") |
| pdf(outfile) |
| |
| plot(c(min_x,max_x), c(0, max_y), type="n", xlab=xlab, main=title, |
| ylab="Density") |
| |
| for (i in 1:nr_vecs) { |
| # too many points, so using "l" and no plotchar. |
| #lines(densities[[i]], type="b", lty=linetype[i], col=colors[i], |
| # pch=plotchar[i], lwd=1.5) |
| lines(densities[[i]], type="l", lty=linetype[i], lwd=1.5) |
| } |
| |
| #legend(x=min_x, y=max_y, legend=names, lty=linetype, col=colors) |
| if (!is.null(names)) |
| legend("topright", legend=names, lty=linetype) |
| |
| if (outfile != "") |
| invisible(dev.off()) |
| } |
| |
| |
| plot_density <- function(vec, outfile="", |
| title="Lock Acquisition Latency", |
| xlab="TSC Ticks") |
| { |
| vecs = list(vec) |
| plot_densities(vecs=vecs, outfile=outfile, title=title, xlab=xlab) |
| } |
| |
| |
| plot_acq_times <- function(data, outfile="") |
| { |
| if (outfile != "") |
| pdf(outfile) |
| |
| # all acquire times, timestamps starting at 0 |
| time0 = min(data$V4) |
| total_acq <- data$V4 - time0 |
| |
| threadid <- unique(data$V1) |
| |
| acq_n <- list() |
| names <- c() |
| for (i in threadid) { |
| thread_data <- subset(data, data$V1 == i) - time0 |
| acq_n <- c(acq_n, list(thread_data$V4)) |
| names <- c(names, paste("Thread ", i)) |
| } |
| # can adjust ylim, default are from 1..nr_items |
| stripchart(acq_n, group.names=names, pch='.', xlab="Time (TSC Ticks)", |
| main="Lock Acquisition Timestamps") |
| |
| if (outfile != "") |
| invisible(dev.off()) |
| } |
| |
| print_vec <- function(vec) |
| { |
| # this whole str, paste dance is nasty |
| print("---------------") |
| str = paste("Average: ", round(mean(vec), 4)) |
| print(str) |
| str = paste("Stddev: ", round(sd(vec), 4)) |
| print(str) |
| quants = round(quantile(vec, c(.5, .75, .9, .99, .999))) |
| str = paste("50/75/90/99/99.9: ", quants[[1]], quants[[2]], quants[[3]], |
| quants[[4]], quants[[5]]) |
| print(str) |
| str = paste("Min: ", min(vec), " Max: ", max(vec)) |
| print(str) |
| } |
| |
| # using something like the tables package to output latex booktab's would be |
| # much nicer |
| print_stats <- function(data) |
| { |
| acq_lat = acq_latency(data) |
| hld_lat = hld_latency(data) |
| |
| print("Acquire Latency") |
| print_vec(acq_lat) |
| print("") |
| print("Hold Latency") |
| print_vec(hld_lat) |
| } |
| |
| # if you know how many msec there are, this is like doing: |
| # hist(total_acq/1000000, breaks=50) |
| # except it gives you a line, with points being the top of the hist bars |
| plot_tput <- function(data, title="Lock Acquisition Throughput", outfile="") |
| { |
| if (outfile != "") |
| pdf(outfile) |
| |
| total_acq = sort(data$V4 - min(data$V4)) |
| |
| if (g_tsc_frequency == 0) |
| stop("WARNING: global TSC freq not set!") |
| # convert to nsec? XXX |
| total_acq = total_acq / (g_tsc_frequency / 1e9) |
| |
| # rounds down all times to the nearest msec, will collect into a table, |
| # which counts the freq of each bucket, as per: |
| # http://stackoverflow.com/questions/5034513/how-to-graph-requests-per-second-from-web-log-file-using-r |
| msec_times = trunc(total_acq/1e6) |
| |
| # if we just table directly, we'll lose the absent values (msec where no |
| # timestamp happened). not sure if factor is the best way, the help |
| # says it should be a small range. |
| # http://stackoverflow.com/questions/1617061/including-absent-values-in-table-results-in-r |
| msec_times = factor(msec_times, 0:max(msec_times)) |
| |
| # without the c(), it'll be a bunch of bars at each msec |
| tab = c(table(msec_times)) |
| plot(tab, type="o", main=title, xlab="Time (msec)", |
| ylab="Locks per msec") |
| |
| if (outfile != "") |
| invisible(dev.off()) |
| } |
| |
| |
| # extract useful information from the raw data file |
| extract_data <- function(filename) { |
| mydata = read.table(filename, comment.char="#") |
| |
| work_amt = mydata$V2 |
| |
| # calculate time steps and mean time step (all in ns) |
| times = as.numeric(as.character(mydata$V1)) |
| N_entries = length(times) |
| time_steps_ns = times[2:N_entries] - times[1:(N_entries-1)] |
| avg_time_step_ns = mean(time_steps_ns) |
| |
| return(list(work_amt=work_amt, time_steps_ns=time_steps_ns, |
| N_entries=N_entries, avg_time_step_ns=avg_time_step_ns)) |
| } |
| |
| |
| ###################################### |
| ### Main |
| ###################################### |
| |
| ### collect command line arguments |
| # establish optional arguments |
| # "-h" and "--help" are automatically in the list |
| option_list <- list( |
| make_option(c("-i", "--input"), type="character", |
| default="welch_input.dat", |
| help="Input data file"), |
| make_option(c("-o", "--output"), type="character", |
| default="welch_plot.pdf", |
| help="Output file for plotting"), |
| make_option("--xmin", type="double", default=0, |
| help=paste("Minimum frequency (horizontal axis) ", |
| "in output plot [default %default]",sep="")), |
| make_option("--xmax", type="double", default=40, |
| help=paste("Maximum frequency (horizontal axis) ", |
| "in output plot [default %default]",sep="")), |
| make_option("--ymin", type="double", default=-1, |
| help=paste("Minimum spectrum (vertical axis) ", |
| "in output plot [default adaptive]",sep="")), |
| make_option("--ymax", type="double", default=-1, |
| help=paste("Maximum spectrum (vertical axis) ", |
| "in output plot [default adaptive]",sep="")) |
| ) |
| |
| ## read command line |
| #opt <- parse_args(OptionParser(option_list=option_list)) |
| # |
| ##max_freq = as.numeric(as.character(args[3])) |
| # |
| #### read in data |
| #mydata = extract_data(opt$input) |
| |
| #round_outlier <- function(vec) |
| #acq_latency <- function(data) |
| #hld_latency <- function(data) |
| #plot_densities <- function(vecs, names=NULL, outfile="", |
| #plot_density <- function(vec, outfile="", |
| #plot_acq_times <- function(data, outfile="") |
| #print_vec <- function(vec) |
| #print_stats <- function(data) |
| #plot_tput <- function(data) |
| #mydata = read.table(filename, comment.char="#") |
| |