blob: ca3ec114efdc7459058115cfe761667f7b45727d [file] [log] [blame]
# brho: 2014-10-13
#
# this is partly fleshed out. to use, i've just been sourcing the script in R,
# then overriding the tsc overhead and freq. then just running various
# functions directly, like print_stats, plot_densities, plot_tput, etc. don't
# expect any command line options to work.
# library that includes the pwelch function
suppressPackageStartupMessages(library(oce))
# library for command line option parsing
suppressPackageStartupMessages(library(optparse))
# file format: thread_id attempt pre acq(uire) un(lock) tsc_overhead
g_tsc_overhead <- 0
g_tsc_frequency <- 0
######################################
### Functions
######################################
# takes any outliers 2 * farther than the 99th quantile and rounds them down to
# that limit. the limit is pretty arbitrary. useful for not having
# ridiculously large graphs, but still is lousy for various datasets.
round_outlier <- function(vec)
{
vec99 = quantile(vec, .99)
lim = vec99 + 2 * (vec99 - median(vec))
return(sapply(vec, function(x) min(x, lim)))
}
# computes acquire latency, using global tsc freq if there isn't one in the
# data
acq_latency <- function(data)
{
tsc_overhead = data$V6
if (tsc_overhead[1] == 0)
tsc_overhead = sapply(tsc_overhead, function(x) g_tsc_overhead)
return (data$V4 - data$V3 - tsc_overhead)
}
# computes hold latency, using global tsc freq if there isn't one in the data
hld_latency <- function(data)
{
tsc_overhead = data$V6
if (tsc_overhead[1] == 0)
tsc_overhead = sapply(tsc_overhead, function(x) g_tsc_overhead)
return (data$V5 - data$V4 - tsc_overhead)
}
# histogram, bins based on percentiles, with limits of the graph based on the
# outermost bins. somewhat works. can get a 'need finite ylim' if the bins
# are too small. maybe since there are no values in it.
#
# with density and percentiles for bins, keep in mind the area of a rectangle
# is the fraction of data points in the cell. since all bins have the same
# amount of data points, taller cells show a denser concentration in a skinnier
# bin
#
# i don't actually like this much. using a round_outlier with 20-bin hist or a
# density plot look nicer.
quant_hist <- function(vec)
{
vec_quant = c(quantile(vec, probs=seq(0, 1, .01)))
print(vec_quant)
# keep the 100 in sync with the 0.01 above
hist(vec, breaks=vec_quant, xlim=c(vec_quant[2], vec_quant[100]))
}
plot_densities <- function(vecs, names=NULL, outfile="",
title="Lock Acquisition Latency",
xlab="TSC Ticks")
{
nr_vecs = length(vecs)
densities = list()
max_y = 0
min_x = Inf
max_x = 0
for (i in 1:nr_vecs) {
# [[ ]] chooses the actual element. [] just subsets
dense_i = density(vecs[[i]])
densities = c(densities, list(dense_i))
max_y = max(max_y, dense_i$y)
max_x = max(max_x, dense_i$x)
min_x = min(min_x, dense_i$x)
}
# http://www.statmethods.net/graphs/line.html
colors <- rainbow(nr_vecs) # not a huge fan. color #2 is light blue.
linetype <- c(1:nr_vecs)
plotchar <- seq(18, 18 + nr_vecs, 1)
# http://stackoverflow.com/questions/8929663/r-legend-placement-in-a-plot
# can manually move it if we don't want to waste space
if (!is.null(names)) {
plot(c(min_x,max_x), c(0, max_y), type="n", xaxt="n", yaxt="n")
legend_sz = legend("topright", legend=names, lty=linetype,
plot=FALSE)
max_y = 1.04 * (max_y + legend_sz$rect$h)
invisible(dev.off())
}
if (outfile != "")
pdf(outfile)
plot(c(min_x,max_x), c(0, max_y), type="n", xlab=xlab, main=title,
ylab="Density")
for (i in 1:nr_vecs) {
# too many points, so using "l" and no plotchar.
#lines(densities[[i]], type="b", lty=linetype[i], col=colors[i],
# pch=plotchar[i], lwd=1.5)
lines(densities[[i]], type="l", lty=linetype[i], lwd=1.5)
}
#legend(x=min_x, y=max_y, legend=names, lty=linetype, col=colors)
if (!is.null(names))
legend("topright", legend=names, lty=linetype)
if (outfile != "")
invisible(dev.off())
}
plot_density <- function(vec, outfile="",
title="Lock Acquisition Latency",
xlab="TSC Ticks")
{
vecs = list(vec)
plot_densities(vecs=vecs, outfile=outfile, title=title, xlab=xlab)
}
plot_acq_times <- function(data, outfile="")
{
if (outfile != "")
pdf(outfile)
# all acquire times, timestamps starting at 0
time0 = min(data$V4)
total_acq <- data$V4 - time0
threadid <- unique(data$V1)
acq_n <- list()
names <- c()
for (i in threadid) {
thread_data <- subset(data, data$V1 == i) - time0
acq_n <- c(acq_n, list(thread_data$V4))
names <- c(names, paste("Thread ", i))
}
# can adjust ylim, default are from 1..nr_items
stripchart(acq_n, group.names=names, pch='.', xlab="Time (TSC Ticks)",
main="Lock Acquisition Timestamps")
if (outfile != "")
invisible(dev.off())
}
print_vec <- function(vec)
{
# this whole str, paste dance is nasty
print("---------------")
str = paste("Average: ", round(mean(vec), 4))
print(str)
str = paste("Stddev: ", round(sd(vec), 4))
print(str)
quants = round(quantile(vec, c(.5, .75, .9, .99, .999)))
str = paste("50/75/90/99/99.9: ", quants[[1]], quants[[2]], quants[[3]],
quants[[4]], quants[[5]])
print(str)
str = paste("Min: ", min(vec), " Max: ", max(vec))
print(str)
}
# using something like the tables package to output latex booktab's would be
# much nicer
print_stats <- function(data)
{
acq_lat = acq_latency(data)
hld_lat = hld_latency(data)
print("Acquire Latency")
print_vec(acq_lat)
print("")
print("Hold Latency")
print_vec(hld_lat)
}
# if you know how many msec there are, this is like doing:
# hist(total_acq/1000000, breaks=50)
# except it gives you a line, with points being the top of the hist bars
plot_tput <- function(data, title="Lock Acquisition Throughput", outfile="")
{
if (outfile != "")
pdf(outfile)
total_acq = sort(data$V4 - min(data$V4))
if (g_tsc_frequency == 0)
stop("WARNING: global TSC freq not set!")
# convert to nsec? XXX
total_acq = total_acq / (g_tsc_frequency / 1e9)
# rounds down all times to the nearest msec, will collect into a table,
# which counts the freq of each bucket, as per:
# http://stackoverflow.com/questions/5034513/how-to-graph-requests-per-second-from-web-log-file-using-r
msec_times = trunc(total_acq/1e6)
# if we just table directly, we'll lose the absent values (msec where no
# timestamp happened). not sure if factor is the best way, the help
# says it should be a small range.
# http://stackoverflow.com/questions/1617061/including-absent-values-in-table-results-in-r
msec_times = factor(msec_times, 0:max(msec_times))
# without the c(), it'll be a bunch of bars at each msec
tab = c(table(msec_times))
plot(tab, type="o", main=title, xlab="Time (msec)",
ylab="Locks per msec")
if (outfile != "")
invisible(dev.off())
}
# extract useful information from the raw data file
extract_data <- function(filename) {
mydata = read.table(filename, comment.char="#")
work_amt = mydata$V2
# calculate time steps and mean time step (all in ns)
times = as.numeric(as.character(mydata$V1))
N_entries = length(times)
time_steps_ns = times[2:N_entries] - times[1:(N_entries-1)]
avg_time_step_ns = mean(time_steps_ns)
return(list(work_amt=work_amt, time_steps_ns=time_steps_ns,
N_entries=N_entries, avg_time_step_ns=avg_time_step_ns))
}
######################################
### Main
######################################
### collect command line arguments
# establish optional arguments
# "-h" and "--help" are automatically in the list
option_list <- list(
make_option(c("-i", "--input"), type="character",
default="welch_input.dat",
help="Input data file"),
make_option(c("-o", "--output"), type="character",
default="welch_plot.pdf",
help="Output file for plotting"),
make_option("--xmin", type="double", default=0,
help=paste("Minimum frequency (horizontal axis) ",
"in output plot [default %default]",sep="")),
make_option("--xmax", type="double", default=40,
help=paste("Maximum frequency (horizontal axis) ",
"in output plot [default %default]",sep="")),
make_option("--ymin", type="double", default=-1,
help=paste("Minimum spectrum (vertical axis) ",
"in output plot [default adaptive]",sep="")),
make_option("--ymax", type="double", default=-1,
help=paste("Maximum spectrum (vertical axis) ",
"in output plot [default adaptive]",sep=""))
)
## read command line
#opt <- parse_args(OptionParser(option_list=option_list))
#
##max_freq = as.numeric(as.character(args[3]))
#
#### read in data
#mydata = extract_data(opt$input)
#round_outlier <- function(vec)
#acq_latency <- function(data)
#hld_latency <- function(data)
#plot_densities <- function(vecs, names=NULL, outfile="",
#plot_density <- function(vec, outfile="",
#plot_acq_times <- function(data, outfile="")
#print_vec <- function(vec)
#print_stats <- function(data)
#plot_tput <- function(data)
#mydata = read.table(filename, comment.char="#")