Overhaul lock_test.R

More functionality and works from the command line.  See you in another
5-6 years.

Signed-off-by: Barret Rhoden <brho@cs.berkeley.edu>
diff --git a/scripts/lock_test.R b/scripts/lock_test.R
old mode 100644
new mode 100755
index ca3ec11..0af3081
--- a/scripts/lock_test.R
+++ b/scripts/lock_test.R
@@ -1,20 +1,54 @@
-# brho: 2014-10-13
+#!/usr/bin/env Rscript
 #
-# this is partly fleshed out.  to use, i've just been sourcing the script in R,
-# then overriding the tsc overhead and freq.  then just running various
-# functions directly, like print_stats, plot_densities, plot_tput, etc.  don't
-# expect any command line options to work.
+# brho: 2014-10-13, 2020-06-25
+#
+# to install helper libraries, run R, install manually:
+#
+# 	install.packages('oce')
+# 	install.packages('optparse')
+# 	install.packages('data.table')
+# didn't need to library()-load this, but data.table wanted it at one point
+# 	install.packages('bit64')
+#
+# To run it, you can do stuff like this:
+#
+#  for i in *.dat.gz; do
+#          echo Processing $i...
+#          $AKA_DIR/scripts/lock_test.R -c 1 $i
+#  done
+#
+#  echo "Creating ${DIR%/}-tput.pdf"
+#  $AKA_DIR/scripts/lock_test.R -c 2 *.dat.gz -o ${DIR%/}-tput.pdf
+#
+#  echo "Creating ${DIR%/}-kernel-tput.pdf"
+#  $AKA_DIR/scripts/lock_test.R -c 2 *kernel*.dat.gz -o ${DIR%/}-kernel-tput.pdf
+#
+# TODO:
+# - analyze when the lock jumps between locality regions (hyperthread,
+# core, CCX, numa, whatever).  It's a bit of a pain with Linux, since core 1 is
+# on another numa node than core 0.  Whatever.
+# - For lock_test, have each additional core/thread be allocated close to the
+# existing core (locality), and then look at the lock scalability.  (classic,
+# X-axis is nr_cpus, Y axis is time per lock (avg acq lat)).
+# - Figure out if there was something special about disable/enable IRQs
 
 # library that includes the pwelch function
 suppressPackageStartupMessages(library(oce))
 # library for command line option parsing
 suppressPackageStartupMessages(library(optparse))
+# read.table is brutally slow.  this brings in fread
+suppressPackageStartupMessages(library(data.table))
 
 # file format: thread_id attempt pre acq(uire) un(lock) tsc_overhead
+# 0 0 4748619790389387 4748619790429260 4748619790429323 0
 
 g_tsc_overhead <- 0
 g_tsc_frequency <- 0
 
+# For PNG output...
+g_width <- 1920
+g_height <- 1200
+
 ######################################
 ### Functions
 ######################################
@@ -67,9 +101,63 @@
 	hist(vec, breaks=vec_quant, xlim=c(vec_quant[2], vec_quant[100]))
 }
 
-plot_densities <- function(vecs, names=NULL, outfile="",
-                           title="Lock Acquisition Latency",
-                           xlab="TSC Ticks")
+# line_data is a list of N data sets that can be plotted as lines.  each set
+# should just be an array of values; the index will be the X value.
+#
+# names is a c() of N strings corresponding to the line_data members
+# Set the desired x and y min/max.
+plot_lines <- function(line_data, names=NULL, outfile="", title="",
+		       xlab="X", ylab="Y", min_x=0, max_x=0, min_y=0, max_y=0)
+{
+	nr_lines <- length(line_data)
+
+	# not guaranteed to work, but might save some pain
+	if (max_x == 0) {
+		for (i in 1:nr_lines)
+			max_x <- max(max_x, length(line_data[[i]]))
+	}
+	if (max_y == 0) {
+		for (i in 1:nr_lines)
+			max_y <- max(max_y, max(line_data[[i]]))
+	}
+
+	# http://www.statmethods.net/graphs/line.html
+	colors <- rainbow(nr_lines) # not a huge fan.  color #2 is light blue.
+	linetype <- c(1:nr_lines)
+	plotchar <- seq(18, 18 + nr_lines, 1)
+
+	# http://stackoverflow.com/questions/8929663/r-legend-placement-in-a-plot
+	# can manually move it if we don't want to waste space
+	if (!is.null(names)) {
+		plot(c(min_x, max_x), c(min_y, max_y), type="n", xaxt="n", yaxt="n")
+		legend_sz = legend("topright", legend=names, lty=linetype,
+		                   plot=FALSE)
+		max_y = 1.04 * (max_y + legend_sz$rect$h)
+	}
+
+	if (outfile != "")
+		png(outfile, width=g_width, height=g_height)
+
+	plot(c(min_x, max_x), c(min_y, max_y), type="n", main=title, xlab=xlab,
+		ylab=ylab)
+
+	for (i in 1:nr_lines) {
+		# too many points, so using "l" and no plotchar.
+		#lines(densities[[i]], type="b", lty=linetype[i], col=colors[i],
+		#      pch=plotchar[i], lwd=1.5)
+		lines(line_data[[i]], type="l", lty=linetype[i], col=colors[i],
+			lwd=1.5)
+	}
+
+	#legend(x=min_x, y=max_y, legend=names, lty=linetype, col=colors)
+	if (!is.null(names))
+		legend("topright", legend=names, lty=linetype, col=colors)
+
+	if (outfile != "")
+		invisible(dev.off())
+}
+
+plot_densities <- function(vecs, names=NULL, outfile="")
 {
 	nr_vecs = length(vecs)
 	densities = list()
@@ -85,59 +173,24 @@
 		max_x = max(max_x, dense_i$x)
 		min_x = min(min_x, dense_i$x)
 	}
-
-	# http://www.statmethods.net/graphs/line.html
-	colors <- rainbow(nr_vecs) # not a huge fan.  color #2 is light blue.
-	linetype <- c(1:nr_vecs)
-	plotchar <- seq(18, 18 + nr_vecs, 1)
-
-	# http://stackoverflow.com/questions/8929663/r-legend-placement-in-a-plot
-	# can manually move it if we don't want to waste space
-	if (!is.null(names)) {
-		plot(c(min_x,max_x), c(0, max_y), type="n", xaxt="n", yaxt="n")
-		legend_sz = legend("topright", legend=names, lty=linetype,
-		                   plot=FALSE)
-		max_y = 1.04 * (max_y + legend_sz$rect$h)
-		invisible(dev.off())
-	}
-
-	if (outfile != "")
-		pdf(outfile)
-
-	plot(c(min_x,max_x), c(0, max_y), type="n", xlab=xlab, main=title,
-	     ylab="Density")
-
-	for (i in 1:nr_vecs) {
-		# too many points, so using "l" and no plotchar.
-		#lines(densities[[i]], type="b", lty=linetype[i], col=colors[i],
-		#      pch=plotchar[i], lwd=1.5)
-		lines(densities[[i]], type="l", lty=linetype[i], lwd=1.5)
-	}
-
-	#legend(x=min_x, y=max_y, legend=names, lty=linetype, col=colors)
-	if (!is.null(names))
-		legend("topright", legend=names, lty=linetype)
-
-	if (outfile != "")
-		invisible(dev.off())
+	plot_lines(densities, names=names, outfile=outfile,
+	           title=paste(outfile, "Lock Acquisition Latency"),
+		   xlab="TSC Ticks", ylab="PDF",
+		   min_x=min_x, max_x=max_x, max_y=max_y)
 }
 
-
-plot_density <- function(vec, outfile="",
-                         title="Lock Acquisition Latency",
-                         xlab="TSC Ticks")
+plot_density <- function(vec, outfile="")
 {
 	vecs = list(vec)
-	plot_densities(vecs=vecs, outfile=outfile, title=title, xlab=xlab)
+	plot_densities(vecs=vecs, outfile=outfile)
 }
 
-
 plot_acq_times <- function(data, outfile="")
 {
 	if (outfile != "")
-		pdf(outfile)
+		png(outfile, width=g_width, height=g_height)
 
-    # all acquire times, timestamps starting at 0
+	# all acquire times, timestamps starting at 0
 	time0 = min(data$V4)
 	total_acq <- data$V4 - time0
 
@@ -158,133 +211,380 @@
 		invisible(dev.off())
 }
 
-print_vec <- function(vec)
+print_vec <- function(vec, name)
 {
-	# this whole str, paste dance is nasty
-	print("---------------")
-	str = paste("Average: ", round(mean(vec), 4))
-	print(str)
-	str = paste("Stddev: ", round(sd(vec), 4))
-	print(str)
+	# goddamn.  cat doesn't like integer64 or something, so you need to
+	# pre-paste for the calculations.  print will spit out [1] for row
+	# names, which sucks.
+	cat(name, "\n")
+	cat("---------------\n")
+	cat(paste("Average: ", round(mean(vec), 4), "\n"))
+	cat(paste("Stddev: ", round(sd(vec), 4), "\n"))
 	quants = round(quantile(vec, c(.5, .75, .9, .99, .999)))
-	str = paste("50/75/90/99/99.9: ", quants[[1]], quants[[2]], quants[[3]],
-	            quants[[4]], quants[[5]])
-	print(str)
-	str = paste("Min: ", min(vec), " Max: ", max(vec))
-	print(str)
+	cat(paste("50/75/90/99/99.9: ", quants[[1]], quants[[2]], quants[[3]],
+	      quants[[4]], quants[[5]], "\n"))
+	cat(paste("Min: ", min(vec), " Max: ", max(vec), "\n"))
+	cat("\n")
 }
 
 # using something like the tables package to output latex booktab's would be
 # much nicer
 print_stats <- function(data)
 {
-	acq_lat = acq_latency(data)
-	hld_lat = hld_latency(data)
-
-	print("Acquire Latency")
-	print_vec(acq_lat)
-	print("")
-	print("Hold Latency")
-	print_vec(hld_lat)
+	print_vec(acq_latency(data), "Acquire Latency")
+	print_vec(hld_latency(data), "Hold Latency")
 }
 
-# if you know how many msec there are, this is like doing:
-#     hist(total_acq/1000000, breaks=50)
-# except it gives you a line, with points being the top of the hist bars
-plot_tput <- function(data, title="Lock Acquisition Throughput", outfile="")
+# the 'queue' system includes waiting for the lock and holding the lock.
+# Returns a data.frame, with X = TSC, Y = qlen
+get_qlen <- function(data)
 {
-	if (outfile != "")
-		pdf(outfile)
-
-	total_acq = sort(data$V4 - min(data$V4))
-
 	if (g_tsc_frequency == 0)
 		stop("WARNING: global TSC freq not set!")
-	# convert to nsec? XXX
-	total_acq = total_acq / (g_tsc_frequency / 1e9)
 
-	# rounds down all times to the nearest msec, will collect into a table,
-	# which counts the freq of each bucket, as per:
-	# http://stackoverflow.com/questions/5034513/how-to-graph-requests-per-second-from-web-log-file-using-r
-	msec_times = trunc(total_acq/1e6)
+	# timestamps for all threads, sorted.
+	# a 'pre' is someone entering the q.  'unl' is leaving.
+	# both are normalized to the TSC time for pres
+	pres <- sort(data$V3 - min(data$V3))
+	unls <- sort(data$V5 - min(data$V3))
 
-	# if we just table directly, we'll lose the absent values (msec where no
-	# timestamp happened).  not sure if factor is the best way, the help
-	# says it should be a small range.
-	# http://stackoverflow.com/questions/1617061/including-absent-values-in-table-results-in-r
-	msec_times = factor(msec_times, 0:max(msec_times))
+	# not sure the best way to create the data to print.  easiest seems to
+	# be two vectors, X = times, Y = qlens, which we'll pass to plot
+	# but you can't append to them, that's way to slow.  we do know the
+	# overall length of both: one entry for each of pres and unls.
+	# so we preallocate.
+	samples <- length(pres) + length(unls)
 
-	# without the c(), it'll be a bunch of bars at each msec
-	tab = c(table(msec_times))
-	plot(tab, type="o", main=title, xlab="Time (msec)",
-	     ylab="Locks per msec")
+	times <- rep(0, samples)
+	qlens <- rep(0, samples)
+
+	qlen <- 0
+	# R uses 1 indexing
+	p_i <- 1
+	u_i <- 1
+
+	for (i in 1:samples) {
+		now <- 0
+
+		# a bit painful.
+		if (p_i <= length(pres))
+			pre <- pres[[p_i]]
+		else
+			pre <- Inf
+		if (u_i <= length(unls))
+			unl <- unls[[u_i]]
+		else
+			unl <- Inf
+
+		if (pre <= unl) {
+			if (pre == Inf) {
+				print("both pre and unl were Inf!")
+				break
+			}
+
+			qlen <- qlen + 1
+			now <- pre
+			p_i <- p_i + 1
+		} else {
+			if (unl == Inf) {
+				print("both unl and pre were Inf!")
+				break
+			}
+
+			qlen <- qlen - 1
+			now <- unl
+			u_i <- u_i + 1
+		}
+		times[i] = now
+		qlens[i] = qlen
+	}
+	# times is in units of TSC.  convert to msec, for easier comparison
+	# with throughput.  though note that when plotting with e.g. tput, the
+	# x-axis is set by tput, and whatever we print is scaled to fit in that
+	# space.  so if you didn't do this, you wouldn't notice.  (I was using
+	# nsec for a while and didn't notice).
+	times <- times / (g_tsc_frequency / 1e3)
+
+	return(data.frame(setNames(list(times, qlens),
+	                           c("Time (msec)", "Queue Length"))))
+}
+
+plot_qlen <- function(data, outfile="")
+{
+	df <- get_qlen(data)
+	if (outfile != "")
+		png(outfile, width=g_width, height=g_height)
+
+	# df isn't a 'line', so we need to plot it directly
+	plot(df, type="p", main="Spinlock Queue Length")
 
 	if (outfile != "")
 		invisible(dev.off())
 }
 
+get_tput <- function(data)
+{
+	if (g_tsc_frequency == 0)
+		stop("WARNING: global TSC freq not set!")
 
-# extract useful information from the raw data file
-extract_data <- function(filename) {
-	mydata = read.table(filename, comment.char="#")
+	# acq times, sorted, normalized to the lowest, still in ticks
+	total_acq <- sort(data$V4 - min(data$V4))
 
-	work_amt = mydata$V2
+	# convert to nsec
+	total_acq <- total_acq / (g_tsc_frequency / 1e9)
 
-	# calculate time steps and mean time step (all in ns)
-	times = as.numeric(as.character(mydata$V1))
-	N_entries = length(times)
-	time_steps_ns = times[2:N_entries] - times[1:(N_entries-1)]
-	avg_time_step_ns = mean(time_steps_ns)
+	# rounds down all times to the nearest msec, will collect into a table,
+	# which counts the freq of each bucket, as per:
+	# http://stackoverflow.com/questions/5034513/how-to-graph-requests-per-second-from-web-log-file-using-r
+	msec_times <- trunc(total_acq/1e6)
 
-	return(list(work_amt=work_amt, time_steps_ns=time_steps_ns,
-		N_entries=N_entries, avg_time_step_ns=avg_time_step_ns))
+	# if we just table directly, we'll lose the absent values (msec where no
+	# timestamp happened).  not sure if factor is the best way, the help
+	# says it should be a small range.
+	# http://stackoverflow.com/questions/1617061/including-absent-values-in-table-results-in-r
+	msec_times <- factor(msec_times, 0:max(msec_times))
+
+	# without the c(), it'll be a bunch of bars at each msec
+	tab <- c(table(msec_times))
+	return(tab)
 }
 
+# this is a little rough.  You need the data loaded, but calculating tput
+# requires the right TSC freq set.  So don't use this with data from different
+# machines (which can be a tough comparison, regardless).
+plot_tputs <- function(data_l, names=NULL, outfile="")
+{
+	nr_lines <- length(data_l)
+	tputs <- list()
+
+	for (i in 1:nr_lines) {
+		# [[ ]] chooses the actual element.  [] just subsets
+		tput_i <- get_tput(data_l[[i]])
+		tputs <- c(tputs, list(tput_i))
+	}
+
+	plot_lines(tputs, names, outfile=outfile,
+	           title=paste(outfile, "Lock Throughput"),
+		   xlab = "Time (msec)", ylab = "Locks per msec")
+}
+
+
+# helper, plots throughput (tp) and whatever else you give it.  e.g. qlen is a
+# value at a timestamp (msec)
+plot_tput_foo <- function(tp, foo, foo_str, outfile="")
+{
+	if (outfile != "")
+		png(outfile, width=g_width, height=g_height)
+
+	# decent looking margins for the right Y axis
+	par(mar = c(5, 5, 3, 5))
+
+	# Scaling ylim to make room for FOO, which tends to be up top
+	plot(tp, type="l", main=paste(outfile, "Lock Throughput and", foo_str),
+	     ylim=c(0, max(tp)*1.2),
+	     xlab="Time (msec)", ylab="Throughput",
+	     col="blue", lty=1)
+
+	par(new = TRUE)
+	plot(foo, type="p", xaxt = "n", yaxt = "n", ylab = "", xlab = "",
+	     col="red", lty=2)
+	axis(side = 4)
+	mtext(foo_str, side = 4)
+
+	legend("topleft", c("tput", foo_str), col = c("blue", "red"),
+	       lty = c(1, 2))
+
+	if (outfile != "")
+		invisible(dev.off())
+}
+
+# plot throughput and queue length on the same graph
+plot_tput_qlen <- function(data, outfile="")
+{
+	tp <- get_tput(data)
+	ql <- get_qlen(data)
+
+	plot_tput_foo(tp, ql, "Qlen", outfile)
+}
+
+# if you know how many msec there are, this is like doing:
+#     hist(total_acq/1000000, breaks=50)
+# except it gives you a line, with points being the top of the hist bars
+plot_tput <- function(data, outfile="")
+{
+	plot_tputs(list(data), outfile=outfile)
+}
+
+# the middle timestamp is the acquire-by-lockholder.  sorting on that, we can
+# see the coreid.  it's a really busy thing to graph.
+#
+# You can see some gaps in non-MCS lock holders when a core is starved or not
+# partipating.  By comparing that gap to the qlen, you can see if it was
+# starvation or just not participating.  Also, you can easily spot a preempted
+# lockholder (if they are gone for enough msec).
+get_core_acqs <- function(data)
+{
+	earliest <- min(data$V3)
+	sorted <- data[order(data$V3)]
+
+	times <- sorted$V3 - earliest
+	cores <- sorted$V1
+
+	# times is in units of TSC.  convert to msec, for easier comparison
+	# with throughput
+	times <- times / (g_tsc_frequency / 1e3)
+
+	return(data.frame(setNames(list(times, cores),
+	                           c("Time (msec)", "Holder ID"))))
+}
+
+plot_tput_core_acqs <- function(data, outfile="")
+{
+	tp <- get_tput(data)
+	ca <- get_core_acqs(data)
+
+	plot_tput_foo(tp, ca, "Holder ID", outfile)
+}
+
+get_tsc_freq <- function(filename) {
+	# format looks like this:
+	# tsc_frequency 2300002733
+	header <- readLines(filename, n=50)
+	line <- grep("tsc_frequency", header, value=TRUE)
+	x <- strsplit(line, split=" ")
+	return(as.numeric(x[[1]][3]))
+}
+
+
+# pretty ugly.  reads in the data, but also sets the TSC freq, which is
+# particular to a given machine.  so if you use this, be sure you do any
+# analyses before loading another.  Sorry.
+load_test_data <- function(filename)
+{
+	g_tsc_frequency <- get_tsc_freq(filename)
+	if (g_tsc_frequency == 0)
+		stop("WARNING: could not set global TSC freq!")
+	assign("g_tsc_frequency", g_tsc_frequency, envir = .GlobalEnv)
+
+	# using grep for a hokey comment.char="#".  it's fast enough for now.
+
+	# integer64="numeric", o/w all sorts of things fuck up.  like density()
+	# and round_outlier().  Even with sprinkling as.numeric() around, it's
+	# still a mess.  This actually worked, though presumably there's a
+	# reason to use int64.
+
+	if (grepl("\\.gz$", filename)) {
+		return(fread(cmd=paste("gunzip -c", filename, "| grep -v '^#'"),
+		             integer64="numeric"))
+	} else {
+		return(fread(cmd=paste("grep -v '^#'", filename),
+		             integer64="numeric"))
+	}
+}
 
 ######################################
 ### Main
 ######################################
-
-### collect command line arguments
 # establish optional arguments
 # "-h" and "--help" are automatically in the list
 option_list <- list(
-  make_option(c("-i", "--input"), type="character",
-    default="welch_input.dat",
-    help="Input data file"),
-  make_option(c("-o", "--output"), type="character",
-    default="welch_plot.pdf",
-    help="Output file for plotting"),
-  make_option("--xmin", type="double", default=0,
-    help=paste("Minimum frequency (horizontal axis) ",
-      "in output plot [default %default]",sep="")),
-  make_option("--xmax", type="double", default=40,
-    help=paste("Maximum frequency (horizontal axis) ",
-      "in output plot [default %default]",sep="")),
-  make_option("--ymin", type="double", default=-1,
-    help=paste("Minimum spectrum (vertical axis) ",
-      "in output plot [default adaptive]",sep="")),
-  make_option("--ymax", type="double", default=-1,
-    help=paste("Maximum spectrum (vertical axis) ",
-      "in output plot [default adaptive]",sep=""))
+	make_option(c("-c", "--cmd"), type="integer", default=-1,
+	            help="
+	1: acq_lat & tput_qlen for one file
+	2: tput comparison for multiple files
+	"),
+	make_option(c("-o", "--output"), type="character",
+	            default="", help="Output file, if applicable")
 )
 
-## read command line
-#opt <- parse_args(OptionParser(option_list=option_list))
-#  
-##max_freq = as.numeric(as.character(args[3]))
-#
-#### read in data
-#mydata = extract_data(opt$input)
+# CMD 1
+single_analysis <- function(args)
+{
+	data_file <- args$args
 
-#round_outlier <- function(vec)
-#acq_latency <- function(data)
-#hld_latency <- function(data)
-#plot_densities <- function(vecs, names=NULL, outfile="",
-#plot_density <- function(vec, outfile="",
-#plot_acq_times <- function(data, outfile="")
-#print_vec <- function(vec)
-#print_stats <- function(data)
-#plot_tput <- function(data)
-#mydata = read.table(filename, comment.char="#")
+	if (file.access(data_file) == -1)
+		stop("cannot access input file")
 
+	data <- load_test_data(data_file)
+
+	basename <- data_file
+	basename <- gsub(".gz$", "", basename)
+	basename <- gsub(".dat$", "", basename)
+
+	sink(paste(basename, "-stats.txt", sep=""))
+	cat("Filename:", data_file, "\n")
+	cat("\n")
+	print(grep("^#", readLines(data_file, n=50), value=TRUE))
+	cat("\n")
+	print_stats(data)
+	sink()
+
+	# For now, create all files.  Can make options later
+	plot_density(round_outlier(acq_latency(data)),
+	             outfile=paste(basename, "-acq.png", sep=""))
+	plot_tput_qlen(data,
+	               outfile=paste(basename, "-tput_qlen.png", sep=""))
+	plot_tput_core_acqs(data,
+	               outfile=paste(basename, "-tput_core_acqs.png", sep=""))
+}
+
+# CMD 2
+tput_comparison <- function(args)
+{
+	outfile <- args$options$output
+
+	if (outfile == "")
+		stop("Need an outfile (-o)")
+
+	tputs <- list()
+	names <- list()
+
+	for (i in args$args) {
+		data <- load_test_data(i)
+		# I wanted to do this:
+		#tputs <- append(tputs, get_tput(data))
+		# but it's a collection of lists of TPUT tables.  or something
+		tputs <- c(tputs, list(get_tput(data)))
+		names <- append(names, i)
+		rm(data)
+	}
+	plot_lines(tputs, names, outfile=outfile,
+	           title="Lock Throughput",
+		   xlab = "Time (msec)",
+		   ylab = "Locks per msec")
+}
+
+main <- function()
+{
+	parser <- OptionParser(usage="%prog [options] data.dat",
+	                       option_list=option_list)
+	# ugly as hell, no error messages, etc.
+	args <- parse_args(parser, positional_arguments = TRUE)
+	opt <- args$options
+
+	f <- switch(opt$cmd, single_analysis, tput_comparison)
+	if (is.null(f))
+		stop("Need a valid (-c) command (-h for options)")
+	f(args)
+}
+
+# don't run when sourced
+if (sys.nframe() == 0L) {
+	main()
+}
+
+# Dirty Hacks Storage
+
+# source("scripts/lock_test.R")
+
+# this is slow and shitty:
+# raw = read.table(filename, comment.char="#")
+
+# to print stuff to a file, you can do
+# that can be really slow for large files, so try png instead
+#pdf("foo.pdf")
+#command that creates a graphic, like hist or plot
+#dev.off()
+
+# hack, so i can re-source this and not have to reload TSC freq
+#g_tsc_frequency <- get_tsc_freq("raw1.dat")
diff --git a/scripts/run_lock_test.sh b/scripts/run_lock_test.sh
new file mode 100755
index 0000000..83169f2
--- /dev/null
+++ b/scripts/run_lock_test.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+AKA_DIR=~/akaros/ros-kernel/
+RESULTS_TOP_DIR=~/data/lock_data
+
+set -e
+
+[ $# -lt 2 ] && echo Need HOST LOOPS && exit -1
+
+HOST=$1
+LOOPS=$2
+TESTS="
+	mcs
+	mcscas
+	spin
+	mcs-kernel
+	queue-kernel
+	spin-kernel
+	"
+
+echo Running lock_test on $HOST for $LOOPS loops, ctl-C to abort
+sleep 5
+
+RESULTS_DIR=$RESULTS_TOP_DIR/$HOST-$LOOPS-$(date +%F-%H%M%S)/
+mkdir $RESULTS_DIR
+
+HDIR=/some/data/brho/
+ssh root@$HOST mkdir -p $HDIR
+scp $AKA_DIR/tests/linux/modules/mcs.ko $AKA_DIR/tests/linux/obj/lock_test root@$HOST:$HDIR
+set +e
+ssh root@$HOST rmmod mcs
+set -e
+ssh root@$HOST insmod $HDIR/mcs.ko
+
+for i in $TESTS; do
+	echo Trying lock_test -l $LOOPS -t $i
+	ssh root@$HOST $HDIR/lock_test -l $LOOPS -t $i -o $HDIR/$HOST-$i-$LOOPS.dat
+	ssh root@HOST gzip $HDIR/$HOST-$i-$LOOPS.dat
+	scp root@$HOST:$HDIR/$HOST-$i-$LOOPS.dat.gz $RESULTS_DIR
+done
+
+echo Done... $RESULTS_DIR