blob: 0af30816faf8c41a54f811f1e56699badc301240 [file] [log] [blame]
#!/usr/bin/env Rscript
#
# brho: 2014-10-13, 2020-06-25
#
# to install helper libraries, run R, install manually:
#
# install.packages('oce')
# install.packages('optparse')
# install.packages('data.table')
# didn't need to library()-load this, but data.table wanted it at one point
# install.packages('bit64')
#
# To run it, you can do stuff like this:
#
# for i in *.dat.gz; do
# echo Processing $i...
# $AKA_DIR/scripts/lock_test.R -c 1 $i
# done
#
# echo "Creating ${DIR%/}-tput.pdf"
# $AKA_DIR/scripts/lock_test.R -c 2 *.dat.gz -o ${DIR%/}-tput.pdf
#
# echo "Creating ${DIR%/}-kernel-tput.pdf"
# $AKA_DIR/scripts/lock_test.R -c 2 *kernel*.dat.gz -o ${DIR%/}-kernel-tput.pdf
#
# TODO:
# - analyze when the lock jumps between locality regions (hyperthread,
# core, CCX, numa, whatever). It's a bit of a pain with Linux, since core 1 is
# on another numa node than core 0. Whatever.
# - For lock_test, have each additional core/thread be allocated close to the
# existing core (locality), and then look at the lock scalability. (classic,
# X-axis is nr_cpus, Y axis is time per lock (avg acq lat)).
# - Figure out if there was something special about disable/enable IRQs
# library that includes the pwelch function
suppressPackageStartupMessages(library(oce))
# library for command line option parsing
suppressPackageStartupMessages(library(optparse))
# read.table is brutally slow. this brings in fread
suppressPackageStartupMessages(library(data.table))
# file format: thread_id attempt pre acq(uire) un(lock) tsc_overhead
# 0 0 4748619790389387 4748619790429260 4748619790429323 0
g_tsc_overhead <- 0
g_tsc_frequency <- 0
# For PNG output...
g_width <- 1920
g_height <- 1200
######################################
### Functions
######################################
# takes any outliers 2 * farther than the 99th quantile and rounds them down to
# that limit. the limit is pretty arbitrary. useful for not having
# ridiculously large graphs, but still is lousy for various datasets.
round_outlier <- function(vec)
{
vec99 = quantile(vec, .99)
lim = vec99 + 2 * (vec99 - median(vec))
return(sapply(vec, function(x) min(x, lim)))
}
# computes acquire latency, using global tsc freq if there isn't one in the
# data
acq_latency <- function(data)
{
tsc_overhead = data$V6
if (tsc_overhead[1] == 0)
tsc_overhead = sapply(tsc_overhead, function(x) g_tsc_overhead)
return (data$V4 - data$V3 - tsc_overhead)
}
# computes hold latency, using global tsc freq if there isn't one in the data
hld_latency <- function(data)
{
tsc_overhead = data$V6
if (tsc_overhead[1] == 0)
tsc_overhead = sapply(tsc_overhead, function(x) g_tsc_overhead)
return (data$V5 - data$V4 - tsc_overhead)
}
# histogram, bins based on percentiles, with limits of the graph based on the
# outermost bins. somewhat works. can get a 'need finite ylim' if the bins
# are too small. maybe since there are no values in it.
#
# with density and percentiles for bins, keep in mind the area of a rectangle
# is the fraction of data points in the cell. since all bins have the same
# amount of data points, taller cells show a denser concentration in a skinnier
# bin
#
# i don't actually like this much. using a round_outlier with 20-bin hist or a
# density plot look nicer.
quant_hist <- function(vec)
{
vec_quant = c(quantile(vec, probs=seq(0, 1, .01)))
print(vec_quant)
# keep the 100 in sync with the 0.01 above
hist(vec, breaks=vec_quant, xlim=c(vec_quant[2], vec_quant[100]))
}
# line_data is a list of N data sets that can be plotted as lines. each set
# should just be an array of values; the index will be the X value.
#
# names is a c() of N strings corresponding to the line_data members
# Set the desired x and y min/max.
plot_lines <- function(line_data, names=NULL, outfile="", title="",
xlab="X", ylab="Y", min_x=0, max_x=0, min_y=0, max_y=0)
{
nr_lines <- length(line_data)
# not guaranteed to work, but might save some pain
if (max_x == 0) {
for (i in 1:nr_lines)
max_x <- max(max_x, length(line_data[[i]]))
}
if (max_y == 0) {
for (i in 1:nr_lines)
max_y <- max(max_y, max(line_data[[i]]))
}
# http://www.statmethods.net/graphs/line.html
colors <- rainbow(nr_lines) # not a huge fan. color #2 is light blue.
linetype <- c(1:nr_lines)
plotchar <- seq(18, 18 + nr_lines, 1)
# http://stackoverflow.com/questions/8929663/r-legend-placement-in-a-plot
# can manually move it if we don't want to waste space
if (!is.null(names)) {
plot(c(min_x, max_x), c(min_y, max_y), type="n", xaxt="n", yaxt="n")
legend_sz = legend("topright", legend=names, lty=linetype,
plot=FALSE)
max_y = 1.04 * (max_y + legend_sz$rect$h)
}
if (outfile != "")
png(outfile, width=g_width, height=g_height)
plot(c(min_x, max_x), c(min_y, max_y), type="n", main=title, xlab=xlab,
ylab=ylab)
for (i in 1:nr_lines) {
# too many points, so using "l" and no plotchar.
#lines(densities[[i]], type="b", lty=linetype[i], col=colors[i],
# pch=plotchar[i], lwd=1.5)
lines(line_data[[i]], type="l", lty=linetype[i], col=colors[i],
lwd=1.5)
}
#legend(x=min_x, y=max_y, legend=names, lty=linetype, col=colors)
if (!is.null(names))
legend("topright", legend=names, lty=linetype, col=colors)
if (outfile != "")
invisible(dev.off())
}
plot_densities <- function(vecs, names=NULL, outfile="")
{
nr_vecs = length(vecs)
densities = list()
max_y = 0
min_x = Inf
max_x = 0
for (i in 1:nr_vecs) {
# [[ ]] chooses the actual element. [] just subsets
dense_i = density(vecs[[i]])
densities = c(densities, list(dense_i))
max_y = max(max_y, dense_i$y)
max_x = max(max_x, dense_i$x)
min_x = min(min_x, dense_i$x)
}
plot_lines(densities, names=names, outfile=outfile,
title=paste(outfile, "Lock Acquisition Latency"),
xlab="TSC Ticks", ylab="PDF",
min_x=min_x, max_x=max_x, max_y=max_y)
}
plot_density <- function(vec, outfile="")
{
vecs = list(vec)
plot_densities(vecs=vecs, outfile=outfile)
}
plot_acq_times <- function(data, outfile="")
{
if (outfile != "")
png(outfile, width=g_width, height=g_height)
# all acquire times, timestamps starting at 0
time0 = min(data$V4)
total_acq <- data$V4 - time0
threadid <- unique(data$V1)
acq_n <- list()
names <- c()
for (i in threadid) {
thread_data <- subset(data, data$V1 == i) - time0
acq_n <- c(acq_n, list(thread_data$V4))
names <- c(names, paste("Thread ", i))
}
# can adjust ylim, default are from 1..nr_items
stripchart(acq_n, group.names=names, pch='.', xlab="Time (TSC Ticks)",
main="Lock Acquisition Timestamps")
if (outfile != "")
invisible(dev.off())
}
print_vec <- function(vec, name)
{
# goddamn. cat doesn't like integer64 or something, so you need to
# pre-paste for the calculations. print will spit out [1] for row
# names, which sucks.
cat(name, "\n")
cat("---------------\n")
cat(paste("Average: ", round(mean(vec), 4), "\n"))
cat(paste("Stddev: ", round(sd(vec), 4), "\n"))
quants = round(quantile(vec, c(.5, .75, .9, .99, .999)))
cat(paste("50/75/90/99/99.9: ", quants[[1]], quants[[2]], quants[[3]],
quants[[4]], quants[[5]], "\n"))
cat(paste("Min: ", min(vec), " Max: ", max(vec), "\n"))
cat("\n")
}
# using something like the tables package to output latex booktab's would be
# much nicer
print_stats <- function(data)
{
print_vec(acq_latency(data), "Acquire Latency")
print_vec(hld_latency(data), "Hold Latency")
}
# the 'queue' system includes waiting for the lock and holding the lock.
# Returns a data.frame, with X = TSC, Y = qlen
get_qlen <- function(data)
{
if (g_tsc_frequency == 0)
stop("WARNING: global TSC freq not set!")
# timestamps for all threads, sorted.
# a 'pre' is someone entering the q. 'unl' is leaving.
# both are normalized to the TSC time for pres
pres <- sort(data$V3 - min(data$V3))
unls <- sort(data$V5 - min(data$V3))
# not sure the best way to create the data to print. easiest seems to
# be two vectors, X = times, Y = qlens, which we'll pass to plot
# but you can't append to them, that's way to slow. we do know the
# overall length of both: one entry for each of pres and unls.
# so we preallocate.
samples <- length(pres) + length(unls)
times <- rep(0, samples)
qlens <- rep(0, samples)
qlen <- 0
# R uses 1 indexing
p_i <- 1
u_i <- 1
for (i in 1:samples) {
now <- 0
# a bit painful.
if (p_i <= length(pres))
pre <- pres[[p_i]]
else
pre <- Inf
if (u_i <= length(unls))
unl <- unls[[u_i]]
else
unl <- Inf
if (pre <= unl) {
if (pre == Inf) {
print("both pre and unl were Inf!")
break
}
qlen <- qlen + 1
now <- pre
p_i <- p_i + 1
} else {
if (unl == Inf) {
print("both unl and pre were Inf!")
break
}
qlen <- qlen - 1
now <- unl
u_i <- u_i + 1
}
times[i] = now
qlens[i] = qlen
}
# times is in units of TSC. convert to msec, for easier comparison
# with throughput. though note that when plotting with e.g. tput, the
# x-axis is set by tput, and whatever we print is scaled to fit in that
# space. so if you didn't do this, you wouldn't notice. (I was using
# nsec for a while and didn't notice).
times <- times / (g_tsc_frequency / 1e3)
return(data.frame(setNames(list(times, qlens),
c("Time (msec)", "Queue Length"))))
}
plot_qlen <- function(data, outfile="")
{
df <- get_qlen(data)
if (outfile != "")
png(outfile, width=g_width, height=g_height)
# df isn't a 'line', so we need to plot it directly
plot(df, type="p", main="Spinlock Queue Length")
if (outfile != "")
invisible(dev.off())
}
get_tput <- function(data)
{
if (g_tsc_frequency == 0)
stop("WARNING: global TSC freq not set!")
# acq times, sorted, normalized to the lowest, still in ticks
total_acq <- sort(data$V4 - min(data$V4))
# convert to nsec
total_acq <- total_acq / (g_tsc_frequency / 1e9)
# rounds down all times to the nearest msec, will collect into a table,
# which counts the freq of each bucket, as per:
# http://stackoverflow.com/questions/5034513/how-to-graph-requests-per-second-from-web-log-file-using-r
msec_times <- trunc(total_acq/1e6)
# if we just table directly, we'll lose the absent values (msec where no
# timestamp happened). not sure if factor is the best way, the help
# says it should be a small range.
# http://stackoverflow.com/questions/1617061/including-absent-values-in-table-results-in-r
msec_times <- factor(msec_times, 0:max(msec_times))
# without the c(), it'll be a bunch of bars at each msec
tab <- c(table(msec_times))
return(tab)
}
# this is a little rough. You need the data loaded, but calculating tput
# requires the right TSC freq set. So don't use this with data from different
# machines (which can be a tough comparison, regardless).
plot_tputs <- function(data_l, names=NULL, outfile="")
{
nr_lines <- length(data_l)
tputs <- list()
for (i in 1:nr_lines) {
# [[ ]] chooses the actual element. [] just subsets
tput_i <- get_tput(data_l[[i]])
tputs <- c(tputs, list(tput_i))
}
plot_lines(tputs, names, outfile=outfile,
title=paste(outfile, "Lock Throughput"),
xlab = "Time (msec)", ylab = "Locks per msec")
}
# helper, plots throughput (tp) and whatever else you give it. e.g. qlen is a
# value at a timestamp (msec)
plot_tput_foo <- function(tp, foo, foo_str, outfile="")
{
if (outfile != "")
png(outfile, width=g_width, height=g_height)
# decent looking margins for the right Y axis
par(mar = c(5, 5, 3, 5))
# Scaling ylim to make room for FOO, which tends to be up top
plot(tp, type="l", main=paste(outfile, "Lock Throughput and", foo_str),
ylim=c(0, max(tp)*1.2),
xlab="Time (msec)", ylab="Throughput",
col="blue", lty=1)
par(new = TRUE)
plot(foo, type="p", xaxt = "n", yaxt = "n", ylab = "", xlab = "",
col="red", lty=2)
axis(side = 4)
mtext(foo_str, side = 4)
legend("topleft", c("tput", foo_str), col = c("blue", "red"),
lty = c(1, 2))
if (outfile != "")
invisible(dev.off())
}
# plot throughput and queue length on the same graph
plot_tput_qlen <- function(data, outfile="")
{
tp <- get_tput(data)
ql <- get_qlen(data)
plot_tput_foo(tp, ql, "Qlen", outfile)
}
# if you know how many msec there are, this is like doing:
# hist(total_acq/1000000, breaks=50)
# except it gives you a line, with points being the top of the hist bars
plot_tput <- function(data, outfile="")
{
plot_tputs(list(data), outfile=outfile)
}
# the middle timestamp is the acquire-by-lockholder. sorting on that, we can
# see the coreid. it's a really busy thing to graph.
#
# You can see some gaps in non-MCS lock holders when a core is starved or not
# partipating. By comparing that gap to the qlen, you can see if it was
# starvation or just not participating. Also, you can easily spot a preempted
# lockholder (if they are gone for enough msec).
get_core_acqs <- function(data)
{
earliest <- min(data$V3)
sorted <- data[order(data$V3)]
times <- sorted$V3 - earliest
cores <- sorted$V1
# times is in units of TSC. convert to msec, for easier comparison
# with throughput
times <- times / (g_tsc_frequency / 1e3)
return(data.frame(setNames(list(times, cores),
c("Time (msec)", "Holder ID"))))
}
plot_tput_core_acqs <- function(data, outfile="")
{
tp <- get_tput(data)
ca <- get_core_acqs(data)
plot_tput_foo(tp, ca, "Holder ID", outfile)
}
get_tsc_freq <- function(filename) {
# format looks like this:
# tsc_frequency 2300002733
header <- readLines(filename, n=50)
line <- grep("tsc_frequency", header, value=TRUE)
x <- strsplit(line, split=" ")
return(as.numeric(x[[1]][3]))
}
# pretty ugly. reads in the data, but also sets the TSC freq, which is
# particular to a given machine. so if you use this, be sure you do any
# analyses before loading another. Sorry.
load_test_data <- function(filename)
{
g_tsc_frequency <- get_tsc_freq(filename)
if (g_tsc_frequency == 0)
stop("WARNING: could not set global TSC freq!")
assign("g_tsc_frequency", g_tsc_frequency, envir = .GlobalEnv)
# using grep for a hokey comment.char="#". it's fast enough for now.
# integer64="numeric", o/w all sorts of things fuck up. like density()
# and round_outlier(). Even with sprinkling as.numeric() around, it's
# still a mess. This actually worked, though presumably there's a
# reason to use int64.
if (grepl("\\.gz$", filename)) {
return(fread(cmd=paste("gunzip -c", filename, "| grep -v '^#'"),
integer64="numeric"))
} else {
return(fread(cmd=paste("grep -v '^#'", filename),
integer64="numeric"))
}
}
######################################
### Main
######################################
# establish optional arguments
# "-h" and "--help" are automatically in the list
option_list <- list(
make_option(c("-c", "--cmd"), type="integer", default=-1,
help="
1: acq_lat & tput_qlen for one file
2: tput comparison for multiple files
"),
make_option(c("-o", "--output"), type="character",
default="", help="Output file, if applicable")
)
# CMD 1
single_analysis <- function(args)
{
data_file <- args$args
if (file.access(data_file) == -1)
stop("cannot access input file")
data <- load_test_data(data_file)
basename <- data_file
basename <- gsub(".gz$", "", basename)
basename <- gsub(".dat$", "", basename)
sink(paste(basename, "-stats.txt", sep=""))
cat("Filename:", data_file, "\n")
cat("\n")
print(grep("^#", readLines(data_file, n=50), value=TRUE))
cat("\n")
print_stats(data)
sink()
# For now, create all files. Can make options later
plot_density(round_outlier(acq_latency(data)),
outfile=paste(basename, "-acq.png", sep=""))
plot_tput_qlen(data,
outfile=paste(basename, "-tput_qlen.png", sep=""))
plot_tput_core_acqs(data,
outfile=paste(basename, "-tput_core_acqs.png", sep=""))
}
# CMD 2
tput_comparison <- function(args)
{
outfile <- args$options$output
if (outfile == "")
stop("Need an outfile (-o)")
tputs <- list()
names <- list()
for (i in args$args) {
data <- load_test_data(i)
# I wanted to do this:
#tputs <- append(tputs, get_tput(data))
# but it's a collection of lists of TPUT tables. or something
tputs <- c(tputs, list(get_tput(data)))
names <- append(names, i)
rm(data)
}
plot_lines(tputs, names, outfile=outfile,
title="Lock Throughput",
xlab = "Time (msec)",
ylab = "Locks per msec")
}
main <- function()
{
parser <- OptionParser(usage="%prog [options] data.dat",
option_list=option_list)
# ugly as hell, no error messages, etc.
args <- parse_args(parser, positional_arguments = TRUE)
opt <- args$options
f <- switch(opt$cmd, single_analysis, tput_comparison)
if (is.null(f))
stop("Need a valid (-c) command (-h for options)")
f(args)
}
# don't run when sourced
if (sys.nframe() == 0L) {
main()
}
# Dirty Hacks Storage
# source("scripts/lock_test.R")
# this is slow and shitty:
# raw = read.table(filename, comment.char="#")
# to print stuff to a file, you can do
# that can be really slow for large files, so try png instead
#pdf("foo.pdf")
#command that creates a graphic, like hist or plot
#dev.off()
# hack, so i can re-source this and not have to reload TSC freq
#g_tsc_frequency <- get_tsc_freq("raw1.dat")