tests/lock_test: overhaul lock_test
In preparation for handling another lock type, this commit cleans up a
bunch of things. Better lock types, better output options, easier to
follow code, etc.
Signed-off-by: Barret Rhoden <brho@cs.berkeley.edu>
diff --git a/tests/lock_test.c b/tests/lock_test.c
index a02d7fb..8afc161 100644
--- a/tests/lock_test.c
+++ b/tests/lock_test.c
@@ -20,8 +20,11 @@
#include <assert.h>
#include <string.h>
+#define handle_error(msg) \
+ do { perror(msg); exit(EXIT_FAILURE); } while (0)
+
/* OS dependent #incs */
-#ifdef __ros__
+#ifdef __akaros__
#include <parlib/parlib.h>
#include <parlib/stdio.h>
@@ -35,6 +38,11 @@
#include <parlib/tsc-compat.h>
#include <benchutil/measure.h>
+static const char *os_name(void)
+{
+ return "Akaros";
+}
+
#else
#include "../user/parlib/include/parlib/tsc-compat.h"
@@ -44,14 +52,14 @@
static void os_prep_work(pthread_t *worker_threads, int nr_threads)
{
- if (nr_threads > num_vcores())
+ if (nr_threads > max_vcores())
printf("WARNING: %d threads requested, but only %d cores available\n",
- nr_threads, num_vcores());
+ nr_threads, max_vcores());
}
static void os_post_work(pthread_t *worker_threads, int nr_threads)
{
- if (nr_threads > num_vcores())
+ if (nr_threads > max_vcores())
return;
/* assuming we're taking cores 0..nr_threads, and we never move. */
for (int i = 0; i < nr_threads; i++) {
@@ -63,6 +71,11 @@
}
}
+static const char *os_name(void)
+{
+ return "Linux";
+}
+
#define print_preempt_trace(args...) {}
__thread int __vcore_context = 0;
@@ -449,8 +462,8 @@
*
*/
-const char *argp_program_version = "lock_test v0.1475263";
-const char *argp_program_bug_address = "<akaros+subscribe@googlegroups.com>";
+const char *argp_program_version = "lock_test v0.1475264";
+const char *argp_program_bug_address = "<akaros@googlegroups.com>";
static char doc[] = "lock_test -- spinlock benchmarking";
static char args_doc[] = "-w NUM -l NUM -t LOCK";
@@ -459,19 +472,11 @@
#define OPT_ADJ_WORKERS 2
static struct argp_option options[] = {
- {"workers", 'w', "NUM", OPTION_NO_USAGE, "Number of threads/cores"},
+ {"workers", 'w', "NUM", OPTION_NO_USAGE, "Number of threads/cores (max possible)"},
{0, 0, 0, 0, ""},
- {"loops", 'l', "NUM", OPTION_NO_USAGE, "Number of loops per worker"},
+ {"loops", 'l', "NUM", OPTION_NO_USAGE, "Number of loops per worker (10000)"},
{0, 0, 0, 0, ""},
- {"type", 't', "LOCK",OPTION_NO_USAGE, "Type of lock to use. "
- "Options:\n"
- "\tmcs\n"
- "\tmcscas\n"
- "\tmcspdr\n"
- "\tmcspdro\n"
- "\t__mcspdro\n"
- "\tspin\n"
- "\tspinpdr"},
+ {"type", 't', "LOCK",OPTION_NO_USAGE, "Type of lock to use" },
{0, 0, 0, 0, "Other options (not mandatory):"},
{"adj_workers", OPT_ADJ_WORKERS, 0, 0,
"Adjust workers such that the "
@@ -486,27 +491,32 @@
{ 0 }
};
+struct lock_test {
+ const char *name;
+ void *(*func)(void *arg);
+};
+
struct prog_args {
- int nr_threads;
- int nr_loops;
- int hold_time;
- int delay_time;
- int nr_print_rows;
+ unsigned int nr_threads;
+ unsigned int nr_loops;
+ unsigned int hold_time;
+ unsigned int delay_time;
+ unsigned int nr_print_rows;
bool fake_vc_ctx;
bool adj_workers;
char *outfile_path;
- void *(*lock_type)(void *arg);
+ struct lock_test *test;
};
struct prog_args pargs = {0};
/* Globals */
-struct time_stamp {
+struct lock_sample {
uint64_t pre;
uint64_t acq;
uint64_t un;
bool valid;
};
-struct time_stamp **times;
+struct lock_sample **times;
bool run_locktest = TRUE;
pthread_barrier_t start_test;
@@ -523,7 +533,7 @@
bool fake_vc_ctx = ACCESS_ONCE(pargs.fake_vc_ctx); \
bool adj_workers = ACCESS_ONCE(pargs.adj_workers); \
uint64_t pre_lock, acq_lock, un_lock; \
- struct time_stamp *this_time; \
+ struct lock_sample *this_time; \
struct mcs_lock_qnode mcs_qnode = MCS_QNODE_INIT; \
struct mcs_pdro_qnode pdro_qnode = MCSPDRO_QNODE_INIT; \
int i; \
@@ -588,13 +598,6 @@
return (void*)(long)i; \
}
-#define fake_lock_func(lock_name, x1, x2) \
-void *lock_name##_thread(void *arg) \
-{ \
- printf("Lock " #lock_name " not supported!\n"); \
- exit(-1); \
-}
-
spinlock_t spin_lock = SPINLOCK_INITIALIZER;
struct mcs_lock mcs_lock = MCS_LOCK_INIT;
@@ -609,7 +612,7 @@
spinlock_lock(&spin_lock);,
spinlock_unlock(&spin_lock);)
-#ifdef __ros__
+#ifdef __akaros__
struct spin_pdr_lock spdr_lock = SPINPDR_INITIALIZER;
struct mcs_pdr_lock mcspdr_lock;
struct mcs_pdro_lock mcspdro_lock = MCSPDRO_LOCK_INIT;
@@ -626,18 +629,32 @@
lock_func(spinpdr,
spin_pdr_lock(&spdr_lock);,
spin_pdr_unlock(&spdr_lock);)
+
+static struct lock_test tests[] = {
+ {"mcs", mcs_thread},
+ {"mcscas", mcscas_thread},
+ {"mcspdr", mcspdr_thread},
+ {"mcspdro", mcspdro_thread},
+ {"__mcspdro", __mcspdro_thread},
+ {"spin", spin_thread},
+ {"spinpdr", spinpdr_thread},
+ {}
+};
+
#else
-fake_lock_func(mcspdr, 0, 0);
-fake_lock_func(mcspdro, 0, 0);
-fake_lock_func(__mcspdro, 0, 0);
-fake_lock_func(spinpdr, 0, 0);
+static struct lock_test tests[] = {
+ {"mcs", mcs_thread},
+ {"mcscas", mcscas_thread},
+ {"spin", spin_thread},
+ {}
+};
#endif
static int get_acq_latency(void **data, int i, int j, uint64_t *sample)
{
- struct time_stamp **times = (struct time_stamp**)data;
+ struct lock_sample **times = (struct lock_sample**)data;
/* 0 for initial time means we didn't measure */
if (times[i][j].pre == 0)
return -1;
@@ -653,7 +670,7 @@
static int get_hld_latency(void **data, int i, int j, uint64_t *sample)
{
- struct time_stamp **times = (struct time_stamp**)data;
+ struct lock_sample **times = (struct lock_sample**)data;
/* 0 for initial time means we didn't measure */
if (times[i][j].pre == 0)
return -1;
@@ -663,7 +680,7 @@
static int get_acq_timestamp(void **data, int i, int j, uint64_t *sample)
{
- struct time_stamp **times = (struct time_stamp**)data;
+ struct lock_sample **times = (struct lock_sample**)data;
/* 0 for initial time means we didn't measure */
if (times[i][j].pre == 0)
return -1;
@@ -671,7 +688,7 @@
return 0;
}
-#ifdef __ros__
+#ifdef __akaros__
/* Lousy event intercept. build something similar in the event library? */
#define MAX_NR_EVENT_TRACES 1000
@@ -766,6 +783,14 @@
#endif
+static void print_lock_types(void)
+{
+ printf("Available lock types:\n");
+ for (struct lock_test *t = tests; t->name; t++)
+ printf("\t%s\n", t->name);
+ printf("\n");
+}
+
/* Argument parsing */
static error_t parse_opt (int key, char *arg, struct argp_state *state)
{
@@ -817,53 +842,25 @@
}
break;
case 't':
- if (!strcmp("mcs", arg)) {
- pargs->lock_type = mcs_thread;
- break;
+ for (struct lock_test *t = tests; t->name; t++) {
+ if (!strcmp(t->name, arg)) {
+ pargs->test = t;
+ break;
+ }
}
- if (!strcmp("mcscas", arg)) {
- pargs->lock_type = mcscas_thread;
- break;
+ if (!pargs->test) {
+ printf("Unknown locktype %s\n\n", arg);
+ print_lock_types();
+ argp_usage(state);
}
- if (!strcmp("mcspdr", arg)) {
- pargs->lock_type = mcspdr_thread;
- break;
- }
- if (!strcmp("mcspdro", arg)) {
- pargs->lock_type = mcspdro_thread;
- break;
- }
- if (!strcmp("__mcspdro", arg)) {
- pargs->lock_type = __mcspdro_thread;
- break;
- }
- if (!strcmp("spin", arg)) {
- pargs->lock_type = spin_thread;
- break;
- }
- if (!strcmp("spinpdr", arg)) {
- pargs->lock_type = spinpdr_thread;
- break;
- }
- printf("Unknown locktype %s\n\n", arg);
- argp_usage(state);
break;
case ARGP_KEY_ARG:
printf("Warning, extra argument %s ignored\n\n", arg);
break;
case ARGP_KEY_END:
- if (!pargs->nr_threads) {
- printf("Must select a number of threads.\n\n");
- argp_usage(state);
- break;
- }
- if (!pargs->nr_loops) {
- printf("Must select a number of loops.\n\n");
- argp_usage(state);
- break;
- }
- if (!pargs->lock_type) {
+ if (!pargs->test) {
printf("Must select a type of lock.\n\n");
+ print_lock_types();
argp_usage(state);
break;
}
@@ -876,115 +873,146 @@
static struct argp argp = {options, parse_opt, args_doc, doc};
-int main(int argc, char** argv)
-{
- pthread_t *worker_threads;
+struct results {
void **loops_done;
- struct timeval start_tv = {0};
- struct timeval end_tv = {0};
- long usec_diff, total_loops = 0;
- uint64_t starttsc;
- int nr_threads, nr_loops;
- FILE *outfile;
+ struct lock_sample **thread_samples;
+};
+
+static struct results run_test(void)
+{
+ struct results results;
+ void **loops_done;
+ struct lock_sample **thread_samples;
+ pthread_t *worker_threads;
+
+ mcs_pdr_init(&mcspdr_lock);
+
+ worker_threads = malloc(sizeof(pthread_t) * pargs.nr_threads);
+ if (!worker_threads)
+ handle_error("pthread_t malloc failed:");
+ loops_done = malloc(sizeof(void*) * pargs.nr_threads);
+ if (!loops_done)
+ handle_error("loops_done malloc failed");
+ pthread_barrier_init(&start_test, NULL, pargs.nr_threads);
+
+ times = malloc(sizeof(struct lock_sample *) * pargs.nr_threads);
+ assert(times);
+ for (int i = 0; i < pargs.nr_threads; i++) {
+ times[i] = malloc(sizeof(struct lock_sample) * pargs.nr_loops);
+ if (!times[i])
+ handle_error("Record keeping malloc");
+ memset(times[i], 0,
+ sizeof(struct lock_sample) * pargs.nr_loops);
+ }
+ printd("Record tracking takes %ld bytes of memory\n",
+ pargs.nr_threads * pargs.nr_loops * sizeof(struct lock_sample));
+
+ /* ensure we have enough VCs */
+ os_prep_work(worker_threads, pargs.nr_threads);
+
+ for (long i = 0; i < pargs.nr_threads; i++) {
+ if (pthread_create(&worker_threads[i], NULL, pargs.test->func,
+ (void*)i))
+ handle_error("pth_create failed");
+ }
+
+ /* TODO: this affinity shuffling happens after the threads were created,
+ * but the test might have started already. Ultimately, we need an
+ * atomic {kick-barrier, sleep}. */
+ os_post_work(worker_threads, pargs.nr_threads);
+
+ for (int i = 0; i < pargs.nr_threads; i++)
+ pthread_join(worker_threads[i], &loops_done[i]);
+
+ results.loops_done = loops_done;
+ results.thread_samples = times;
+ return results;
+}
+
+static void analyze(struct results *results)
+{
+ void **loops_done = results->loops_done;
+ struct lock_sample **thread_samples = results->thread_samples;
+
+ uint64_t max_tsc = 0;
+ uint64_t min_tsc = UINT64_MAX;
+ unsigned long total_loops = 0;
struct sample_stats acq_stats, hld_stats;
- argp_parse(&argp, argc, argv, 0, 0, &pargs);
- nr_threads = pargs.nr_threads;
- nr_loops = pargs.nr_loops;
- mcs_pdr_init(&mcspdr_lock);
+ printf("Acquire times (TSC Ticks)\n---------------------------\n");
+ acq_stats.get_sample = get_acq_latency;
+ compute_stats((void**)thread_samples, pargs.nr_threads, pargs.nr_loops,
+ &acq_stats);
+
+ printf("Held times (from acq til rel done) (TSC Ticks)\n------\n");
+ hld_stats.get_sample = get_hld_latency;
+ compute_stats((void**)thread_samples, pargs.nr_threads, pargs.nr_loops,
+ &hld_stats);
+
+ /* compute start and end based off the data set */
+ for (int i = 0; i < pargs.nr_threads; i++) {
+ for (int j = 0; j < pargs.nr_loops; j++) {
+ if (thread_samples[i][j].pre == 0)
+ continue;
+ printd("T %d L %03d p %lu a %lu u %lu v %u\n", i, j,
+ thread_samples[i][j].pre,
+ thread_samples[i][j].acq,
+ thread_samples[i][j].un,
+ thread_samples[i][j].valid);
+ min_tsc = MIN(min_tsc, thread_samples[i][j].pre);
+ max_tsc = MAX(max_tsc, thread_samples[i][j].un);
+ }
+ }
+ printf("Time to run: %ld usec\n", tsc2usec(max_tsc - min_tsc));
+
+ /* throughput for the entire duration (in ms), 1ms steps. print as many
+ * steps as they ask for (up to the end of the run). */
+ printf("\nLock throughput:\n-----------------\n");
+ print_throughput((void**)thread_samples,
+ tsc2msec(max_tsc - min_tsc),
+ msec2tsc(1),
+ pargs.nr_print_rows,
+ min_tsc,
+ pargs.nr_threads, pargs.nr_loops, get_acq_timestamp);
+
+ print_preempt_trace(min_tsc, pargs.nr_print_rows);
+
+ for (int i = 0; i < pargs.nr_threads; i++) {
+ total_loops += (unsigned long)loops_done[i];
+ if (!loops_done[i])
+ printf("WARNING: thread %d performed 0 loops!\n", i);
+ }
+ printd("Average number of loops done, per thread: %ld\n",
+ total_loops / pargs.nr_threads);
+ for (int i = 0; i < pargs.nr_threads; i++)
+ printd("\tThread %d performed %lu loops\n",
+ i, (long)loops_done[i]);
+}
+
+static void save_results(struct results *results, int argc, char **argv)
+{
+ struct lock_sample **thread_samples = results->thread_samples;
+ FILE *outfile;
if (pargs.outfile_path) {
/* RDWR, CREAT, TRUNC, O666 */
outfile = fopen(pargs.outfile_path, "w+");
- if (!outfile) {
- perror("outfile");
- exit(-1);
- }
- }
- worker_threads = malloc(sizeof(pthread_t) * nr_threads);
- if (!worker_threads) {
- perror("pthread_t malloc failed:");
- exit(-1);
- }
- loops_done = malloc(sizeof(void*) * nr_threads);
- if (!loops_done) {
- perror("loops_done malloc failed");
- exit(-1);
- }
- printf("Making %d workers, %d loops each, %sadapting workers to vcores, and %sfaking vcore context\n",
- nr_threads, nr_loops,
- pargs.adj_workers ? "" : "not ",
- pargs.fake_vc_ctx ? "" : "not ");
- pthread_barrier_init(&start_test, NULL, nr_threads);
+ if (!outfile)
+ handle_error("outfile");
- times = malloc(sizeof(struct time_stamp *) * nr_threads);
- assert(times);
- for (int i = 0; i < nr_threads; i++) {
- times[i] = malloc(sizeof(struct time_stamp) * nr_loops);
- if (!times[i]) {
- perror("Record keeping malloc");
- exit(-1);
- }
- memset(times[i], 0, sizeof(struct time_stamp) * nr_loops);
- }
- printf("Record tracking takes %ld bytes of memory\n",
- nr_threads * nr_loops * sizeof(struct time_stamp));
- os_prep_work(worker_threads, nr_threads);/* ensure we have enough VCs */
- /* Doing this in MCP ctx, so we might have been getting a few preempts
- * already. Want to read start before the threads pass their barrier */
- starttsc = read_tsc();
- /* create and join on yield */
- for (long i = 0; i < nr_threads; i++) {
- if (pthread_create(&worker_threads[i], NULL, pargs.lock_type,
- (void*)i))
- perror("pth_create failed");
- }
- os_post_work(worker_threads, nr_threads);
- if (gettimeofday(&start_tv, 0))
- perror("Start time error...");
- for (int i = 0; i < nr_threads; i++) {
- pthread_join(worker_threads[i], &loops_done[i]);
- }
- if (gettimeofday(&end_tv, 0))
- perror("End time error...");
-
- printf("Acquire times (TSC Ticks)\n---------------------------\n");
- acq_stats.get_sample = get_acq_latency;
- compute_stats((void**)times, nr_threads, nr_loops, &acq_stats);
-
- printf("Held times (from acq til rel done) (TSC Ticks)\n------\n");
- hld_stats.get_sample = get_hld_latency;
- compute_stats((void**)times, nr_threads, nr_loops, &hld_stats);
-
- usec_diff = (end_tv.tv_sec - start_tv.tv_sec) * 1000000 +
- (end_tv.tv_usec - start_tv.tv_usec);
- printf("Time to run: %ld usec\n", usec_diff);
-
- printf("\nLock throughput:\n-----------------\n");
- /* throughput for the entire duration (in ms), 1ms steps. print as many
- * steps as they ask for (up to the end of the run). */
- print_throughput((void**)times, usec_diff / 1000 + 1, msec2tsc(1),
- pargs.nr_print_rows,
- starttsc, nr_threads,
- nr_loops, get_acq_timestamp);
- print_preempt_trace(starttsc, pargs.nr_print_rows);
-
- for (int i = 0; i < nr_threads; i++) {
- total_loops += (long)loops_done[i];
- if (!loops_done[i])
- printf("WARNING: thread %d performed 0 loops!\n", i);
- }
- printf("Average number of loops done, per thread: %ld\n",
- total_loops / nr_threads);
- for (int i = 0; i < nr_threads; i++)
- printf("\tThread %d performed %lu loops\n",
- i, (long)loops_done[i]);
-
- if (pargs.outfile_path) {
fprintf(outfile, "#");
for (char **arg = argv; *arg; arg++)
fprintf(outfile, " %s", *arg);
fprintf(outfile, "\n");
+
+ fprintf(outfile, "# test '%s', %u host cores, %u workers\n",
+ pargs.test->name, max_vcores(), pargs.nr_threads);
+ fprintf(outfile, "# %lu loops, %sadaptive, %sfaking vc ctx\n",
+ pargs.nr_loops,
+ pargs.adj_workers ? "" : "not ",
+ pargs.fake_vc_ctx ? "" : "not ");
+ fprintf(outfile, "# %s\n", os_name());
+
fprintf(outfile, "# thread_id attempt pre acq(uire) un(lock) "
"tsc_overhead\n");
fprintf(outfile,
@@ -993,9 +1021,9 @@
fprintf(outfile, "# tsc_frequency %llu\n", get_tsc_freq());
fprintf(outfile,
"# tsc_overhead is 0 on linux, hard code it with a value from akaros\n");
- for (int i = 0; i < nr_threads; i++) {
- for (int j = 0; j < nr_loops; j++) {
- struct time_stamp *ts = ×[i][j];
+ for (int i = 0; i < pargs.nr_threads; i++) {
+ for (int j = 0; j < pargs.nr_loops; j++) {
+ struct lock_sample *ts = &thread_samples[i][j];
if (!ts->pre)
break; /* empty record */
fprintf(outfile, "%d %d %llu %llu %llu %llu\n",
@@ -1005,5 +1033,29 @@
}
fclose(outfile);
}
- printf("Done, exiting\n");
+}
+
+int main(int argc, char **argv)
+{
+ struct results results;
+
+ pargs.nr_threads = max_vcores();
+ pargs.nr_loops = 10000;
+ pargs.nr_print_rows = 10;
+ argp_parse(&argp, argc, argv, 0, 0, &pargs);
+
+ printf("Detected %u cores, running test '%s' on %d cores\n",
+ max_vcores(), pargs.test->name, pargs.nr_threads);
+ printf("%lu loops, %sadapting workers to vcores, %sfaking vcore context\n",
+ pargs.nr_loops,
+ pargs.adj_workers ? "" : "not ",
+ pargs.fake_vc_ctx ? "" : "not ");
+ printf("All times in TSC ticks, freq: %llu\n", get_tsc_freq());
+ printf("\n\n");
+
+ results = run_test();
+ analyze(&results);
+ save_results(&results, argc, argv);
+
+ return 0;
}