tests/linux/modules/mcs.c - upstream - Git at Google

 // SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (c) 2013, 2014 The Regents of the University of California
  * Copyright (c) 2020 Google Inc
  *
  * Barret Rhoden <brho@cs.berkeley.edu>
  *
  * Sorry, but you'll need to change your linux source to expose this function:

  EXPORT_SYMBOL_GPL(kthread_create_on_cpu);

  *
  */

 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/kobject.h>
 #include <linux/sysfs.h>
 #include <linux/slab.h>

 #include <linux/sched/task.h>
 #include <linux/sched/mm.h>
 #include <linux/delay.h>
 #include <linux/kthread.h>
 #include <linux/completion.h>
 #include <asm/msr.h>

 struct lock_sample {
 	u64 pre;
 	u64 acq;
 	u64 un;
 	bool valid;
 };

 /* mtx protects all variables and the test run */
 static struct mutex mtx;

 static DECLARE_COMPLETION(test_done);

 static unsigned int nr_threads;
 static unsigned int nr_loops;
 static unsigned int hold_time;
 static unsigned int delay_time;

 /* array[nr_thread] of pointers of lock_sample[nr_loops] */
 static struct lock_sample **times;
 /* array[nr_thread] of task* */
 static struct task_struct **threads;
 /* array[nr_thread] of void* */
 static void **retvals;
 static void *results;
 static size_t results_sz;

 static bool run_locktest __cacheline_aligned_in_smp;
 static atomic_t horses __cacheline_aligned_in_smp;

 static struct qspinlock l = __ARCH_SPIN_LOCK_UNLOCKED;

 static int __mcs_thread_lock_test(void *arg)
 {
 	long thread_id = (long)arg;
 	u64 pre_lock, acq_lock, un_lock;
 	struct lock_sample *this_time;
 	int i;

 	atomic_dec(&horses);
 	while (atomic_read(&horses))
 		cpu_relax();
 	for (i = 0; i < nr_loops; i++) {
 		/*
 		 * might be able to replace this with post-processing.  let the
 		 * test run, and discard all entries after the first finisher
 		 */
 		if (!READ_ONCE(run_locktest))
 			break;

 		local_irq_disable();
 		pre_lock = rdtsc_ordered();

 		queued_spin_lock(&l);

 		acq_lock = rdtsc_ordered();

 		if (hold_time)
 			ndelay(hold_time);

 		queued_spin_unlock(&l);

 		un_lock = rdtsc_ordered();

 		local_irq_enable();

 		this_time = &times[thread_id][i];
 		this_time->pre = pre_lock;
 		this_time->acq = acq_lock;
 		this_time->un = un_lock;
 		/* Can turn these on/off to control which samples we gather */
 		this_time->valid = true;
 		if (delay_time)
 			ndelay(delay_time);
 		/*
 		 * This can throw off your delay_time.  Think of delay_time as
 		 * the least amount of time we'll wait between reacquiring the
 		 * lock.  After all, IRQs are enabled, so all bets are off.
 		 */
 		cond_resched();
 	}
 	/* First thread to finish stops the test */
 	WRITE_ONCE(run_locktest, false);
 	/*
 	 * Wakes the controller thread.  The others will be done soon, to
 	 * complete the hokey thread join.
 	 */
 	complete(&test_done);

 	WRITE_ONCE(retvals[thread_id], (void*)(long)i);

 	return 0;
 }

 /*
  * This consolidates the results in a format we will export to userspace.  We
  * could have just used this format for the test itself, but then the times
  * arrays wouldn't be NUMA local.
  */
 static int mcs_build_output(struct lock_sample **times, void **retvals)
 {
 	int i;
 	size_t sz_rets = nr_threads * sizeof(void*);
 	size_t sz_times_per = nr_loops * sizeof(struct lock_sample);

 	results_sz = sz_rets + nr_threads * sz_times_per;

 	kvfree(results);

 	results = kvzalloc(results_sz, GFP_KERNEL);
 	if (!results) {
 		pr_err("fucked %d", __LINE__);
 		return -1;
 	}

 	memcpy(results, retvals, sz_rets);
 	for (i = 0; i < nr_threads; i++) {
 		memcpy(results + sz_rets + i * sz_times_per,
 		       times[i], sz_times_per);
 	}

 	return 0;
 }

 static int mcs_lock_test(void)
 {
 	int i;
 	int ret = -1;
 	size_t amt;

 	atomic_set(&horses, nr_threads);
 	WRITE_ONCE(run_locktest, true);

 	times = kcalloc(nr_threads, sizeof(struct lock_sample *), GFP_KERNEL);
 	if (!times) {
 		pr_err("fucked %d", __LINE__);
 		return ret;
 	}

 	if (check_mul_overflow((size_t)nr_loops, sizeof(struct lock_sample),
 			       &amt)) {
 		pr_err("fucked %d", __LINE__);
 		goto out_times;
 	}
 	for (i = 0; i < nr_threads; i++) {
 		times[i] = kvzalloc_node(amt, GFP_KERNEL, cpu_to_node(i));
 		if (!times[i]) {
 			/* we clean up the times[i]s below */
 			pr_err("fucked %d", __LINE__);
 			goto out_times;
 		}
 	}

 	retvals = kcalloc(nr_threads, sizeof(void *), GFP_KERNEL);
 	if (!retvals) {
 		pr_err("fucked %d", __LINE__);
 		goto out_times;
 	}
 	for (i = 0; i < nr_threads; i++)
 		retvals[i] = (void*)-1;

 	threads = kcalloc(nr_threads, sizeof(struct task_struct *),
 			  GFP_KERNEL);
 	if (!threads) {
 		pr_err("fucked %d", __LINE__);
 		goto out_retvals;
 	}

 	for (i = 0; i < nr_threads; i++) {
 		threads[i] = kthread_create_on_cpu(__mcs_thread_lock_test,
 						   (void*)(long)i, i, "mcs-%u");
 		if (IS_ERR(threads[i])) {
 			while (--i >= 0) {
 				/*
 				 * We could recover, perhaps with something like
 				 * kthread_stop(threads[i]), but we'd need those
 				 * threads to check kthread_should_stop(),
 				 * perhaps in their hokey barrier.  I've never
 				 * had this fail, so I haven't tested it.
 				 */
 			}
 			pr_err("fucked %d", __LINE__);
 			goto out_threads;
 		}
 	}
 	for (i = 0; i < nr_threads; i++) {
 		/* what's the deal with refcnting here?  it looks like an
 		 * uncounted ref: create->result = current.  so once we start
 		 * them, we probably can't touch this again. */
 		wake_up_process(threads[i]);
 	}

 	/* Hokey join.  We know when the test is done but wait for the others */
 	wait_for_completion(&test_done);
 	for (i = 0; i < nr_threads; i++) {
 		while (READ_ONCE(retvals[i]) == (void*)-1)
 			cond_resched();
 	}

 	ret = mcs_build_output(times, retvals);

 out_threads:
 	kfree(threads);
 out_retvals:
 	kfree(retvals);
 out_times:
 	for (i = 0; i < nr_threads; i++)
 		kvfree(times[i]);
 	kfree(times);
 	return ret;
 }

 static ssize_t mcs_read(struct file *filp, struct kobject *kobj,
 			struct bin_attribute *bin_attr,
 			char *buf, loff_t off, size_t count)
 {
 	mutex_lock(&mtx);

 	if (!off) {
 		if (mcs_lock_test()) {
 			mutex_unlock(&mtx);
 			return -1;
 		}
 	}
 	if (!results) {
 		pr_err("fucked %d", __LINE__);
 		mutex_unlock(&mtx);
 		return -1;
 	}
 	/* mildly concerned about addition overflow.  caller's job? */
 	if (count + off > results_sz) {
 		pr_err("fucked off %lld count %lu sz %lu\n", off, count,
 		       results_sz);
 		count = results_sz - off;
 	}
 	memcpy(buf, results + off, count);

 	mutex_unlock(&mtx);

 	return count;
 }

 static loff_t __mcs_get_results_size(void)
 {
 	return nr_threads *
 		(sizeof(void*) + nr_loops * sizeof(struct lock_sample));
 }

 /*
  * Unfortunately, this doesn't update the file live.  It'll only take effect the
  * next time you open it.  So users need to write, close, open, read.
  */
 static void __mcs_update_size(void)
 {
 	struct kernfs_node *kn = kernfs_find_and_get(kernel_kobj->sd, "mcs");

 	if (!kn) {
 		pr_err("fucked %d", __LINE__);
 		return;
 	}
 	kn->attr.size = __mcs_get_results_size();
 }

 static ssize_t mcs_write(struct file *filp, struct kobject *kobj,
 			 struct bin_attribute *bin_attr,
 			 char *buf, loff_t off, size_t count)
 {
 	unsigned int threads, loops, hold, delay;
 	ssize_t ret;

 	/* TODO: check_mul_overflow and whatnot, esp for the result_sz buffer */
 	ret = sscanf(buf, "%u %u %u %u", &threads, &loops, &hold,
 		     &delay);
 	if (ret != 4)
 		return -EINVAL;
 	if (threads > num_online_cpus())
 		return -ENODEV;
 	if (threads == 0)
 		threads = num_online_cpus();
 	mutex_lock(&mtx);
 	nr_threads = threads;
 	nr_loops = loops;
 	hold_time = hold;
 	delay_time = delay;
 	__mcs_update_size();
 	mutex_unlock(&mtx);
 	return count;
 }

 struct bin_attribute mcs_attr = {
 	.attr = {
 		.name = "mcs",
 		.mode = 0666,
 	},
 	.size = 0,
 	.private = NULL,
 	.read = mcs_read,
 	.write = mcs_write,
 };

 static int __init mcs_init(void)
 {
 	mutex_init(&mtx);

 	/*
 	 * The user needs to set these, but start with sensible defaults in case
 	 * they read without writing.
 	 */
 	nr_threads = num_online_cpus();
 	nr_loops = 10000;
 	mcs_attr.size = __mcs_get_results_size();

 	if (sysfs_create_bin_file(kernel_kobj, &mcs_attr)) {
 		pr_err("\n\nfucked %d !!!\n\n\n", __LINE__);
 		return -1;
 	}
 	return 0;
 }

 static void __exit mcs_exit(void)
 {
 	sysfs_remove_bin_file(kernel_kobj, &mcs_attr);
 }

 module_init(mcs_init);
 module_exit(mcs_exit);

 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Barret Rhoden <brho@google.com>");
 MODULE_DESCRIPTION("MCS lock test");
	// SPDX-License-Identifier: GPL-2.0-or-later
	/*
	* Copyright (c) 2013, 2014 The Regents of the University of California
	* Copyright (c) 2020 Google Inc
	*
	* Barret Rhoden <brho@cs.berkeley.edu>
	*
	* Sorry, but you'll need to change your linux source to expose this function:

	EXPORT_SYMBOL_GPL(kthread_create_on_cpu);

	*
	*/

	#include <linux/module.h>
	#include <linux/moduleparam.h>
	#include <linux/kobject.h>
	#include <linux/sysfs.h>
	#include <linux/slab.h>

	#include <linux/sched/task.h>
	#include <linux/sched/mm.h>
	#include <linux/delay.h>
	#include <linux/kthread.h>
	#include <linux/completion.h>
	#include <asm/msr.h>

	struct lock_sample {
	u64 pre;
	u64 acq;
	u64 un;
	bool valid;
	};

	/* mtx protects all variables and the test run */
	static struct mutex mtx;

	static DECLARE_COMPLETION(test_done);

	static unsigned int nr_threads;
	static unsigned int nr_loops;
	static unsigned int hold_time;
	static unsigned int delay_time;

	/* array[nr_thread] of pointers of lock_sample[nr_loops] */
	static struct lock_sample **times;
	/* array[nr_thread] of task* */
	static struct task_struct **threads;
	/* array[nr_thread] of void* */
	static void **retvals;
	static void *results;
	static size_t results_sz;

	static bool run_locktest __cacheline_aligned_in_smp;
	static atomic_t horses __cacheline_aligned_in_smp;

	static struct qspinlock l = __ARCH_SPIN_LOCK_UNLOCKED;

	static int __mcs_thread_lock_test(void *arg)
	{
	long thread_id = (long)arg;
	u64 pre_lock, acq_lock, un_lock;
	struct lock_sample *this_time;
	int i;

	atomic_dec(&horses);
	while (atomic_read(&horses))
	cpu_relax();
	for (i = 0; i < nr_loops; i++) {
	/*
	* might be able to replace this with post-processing. let the
	* test run, and discard all entries after the first finisher
	*/
	if (!READ_ONCE(run_locktest))
	break;

	local_irq_disable();
	pre_lock = rdtsc_ordered();

	queued_spin_lock(&l);

	acq_lock = rdtsc_ordered();

	if (hold_time)
	ndelay(hold_time);

	queued_spin_unlock(&l);

	un_lock = rdtsc_ordered();

	local_irq_enable();

	this_time = &times[thread_id][i];
	this_time->pre = pre_lock;
	this_time->acq = acq_lock;
	this_time->un = un_lock;
	/* Can turn these on/off to control which samples we gather */
	this_time->valid = true;
	if (delay_time)
	ndelay(delay_time);
	/*
	* This can throw off your delay_time. Think of delay_time as
	* the least amount of time we'll wait between reacquiring the
	* lock. After all, IRQs are enabled, so all bets are off.
	*/
	cond_resched();
	}
	/* First thread to finish stops the test */
	WRITE_ONCE(run_locktest, false);
	/*
	* Wakes the controller thread. The others will be done soon, to
	* complete the hokey thread join.
	*/
	complete(&test_done);

	WRITE_ONCE(retvals[thread_id], (void*)(long)i);

	return 0;
	}

	/*
	* This consolidates the results in a format we will export to userspace. We
	* could have just used this format for the test itself, but then the times
	* arrays wouldn't be NUMA local.
	*/
	static int mcs_build_output(struct lock_sample times, void retvals)
	{
	int i;
	size_t sz_rets = nr_threads * sizeof(void*);
	size_t sz_times_per = nr_loops * sizeof(struct lock_sample);

	results_sz = sz_rets + nr_threads * sz_times_per;

	kvfree(results);

	results = kvzalloc(results_sz, GFP_KERNEL);
	if (!results) {
	pr_err("fucked %d", __LINE__);
	return -1;
	}

	memcpy(results, retvals, sz_rets);
	for (i = 0; i < nr_threads; i++) {
	memcpy(results + sz_rets + i * sz_times_per,
	times[i], sz_times_per);
	}

	return 0;
	}

	static int mcs_lock_test(void)
	{
	int i;
	int ret = -1;
	size_t amt;

	atomic_set(&horses, nr_threads);
	WRITE_ONCE(run_locktest, true);

	times = kcalloc(nr_threads, sizeof(struct lock_sample *), GFP_KERNEL);
	if (!times) {
	pr_err("fucked %d", __LINE__);
	return ret;
	}

	if (check_mul_overflow((size_t)nr_loops, sizeof(struct lock_sample),
	&amt)) {
	pr_err("fucked %d", __LINE__);
	goto out_times;
	}
	for (i = 0; i < nr_threads; i++) {
	times[i] = kvzalloc_node(amt, GFP_KERNEL, cpu_to_node(i));
	if (!times[i]) {
	/* we clean up the times[i]s below */
	pr_err("fucked %d", __LINE__);
	goto out_times;
	}
	}

	retvals = kcalloc(nr_threads, sizeof(void *), GFP_KERNEL);
	if (!retvals) {
	pr_err("fucked %d", __LINE__);
	goto out_times;
	}
	for (i = 0; i < nr_threads; i++)
	retvals[i] = (void*)-1;

	threads = kcalloc(nr_threads, sizeof(struct task_struct *),
	GFP_KERNEL);
	if (!threads) {
	pr_err("fucked %d", __LINE__);
	goto out_retvals;
	}

	for (i = 0; i < nr_threads; i++) {
	threads[i] = kthread_create_on_cpu(__mcs_thread_lock_test,
	(void*)(long)i, i, "mcs-%u");
	if (IS_ERR(threads[i])) {
	while (--i >= 0) {
	/*
	* We could recover, perhaps with something like
	* kthread_stop(threads[i]), but we'd need those
	* threads to check kthread_should_stop(),
	* perhaps in their hokey barrier. I've never
	* had this fail, so I haven't tested it.
	*/
	}
	pr_err("fucked %d", __LINE__);
	goto out_threads;
	}
	}
	for (i = 0; i < nr_threads; i++) {
	/* what's the deal with refcnting here? it looks like an
	* uncounted ref: create->result = current. so once we start
	* them, we probably can't touch this again. */
	wake_up_process(threads[i]);
	}

	/* Hokey join. We know when the test is done but wait for the others */
	wait_for_completion(&test_done);
	for (i = 0; i < nr_threads; i++) {
	while (READ_ONCE(retvals[i]) == (void*)-1)
	cond_resched();
	}

	ret = mcs_build_output(times, retvals);

	out_threads:
	kfree(threads);
	out_retvals:
	kfree(retvals);
	out_times:
	for (i = 0; i < nr_threads; i++)
	kvfree(times[i]);
	kfree(times);
	return ret;
	}

	static ssize_t mcs_read(struct file filp, struct kobject kobj,
	struct bin_attribute *bin_attr,
	char *buf, loff_t off, size_t count)
	{
	mutex_lock(&mtx);

	if (!off) {
	if (mcs_lock_test()) {
	mutex_unlock(&mtx);
	return -1;
	}
	}
	if (!results) {
	pr_err("fucked %d", __LINE__);
	mutex_unlock(&mtx);
	return -1;
	}
	/* mildly concerned about addition overflow. caller's job? */
	if (count + off > results_sz) {
	pr_err("fucked off %lld count %lu sz %lu\n", off, count,
	results_sz);
	count = results_sz - off;
	}
	memcpy(buf, results + off, count);

	mutex_unlock(&mtx);

	return count;
	}

	static loff_t __mcs_get_results_size(void)
	{
	return nr_threads *
	(sizeof(void) + nr_loops sizeof(struct lock_sample));
	}

	/*
	* Unfortunately, this doesn't update the file live. It'll only take effect the
	* next time you open it. So users need to write, close, open, read.
	*/
	static void __mcs_update_size(void)
	{
	struct kernfs_node *kn = kernfs_find_and_get(kernel_kobj->sd, "mcs");

	if (!kn) {
	pr_err("fucked %d", __LINE__);
	return;
	}
	kn->attr.size = __mcs_get_results_size();
	}

	static ssize_t mcs_write(struct file filp, struct kobject kobj,
	struct bin_attribute *bin_attr,
	char *buf, loff_t off, size_t count)
	{
	unsigned int threads, loops, hold, delay;
	ssize_t ret;

	/* TODO: check_mul_overflow and whatnot, esp for the result_sz buffer */
	ret = sscanf(buf, "%u %u %u %u", &threads, &loops, &hold,
	&delay);
	if (ret != 4)
	return -EINVAL;
	if (threads > num_online_cpus())
	return -ENODEV;
	if (threads == 0)
	threads = num_online_cpus();
	mutex_lock(&mtx);
	nr_threads = threads;
	nr_loops = loops;
	hold_time = hold;
	delay_time = delay;
	__mcs_update_size();
	mutex_unlock(&mtx);
	return count;
	}

	struct bin_attribute mcs_attr = {
	.attr = {
	.name = "mcs",
	.mode = 0666,
	},
	.size = 0,
	.private = NULL,
	.read = mcs_read,
	.write = mcs_write,
	};

	static int __init mcs_init(void)
	{
	mutex_init(&mtx);

	/*
	* The user needs to set these, but start with sensible defaults in case
	* they read without writing.
	*/
	nr_threads = num_online_cpus();
	nr_loops = 10000;
	mcs_attr.size = __mcs_get_results_size();

	if (sysfs_create_bin_file(kernel_kobj, &mcs_attr)) {
	pr_err("\n\nfucked %d !!!\n\n\n", __LINE__);
	return -1;
	}
	return 0;
	}

	static void __exit mcs_exit(void)
	{
	sysfs_remove_bin_file(kernel_kobj, &mcs_attr);
	}

	module_init(mcs_init);
	module_exit(mcs_exit);

	MODULE_LICENSE("GPL");
	MODULE_AUTHOR("Barret Rhoden <brho@google.com>");
	MODULE_DESCRIPTION("MCS lock test");