| // SPDX-License-Identifier: GPL-2.0-or-later | 
 | /* | 
 |  * Copyright (c) 2013, 2014 The Regents of the University of California | 
 |  * Copyright (c) 2020 Google Inc | 
 |  * | 
 |  * Barret Rhoden <brho@cs.berkeley.edu> | 
 |  * | 
 |  * Sorry, but you'll need to change your linux source to expose this function: | 
 |  | 
 |  EXPORT_SYMBOL_GPL(kthread_create_on_cpu); | 
 |  | 
 |  * | 
 |  */ | 
 |  | 
 | #include <linux/module.h> | 
 | #include <linux/moduleparam.h> | 
 | #include <linux/kobject.h> | 
 | #include <linux/sysfs.h> | 
 | #include <linux/slab.h> | 
 |  | 
 | #include <linux/sched/task.h> | 
 | #include <linux/sched/mm.h> | 
 | #include <linux/delay.h> | 
 | #include <linux/kthread.h> | 
 | #include <linux/completion.h> | 
 | #include <asm/msr.h> | 
 |  | 
 | struct lock_sample { | 
 | 	u64 pre; | 
 | 	u64 acq; | 
 | 	u64 un; | 
 | 	bool valid; | 
 | }; | 
 |  | 
 | /* mtx protects all variables and the test run */ | 
 | static struct mutex mtx; | 
 |  | 
 | static DECLARE_COMPLETION(test_done); | 
 |  | 
 | static unsigned int nr_threads; | 
 | static unsigned int nr_loops; | 
 | static unsigned int hold_time; | 
 | static unsigned int delay_time; | 
 |  | 
 | /* array[nr_thread] of pointers of lock_sample[nr_loops] */ | 
 | static struct lock_sample **times; | 
 | /* array[nr_thread] of task* */ | 
 | static struct task_struct **threads; | 
 | /* array[nr_thread] of void* */ | 
 | static void **retvals; | 
 | static void *results; | 
 | static size_t results_sz; | 
 |  | 
 | static bool run_locktest __cacheline_aligned_in_smp; | 
 | static atomic_t horses __cacheline_aligned_in_smp; | 
 |  | 
 | static struct qspinlock l = __ARCH_SPIN_LOCK_UNLOCKED; | 
 |  | 
 | static int __mcs_thread_lock_test(void *arg) | 
 | { | 
 | 	long thread_id = (long)arg; | 
 | 	u64 pre_lock, acq_lock, un_lock; | 
 | 	struct lock_sample *this_time; | 
 | 	int i; | 
 |  | 
 | 	atomic_dec(&horses); | 
 | 	while (atomic_read(&horses)) | 
 | 		cpu_relax(); | 
 | 	for (i = 0; i < nr_loops; i++) { | 
 | 		/* | 
 | 		 * might be able to replace this with post-processing.  let the | 
 | 		 * test run, and discard all entries after the first finisher | 
 | 		 */ | 
 | 		if (!READ_ONCE(run_locktest)) | 
 | 			break; | 
 |  | 
 | 		local_irq_disable(); | 
 | 		pre_lock = rdtsc_ordered(); | 
 |  | 
 | 		queued_spin_lock(&l); | 
 |  | 
 | 		acq_lock = rdtsc_ordered(); | 
 |  | 
 | 		if (hold_time) | 
 | 			ndelay(hold_time); | 
 |  | 
 | 		queued_spin_unlock(&l); | 
 |  | 
 | 		un_lock = rdtsc_ordered(); | 
 |  | 
 | 		local_irq_enable(); | 
 |  | 
 | 		this_time = ×[thread_id][i]; | 
 | 		this_time->pre = pre_lock; | 
 | 		this_time->acq = acq_lock; | 
 | 		this_time->un = un_lock; | 
 | 		/* Can turn these on/off to control which samples we gather */ | 
 | 		this_time->valid = true; | 
 | 		if (delay_time) | 
 | 			ndelay(delay_time); | 
 | 		/* | 
 | 		 * This can throw off your delay_time.  Think of delay_time as | 
 | 		 * the least amount of time we'll wait between reacquiring the | 
 | 		 * lock.  After all, IRQs are enabled, so all bets are off. | 
 | 		 */ | 
 | 		cond_resched(); | 
 | 	} | 
 | 	/* First thread to finish stops the test */ | 
 | 	WRITE_ONCE(run_locktest, false); | 
 | 	/* | 
 | 	 * Wakes the controller thread.  The others will be done soon, to | 
 | 	 * complete the hokey thread join. | 
 | 	 */ | 
 | 	complete(&test_done); | 
 |  | 
 | 	WRITE_ONCE(retvals[thread_id], (void*)(long)i); | 
 |  | 
 | 	return 0; | 
 | } | 
 |  | 
 | /* | 
 |  * This consolidates the results in a format we will export to userspace.  We | 
 |  * could have just used this format for the test itself, but then the times | 
 |  * arrays wouldn't be NUMA local. | 
 |  */ | 
 | static int mcs_build_output(struct lock_sample **times, void **retvals) | 
 | { | 
 | 	int i; | 
 | 	size_t sz_rets = nr_threads * sizeof(void*); | 
 | 	size_t sz_times_per = nr_loops * sizeof(struct lock_sample); | 
 |  | 
 | 	results_sz = sz_rets + nr_threads * sz_times_per; | 
 |  | 
 | 	kvfree(results); | 
 |  | 
 | 	results = kvzalloc(results_sz, GFP_KERNEL); | 
 | 	if (!results) { | 
 | 		pr_err("fucked %d", __LINE__); | 
 | 		return -1; | 
 | 	} | 
 |  | 
 | 	memcpy(results, retvals, sz_rets); | 
 | 	for (i = 0; i < nr_threads; i++) { | 
 | 		memcpy(results + sz_rets + i * sz_times_per, | 
 | 		       times[i], sz_times_per); | 
 | 	} | 
 |  | 
 | 	return 0; | 
 | } | 
 |  | 
 | static int mcs_lock_test(void) | 
 | { | 
 | 	int i; | 
 | 	int ret = -1; | 
 | 	size_t amt; | 
 |  | 
 | 	atomic_set(&horses, nr_threads); | 
 | 	WRITE_ONCE(run_locktest, true); | 
 |  | 
 | 	times = kcalloc(nr_threads, sizeof(struct lock_sample *), GFP_KERNEL); | 
 | 	if (!times) { | 
 | 		pr_err("fucked %d", __LINE__); | 
 | 		return ret; | 
 | 	} | 
 |  | 
 | 	if (check_mul_overflow((size_t)nr_loops, sizeof(struct lock_sample), | 
 | 			       &amt)) { | 
 | 		pr_err("fucked %d", __LINE__); | 
 | 		goto out_times; | 
 | 	} | 
 | 	for (i = 0; i < nr_threads; i++) { | 
 | 		times[i] = kvzalloc_node(amt, GFP_KERNEL, cpu_to_node(i)); | 
 | 		if (!times[i]) { | 
 | 			/* we clean up the times[i]s below */ | 
 | 			pr_err("fucked %d", __LINE__); | 
 | 			goto out_times; | 
 | 		} | 
 | 	} | 
 |  | 
 | 	retvals = kcalloc(nr_threads, sizeof(void *), GFP_KERNEL); | 
 | 	if (!retvals) { | 
 | 		pr_err("fucked %d", __LINE__); | 
 | 		goto out_times; | 
 | 	} | 
 | 	for (i = 0; i < nr_threads; i++) | 
 | 		retvals[i] = (void*)-1; | 
 |  | 
 | 	threads = kcalloc(nr_threads, sizeof(struct task_struct *), | 
 | 			  GFP_KERNEL); | 
 | 	if (!threads) { | 
 | 		pr_err("fucked %d", __LINE__); | 
 | 		goto out_retvals; | 
 | 	} | 
 |  | 
 | 	for (i = 0; i < nr_threads; i++) { | 
 | 		threads[i] = kthread_create_on_cpu(__mcs_thread_lock_test, | 
 | 						   (void*)(long)i, i, "mcs-%u"); | 
 | 		if (IS_ERR(threads[i])) { | 
 | 			while (--i >= 0) { | 
 | 				/* | 
 | 				 * We could recover, perhaps with something like | 
 | 				 * kthread_stop(threads[i]), but we'd need those | 
 | 				 * threads to check kthread_should_stop(), | 
 | 				 * perhaps in their hokey barrier.  I've never | 
 | 				 * had this fail, so I haven't tested it. | 
 | 				 */ | 
 | 			} | 
 | 			pr_err("fucked %d", __LINE__); | 
 | 			goto out_threads; | 
 | 		} | 
 | 	} | 
 | 	for (i = 0; i < nr_threads; i++) { | 
 | 		/* what's the deal with refcnting here?  it looks like an | 
 | 		 * uncounted ref: create->result = current.  so once we start | 
 | 		 * them, we probably can't touch this again. */ | 
 | 		wake_up_process(threads[i]); | 
 | 	} | 
 |  | 
 | 	/* Hokey join.  We know when the test is done but wait for the others */ | 
 | 	wait_for_completion(&test_done); | 
 | 	for (i = 0; i < nr_threads; i++) { | 
 | 		while (READ_ONCE(retvals[i]) == (void*)-1) | 
 | 			cond_resched(); | 
 | 	} | 
 |  | 
 | 	ret = mcs_build_output(times, retvals); | 
 |  | 
 | out_threads: | 
 | 	kfree(threads); | 
 | out_retvals: | 
 | 	kfree(retvals); | 
 | out_times: | 
 | 	for (i = 0; i < nr_threads; i++) | 
 | 		kvfree(times[i]); | 
 | 	kfree(times); | 
 | 	return ret; | 
 | } | 
 |  | 
 | static ssize_t mcs_read(struct file *filp, struct kobject *kobj, | 
 | 			struct bin_attribute *bin_attr, | 
 | 			char *buf, loff_t off, size_t count) | 
 | { | 
 | 	mutex_lock(&mtx); | 
 |  | 
 | 	if (!off) { | 
 | 		if (mcs_lock_test()) { | 
 | 			mutex_unlock(&mtx); | 
 | 			return -1; | 
 | 		} | 
 | 	} | 
 | 	if (!results) { | 
 | 		pr_err("fucked %d", __LINE__); | 
 | 		mutex_unlock(&mtx); | 
 | 		return -1; | 
 | 	} | 
 | 	/* mildly concerned about addition overflow.  caller's job? */ | 
 | 	if (count + off > results_sz) { | 
 | 		pr_err("fucked off %lld count %lu sz %lu\n", off, count, | 
 | 		       results_sz); | 
 | 		count = results_sz - off; | 
 | 	} | 
 | 	memcpy(buf, results + off, count); | 
 |  | 
 | 	mutex_unlock(&mtx); | 
 |  | 
 | 	return count; | 
 | } | 
 |  | 
 | static loff_t __mcs_get_results_size(void) | 
 | { | 
 | 	return nr_threads * | 
 | 		(sizeof(void*) + nr_loops * sizeof(struct lock_sample)); | 
 | } | 
 |  | 
 | /* | 
 |  * Unfortunately, this doesn't update the file live.  It'll only take effect the | 
 |  * next time you open it.  So users need to write, close, open, read. | 
 |  */ | 
 | static void __mcs_update_size(void) | 
 | { | 
 | 	struct kernfs_node *kn = kernfs_find_and_get(kernel_kobj->sd, "mcs"); | 
 |  | 
 | 	if (!kn) { | 
 | 		pr_err("fucked %d", __LINE__); | 
 | 		return; | 
 | 	} | 
 | 	kn->attr.size = __mcs_get_results_size(); | 
 | } | 
 |  | 
 | static ssize_t mcs_write(struct file *filp, struct kobject *kobj, | 
 | 			 struct bin_attribute *bin_attr, | 
 | 			 char *buf, loff_t off, size_t count) | 
 | { | 
 | 	unsigned int threads, loops, hold, delay; | 
 | 	ssize_t ret; | 
 |  | 
 | 	/* TODO: check_mul_overflow and whatnot, esp for the result_sz buffer */ | 
 | 	ret = sscanf(buf, "%u %u %u %u", &threads, &loops, &hold, | 
 | 		     &delay); | 
 | 	if (ret != 4) | 
 | 		return -EINVAL; | 
 | 	if (threads > num_online_cpus()) | 
 | 		return -ENODEV; | 
 | 	if (threads == 0) | 
 | 		threads = num_online_cpus(); | 
 | 	mutex_lock(&mtx); | 
 | 	nr_threads = threads; | 
 | 	nr_loops = loops; | 
 | 	hold_time = hold; | 
 | 	delay_time = delay; | 
 | 	__mcs_update_size(); | 
 | 	mutex_unlock(&mtx); | 
 | 	return count; | 
 | } | 
 |  | 
 | struct bin_attribute mcs_attr = { | 
 | 	.attr = { | 
 | 		.name = "mcs", | 
 | 		.mode = 0666, | 
 | 	}, | 
 | 	.size = 0, | 
 | 	.private = NULL, | 
 | 	.read = mcs_read, | 
 | 	.write = mcs_write, | 
 | }; | 
 |  | 
 | static int __init mcs_init(void) | 
 | { | 
 | 	mutex_init(&mtx); | 
 |  | 
 | 	/* | 
 | 	 * The user needs to set these, but start with sensible defaults in case | 
 | 	 * they read without writing. | 
 | 	 */ | 
 | 	nr_threads = num_online_cpus(); | 
 | 	nr_loops = 10000; | 
 | 	mcs_attr.size = __mcs_get_results_size(); | 
 |  | 
 | 	if (sysfs_create_bin_file(kernel_kobj, &mcs_attr)) { | 
 | 		pr_err("\n\nfucked %d !!!\n\n\n", __LINE__); | 
 | 		return -1; | 
 | 	} | 
 | 	return 0; | 
 | } | 
 |  | 
 | static void __exit mcs_exit(void) | 
 | { | 
 | 	sysfs_remove_bin_file(kernel_kobj, &mcs_attr); | 
 | } | 
 |  | 
 | module_init(mcs_init); | 
 | module_exit(mcs_exit); | 
 |  | 
 | MODULE_LICENSE("GPL"); | 
 | MODULE_AUTHOR("Barret Rhoden <brho@google.com>"); | 
 | MODULE_DESCRIPTION("MCS lock test"); |