[PATCH RFC v1 2/2] rcuperf: Add kfree_rcu performance Tests

From: Joel Fernandes (Google)
Date: Tue Aug 06 2019 - 17:21:02 EST


This test runs kfree_rcu in a loop to measure performance of the new
kfree_rcu, with and without patch.

To see improvement, run with boot parameters:
rcuperf.kfree_loops=2000 rcuperf.kfree_alloc_num=100 rcuperf.perf_type=kfree

Without patch, test runs in 6.9 seconds.
With patch, test runs in 6.1 seconds (+13% improvement)

If it is desired to run the test but with the traditional (non-batched)
kfree_rcu, for example to compare results, then you could pass along the
rcuperf.kfree_no_batch=1 boot parameter.

Cc: max.byungchul.park@xxxxxxxxx
Cc: byungchul.park@xxxxxxx
Signed-off-by: Joel Fernandes (Google) <joel@xxxxxxxxxxxxxxxxx>
---
kernel/rcu/rcuperf.c | 169 ++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 168 insertions(+), 1 deletion(-)

diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c
index 7a6890b23c5f..34658760da5e 100644
--- a/kernel/rcu/rcuperf.c
+++ b/kernel/rcu/rcuperf.c
@@ -89,7 +89,7 @@ torture_param(int, writer_holdoff, 0, "Holdoff (us) between GPs, zero to disable

static char *perf_type = "rcu";
module_param(perf_type, charp, 0444);
-MODULE_PARM_DESC(perf_type, "Type of RCU to performance-test (rcu, rcu_bh, ...)");
+MODULE_PARM_DESC(perf_type, "Type of RCU to performance-test (rcu, rcu_bh, kfree,...)");

static int nrealreaders;
static int nrealwriters;
@@ -592,6 +592,170 @@ rcu_perf_shutdown(void *arg)
return -EINVAL;
}

+/*
+ * kfree_rcu performance tests: Start a kfree_rcu loop on all CPUs for number
+ * of iterations and measure total time for all iterations to complete.
+ */
+
+torture_param(int, kfree_nthreads, -1, "Number of RCU reader threads");
+torture_param(int, kfree_alloc_num, 8000, "Number of allocations and frees done by a thread");
+torture_param(int, kfree_alloc_size, 16, "Size of each allocation");
+torture_param(int, kfree_loops, 10, "Size of each allocation");
+torture_param(int, kfree_no_batch, 0, "Use the non-batching (slower) version of kfree_rcu");
+
+static struct task_struct **kfree_reader_tasks;
+static int kfree_nrealthreads;
+static atomic_t n_kfree_perf_thread_started;
+static atomic_t n_kfree_perf_thread_ended;
+
+#define KFREE_OBJ_BYTES 8
+
+struct kfree_obj {
+ char kfree_obj[KFREE_OBJ_BYTES];
+ struct rcu_head rh;
+};
+
+void kfree_call_rcu_nobatch(struct rcu_head *head, rcu_callback_t func);
+
+static int
+kfree_perf_thread(void *arg)
+{
+ int i, l = 0;
+ long me = (long)arg;
+ struct kfree_obj **alloc_ptrs;
+ u64 start_time, end_time;
+
+ VERBOSE_PERFOUT_STRING("kfree_perf_thread task started");
+ set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids));
+ set_user_nice(current, MAX_NICE);
+ atomic_inc(&n_kfree_perf_thread_started);
+
+ alloc_ptrs = (struct kfree_obj **)kmalloc(sizeof(struct kfree_obj *) * kfree_alloc_num,
+ GFP_KERNEL);
+ if (!alloc_ptrs)
+ return -ENOMEM;
+
+ start_time = ktime_get_mono_fast_ns();
+ do {
+ for (i = 0; i < kfree_alloc_num; i++) {
+ alloc_ptrs[i] = kmalloc(sizeof(struct kfree_obj), GFP_KERNEL);
+ if (!alloc_ptrs[i])
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < kfree_alloc_num; i++) {
+ if (!kfree_no_batch) {
+ kfree_rcu(alloc_ptrs[i], rh);
+ } else {
+ rcu_callback_t cb;
+
+ cb = (rcu_callback_t)(unsigned long)offsetof(struct kfree_obj, rh);
+ kfree_call_rcu_nobatch(&(alloc_ptrs[i]->rh), cb);
+ }
+ }
+
+ schedule_timeout_uninterruptible(2);
+ } while (!torture_must_stop() && ++l < kfree_loops);
+
+ kfree(alloc_ptrs);
+
+ if (atomic_inc_return(&n_kfree_perf_thread_ended) >= kfree_nrealthreads) {
+ end_time = ktime_get_mono_fast_ns();
+ pr_alert("Total time taken by all kfree'ers: %llu ns, loops: %d\n",
+ (unsigned long long)(end_time - start_time), kfree_loops);
+ if (shutdown) {
+ smp_mb(); /* Assign before wake. */
+ wake_up(&shutdown_wq);
+ }
+ }
+
+ torture_kthread_stopping("kfree_perf_thread");
+ return 0;
+}
+
+static void
+kfree_perf_cleanup(void)
+{
+ int i;
+
+ if (torture_cleanup_begin())
+ return;
+
+ if (kfree_reader_tasks) {
+ for (i = 0; i < kfree_nrealthreads; i++)
+ torture_stop_kthread(kfree_perf_thread,
+ kfree_reader_tasks[i]);
+ kfree(kfree_reader_tasks);
+ }
+
+ torture_cleanup_end();
+}
+
+/*
+ * shutdown kthread. Just waits to be awakened, then shuts down system.
+ */
+static int
+kfree_perf_shutdown(void *arg)
+{
+ do {
+ wait_event(shutdown_wq,
+ atomic_read(&n_kfree_perf_thread_ended) >=
+ kfree_nrealthreads);
+ } while (atomic_read(&n_kfree_perf_thread_ended) < kfree_nrealthreads);
+
+ smp_mb(); /* Wake before output. */
+
+ kfree_perf_cleanup();
+ kernel_power_off();
+ return -EINVAL;
+}
+
+static int __init
+kfree_perf_init(void)
+{
+ long i;
+ int firsterr = 0;
+
+ if (!torture_init_begin("kfree_perf", verbose))
+ return -EBUSY;
+
+ kfree_nrealthreads = compute_real(kfree_nthreads);
+ /* Start up the kthreads. */
+ if (shutdown) {
+ init_waitqueue_head(&shutdown_wq);
+ firsterr = torture_create_kthread(kfree_perf_shutdown, NULL,
+ shutdown_task);
+ if (firsterr)
+ goto unwind;
+ schedule_timeout_uninterruptible(1);
+ }
+
+ kfree_reader_tasks = kcalloc(kfree_nrealthreads, sizeof(kfree_reader_tasks[0]),
+ GFP_KERNEL);
+ if (kfree_reader_tasks == NULL) {
+ firsterr = -ENOMEM;
+ goto unwind;
+ }
+
+ for (i = 0; i < kfree_nrealthreads; i++) {
+ firsterr = torture_create_kthread(kfree_perf_thread, (void *)i,
+ kfree_reader_tasks[i]);
+ if (firsterr)
+ goto unwind;
+ }
+
+ while (atomic_read(&n_kfree_perf_thread_started) < kfree_nrealthreads)
+ schedule_timeout_uninterruptible(1);
+
+ torture_init_end();
+ return 0;
+
+unwind:
+ torture_init_end();
+ kfree_perf_cleanup();
+ return firsterr;
+}
+
static int __init
rcu_perf_init(void)
{
@@ -601,6 +765,9 @@ rcu_perf_init(void)
&rcu_ops, &srcu_ops, &srcud_ops, &tasks_ops,
};

+ if (strcmp(perf_type, "kfree") == 0)
+ return kfree_perf_init();
+
if (!torture_init_begin(perf_type, verbose))
return -EBUSY;

--
2.22.0.770.g0f2c4a37fd-goog