[PATCH v25 02/12] LRNG - allocate one SDRNG instance per NUMA node

From: Stephan Müller
Date: Sat Nov 16 2019 - 04:42:35 EST


In order to improve NUMA-locality when serving getrandom(2) requests,
allocate one DRNG instance per node.

The SDRNG instance that is present right from the start of the kernel is
reused as the first per-NUMA-node SDRNG. For all remaining online NUMA
nodes a new SDRNG instance is allocated.

During boot time, the multiple SDRNG instances are seeded sequentially.
With this, the first SDRNG instance (referenced as the initial SDRNG
in the code) is completely seeded with 256 bits of entropy before the
next SDRNG instance is completely seeded.

When random numbers are requested, the NUMA-node-local SDRNG is checked
whether it has been already fully seeded. If this is not the case, the
initial SDRNG is used to serve the request.

CC: "Eric W. Biederman" <ebiederm@xxxxxxxxxxxx>
CC: "Alexander E. Patrakov" <patrakov@xxxxxxxxx>
CC: "Ahmed S. Darwish" <darwish.07@xxxxxxxxx>
CC: "Theodore Y. Ts'o" <tytso@xxxxxxx>
CC: Willy Tarreau <w@xxxxxx>
CC: Matthew Garrett <mjg59@xxxxxxxxxxxxx>
CC: Vito Caputo <vcaputo@xxxxxxxxxxx>
CC: Andreas Dilger <adilger.kernel@xxxxxxxxx>
CC: Jan Kara <jack@xxxxxxx>
CC: Ray Strode <rstrode@xxxxxxxxxx>
CC: William Jon McCann <mccann@xxxxxxx>
CC: zhangjs <zachary@xxxxxxxxxxxxxxxx>
CC: Andy Lutomirski <luto@xxxxxxxxxx>
CC: Florian Weimer <fweimer@xxxxxxxxxx>
CC: Lennart Poettering <mzxreary@xxxxxxxxxxx>
CC: Nicolai Stange <nstange@xxxxxxx>
Reviewed-by: Marcelo Henrique Cerri <marcelo.cerri@xxxxxxxxxxxxx>
Reviewed-by: Roman Drahtmueller <draht@xxxxxxxxxxxxxx>
Tested-by: Roman Drahtmüller <draht@xxxxxxxxxxxxxx>
Tested-by: Marcelo Henrique Cerri <marcelo.cerri@xxxxxxxxxxxxx>
Tested-by: Neil Horman <nhorman@xxxxxxxxxx>
Signed-off-by: Stephan Mueller <smueller@xxxxxxxxxx>
---
drivers/char/lrng/Makefile | 2 +
drivers/char/lrng/lrng_internal.h | 5 ++
drivers/char/lrng/lrng_numa.c | 101 ++++++++++++++++++++++++++++++
3 files changed, 108 insertions(+)
create mode 100644 drivers/char/lrng/lrng_numa.c

diff --git a/drivers/char/lrng/Makefile b/drivers/char/lrng/Makefile
index 2761623715d2..a00cddb45773 100644
--- a/drivers/char/lrng/Makefile
+++ b/drivers/char/lrng/Makefile
@@ -7,3 +7,5 @@ obj-y += lrng_pool.o lrng_aux.o \
lrng_sw_noise.o lrng_archrandom.o \
lrng_sdrng.o lrng_chacha20.o \
lrng_interfaces.o \
+
+obj-$(CONFIG_NUMA) += lrng_numa.o
diff --git a/drivers/char/lrng/lrng_internal.h b/drivers/char/lrng/lrng_internal.h
index a3d9e0bce884..e1f9b6b166fd 100644
--- a/drivers/char/lrng/lrng_internal.h
+++ b/drivers/char/lrng/lrng_internal.h
@@ -250,8 +250,13 @@ int lrng_sdrng_get_sleep(u8 *outbuf, u32 outbuflen);
void lrng_sdrng_force_reseed(void);
void lrng_sdrng_seed_work(struct work_struct *dummy);

+#ifdef CONFIG_NUMA
+struct lrng_sdrng **lrng_sdrng_instances(void);
+void lrng_drngs_numa_alloc(void);
+#else /* CONFIG_NUMA */
static inline struct lrng_sdrng **lrng_sdrng_instances(void) { return NULL; }
static inline void lrng_drngs_numa_alloc(void) { return; }
+#endif /* CONFIG_NUMA */

/************************** Health Test linking code **************************/

diff --git a/drivers/char/lrng/lrng_numa.c b/drivers/char/lrng/lrng_numa.c
new file mode 100644
index 000000000000..5401f927919a
--- /dev/null
+++ b/drivers/char/lrng/lrng_numa.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
+/*
+ * LRNG NUMA support
+ *
+ * Copyright (C) 2016 - 2019, Stephan Mueller <smueller@xxxxxxxxxx>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/lrng.h>
+#include <linux/slab.h>
+
+#include "lrng_internal.h"
+
+static struct lrng_sdrng **lrng_sdrng __read_mostly = NULL;
+
+struct lrng_sdrng **lrng_sdrng_instances(void)
+{
+ return lrng_sdrng;
+}
+
+/* Allocate the data structures for the per-NUMA node DRNGs */
+static void _lrng_drngs_numa_alloc(struct work_struct *work)
+{
+ struct lrng_sdrng **sdrngs;
+ struct lrng_sdrng *lrng_sdrng_init = lrng_sdrng_init_instance();
+ u32 node;
+ bool init_sdrng_used = false;
+
+ mutex_lock(&lrng_crypto_cb_update);
+
+ /* per-NUMA-node DRNGs are already present */
+ if (lrng_sdrng)
+ goto unlock;
+
+ sdrngs = kcalloc(nr_node_ids, sizeof(void *), GFP_KERNEL|__GFP_NOFAIL);
+ for_each_online_node(node) {
+ struct lrng_sdrng *sdrng;
+
+ if (!init_sdrng_used) {
+ sdrngs[node] = lrng_sdrng_init;
+ init_sdrng_used = true;
+ continue;
+ }
+
+ sdrng = kmalloc_node(sizeof(struct lrng_sdrng),
+ GFP_KERNEL|__GFP_NOFAIL, node);
+ memset(sdrng, 0, sizeof(lrng_sdrng));
+
+ sdrng->crypto_cb = lrng_sdrng_init->crypto_cb;
+ sdrng->sdrng = sdrng->crypto_cb->lrng_drng_alloc(
+ LRNG_DRNG_SECURITY_STRENGTH_BYTES);
+ if (IS_ERR(sdrng->sdrng)) {
+ kfree(sdrng);
+ goto err;
+ }
+
+ mutex_init(&sdrng->lock);
+ spin_lock_init(&sdrng->spin_lock);
+
+ /*
+ * No reseeding of NUMA DRNGs from previous DRNGs as this
+ * would complicate the code. Let it simply reseed.
+ */
+ lrng_sdrng_reset(sdrng);
+ sdrngs[node] = sdrng;
+
+ lrng_pool_inc_numa_node();
+ pr_info("secondary DRNG for NUMA node %d allocated\n", node);
+ }
+
+ /* Ensure that all NUMA nodes receive changed memory here. */
+ mb();
+
+ if (!cmpxchg(&lrng_sdrng, NULL, sdrngs))
+ goto unlock;
+
+err:
+ for_each_online_node(node) {
+ struct lrng_sdrng *sdrng = sdrngs[node];
+
+ if (sdrng == lrng_sdrng_init)
+ continue;
+
+ if (sdrng) {
+ sdrng->crypto_cb->lrng_drng_dealloc(sdrng->sdrng);
+ kfree(sdrng);
+ }
+ }
+ kfree(sdrngs);
+
+unlock:
+ mutex_unlock(&lrng_crypto_cb_update);
+}
+
+static DECLARE_WORK(lrng_drngs_numa_alloc_work, _lrng_drngs_numa_alloc);
+
+void lrng_drngs_numa_alloc(void)
+{
+ schedule_work(&lrng_drngs_numa_alloc_work);
+}
--
2.23.0