[PATCH v2] sunrpc: hardcode pool_mode to pernode, remove other modes
From: Jeff Layton
Date: Thu Jun 25 2026 - 11:59:44 EST
The SVC_POOL_AUTO/GLOBAL/PERCPU/PERNODE pool mode selection machinery
was added when NUMA was new and the right default was unclear. Today,
pernode is the right choice everywhere:
- On multi-NUMA hosts, it gives one pool per node with proper thread
affinity and NUMA-local memory allocation.
- On single-node hosts, pernode degenerates to exactly one pool,
identical to the old "global" mode -- svc_pool_for_cpu() short-
circuits when sv_nrpools <= 1, no CPU affinity is set, and memory
is allocated from the single node.
The percpu mode (one pool per CPU) created excessive pools relative to
the number of threads most deployments run, and was only auto-selected
in a narrow case (single node, >2 CPUs).
Remove the SVC_POOL_* enum, mode selection heuristic,
svc_pool_map_init_percpu(), and all mode-based switch statements.
Simplify pool map functions to always use the pernode path. If pool
map allocation fails, svc_pool_map_get() now returns 0 and service
creation fails, rather than silently falling back to a single global
pool.
The module parameter and netlink interfaces are preserved for backward
compatibility:
- Writing any previously-accepted value succeeds silently
- Reading always returns "pernode"
- Writing to the module parameter emits a deprecation notice
Assisted-by: Claude:claude-opus-4-8
Signed-off-by: Jeff Layton <jlayton@xxxxxxxxxx>
---
This version is essentially the same as v1, but allows the kernel to
accept any previously-accepted setting for pool-mode, which should
alleviate concerns about breakage.
---
Changes in v2:
- Accept any previously-accepted setting for pool_mode
- Link to v1: https://lore.kernel.org/r/20260423-sunrpc-pool-mode-v1-1-b7f20e35749b@xxxxxxxxxx
---
net/sunrpc/svc.c | 238 +++++++++----------------------------------------------
1 file changed, 37 insertions(+), 201 deletions(-)
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index dd80a2eaaa74..6e3d509bf95a 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -38,19 +38,6 @@
static void svc_unregister(const struct svc_serv *serv, struct net *net);
-#define SVC_POOL_DEFAULT SVC_POOL_GLOBAL
-
-/*
- * Mode for mapping cpus to pools.
- */
-enum {
- SVC_POOL_AUTO = -1, /* choose one of the others */
- SVC_POOL_GLOBAL, /* no mapping, just a single global pool
- * (legacy & UP mode) */
- SVC_POOL_PERCPU, /* one pool per cpu */
- SVC_POOL_PERNODE /* one pool per numa node */
-};
-
/*
* Structure for mapping cpus to pools and vice versa.
* Setup once during sunrpc initialisation.
@@ -58,62 +45,23 @@ enum {
struct svc_pool_map {
int count; /* How many svc_servs use us */
- int mode; /* Note: int not enum to avoid
- * warnings about "enumeration value
- * not handled in switch" */
unsigned int npools;
- unsigned int *pool_to; /* maps pool id to cpu or node */
- unsigned int *to_pool; /* maps cpu or node to pool id */
+ unsigned int *pool_to; /* maps pool id to node */
+ unsigned int *to_pool; /* maps node to pool id */
};
-static struct svc_pool_map svc_pool_map = {
- .mode = SVC_POOL_DEFAULT
-};
+static struct svc_pool_map svc_pool_map;
static DEFINE_MUTEX(svc_pool_map_mutex);/* protects svc_pool_map.count only */
-static int
-__param_set_pool_mode(const char *val, struct svc_pool_map *m)
-{
- int err, mode;
-
- mutex_lock(&svc_pool_map_mutex);
-
- err = 0;
- if (!strncmp(val, "auto", 4))
- mode = SVC_POOL_AUTO;
- else if (!strncmp(val, "global", 6))
- mode = SVC_POOL_GLOBAL;
- else if (!strncmp(val, "percpu", 6))
- mode = SVC_POOL_PERCPU;
- else if (!strncmp(val, "pernode", 7))
- mode = SVC_POOL_PERNODE;
- else
- err = -EINVAL;
-
- if (err)
- goto out;
-
- if (m->count == 0)
- m->mode = mode;
- else if (mode != m->mode)
- err = -EBUSY;
-out:
- mutex_unlock(&svc_pool_map_mutex);
- return err;
-}
-
-static int
-param_set_pool_mode(const char *val, const struct kernel_param *kp)
-{
- struct svc_pool_map *m = kp->arg;
-
- return __param_set_pool_mode(val, m);
-}
-
int sunrpc_set_pool_mode(const char *val)
{
- return __param_set_pool_mode(val, &svc_pool_map);
+ if (!strncmp(val, "auto", 4) ||
+ !strncmp(val, "global", 6) ||
+ !strncmp(val, "percpu", 6) ||
+ !strncmp(val, "pernode", 7))
+ return 0;
+ return -EINVAL;
}
EXPORT_SYMBOL(sunrpc_set_pool_mode);
@@ -122,84 +70,32 @@ EXPORT_SYMBOL(sunrpc_set_pool_mode);
* @buf: where to write the current pool_mode
* @size: size of @buf
*
- * Grab the current pool_mode from the svc_pool_map and write
- * the resulting string to @buf. Returns the number of characters
+ * Write the pool_mode string to @buf. Returns the number of characters
* written to @buf (a'la snprintf()).
*/
int
sunrpc_get_pool_mode(char *buf, size_t size)
{
- struct svc_pool_map *m = &svc_pool_map;
-
- switch (m->mode)
- {
- case SVC_POOL_AUTO:
- return snprintf(buf, size, "auto");
- case SVC_POOL_GLOBAL:
- return snprintf(buf, size, "global");
- case SVC_POOL_PERCPU:
- return snprintf(buf, size, "percpu");
- case SVC_POOL_PERNODE:
- return snprintf(buf, size, "pernode");
- default:
- return snprintf(buf, size, "%d", m->mode);
- }
+ return snprintf(buf, size, "pernode");
}
EXPORT_SYMBOL(sunrpc_get_pool_mode);
static int
-param_get_pool_mode(char *buf, const struct kernel_param *kp)
+param_set_pool_mode(const char *val, const struct kernel_param *kp)
{
- char str[16];
- int len;
-
- len = sunrpc_get_pool_mode(str, ARRAY_SIZE(str));
-
- /* Ensure we have room for newline and NUL */
- len = min_t(int, len, ARRAY_SIZE(str) - 2);
-
- /* tack on the newline */
- str[len] = '\n';
- str[len + 1] = '\0';
-
- return sysfs_emit(buf, "%s", str);
+ pr_notice("sunrpc: the pool_mode parameter is deprecated and will be removed in a future release.\n");
+ return sunrpc_set_pool_mode(val);
}
-module_param_call(pool_mode, param_set_pool_mode, param_get_pool_mode,
- &svc_pool_map, 0644);
-
-/*
- * Detect best pool mapping mode heuristically,
- * according to the machine's topology.
- */
static int
-svc_pool_map_choose_mode(void)
+param_get_pool_mode(char *buf, const struct kernel_param *kp)
{
- unsigned int node;
-
- if (nr_online_nodes > 1) {
- /*
- * Actually have multiple NUMA nodes,
- * so split pools on NUMA node boundaries
- */
- return SVC_POOL_PERNODE;
- }
-
- node = first_online_node;
- if (nr_cpus_node(node) > 2) {
- /*
- * Non-trivial SMP, or CONFIG_NUMA on
- * non-NUMA hardware, e.g. with a generic
- * x86_64 kernel on Xeons. In this case we
- * want to divide the pools on cpu boundaries.
- */
- return SVC_POOL_PERCPU;
- }
-
- /* default: one global pool */
- return SVC_POOL_GLOBAL;
+ return sysfs_emit(buf, "pernode\n");
}
+module_param_call(pool_mode, param_set_pool_mode, param_get_pool_mode,
+ NULL, 0644);
+
/*
* Allocate the to_pool[] and pool_to[] arrays.
* Returns 0 on success or an errno.
@@ -224,35 +120,7 @@ svc_pool_map_alloc_arrays(struct svc_pool_map *m, unsigned int maxpools)
}
/*
- * Initialise the pool map for SVC_POOL_PERCPU mode.
- * Returns number of pools or <0 on error.
- */
-static int
-svc_pool_map_init_percpu(struct svc_pool_map *m)
-{
- unsigned int maxpools = nr_cpu_ids;
- unsigned int pidx = 0;
- unsigned int cpu;
- int err;
-
- err = svc_pool_map_alloc_arrays(m, maxpools);
- if (err)
- return err;
-
- for_each_online_cpu(cpu) {
- BUG_ON(pidx >= maxpools);
- m->to_pool[cpu] = pidx;
- m->pool_to[pidx] = cpu;
- pidx++;
- }
- /* cpus brought online later all get mapped to pool0, sorry */
-
- return pidx;
-};
-
-
-/*
- * Initialise the pool map for SVC_POOL_PERNODE mode.
+ * Initialise the pool map for one pool per NUMA node.
* Returns number of pools or <0 on error.
*/
static int
@@ -284,14 +152,13 @@ svc_pool_map_init_pernode(struct svc_pool_map *m)
* Add a reference to the global map of cpus to pools (and
* vice versa) if pools are in use.
* Initialise the map if we're the first user.
- * Returns the number of pools. If this is '1', no reference
- * was taken.
+ * Returns the number of pools, or 0 on failure.
*/
static unsigned int
svc_pool_map_get(void)
{
struct svc_pool_map *m = &svc_pool_map;
- int npools = -1;
+ int npools;
mutex_lock(&svc_pool_map_mutex);
if (m->count++) {
@@ -299,22 +166,11 @@ svc_pool_map_get(void)
return m->npools;
}
- if (m->mode == SVC_POOL_AUTO)
- m->mode = svc_pool_map_choose_mode();
-
- switch (m->mode) {
- case SVC_POOL_PERCPU:
- npools = svc_pool_map_init_percpu(m);
- break;
- case SVC_POOL_PERNODE:
- npools = svc_pool_map_init_pernode(m);
- break;
- }
-
+ npools = svc_pool_map_init_pernode(m);
if (npools <= 0) {
- /* default, or memory allocation failure */
- npools = 1;
- m->mode = SVC_POOL_GLOBAL;
+ m->count--;
+ mutex_unlock(&svc_pool_map_mutex);
+ return 0;
}
m->npools = npools;
mutex_unlock(&svc_pool_map_mutex);
@@ -346,14 +202,11 @@ static int svc_pool_map_get_node(unsigned int pidx)
{
const struct svc_pool_map *m = &svc_pool_map;
- if (m->count) {
- if (m->mode == SVC_POOL_PERCPU)
- return cpu_to_node(m->pool_to[pidx]);
- if (m->mode == SVC_POOL_PERNODE)
- return m->pool_to[pidx];
- }
+ if (m->count)
+ return m->pool_to[pidx];
return numa_mem_id();
}
+
/*
* Set the given thread's cpus_allowed mask so that it
* will only run on cpus in the given pool.
@@ -372,27 +225,15 @@ svc_pool_map_set_cpumask(struct task_struct *task, unsigned int pidx)
if (m->count == 0)
return;
- switch (m->mode) {
- case SVC_POOL_PERCPU:
- {
- set_cpus_allowed_ptr(task, cpumask_of(node));
- break;
- }
- case SVC_POOL_PERNODE:
- {
- set_cpus_allowed_ptr(task, cpumask_of_node(node));
- break;
- }
- }
+ set_cpus_allowed_ptr(task, cpumask_of_node(node));
}
/**
* svc_pool_for_cpu - Select pool to run a thread on this cpu
* @serv: An RPC service
*
- * Use the active CPU and the svc_pool_map's mode setting to
- * select the svc thread pool to use. Once initialized, the
- * svc_pool_map does not change.
+ * Use the active CPU and the svc_pool_map to select the svc thread
+ * pool to use. Once initialized, the svc_pool_map does not change.
*
* Return value:
* A pointer to an svc_pool
@@ -400,20 +241,12 @@ svc_pool_map_set_cpumask(struct task_struct *task, unsigned int pidx)
struct svc_pool *svc_pool_for_cpu(struct svc_serv *serv)
{
struct svc_pool_map *m = &svc_pool_map;
- int cpu = raw_smp_processor_id();
- unsigned int pidx = 0;
+ unsigned int pidx;
if (serv->sv_nrpools <= 1)
return serv->sv_pools;
- switch (m->mode) {
- case SVC_POOL_PERCPU:
- pidx = m->to_pool[cpu];
- break;
- case SVC_POOL_PERNODE:
- pidx = m->to_pool[cpu_to_node(cpu)];
- break;
- }
+ pidx = m->to_pool[cpu_to_node(raw_smp_processor_id())];
return &serv->sv_pools[pidx % serv->sv_nrpools];
}
@@ -617,6 +450,9 @@ struct svc_serv *svc_create_pooled(struct svc_program *prog,
struct svc_serv *serv;
unsigned int npools = svc_pool_map_get();
+ if (!npools)
+ return NULL;
+
serv = __svc_create(prog, nprogs, stats, bufsize, npools, threadfn);
if (!serv)
goto out_err;
---
base-commit: 2bb83225da8ee0383d17783b5c903589696faf90
change-id: 20260423-sunrpc-pool-mode-3e6b56320dc4
Best regards,
--
Jeff Layton <jlayton@xxxxxxxxxx>