[PATCH v3 2/4] sunrpc: hardcode pool_mode to pernode, remove other modes

From: Jeff Layton

Date: Mon Jun 29 2026 - 13:50:54 EST


The SVC_POOL_AUTO/GLOBAL/PERCPU/PERNODE pool mode selection machinery
was added when NUMA was new and the right default was unclear. The
default has always been "global" (a single pool for the whole service);
the other modes were only used when an admin explicitly set the
pool_mode parameter or asked for "auto", which then picked a mode from
the host topology. Today, pernode is the right choice everywhere:

- On multi-NUMA hosts, it gives one pool per node with proper thread
affinity and NUMA-local memory allocation.
- On single-node hosts, pernode degenerates to exactly one pool,
identical to the old "global" mode -- svc_pool_for_cpu() short-
circuits when sv_nrpools <= 1, no CPU affinity is set, and memory
is allocated from the single node.

The percpu mode (one pool per CPU) created excessive pools relative to
the number of threads most deployments run, and was only auto-selected
in a narrow case (single node, >2 CPUs).

Note that this changes the default behaviour on multi-NUMA hosts: a
service that previously ran with a single global pool now gets one pool
per NUMA node by default. This in turn means a host running fewer
threads than it has NUMA nodes can end up with pools that have no
threads. svc_pool_for_cpu() already falls back to a populated pool in
that case, so transports are still serviced.

Remove the SVC_POOL_* enum, mode selection heuristic,
svc_pool_map_init_percpu(), and all mode-based switch statements.
Simplify pool map functions to always use the pernode path. If pool
map allocation fails, svc_pool_map_get() now returns 0 and service
creation fails, rather than silently falling back to a single global
pool.

The module parameter and netlink interfaces are preserved for backward
compatibility:
- Writing any previously-accepted value succeeds silently
- Reading always returns "pernode"
- Writing to the module parameter emits a deprecation notice

Update Documentation/admin-guide/kernel-parameters.txt to mark the
pool_mode parameter deprecated and describe the new behaviour.

Assisted-by: Claude:claude-opus-4-8
Signed-off-by: Jeff Layton <jlayton@xxxxxxxxxx>
---
Documentation/admin-guide/kernel-parameters.txt | 20 +-
net/sunrpc/svc.c | 244 ++++--------------------
2 files changed, 49 insertions(+), 215 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index b5493a7f8f22..441b78867478 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -7441,19 +7441,13 @@ Kernel parameters

sunrpc.pool_mode=
[NFS]
- Control how the NFS server code allocates CPUs to
- service thread pools. Depending on how many NICs
- you have and where their interrupts are bound, this
- option will affect which CPUs will do NFS serving.
- Note: this parameter cannot be changed while the
- NFS server is running.
-
- auto the server chooses an appropriate mode
- automatically using heuristics
- global a single global pool contains all CPUs
- percpu one pool for each CPU
- pernode one pool for each NUMA node (equivalent
- to global on non-NUMA machines)
+ Deprecated. The NFS server now always uses one
+ service thread pool per NUMA node (equivalent to a
+ single global pool on non-NUMA machines). All of
+ the previously accepted values (auto, global,
+ percpu, pernode) are still accepted for backward
+ compatibility but are ignored: the mode is always
+ pernode, and reads always return "pernode".

sunrpc.tcp_slot_table_entries=
sunrpc.udp_slot_table_entries=
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 82fb7faf563f..2f6938fe28b2 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -38,19 +38,6 @@

static void svc_unregister(const struct svc_serv *serv, struct net *net);

-#define SVC_POOL_DEFAULT SVC_POOL_GLOBAL
-
-/*
- * Mode for mapping cpus to pools.
- */
-enum {
- SVC_POOL_AUTO = -1, /* choose one of the others */
- SVC_POOL_GLOBAL, /* no mapping, just a single global pool
- * (legacy & UP mode) */
- SVC_POOL_PERCPU, /* one pool per cpu */
- SVC_POOL_PERNODE /* one pool per numa node */
-};
-
/*
* Structure for mapping cpus to pools and vice versa.
* Setup once during sunrpc initialisation.
@@ -58,62 +45,29 @@ enum {

struct svc_pool_map {
int count; /* How many svc_servs use us */
- int mode; /* Note: int not enum to avoid
- * warnings about "enumeration value
- * not handled in switch" */
unsigned int npools;
- unsigned int *pool_to; /* maps pool id to cpu or node */
- unsigned int *to_pool; /* maps cpu or node to pool id */
+ unsigned int *pool_to; /* maps pool id to node */
+ unsigned int *to_pool; /* maps node to pool id */
};

-static struct svc_pool_map svc_pool_map = {
- .mode = SVC_POOL_DEFAULT
-};
+static struct svc_pool_map svc_pool_map;

static DEFINE_MUTEX(svc_pool_map_mutex);/* protects svc_pool_map.count only */

-static int
-__param_set_pool_mode(const char *val, struct svc_pool_map *m)
-{
- int err, mode;
-
- mutex_lock(&svc_pool_map_mutex);
-
- err = 0;
- if (!strncmp(val, "auto", 4))
- mode = SVC_POOL_AUTO;
- else if (!strncmp(val, "global", 6))
- mode = SVC_POOL_GLOBAL;
- else if (!strncmp(val, "percpu", 6))
- mode = SVC_POOL_PERCPU;
- else if (!strncmp(val, "pernode", 7))
- mode = SVC_POOL_PERNODE;
- else
- err = -EINVAL;
-
- if (err)
- goto out;
-
- if (m->count == 0)
- m->mode = mode;
- else if (mode != m->mode)
- err = -EBUSY;
-out:
- mutex_unlock(&svc_pool_map_mutex);
- return err;
-}
-
-static int
-param_set_pool_mode(const char *val, const struct kernel_param *kp)
-{
- struct svc_pool_map *m = kp->arg;
-
- return __param_set_pool_mode(val, m);
-}
+/*
+ * Pool modes that were historically accepted. They no longer select
+ * anything: the pool mode is always pernode. The names are retained
+ * only so that writing a previously-valid value still succeeds.
+ */
+static const char * const pool_mode_names[] = {
+ "auto", "global", "percpu", "pernode",
+};

int sunrpc_set_pool_mode(const char *val)
{
- return __param_set_pool_mode(val, &svc_pool_map);
+ int idx = sysfs_match_string(pool_mode_names, val);
+
+ return idx < 0 ? idx : 0;
}
EXPORT_SYMBOL(sunrpc_set_pool_mode);

@@ -122,84 +76,32 @@ EXPORT_SYMBOL(sunrpc_set_pool_mode);
* @buf: where to write the current pool_mode
* @size: size of @buf
*
- * Grab the current pool_mode from the svc_pool_map and write
- * the resulting string to @buf. Returns the number of characters
+ * Write the pool_mode string to @buf. Returns the number of characters
* written to @buf (a'la snprintf()).
*/
int
sunrpc_get_pool_mode(char *buf, size_t size)
{
- struct svc_pool_map *m = &svc_pool_map;
-
- switch (m->mode)
- {
- case SVC_POOL_AUTO:
- return snprintf(buf, size, "auto");
- case SVC_POOL_GLOBAL:
- return snprintf(buf, size, "global");
- case SVC_POOL_PERCPU:
- return snprintf(buf, size, "percpu");
- case SVC_POOL_PERNODE:
- return snprintf(buf, size, "pernode");
- default:
- return snprintf(buf, size, "%d", m->mode);
- }
+ return snprintf(buf, size, "pernode");
}
EXPORT_SYMBOL(sunrpc_get_pool_mode);

static int
-param_get_pool_mode(char *buf, const struct kernel_param *kp)
+param_set_pool_mode(const char *val, const struct kernel_param *kp)
{
- char str[16];
- int len;
-
- len = sunrpc_get_pool_mode(str, ARRAY_SIZE(str));
-
- /* Ensure we have room for newline and NUL */
- len = min_t(int, len, ARRAY_SIZE(str) - 2);
-
- /* tack on the newline */
- str[len] = '\n';
- str[len + 1] = '\0';
-
- return sysfs_emit(buf, "%s", str);
+ pr_notice_once("sunrpc: the pool_mode module parameter is deprecated and no longer has any effect; the pool mode is always 'pernode'\n");
+ return sunrpc_set_pool_mode(val);
}

-module_param_call(pool_mode, param_set_pool_mode, param_get_pool_mode,
- &svc_pool_map, 0644);
-
-/*
- * Detect best pool mapping mode heuristically,
- * according to the machine's topology.
- */
static int
-svc_pool_map_choose_mode(void)
+param_get_pool_mode(char *buf, const struct kernel_param *kp)
{
- unsigned int node;
-
- if (nr_online_nodes > 1) {
- /*
- * Actually have multiple NUMA nodes,
- * so split pools on NUMA node boundaries
- */
- return SVC_POOL_PERNODE;
- }
-
- node = first_online_node;
- if (nr_cpus_node(node) > 2) {
- /*
- * Non-trivial SMP, or CONFIG_NUMA on
- * non-NUMA hardware, e.g. with a generic
- * x86_64 kernel on Xeons. In this case we
- * want to divide the pools on cpu boundaries.
- */
- return SVC_POOL_PERCPU;
- }
-
- /* default: one global pool */
- return SVC_POOL_GLOBAL;
+ return sysfs_emit(buf, "pernode\n");
}

+module_param_call(pool_mode, param_set_pool_mode, param_get_pool_mode,
+ NULL, 0644);
+
/*
* Allocate the to_pool[] and pool_to[] arrays.
* Returns 0 on success or an errno.
@@ -224,35 +126,7 @@ svc_pool_map_alloc_arrays(struct svc_pool_map *m, unsigned int maxpools)
}

/*
- * Initialise the pool map for SVC_POOL_PERCPU mode.
- * Returns number of pools or <0 on error.
- */
-static int
-svc_pool_map_init_percpu(struct svc_pool_map *m)
-{
- unsigned int maxpools = nr_cpu_ids;
- unsigned int pidx = 0;
- unsigned int cpu;
- int err;
-
- err = svc_pool_map_alloc_arrays(m, maxpools);
- if (err)
- return err;
-
- for_each_online_cpu(cpu) {
- BUG_ON(pidx >= maxpools);
- m->to_pool[cpu] = pidx;
- m->pool_to[pidx] = cpu;
- pidx++;
- }
- /* cpus brought online later all get mapped to pool0, sorry */
-
- return pidx;
-};
-
-
-/*
- * Initialise the pool map for SVC_POOL_PERNODE mode.
+ * Initialise the pool map for one pool per NUMA node.
* Returns number of pools or <0 on error.
*/
static int
@@ -284,14 +158,13 @@ svc_pool_map_init_pernode(struct svc_pool_map *m)
* Add a reference to the global map of cpus to pools (and
* vice versa) if pools are in use.
* Initialise the map if we're the first user.
- * Returns the number of pools. If this is '1', no reference
- * was taken.
+ * Returns the number of pools, or 0 on failure.
*/
static unsigned int
svc_pool_map_get(void)
{
struct svc_pool_map *m = &svc_pool_map;
- int npools = -1;
+ int npools;

mutex_lock(&svc_pool_map_mutex);
if (m->count++) {
@@ -299,22 +172,11 @@ svc_pool_map_get(void)
return m->npools;
}

- if (m->mode == SVC_POOL_AUTO)
- m->mode = svc_pool_map_choose_mode();
-
- switch (m->mode) {
- case SVC_POOL_PERCPU:
- npools = svc_pool_map_init_percpu(m);
- break;
- case SVC_POOL_PERNODE:
- npools = svc_pool_map_init_pernode(m);
- break;
- }
-
+ npools = svc_pool_map_init_pernode(m);
if (npools <= 0) {
- /* default, or memory allocation failure */
- npools = 1;
- m->mode = SVC_POOL_GLOBAL;
+ m->count = 0;
+ mutex_unlock(&svc_pool_map_mutex);
+ return 0;
}
m->npools = npools;
mutex_unlock(&svc_pool_map_mutex);
@@ -346,14 +208,11 @@ static int svc_pool_map_get_node(unsigned int pidx)
{
const struct svc_pool_map *m = &svc_pool_map;

- if (m->count) {
- if (m->mode == SVC_POOL_PERCPU)
- return cpu_to_node(m->pool_to[pidx]);
- if (m->mode == SVC_POOL_PERNODE)
- return m->pool_to[pidx];
- }
+ if (m->count)
+ return m->pool_to[pidx];
return numa_mem_id();
}
+
/*
* Set the given thread's cpus_allowed mask so that it
* will only run on cpus in the given pool.
@@ -372,27 +231,15 @@ svc_pool_map_set_cpumask(struct task_struct *task, unsigned int pidx)
if (m->count == 0)
return;

- switch (m->mode) {
- case SVC_POOL_PERCPU:
- {
- set_cpus_allowed_ptr(task, cpumask_of(node));
- break;
- }
- case SVC_POOL_PERNODE:
- {
- set_cpus_allowed_ptr(task, cpumask_of_node(node));
- break;
- }
- }
+ set_cpus_allowed_ptr(task, cpumask_of_node(node));
}

/**
* svc_pool_for_cpu - Select pool to run a thread on this cpu
* @serv: An RPC service
*
- * Use the active CPU and the svc_pool_map's mode setting to
- * select the svc thread pool to use. Once initialized, the
- * svc_pool_map does not change.
+ * Use the active CPU and the svc_pool_map to select the svc thread
+ * pool to use. Once initialized, the svc_pool_map does not change.
*
* Return value:
* A pointer to an svc_pool
@@ -400,22 +247,12 @@ svc_pool_map_set_cpumask(struct task_struct *task, unsigned int pidx)
struct svc_pool *svc_pool_for_cpu(struct svc_serv *serv)
{
struct svc_pool_map *m = &svc_pool_map;
- int cpu = raw_smp_processor_id();
- unsigned int pidx = 0;
- unsigned int i;
+ unsigned int pidx, i;

if (serv->sv_nrpools <= 1)
return serv->sv_pools;

- switch (m->mode) {
- case SVC_POOL_PERCPU:
- pidx = m->to_pool[cpu];
- break;
- case SVC_POOL_PERNODE:
- pidx = m->to_pool[cpu_to_node(cpu)];
- break;
- }
- pidx %= serv->sv_nrpools;
+ pidx = m->to_pool[cpu_to_node(raw_smp_processor_id())] % serv->sv_nrpools;

/*
* Threads are spread evenly across the pools, but when there are
@@ -641,6 +478,9 @@ struct svc_serv *svc_create_pooled(struct svc_program *prog,
struct svc_serv *serv;
unsigned int npools = svc_pool_map_get();

+ if (!npools)
+ return NULL;
+
serv = __svc_create(prog, nprogs, stats, bufsize, npools, threadfn);
if (!serv)
goto out_err;

--
2.54.0