[PATCH 2/2] pnfs/blocklayout: cap total parse operations in volume topology
From: Werner Kasselman
Date: Tue Apr 21 2026 - 06:04:08 EST
The recursive-descent volume parser materializes a separate device
tree node for every volume reference. When CONCAT or STRIPE volumes
reference the same child index, the parser re-parses that subtree for
each reference, causing work exponential in nesting depth.
Cap the total number of bl_parse_deviceid() calls at
PNFS_BLOCK_MAX_PARSE_OPS (1024) to bound CPU and memory consumption
from server-controlled GETDEVICEINFO topologies.
Signed-off-by: Werner Kasselman <werner@xxxxxxxxxxx>
---
fs/nfs/blocklayout/blocklayout.h | 1 +
fs/nfs/blocklayout/dev.c | 31 +++++++++++++++++++------------
2 files changed, 20 insertions(+), 12 deletions(-)
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index ec8917cc335d..6c00d98d4317 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -49,6 +49,7 @@ struct pnfs_block_dev;
#define PNFS_BLOCK_MAX_UUIDS 4
#define PNFS_BLOCK_MAX_DEVICES 64
#define PNFS_BLOCK_MAX_DEPTH 16
+#define PNFS_BLOCK_MAX_PARSE_OPS 1024
/*
* Random upper cap for the uuid length to avoid unbounded allocation.
diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c
index d9b1af863535..6e0df65c9b1f 100644
--- a/fs/nfs/blocklayout/dev.c
+++ b/fs/nfs/blocklayout/dev.c
@@ -288,7 +288,7 @@ static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset,
static int
bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
struct pnfs_block_volume *volumes, int nr_volumes, int idx,
- int depth, gfp_t gfp_mask);
+ int depth, int *remaining, gfp_t gfp_mask);
static int
@@ -441,13 +441,14 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
static int
bl_parse_slice(struct nfs_server *server, struct pnfs_block_dev *d,
struct pnfs_block_volume *volumes, int nr_volumes, int idx,
- int depth, gfp_t gfp_mask)
+ int depth, int *remaining, gfp_t gfp_mask)
{
struct pnfs_block_volume *v = &volumes[idx];
int ret;
ret = bl_parse_deviceid(server, d, volumes, nr_volumes,
- v->slice.volume, depth + 1, gfp_mask);
+ v->slice.volume, depth + 1, remaining,
+ gfp_mask);
if (ret)
return ret;
@@ -459,7 +460,7 @@ bl_parse_slice(struct nfs_server *server, struct pnfs_block_dev *d,
static int
bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d,
struct pnfs_block_volume *volumes, int nr_volumes, int idx,
- int depth, gfp_t gfp_mask)
+ int depth, int *remaining, gfp_t gfp_mask)
{
struct pnfs_block_volume *v = &volumes[idx];
u64 len = 0;
@@ -473,7 +474,7 @@ bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d,
for (i = 0; i < v->concat.volumes_count; i++) {
ret = bl_parse_deviceid(server, &d->children[i], volumes,
nr_volumes, v->concat.volumes[i],
- depth + 1, gfp_mask);
+ depth + 1, remaining, gfp_mask);
if (ret)
return ret;
@@ -490,7 +491,7 @@ bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d,
static int
bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d,
struct pnfs_block_volume *volumes, int nr_volumes, int idx,
- int depth, gfp_t gfp_mask)
+ int depth, int *remaining, gfp_t gfp_mask)
{
struct pnfs_block_volume *v = &volumes[idx];
u64 len = 0;
@@ -504,7 +505,7 @@ bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d,
for (i = 0; i < v->stripe.volumes_count; i++) {
ret = bl_parse_deviceid(server, &d->children[i], volumes,
nr_volumes, v->stripe.volumes[i],
- depth + 1, gfp_mask);
+ depth + 1, remaining, gfp_mask);
if (ret)
return ret;
@@ -521,7 +522,7 @@ bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d,
static int
bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
struct pnfs_block_volume *volumes, int nr_volumes, int idx,
- int depth, gfp_t gfp_mask)
+ int depth, int *remaining, gfp_t gfp_mask)
{
if (idx < 0 || idx >= nr_volumes) {
dprintk("volume index %d out of range (0..%d)\n",
@@ -534,6 +535,11 @@ bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
return -EIO;
}
+ if (--(*remaining) < 0) {
+ dprintk("volume topology too complex\n");
+ return -EIO;
+ }
+
d->type = volumes[idx].type;
switch (d->type) {
@@ -541,13 +547,13 @@ bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
return bl_parse_simple(server, d, volumes, idx, gfp_mask);
case PNFS_BLOCK_VOLUME_SLICE:
return bl_parse_slice(server, d, volumes, nr_volumes,
- idx, depth, gfp_mask);
+ idx, depth, remaining, gfp_mask);
case PNFS_BLOCK_VOLUME_CONCAT:
return bl_parse_concat(server, d, volumes, nr_volumes,
- idx, depth, gfp_mask);
+ idx, depth, remaining, gfp_mask);
case PNFS_BLOCK_VOLUME_STRIPE:
return bl_parse_stripe(server, d, volumes, nr_volumes,
- idx, depth, gfp_mask);
+ idx, depth, remaining, gfp_mask);
case PNFS_BLOCK_VOLUME_SCSI:
return bl_parse_scsi(server, d, volumes, idx, gfp_mask);
default:
@@ -567,6 +573,7 @@ bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
struct xdr_buf buf;
struct folio *scratch;
int nr_volumes, ret, i;
+ int remaining = PNFS_BLOCK_MAX_PARSE_OPS;
__be32 *p;
scratch = folio_alloc(gfp_mask, 0);
@@ -599,7 +606,7 @@ bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
goto out_free_volumes;
ret = bl_parse_deviceid(server, top, volumes, nr_volumes,
- nr_volumes - 1, 0, gfp_mask);
+ nr_volumes - 1, 0, &remaining, gfp_mask);
node = &top->node;
nfs4_init_deviceid_node(node, server, &pdev->dev_id);
--
2.43.0