[PATCH 6/6] nfsd: fix layout fence worker double-reference race
From: Jeff Layton
Date: Sun May 31 2026 - 08:07:49 EST
The workqueue core clears WORK_STRUCT_PENDING before the callback
is invoked, so delayed_work_pending() in lm_breaker_timedout() can
return false while the fence worker is already running. This lets
the breaker take a duplicate sc_count reference and schedule a new
worker that coalesces with the in-progress one. The extra reference
is never put, leaking the layout stateid.
Replace the racy delayed_work_pending() check with an
ls_fence_inflight boolean set atomically with
refcount_inc_not_zero() under ls_lock, and cleared under ls_lock
before nfs4_put_stid() on every exit path. Remove the self-rearm
mod_delayed_work() at the top of the worker.
Fixes: f52792f484ba ("NFSD: Enforce timeout on layout recall and integrate lease manager fencing")
Assisted-by: kres:claude-opus-4-7
Signed-off-by: Jeff Layton <jlayton@xxxxxxxxxx>
---
fs/nfsd/nfs4layouts.c | 27 +++++++++++++++------------
fs/nfsd/state.h | 1 +
2 files changed, 16 insertions(+), 12 deletions(-)
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index 6c4e4fdd6c05..475246c0e20c 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -260,6 +260,7 @@ nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate,
}
ls->ls_fenced = false;
+ ls->ls_fence_inflight = false;
ls->ls_fence_delay = 0;
INIT_DELAYED_WORK(&ls->ls_fence_work, nfsd4_layout_fence_worker);
@@ -798,15 +799,6 @@ nfsd4_layout_fence_worker(struct work_struct *work)
struct nfs4_client *clp;
struct nfsd_net *nn;
- /*
- * The workqueue clears WORK_STRUCT_PENDING before invoking
- * this callback. Re-arm immediately so that
- * delayed_work_pending() returns true while the fence
- * operation is in progress, preventing
- * lm_breaker_timedout() from taking a duplicate reference.
- */
- mod_delayed_work(system_dfl_wq, &ls->ls_fence_work, 0);
-
spin_lock(&ls->ls_lock);
if (list_empty(&ls->ls_layouts)) {
spin_unlock(&ls->ls_lock);
@@ -816,6 +808,9 @@ nfsd4_layout_fence_worker(struct work_struct *work)
nfsd4_close_layout(ls);
ls->ls_fenced = true;
+ spin_lock(&ls->ls_lock);
+ ls->ls_fence_inflight = false;
+ spin_unlock(&ls->ls_lock);
nfs4_put_stid(&ls->ls_stid);
return;
}
@@ -901,18 +896,26 @@ nfsd4_layout_lm_breaker_timedout(struct file_lease *fl)
if ((!nfsd4_layout_ops[ls->ls_layout_type]->fence_client) ||
ls->ls_fenced)
return true;
- if (delayed_work_pending(&ls->ls_fence_work))
- return false;
/*
* Make sure layout has not been returned yet before
- * taking a reference count on the layout stateid.
+ * taking a reference count on the layout stateid. The
+ * ls_fence_inflight flag is set together with the sc_count
+ * increment under ls_lock so that a fence worker invocation
+ * already in progress (which has cleared WORK_STRUCT_PENDING
+ * but not yet reached dispose:) cannot be coalesced with a
+ * fresh schedule that takes an extra unmatched reference.
*/
spin_lock(&ls->ls_lock);
+ if (ls->ls_fence_inflight) {
+ spin_unlock(&ls->ls_lock);
+ return false;
+ }
if (list_empty(&ls->ls_layouts) ||
!refcount_inc_not_zero(&ls->ls_stid.sc_count)) {
spin_unlock(&ls->ls_lock);
return true;
}
+ ls->ls_fence_inflight = true;
spin_unlock(&ls->ls_lock);
mod_delayed_work(system_dfl_wq, &ls->ls_fence_work, 0);
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index c26b2384d694..05b6f12040d8 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -812,6 +812,7 @@ struct nfs4_layout_stateid {
struct delayed_work ls_fence_work;
unsigned int ls_fence_delay;
bool ls_fenced;
+ bool ls_fence_inflight;
};
static inline struct nfs4_layout_stateid *layoutstateid(struct nfs4_stid *s)
--
2.54.0