[PATCH 1/3] nfsd: fix cpntf publish race in nfs4_init_cp_state

From: Jeff Layton

Date: Wed Jun 24 2026 - 13:52:33 EST


From: Chris Mason <clm@xxxxxxxx>

nfs4_alloc_init_cpntf_state() splits IDR publication and cp_list
linkage across two separate s2s_cp_lock critical sections. The first
installs the new nfs4_cpntf_state into nn->s2s_cp_stateids and then
assigns cs_type = NFS4_COPYNOTIFY_STID; the second acquires the lock
again to list_add() the entry onto p_stid->sc_cp_list. Between the
two, the entry is reachable by so_id with the correct cs_type, but
cp_list is still {NULL, NULL} from kzalloc.

A second NFSv4.2 request can race in that gap:

CPU 0 CPU 1
----- -----
nfs4_alloc_init_cpntf_state()
nfs4_init_cp_state()
spin_lock(&s2s_cp_lock)
idr_alloc_cyclic()
spin_unlock(&s2s_cp_lock)
cps->cp_stateid.cs_type =
NFS4_COPYNOTIFY_STID
nfsd4_offload_cancel()
find_async_copy() -> NULL
manage_cpntf_state(clp!=NULL)
spin_lock(&s2s_cp_lock)
idr_find() -> cps
cs_type check passes
_free_cpntf_state_locked()
refcount_dec_and_test
list_del(&cps->cp_list)
spin_lock(&s2s_cp_lock) /* {NULL,NULL} -> oops */
list_add(&cps->cp_list, ...)

The so_id returned to the client by COPY_NOTIFY is echoed back as
cnr_stateid, so any authenticated NFSv4.2 client can drive
OFFLOAD_CANCEL into manage_cpntf_state() against an entry still in
the window. list_del() on a zeroed list_head dereferences NULL and
oopses the server.

Fold cs_type assignment and the list_add() onto p_stid->sc_cp_list
into the same s2s_cp_lock critical section that runs idr_alloc_cyclic.
A concurrent manage_cpntf_state() now either fails idr_find() (entry
not yet visible) or observes a fully linked cp_list. Initialize
cp_list with INIT_LIST_HEAD() after allocation, and switch
_free_cpntf_state_locked() to list_del_init() so that any stale
unlink on an already-unlinked entry is a benign no-op rather than a
NULL dereference. nfs4_init_copy_state() passes NULL for p_stid and
skips the list_add branch, preserving NFS4_COPY_STID semantics.

Fixes: 624322f1adc5 ("NFSD add COPY_NOTIFY operation")
Assisted-by: kres:claude-opus-4-7
Signed-off-by: Chris Mason <clm@xxxxxxxx>
---
fs/nfsd/nfs4state.c | 42 ++++++++++++++++++++++++++++++++----------
1 file changed, 32 insertions(+), 10 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index a4398dc861a5..b8946db3ebaa 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -944,7 +944,7 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla
* Create a unique stateid_t to represent each COPY.
*/
static int nfs4_init_cp_state(struct nfsd_net *nn, copy_stateid_t *stid,
- unsigned char cs_type)
+ unsigned char cs_type, struct nfs4_stid *p_stid)
{
int new_id;

@@ -954,19 +954,37 @@ static int nfs4_init_cp_state(struct nfsd_net *nn, copy_stateid_t *stid,
idr_preload(GFP_KERNEL);
spin_lock(&nn->s2s_cp_lock);
new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, stid, 0, 0, GFP_NOWAIT);
- stid->cs_stid.si_opaque.so_id = new_id;
- stid->cs_stid.si_generation = 1;
+ if (new_id >= 0) {
+ stid->cs_stid.si_opaque.so_id = new_id;
+ stid->cs_stid.si_generation = 1;
+ /*
+ * Publish cs_type and link onto the parent stid's
+ * sc_cp_list inside the same critical section that
+ * installed the entry into nn->s2s_cp_stateids. A
+ * concurrent manage_cpntf_state() either fails the
+ * idr_find() (entry not yet visible) or observes a
+ * fully linked cp_list, so list_del_init() in
+ * _free_cpntf_state_locked() is always well-defined.
+ */
+ stid->cs_type = cs_type;
+ if (p_stid) {
+ struct nfs4_cpntf_state *cps =
+ container_of(stid, struct nfs4_cpntf_state,
+ cp_stateid);
+
+ list_add(&cps->cp_list, &p_stid->sc_cp_list);
+ }
+ }
spin_unlock(&nn->s2s_cp_lock);
idr_preload_end();
if (new_id < 0)
return 0;
- stid->cs_type = cs_type;
return 1;
}

int nfs4_init_copy_state(struct nfsd_net *nn, struct nfsd4_copy *copy)
{
- return nfs4_init_cp_state(nn, &copy->cp_stateid, NFS4_COPY_STID);
+ return nfs4_init_cp_state(nn, &copy->cp_stateid, NFS4_COPY_STID, NULL);
}

struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net *nn,
@@ -977,13 +995,17 @@ struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net *nn,
cps = kzalloc_obj(struct nfs4_cpntf_state);
if (!cps)
return NULL;
+ /*
+ * Initialize cp_list so any stale unlink (e.g. on an
+ * entry that never reached its parent's sc_cp_list)
+ * degrades to a benign self-unlink via list_del_init().
+ */
+ INIT_LIST_HEAD(&cps->cp_list);
cps->cpntf_time = ktime_get_boottime_seconds();
refcount_set(&cps->cp_stateid.cs_count, 1);
- if (!nfs4_init_cp_state(nn, &cps->cp_stateid, NFS4_COPYNOTIFY_STID))
+ if (!nfs4_init_cp_state(nn, &cps->cp_stateid, NFS4_COPYNOTIFY_STID,
+ p_stid))
goto out_free;
- spin_lock(&nn->s2s_cp_lock);
- list_add(&cps->cp_list, &p_stid->sc_cp_list);
- spin_unlock(&nn->s2s_cp_lock);
return cps;
out_free:
kfree(cps);
@@ -7854,7 +7876,7 @@ _free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps)
WARN_ON_ONCE(cps->cp_stateid.cs_type != NFS4_COPYNOTIFY_STID);
if (!refcount_dec_and_test(&cps->cp_stateid.cs_count))
return;
- list_del(&cps->cp_list);
+ list_del_init(&cps->cp_list);
idr_remove(&nn->s2s_cp_stateids,
cps->cp_stateid.cs_stid.si_opaque.so_id);
kfree(cps);

--
2.54.0