[PATCH v7 08/20] nfsd: use RCU to protect fi_deleg_file

From: Jeff Layton

Date: Tue Jun 16 2026 - 08:04:52 EST


fi_deleg_file can be NULLed by put_deleg_file() when fi_delegees drops
to zero during delegation teardown (e.g. DELEGRETURN). Concurrent
accesses from workqueue callbacks -- such as CB_NOTIFY -- can
dereference a NULL pointer if they race with this teardown.

Annotate fi_deleg_file with __rcu and convert all accessors to use
proper RCU primitives:

- rcu_assign_pointer() / RCU_INIT_POINTER() for stores
- rcu_dereference_protected() for reads under fi_lock or where
fi_delegees > 0 guarantees stability

This prepares for a subsequent patch that will use rcu_read_lock +
rcu_dereference + nfsd_file_get to safely acquire a reference from
the CB_NOTIFY callback path without holding fi_lock.

While converting the error-path lease teardown in nfsd_get_dir_deleg(),
also add a nfsd_fsnotify_recalc_mask() call after dropping the lease, to
match the success path and the equivalent teardown in
nfs4_unlock_deleg_lease(). Without it, a failure after the lease is set
leaves the inode's fsnotify mask reflecting a delegation that no longer
exists.

That teardown already unlocks against fi_deleg_file->nf_file rather than
this client's nf->nf_file; document why. The lease's flc_file is set to
fi_deleg_file in nfs4_alloc_init_lease(), which differs from nf when an
earlier client already holds a delegation on the same directory, and
generic_delete_lease() matches on flc_file -- unlocking the wrong file
would leak the lease on the inode.

Assisted-by: Claude:claude-opus-4-6
Signed-off-by: Jeff Layton <jlayton@xxxxxxxxxx>
---
fs/nfsd/nfs4layouts.c | 7 ++++---
fs/nfsd/nfs4state.c | 51 ++++++++++++++++++++++++++++++++++-----------------
fs/nfsd/state.h | 2 +-
3 files changed, 39 insertions(+), 21 deletions(-)

diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index 4c3f253c7d07..22bcb6d09f70 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -248,12 +248,13 @@ nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate,
NFSPROC4_CLNT_CB_LAYOUT);

if (parent->sc_type == SC_TYPE_DELEG) {
- spin_lock(&fp->fi_lock);
- ls->ls_file = nfsd_file_get(fp->fi_deleg_file);
- spin_unlock(&fp->fi_lock);
+ rcu_read_lock();
+ ls->ls_file = nfsd_file_get(rcu_dereference(fp->fi_deleg_file));
+ rcu_read_unlock();
} else {
ls->ls_file = find_any_file(fp);
}
+
if (!ls->ls_file) {
nfs4_put_stid(stp);
return NULL;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 2189d8d360af..47af5729a86f 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1212,7 +1212,9 @@ static void put_deleg_file(struct nfs4_file *fp)

spin_lock(&fp->fi_lock);
if (--fp->fi_delegees == 0) {
- swap(nf, fp->fi_deleg_file);
+ nf = rcu_dereference_protected(fp->fi_deleg_file,
+ lockdep_is_held(&fp->fi_lock));
+ RCU_INIT_POINTER(fp->fi_deleg_file, NULL);
swap(rnf, fp->fi_rdeleg_file);
}
spin_unlock(&fp->fi_lock);
@@ -1250,7 +1252,7 @@ static void nfsd4_finalize_deleg_timestamps(struct nfs4_delegation *dp, struct f
static void nfs4_unlock_deleg_lease(struct nfs4_delegation *dp)
{
struct nfs4_file *fp = dp->dl_stid.sc_file;
- struct nfsd_file *nf = fp->fi_deleg_file;
+ struct nfsd_file *nf = rcu_dereference_protected(fp->fi_deleg_file, 1);

WARN_ON_ONCE(!fp->fi_delegees);

@@ -3200,7 +3202,8 @@ static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st)
/* XXX: lease time, whether it's being recalled. */

spin_lock(&nf->fi_lock);
- file = nf->fi_deleg_file;
+ file = rcu_dereference_protected(nf->fi_deleg_file,
+ lockdep_is_held(&nf->fi_lock));
if (file) {
seq_puts(s, ", ");
nfs4_show_superblock(s, file);
@@ -5009,7 +5012,7 @@ static void nfsd4_file_init(const struct svc_fh *fh, struct nfs4_file *fp)
INIT_LIST_HEAD(&fp->fi_delegations);
INIT_LIST_HEAD(&fp->fi_clnt_odstate);
fh_copy_shallow(&fp->fi_fhandle, &fh->fh_handle);
- fp->fi_deleg_file = NULL;
+ RCU_INIT_POINTER(fp->fi_deleg_file, NULL);
fp->fi_rdeleg_file = NULL;
fp->fi_had_conflict = false;
fp->fi_share_deny = 0;
@@ -6163,7 +6166,7 @@ static struct file_lease *nfs4_alloc_init_lease(struct nfs4_delegation *dp, u32
fl->c.flc_type = deleg_is_read(dp->dl_type) ? F_RDLCK : F_WRLCK;
fl->c.flc_owner = (fl_owner_t)dp;
fl->c.flc_pid = current->tgid;
- fl->c.flc_file = dp->dl_stid.sc_file->fi_deleg_file->nf_file;
+ fl->c.flc_file = rcu_dereference_protected(dp->dl_stid.sc_file->fi_deleg_file, 1)->nf_file;
return fl;
}

@@ -6171,7 +6174,7 @@ static int nfsd4_check_conflicting_opens(struct nfs4_client *clp,
struct nfs4_file *fp)
{
struct nfs4_ol_stateid *st;
- struct file *f = fp->fi_deleg_file->nf_file;
+ struct file *f = rcu_dereference_protected(fp->fi_deleg_file, 1)->nf_file;
struct inode *ino = file_inode(f);
int writes;

@@ -6248,7 +6251,7 @@ nfsd4_verify_deleg_dentry(struct nfsd4_open *open, struct nfs4_file *fp,

exp_put(exp);
dput(child);
- if (child != file_dentry(fp->fi_deleg_file->nf_file))
+ if (child != file_dentry(rcu_dereference_protected(fp->fi_deleg_file, 1)->nf_file))
return -EAGAIN;

return 0;
@@ -6354,8 +6357,9 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
status = -EAGAIN;
else if (nfsd4_verify_setuid_write(open, nf))
status = -EAGAIN;
- else if (!fp->fi_deleg_file) {
- fp->fi_deleg_file = nf;
+ else if (!rcu_dereference_protected(fp->fi_deleg_file,
+ lockdep_is_held(&fp->fi_lock))) {
+ rcu_assign_pointer(fp->fi_deleg_file, nf);
/* increment early to prevent fi_deleg_file from being
* cleared */
fp->fi_delegees = 1;
@@ -6380,7 +6384,7 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
if (!fl)
goto out_clnt_odstate;

- status = kernel_setlease(fp->fi_deleg_file->nf_file,
+ status = kernel_setlease(rcu_dereference_protected(fp->fi_deleg_file, 1)->nf_file,
fl->c.flc_type, &fl, NULL);
if (fl)
locks_free_lease(fl);
@@ -6401,7 +6405,7 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
* Now that the deleg is set, check again to ensure that nothing
* raced in and changed the mode while we weren't looking.
*/
- status = nfsd4_verify_setuid_write(open, fp->fi_deleg_file);
+ status = nfsd4_verify_setuid_write(open, rcu_dereference_protected(fp->fi_deleg_file, 1));
if (status)
goto out_unlock;

@@ -6422,7 +6426,8 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,

return dp;
out_unlock:
- kernel_setlease(fp->fi_deleg_file->nf_file, F_UNLCK, NULL, (void **)&dp);
+ kernel_setlease(rcu_dereference_protected(fp->fi_deleg_file, 1)->nf_file,
+ F_UNLCK, NULL, (void **)&dp);
out_clnt_odstate:
put_clnt_odstate(dp->dl_clnt_odstate);
nfs4_put_stid(&dp->dl_stid);
@@ -6579,8 +6584,9 @@ nfs4_open_delegation(struct svc_rqst *rqstp, struct nfsd4_open *open,
memcpy(&open->op_delegate_stateid, &dp->dl_stid.sc_stateid, sizeof(dp->dl_stid.sc_stateid));

if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) {
- struct file *f = dp->dl_stid.sc_file->fi_deleg_file->nf_file;
+ struct file *f;

+ f = rcu_dereference_protected(dp->dl_stid.sc_file->fi_deleg_file, 1)->nf_file;
if (!nfsd4_add_rdaccess_to_wrdeleg(rqstp, open, fh, stp) ||
!nfs4_delegation_stat(dp, currentfh, &stat)) {
nfs4_put_stid(&dp->dl_stid);
@@ -9787,8 +9793,9 @@ nfsd_get_dir_deleg(struct nfsd4_compound_state *cstate,
/* existing delegation? */
if (nfs4_delegation_exists(clp, fp)) {
status = -EAGAIN;
- } else if (!fp->fi_deleg_file) {
- fp->fi_deleg_file = nfsd_file_get(nf);
+ } else if (!rcu_dereference_protected(fp->fi_deleg_file,
+ lockdep_is_held(&fp->fi_lock))) {
+ rcu_assign_pointer(fp->fi_deleg_file, nfsd_file_get(nf));
fp->fi_delegees = 1;
} else {
++fp->fi_delegees;
@@ -9844,8 +9851,18 @@ nfsd_get_dir_deleg(struct nfsd4_compound_state *cstate,
return dp;
}

- /* Something failed. Drop the lease and clean up the stid */
- kernel_setlease(fp->fi_deleg_file->nf_file, F_UNLCK, NULL, (void **)&dp);
+ /*
+ * Something failed after the lease was set. Drop the lease and clean
+ * up the stid. The lease's flc_file is the fi_deleg_file (see
+ * nfs4_alloc_init_lease()), which is not necessarily this client's
+ * @nf when an earlier client already holds a delegation on @fp.
+ * generic_delete_lease() matches on flc_file, so unlock against
+ * fi_deleg_file or the lease will be leaked (and later freed with the
+ * stid, leading to a use-after-free when it's eventually broken).
+ */
+ kernel_setlease(rcu_dereference_protected(fp->fi_deleg_file, 1)->nf_file,
+ F_UNLCK, NULL, (void **)&dp);
+ nfsd_fsnotify_recalc_mask(nf);
out_put_stid:
nfs4_put_stid(&dp->dl_stid);
out_delegees:
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 9f321e9ed76d..4fca0537ca8b 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -699,7 +699,7 @@ struct nfs4_file {
*/
atomic_t fi_access[2];
u32 fi_share_deny;
- struct nfsd_file *fi_deleg_file;
+ struct nfsd_file __rcu *fi_deleg_file;
struct nfsd_file *fi_rdeleg_file;
int fi_delegees;
struct knfsd_fh fi_fhandle;

--
2.54.0