Re: [PATCH v2] nfs: fix the race of lock/unlock and open

From: Li Lingfeng

Date: Mon Feb 02 2026 - 06:23:25 EST

Hi,

I tried separating the local VFS lock update from the RPC completion path
(performing an unlock if the VFS lock update fails), and then using
nfsi->rwsem to protect the file lock to prevent UAF.

Split local VFS lock update from RPC completion path:
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 491fbe65e644..41d66e34851b 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -6887,7 +6887,7 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
return rpc_run_task(&task_setup_data);
}

-static int nfs4_proc_unlck(struct nfs4_state *state, struct file_lock *request)
+static int nfs4_proc_unlck(struct nfs4_state *state, struct file_lock *request, int remote_only)
{
struct inode *inode = state->inode;
struct nfs4_state_owner *sp = state->owner;
@@ -6906,7 +6906,7 @@ static int nfs4_proc_unlck(struct nfs4_state *state, struct file_lock *request)
mutex_lock(&sp->so_delegreturn_mutex);
/* Exclude nfs4_reclaim_open_stateid() - note nesting! */
down_read(&nfsi->rwsem);
- if (locks_lock_inode_wait(inode, request) == -ENOENT) {
+ if (!remote_only && locks_lock_inode_wait(inode, request) == -ENOENT) {
up_read(&nfsi->rwsem);
mutex_unlock(&sp->so_delegreturn_mutex);
goto out;
@@ -7044,11 +7044,6 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
case 0:
renew_lease(NFS_SERVER(d_inode(data->ctx->dentry)),
data->timestamp);
- if (data->arg.new_lock && !data->cancelled) {
- data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS);
- if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0)
- goto out_restart;
- }
if (data->arg.new_lock_owner != 0) {
nfs_confirm_seqid(&lsp->ls_seqid, 0);
nfs4_stateid_copy(&lsp->ls_stateid, &data->res.stateid);
@@ -7254,7 +7249,7 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
struct nfs_inode *nfsi = NFS_I(state->inode);
struct nfs4_state_owner *sp = state->owner;
unsigned char fl_flags = request->fl_flags;
- int status;
+ int status, ret;

request->fl_flags |= FL_ACCESS;
status = locks_lock_inode_wait(state->inode, request);
@@ -7274,6 +7269,16 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
up_read(&nfsi->rwsem);
mutex_unlock(&sp->so_delegreturn_mutex);
status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW);
+ if (status != 0)
+ goto out;
+
+ request->fl_flags = fl_flags & ~(FL_SLEEP | FL_ACCESS);
+ status = locks_lock_inode_wait(state->inode, request);
+ if (status) {
+ ret = nfs4_proc_unlck(state, request, 1);
+ status = ret ? ret : status;
+ dprintk("%s: cancelling lock!\n", __func__);
+ }
out:
request->fl_flags = fl_flags;
return status;
@@ -7428,7 +7433,7 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request)

if (request->fl_type == F_UNLCK) {
if (state != NULL)
- return nfs4_proc_unlck(state, request);
+ return nfs4_proc_unlck(state, request, 0);
return 0;
}

Protect file lock by nfsi->rwsem:
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index c2eb01e5eeab..251a0b196d05 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -145,15 +145,17 @@ int nfs4_check_delegation(struct inode *inode, fmode_t flags)
static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_stateid *stateid)
{
struct inode *inode = state->inode;
+ struct nfs_inode *nfsi = NFS_I(inode);
struct file_lock *fl;
struct file_lock_context *flctx = inode->i_flctx;
struct list_head *list;
int status = 0;

if (flctx == NULL)
- goto out;
+ return status;

list = &flctx->flc_posix;
+ down_write(&nfsi->rwsem);
spin_lock(&flctx->flc_lock);
restart:
list_for_each_entry(fl, list, fl_list) {
@@ -171,6 +173,7 @@ static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_state
}
spin_unlock(&flctx->flc_lock);
out:
+ up_write(&nfsi->rwsem);
return status;
}

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 41d66e34851b..e763423c81ec 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -6770,18 +6770,23 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
.stateid = &calldata->arg.stateid,
};
int status;
+ struct nfs_inode *nfsi;

if (!nfs4_sequence_done(task, &calldata->res.seq_res))
return;
switch (task->tk_status) {
case 0:
+ nfsi = NFS_I(calldata->lsp->ls_state->inode);
renew_lease(calldata->server, calldata->timestamp);
+ down_read(&nfsi->rwsem);
status = locks_lock_inode_wait(calldata->lsp->ls_state->inode,
&calldata->fl);
if (status && (status != -ENOENT)) {
+ up_read(&nfsi->rwsem);
rpc_restart_call_prepare(task);
break;
}
+ up_read(&nfsi->rwsem);
if (nfs4_update_lock_stateid(calldata->lsp,
&calldata->res.stateid))
break;
@@ -7273,7 +7278,9 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
goto out;

request->fl_flags = fl_flags & ~(FL_SLEEP | FL_ACCESS);
+ down_read(&nfsi->rwsem);
status = locks_lock_inode_wait(state->inode, request);
+ up_read(&nfsi->rwsem);
if (status) {
ret = nfs4_proc_unlck(state, request, 1);
status = ret ? ret : status;

However, this partially reverts the logic from commit c69899a17ca4
('NFSv4: Update of VFS byte range lock must be atomic with the stateid
update'), making the VFS lock update non-atomic with the stateid update.
I'm not sure if this might cause any issues.

Does anyone have any thoughts on this solution, or perhaps alternative
approaches?

Thanks,
Lingfeng.

在 2026/1/7 19:07, Li Lingfeng 写道:

Hi,

Recently, we found that this solution can introduce a deadlock issue:
T1
nfs_flock
do_unlk
nfs4_proc_lock
nfs4_proc_unlck
down_read // holding &nfsi->rwsem
nfs4_do_unlck
rpc_wait_for_completion_task // waiting for the rpc_task to complete

// .rpc_call_done
nfs4_locku_done
nfs4_async_handle_exception
nfs4_do_handle_exception
exception->recovering = 1
rpc_sleep_on // the rpc_task sleeps on &clp->cl_rpcwaitq, waiting to be woken up by T2

T2
nfs4_state_manager
nfs4_do_reclaim
nfs4_reclaim_open_state
__nfs4_reclaim_open_state
nfs4_reclaim_locks
down_write // tries to acquire &nfsi->rwsem and gets stuck

It seems that using &nfsi->rwsem to protect file locks is not a good idea.
Does anyone have a viable approach to address this UAF issue?

Thanks,
Lingfeng.

在 2025/9/1 22:25, Li Lingfeng 写道:

Friendly ping..

Thanks

在 2025/7/15 11:05, Li Lingfeng 写道:

LOCK may extend an existing lock and release another one and UNLOCK may
also release an existing lock.
When opening a file, there may be access to file locks that have been
concurrently released by lock/unlock operations, potentially triggering
UAF.
While certain concurrent scenarios involving lock/unlock and open
operations have been safeguarded with locks – for example,
nfs4_proc_unlckz() acquires the so_delegreturn_mutex prior to invoking
locks_lock_inode_wait() – there remain cases where such protection is not
yet implemented.

The issue can be reproduced through the following steps:
T1: open in read-only mode with three consecutive lock operations applied
     lock1(0~100) --> add lock1 to file
     lock2(120~200) --> add lock2 to file
     lock3(50~150) --> extend lock1 to cover range 0~200 and release lock2
T2: restart nfs-server and run state manager
T3: open in write-only mode
     T1 T2                                T3
                             start recover
lock1
lock2
                             nfs4_open_reclaim
                             clear_bit // NFS_DELEGATED_STATE
lock3
_nfs4_proc_setlk
   lock so_delegreturn_mutex
   unlock so_delegreturn_mutex
   _nfs4_do_setlk
                             recover done
                                                 lock so_delegreturn_mutex
nfs_delegation_claim_locks
                                                 get lock2
    rpc_run_task
    ...
    nfs4_lock_done
     locks_lock_inode_wait
     ...
      locks_dispose_list
      free lock2
                                                 use lock2
                                                 // UAF
                                                 unlock so_delegreturn_mutex

Protect file lock by nfsi->rwsem to fix this issue.

Fixes: c69899a17ca4 ("NFSv4: Update of VFS byte range lock must be atomic with the stateid update")
Reported-by: zhangjian (CG) <zhangjian496@xxxxxxxxxx>
Suggested-by: yangerkun <yangerkun@xxxxxxxxxx>
Signed-off-by: Li Lingfeng <lilingfeng3@xxxxxxxxxx>
---
Changes in v2:
   Use nfsi->rwsem instead of sp->so_delegreturn_mutex to prevent concurrency.

fs/nfs/delegation.c | 5 ++++-
fs/nfs/nfs4proc.c   | 8 +++++++-
2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 10ef46e29b25..4467b4f61905 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -149,15 +149,17 @@ int nfs4_check_delegation(struct inode *inode, fmode_t type)
static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_stateid *stateid)
{
      struct inode *inode = state->inode;
+    struct nfs_inode *nfsi = NFS_I(inode);
      struct file_lock *fl;
      struct file_lock_context *flctx = locks_inode_context(inode);
      struct list_head *list;
      int status = 0;
      if (flctx == NULL)
-        goto out;
+        return status;
      list = &flctx->flc_posix;
+    down_write(&nfsi->rwsem);
      spin_lock(&flctx->flc_lock);
restart:
      for_each_file_lock(fl, list) {
@@ -175,6 +177,7 @@ static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_state
      }
      spin_unlock(&flctx->flc_lock);
out:
+    up_write(&nfsi->rwsem);
      return status;
}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 341740fa293d..06f109c7eb2e 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -7294,14 +7294,18 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *
      status = -ENOMEM;
      if (IS_ERR(seqid))
          goto out;
+    down_read(&nfsi->rwsem);
      task = nfs4_do_unlck(request,
nfs_file_open_context(request->c.flc_file),
                   lsp, seqid);
      status = PTR_ERR(task);
-    if (IS_ERR(task))
+    if (IS_ERR(task)) {
+        up_read(&nfsi->rwsem);
          goto out;
+    }
      status = rpc_wait_for_completion_task(task);
      rpc_put_task(task);
+    up_read(&nfsi->rwsem);
out:
      request->c.flc_flags = saved_flags;
      trace_nfs4_unlock(request, state, F_SETLK, status);
@@ -7642,7 +7646,9 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
      }
      up_read(&nfsi->rwsem);
      mutex_unlock(&sp->so_delegreturn_mutex);
+    down_read(&nfsi->rwsem);
      status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW);
+    up_read(&nfsi->rwsem);
out:
      request->c.flc_flags = flags;
      return status;