Re: [PATCH v2] nfs: fix the race of lock/unlock and open

From: Li Lingfeng

Date: Mon Feb 02 2026 - 06:23:25 EST


Hi,

I tried separating the local VFS lock update from the RPC completion path
(performing an unlock if the VFS lock update fails), and then using
nfsi->rwsem to protect the file lock to prevent UAF.

Split local VFS lock update from RPC completion path:
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 491fbe65e644..41d66e34851b 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -6887,7 +6887,7 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
        return rpc_run_task(&task_setup_data);
 }

-static int nfs4_proc_unlck(struct nfs4_state *state, struct file_lock *request)
+static int nfs4_proc_unlck(struct nfs4_state *state, struct file_lock *request, int remote_only)
 {
        struct inode *inode = state->inode;
        struct nfs4_state_owner *sp = state->owner;
@@ -6906,7 +6906,7 @@ static int nfs4_proc_unlck(struct nfs4_state *state, struct file_lock *request)
        mutex_lock(&sp->so_delegreturn_mutex);
        /* Exclude nfs4_reclaim_open_stateid() - note nesting! */
        down_read(&nfsi->rwsem);
-       if (locks_lock_inode_wait(inode, request) == -ENOENT) {
+       if (!remote_only && locks_lock_inode_wait(inode, request) == -ENOENT) {
                up_read(&nfsi->rwsem);
                mutex_unlock(&sp->so_delegreturn_mutex);
                goto out;
@@ -7044,11 +7044,6 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
        case 0:
renew_lease(NFS_SERVER(d_inode(data->ctx->dentry)),
                                data->timestamp);
-               if (data->arg.new_lock && !data->cancelled) {
-                       data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS);
-                       if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0)
-                               goto out_restart;
-               }
                if (data->arg.new_lock_owner != 0) {
                        nfs_confirm_seqid(&lsp->ls_seqid, 0);
                        nfs4_stateid_copy(&lsp->ls_stateid, &data->res.stateid);
@@ -7254,7 +7249,7 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
        struct nfs_inode *nfsi = NFS_I(state->inode);
        struct nfs4_state_owner *sp = state->owner;
        unsigned char fl_flags = request->fl_flags;
-       int status;
+       int status, ret;

        request->fl_flags |= FL_ACCESS;
        status = locks_lock_inode_wait(state->inode, request);
@@ -7274,6 +7269,16 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
        up_read(&nfsi->rwsem);
        mutex_unlock(&sp->so_delegreturn_mutex);
        status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW);
+       if (status != 0)
+               goto out;
+
+       request->fl_flags = fl_flags & ~(FL_SLEEP | FL_ACCESS);
+       status = locks_lock_inode_wait(state->inode, request);
+       if (status) {
+               ret = nfs4_proc_unlck(state, request, 1);
+               status = ret ? ret : status;
+               dprintk("%s: cancelling lock!\n", __func__);
+       }
 out:
        request->fl_flags = fl_flags;
        return status;
@@ -7428,7 +7433,7 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request)

        if (request->fl_type == F_UNLCK) {
                if (state != NULL)
-                       return nfs4_proc_unlck(state, request);
+                       return nfs4_proc_unlck(state, request, 0);
                return 0;
        }

Protect file lock by nfsi->rwsem:
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index c2eb01e5eeab..251a0b196d05 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -145,15 +145,17 @@ int nfs4_check_delegation(struct inode *inode, fmode_t flags)
 static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_stateid *stateid)
 {
        struct inode *inode = state->inode;
+       struct nfs_inode *nfsi = NFS_I(inode);
        struct file_lock *fl;
        struct file_lock_context *flctx = inode->i_flctx;
        struct list_head *list;
        int status = 0;

        if (flctx == NULL)
-               goto out;
+               return status;

        list = &flctx->flc_posix;
+       down_write(&nfsi->rwsem);
        spin_lock(&flctx->flc_lock);
 restart:
        list_for_each_entry(fl, list, fl_list) {
@@ -171,6 +173,7 @@ static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_state
        }
        spin_unlock(&flctx->flc_lock);
 out:
+       up_write(&nfsi->rwsem);
        return status;
 }

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 41d66e34851b..e763423c81ec 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -6770,18 +6770,23 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
                .stateid = &calldata->arg.stateid,
        };
        int status;
+       struct nfs_inode *nfsi;

        if (!nfs4_sequence_done(task, &calldata->res.seq_res))
                return;
        switch (task->tk_status) {
                case 0:
+                       nfsi = NFS_I(calldata->lsp->ls_state->inode);
                        renew_lease(calldata->server, calldata->timestamp);
+                       down_read(&nfsi->rwsem);
                        status = locks_lock_inode_wait(calldata->lsp->ls_state->inode,
 &calldata->fl);
                        if (status && (status != -ENOENT)) {
+                               up_read(&nfsi->rwsem);
                                rpc_restart_call_prepare(task);
                                break;
                        }
+                       up_read(&nfsi->rwsem);
                        if (nfs4_update_lock_stateid(calldata->lsp,
&calldata->res.stateid))
                                break;
@@ -7273,7 +7278,9 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
                goto out;

        request->fl_flags = fl_flags & ~(FL_SLEEP | FL_ACCESS);
+       down_read(&nfsi->rwsem);
        status = locks_lock_inode_wait(state->inode, request);
+       up_read(&nfsi->rwsem);
        if (status) {
                ret = nfs4_proc_unlck(state, request, 1);
                status = ret ? ret : status;

However, this partially reverts the logic from commit c69899a17ca4
('NFSv4: Update of VFS byte range lock must be atomic with the stateid
update'), making the VFS lock update non-atomic with the stateid update.
I'm not sure if this might cause any issues.

Does anyone have any thoughts on this solution, or perhaps alternative
approaches?

Thanks,
Lingfeng.

在 2026/1/7 19:07, Li Lingfeng 写道:
Hi,

Recently, we found that this solution can introduce a deadlock issue:
        T1
nfs_flock
 do_unlk
  nfs4_proc_lock
   nfs4_proc_unlck
    down_read // holding &nfsi->rwsem
    nfs4_do_unlck
    rpc_wait_for_completion_task // waiting for the rpc_task to complete

// .rpc_call_done
nfs4_locku_done
 nfs4_async_handle_exception
  nfs4_do_handle_exception
   exception->recovering = 1
  rpc_sleep_on // the rpc_task sleeps on &clp->cl_rpcwaitq, waiting to be woken up by T2

        T2
nfs4_state_manager
 nfs4_do_reclaim
  nfs4_reclaim_open_state
   __nfs4_reclaim_open_state
    nfs4_reclaim_locks
     down_write // tries to acquire &nfsi->rwsem and gets stuck

It seems that using &nfsi->rwsem to protect file locks is not a good idea.
Does anyone have a viable approach to address this UAF issue?

Thanks,
Lingfeng.

在 2025/9/1 22:25, Li Lingfeng 写道:
Friendly ping..

Thanks

在 2025/7/15 11:05, Li Lingfeng 写道:
LOCK may extend an existing lock and release another one and UNLOCK may
also release an existing lock.
When opening a file, there may be access to file locks that have been
concurrently released by lock/unlock operations, potentially triggering
UAF.
While certain concurrent scenarios involving lock/unlock and open
operations have been safeguarded with locks – for example,
nfs4_proc_unlckz() acquires the so_delegreturn_mutex prior to invoking
locks_lock_inode_wait() – there remain cases where such protection is not
yet implemented.

The issue can be reproduced through the following steps:
T1: open in read-only mode with three consecutive lock operations applied
     lock1(0~100) --> add lock1 to file
     lock2(120~200) --> add lock2 to file
     lock3(50~150) --> extend lock1 to cover range 0~200 and release lock2
T2: restart nfs-server and run state manager
T3: open in write-only mode
     T1 T2                                T3
                             start recover
lock1
lock2
                             nfs4_open_reclaim
                             clear_bit // NFS_DELEGATED_STATE
lock3
  _nfs4_proc_setlk
   lock so_delegreturn_mutex
   unlock so_delegreturn_mutex
   _nfs4_do_setlk
                             recover done
                                                 lock so_delegreturn_mutex
nfs_delegation_claim_locks
                                                 get lock2
    rpc_run_task
    ...
    nfs4_lock_done
     locks_lock_inode_wait
     ...
      locks_dispose_list
      free lock2
                                                 use lock2
                                                 // UAF
                                                 unlock so_delegreturn_mutex

Protect file lock by nfsi->rwsem to fix this issue.

Fixes: c69899a17ca4 ("NFSv4: Update of VFS byte range lock must be atomic with the stateid update")
Reported-by: zhangjian (CG) <zhangjian496@xxxxxxxxxx>
Suggested-by: yangerkun <yangerkun@xxxxxxxxxx>
Signed-off-by: Li Lingfeng <lilingfeng3@xxxxxxxxxx>
---
Changes in v2:
   Use nfsi->rwsem instead of sp->so_delegreturn_mutex to prevent concurrency.

  fs/nfs/delegation.c | 5 ++++-
  fs/nfs/nfs4proc.c   | 8 +++++++-
  2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 10ef46e29b25..4467b4f61905 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -149,15 +149,17 @@ int nfs4_check_delegation(struct inode *inode, fmode_t type)
  static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_stateid *stateid)
  {
      struct inode *inode = state->inode;
+    struct nfs_inode *nfsi = NFS_I(inode);
      struct file_lock *fl;
      struct file_lock_context *flctx = locks_inode_context(inode);
      struct list_head *list;
      int status = 0;
        if (flctx == NULL)
-        goto out;
+        return status;
        list = &flctx->flc_posix;
+    down_write(&nfsi->rwsem);
      spin_lock(&flctx->flc_lock);
  restart:
      for_each_file_lock(fl, list) {
@@ -175,6 +177,7 @@ static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_state
      }
      spin_unlock(&flctx->flc_lock);
  out:
+    up_write(&nfsi->rwsem);
      return status;
  }
  diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 341740fa293d..06f109c7eb2e 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -7294,14 +7294,18 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *
      status = -ENOMEM;
      if (IS_ERR(seqid))
          goto out;
+    down_read(&nfsi->rwsem);
      task = nfs4_do_unlck(request,
nfs_file_open_context(request->c.flc_file),
                   lsp, seqid);
      status = PTR_ERR(task);
-    if (IS_ERR(task))
+    if (IS_ERR(task)) {
+        up_read(&nfsi->rwsem);
          goto out;
+    }
      status = rpc_wait_for_completion_task(task);
      rpc_put_task(task);
+    up_read(&nfsi->rwsem);
  out:
      request->c.flc_flags = saved_flags;
      trace_nfs4_unlock(request, state, F_SETLK, status);
@@ -7642,7 +7646,9 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
      }
      up_read(&nfsi->rwsem);
      mutex_unlock(&sp->so_delegreturn_mutex);
+    down_read(&nfsi->rwsem);
      status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW);
+    up_read(&nfsi->rwsem);
  out:
      request->c.flc_flags = flags;
      return status;