Re: [RFC PATCH] nfs: use nfsi->rwsem to protect traversal of the file lock list

From: Anna Schumaker

Date: Tue Jun 23 2026 - 15:53:04 EST


Hi Erkun,

On Tue, Jun 23, 2026, at 3:37 AM, yangerkun wrote:
> Gently ping...
>
> This patch has been reviewed, but leave alone here for a long time...

The patch is in my linux-next branch right here: https://git.linux-nfs.org/?p=anna/linux-nfs.git;a=commit;h=4837fb36219e6c08b666bc31a86841bad8526358

Anna

>
> 在 2026/5/8 16:33, yangerkun 写道:
>> Gently ping...
>>
>> 在 2026/4/16 11:01, yangerkun 写道:
>>> Hi Anna and Trond,
>>>
>>> Could you please help check if there are any issues with this patch, and
>>> if there are none, could you help merge it in?
>>>
>>> Thanks,
>>> Erkun.
>>>
>>> 在 2026/3/9 22:09, Jeff Layton 写道:
>>>> On Thu, 2026-02-26 at 09:22 +0800, Yang Erkun wrote:
>>>>> Lingfeng identified a bug and suggested two solutions, but both appear
>>>>> to have issues.
>>>>>
>>>>> Generally, we cannot release flc_lock while iterating over the file
>>>>> lock
>>>>> list to avoid use-after-free (UAF) problems with file locks. However,
>>>>> functions like nfs_delegation_claim_locks and nfs4_reclaim_locks cannot
>>>>> adhere to this rule because recover_lock or nfs4_lock_delegation_recall
>>>>> may take a long time. To resolve this, NFS switches to using nfsi-
>>>>> >rwsem
>>>>> for the same protection, and nfs_reclaim_locks follows this approach.
>>>>> Although nfs_delegation_claim_locks uses so_delegreturn_mutex instead,
>>>>> this is inadequate since a single inode can have multiple nfs4_state
>>>>> instances. Therefore, the fix is to also use nfsi->rwsem in this case.
>>>>>
>>>>> Furthermore, after commit c69899a17ca4 ("NFSv4: Update of VFS byte
>>>>> range
>>>>> lock must be atomic with the stateid update"), the functions
>>>>> nfs4_locku_done and nfs4_lock_done also break this rule because they
>>>>> call locks_lock_inode_wait without holding nfsi->rwsem. Simply adding
>>>>> this protection could cause many deadlocks, so instead, the call to
>>>>> locks_lock_inode_wait is moved into _nfs4_proc_setlk. Regarding the bug
>>>>> fixed by commit c69899a17ca4 ("NFSv4: Update of VFS byte range
>>>>> lock must be atomic with the stateid update"), it has been resolved
>>>>> after commit 0460253913e5 ("NFSv4: nfs4_do_open() is incorrectly
>>>>> triggering
>>>>> state recovery") because all slots are drained before calling
>>>>> nfs4_do_reclaim, which prevents concurrent stateid changes along
>>>>> this path.
>>>>> Also, nfs_delegation_claim_locks does not cause this concurrency either
>>>>> since when _nfs4_proc_setlk is called with NFS_DELEGATED_STATE, no
>>>>> RPC is
>>>>> sent, so nfs4_lock_done is not called. Therefore,
>>>>> nfs4_lock_delegation_recall from nfs_delegation_claim_locks is the
>>>>> first
>>>>> time the stateid is set.
>>>>>
>>>>> Reported-by: Li Lingfeng <lilingfeng3@xxxxxxxxxx>
>>>>> Closes: https://lore.kernel.org/all/20250419085709.1452492-1-
>>>>> lilingfeng3@xxxxxxxxxx/
>>>>> Closes: https://lore.kernel.org/all/20250715030559.2906634-1-
>>>>> lilingfeng3@xxxxxxxxxx/
>>>>> Fixes: c69899a17ca4 ("NFSv4: Update of VFS byte range lock must be
>>>>> atomic with the stateid update")
>>>>> Signed-off-by: Yang Erkun <yangerkun@xxxxxxxxxx>
>>>>> ---
>>>>>   fs/nfs/delegation.c     |  9 ++++++++-
>>>>>   fs/nfs/nfs4proc.c       | 22 +++++++++++-----------
>>>>>   include/linux/nfs_xdr.h |  1 -
>>>>>   3 files changed, 19 insertions(+), 13 deletions(-)
>>>>>
>>>>> diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
>>>>> index 122fb3f14ffb..9546d2195c25 100644
>>>>> --- a/fs/nfs/delegation.c
>>>>> +++ b/fs/nfs/delegation.c
>>>>> @@ -173,6 +173,7 @@ int nfs4_check_delegation(struct inode *inode,
>>>>> fmode_t type)
>>>>>   static int nfs_delegation_claim_locks(struct nfs4_state *state,
>>>>> const nfs4_stateid *stateid)
>>>>>   {
>>>>>       struct inode *inode = state->inode;
>>>>> +    struct nfs_inode *nfsi = NFS_I(inode);
>>>>>       struct file_lock *fl;
>>>>>       struct file_lock_context *flctx = locks_inode_context(inode);
>>>>>       struct list_head *list;
>>>>> @@ -182,6 +183,9 @@ static int nfs_delegation_claim_locks(struct
>>>>> nfs4_state *state, const nfs4_state
>>>>>           goto out;
>>>>>       list = &flctx->flc_posix;
>>>>> +
>>>>> +    /* Guard against reclaim and new lock/unlock calls */
>>>>> +    down_write(&nfsi->rwsem);
>>>>>       spin_lock(&flctx->flc_lock);
>>>>>   restart:
>>>>>       for_each_file_lock(fl, list) {
>>>>> @@ -189,8 +193,10 @@ static int nfs_delegation_claim_locks(struct
>>>>> nfs4_state *state, const nfs4_state
>>>>>               continue;
>>>>>           spin_unlock(&flctx->flc_lock);
>>>>>           status = nfs4_lock_delegation_recall(fl, state, stateid);
>>>>> -        if (status < 0)
>>>>> +        if (status < 0) {
>>>>> +            up_write(&nfsi->rwsem);
>>>>>               goto out;
>>>>> +        }
>>>>>           spin_lock(&flctx->flc_lock);
>>>>>       }
>>>>>       if (list == &flctx->flc_posix) {
>>>>> @@ -198,6 +204,7 @@ static int nfs_delegation_claim_locks(struct
>>>>> nfs4_state *state, const nfs4_state
>>>>>           goto restart;
>>>>>       }
>>>>>       spin_unlock(&flctx->flc_lock);
>>>>> +    up_write(&nfsi->rwsem);
>>>>>   out:
>>>>>       return status;
>>>>>   }
>>>>> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
>>>>> index 91bcf67bd743..9d6fbca8798b 100644
>>>>> --- a/fs/nfs/nfs4proc.c
>>>>> +++ b/fs/nfs/nfs4proc.c
>>>>> @@ -7076,7 +7076,6 @@ static void nfs4_locku_done(struct rpc_task
>>>>> *task, void *data)
>>>>>       switch (task->tk_status) {
>>>>>           case 0:
>>>>>               renew_lease(calldata->server, calldata->timestamp);
>>>>> -            locks_lock_inode_wait(calldata->lsp->ls_state->inode,
>>>>> &calldata->fl);
>>>>>               if (nfs4_update_lock_stateid(calldata->lsp,
>>>>>                       &calldata->res.stateid))
>>>>>                   break;
>>>>> @@ -7344,11 +7343,6 @@ static void nfs4_lock_done(struct rpc_task
>>>>> *task, void *calldata)
>>>>>       case 0:
>>>>>           renew_lease(NFS_SERVER(d_inode(data->ctx->dentry)),
>>>>>                   data->timestamp);
>>>>> -        if (data->arg.new_lock && !data->cancelled) {
>>>>> -            data->fl.c.flc_flags &= ~(FL_SLEEP | FL_ACCESS);
>>>>> -            if (locks_lock_inode_wait(lsp->ls_state->inode, &data-
>>>>> >fl) < 0)
>>>>> -                goto out_restart;
>>>>> -        }
>>>>>           if (data->arg.new_lock_owner != 0) {
>>>>>               nfs_confirm_seqid(&lsp->ls_seqid, 0);
>>>>>               nfs4_stateid_copy(&lsp->ls_stateid, &data->res.stateid);
>>>>> @@ -7459,11 +7453,10 @@ static int _nfs4_do_setlk(struct nfs4_state
>>>>> *state, int cmd, struct file_lock *f
>>>>>       msg.rpc_argp = &data->arg;
>>>>>       msg.rpc_resp = &data->res;
>>>>>       task_setup_data.callback_data = data;
>>>>> -    if (recovery_type > NFS_LOCK_NEW) {
>>>>> -        if (recovery_type == NFS_LOCK_RECLAIM)
>>>>> -            data->arg.reclaim = NFS_LOCK_RECLAIM;
>>>>> -    } else
>>>>> -        data->arg.new_lock = 1;
>>>>> +
>>>>> +    if (recovery_type == NFS_LOCK_RECLAIM)
>>>>> +        data->arg.reclaim = NFS_LOCK_RECLAIM;
>>>>> +
>>>>>       task = rpc_run_task(&task_setup_data);
>>>>>       if (IS_ERR(task))
>>>>>           return PTR_ERR(task);
>>>>> @@ -7573,6 +7566,13 @@ static int _nfs4_proc_setlk(struct nfs4_state
>>>>> *state, int cmd, struct file_lock
>>>>>       up_read(&nfsi->rwsem);
>>>>>       mutex_unlock(&sp->so_delegreturn_mutex);
>>>>>       status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW);
>>>>> +    if (status)
>>>>> +        goto out;
>>>>> +
>>>>> +    down_read(&nfsi->rwsem);
>>>>> +    request->c.flc_flags &= ~(FL_SLEEP | FL_ACCESS);
>>>>> +    status = locks_lock_inode_wait(state->inode, request);
>>>>> +    up_read(&nfsi->rwsem);
>>>>>   out:
>>>>>       request->c.flc_flags = flags;
>>>>>       return status;
>>>>> diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
>>>>> index ff1f12aa73d2..9599ad15c3ad 100644
>>>>> --- a/include/linux/nfs_xdr.h
>>>>> +++ b/include/linux/nfs_xdr.h
>>>>> @@ -580,7 +580,6 @@ struct nfs_lock_args {
>>>>>       struct nfs_lowner    lock_owner;
>>>>>       unsigned char        block : 1;
>>>>>       unsigned char        reclaim : 1;
>>>>> -    unsigned char        new_lock : 1;
>>>>>       unsigned char        new_lock_owner : 1;
>>>>>   };
>>>>
>>>> Nice work!
>>>>
>>>> Reviewed-by: Jeff Layton <jlayton@xxxxxxxxxx>
>>>>
>>>>
>>>
>>
>>
>>