[BUG] Server returns nfserr_grace causing client infinite loop

From: Li Lingfeng

Date: Wed Mar 11 2026 - 04:31:41 EST


We recently encountered an issue where the NFS client's state manager gets
stuck in an infinite loop, making the client unresponsive to user
operations. The problem occurs when the server returns nfserr_grace during
open reclaim.

Stack trace from the client:
// client
nfs4_state_manager
 nfs4_do_reclaim // NFS4CLNT_RECLAIM_NOGRACE
  nfs4_reclaim_open_state
   __nfs4_reclaim_open_state
    nfs41_open_expired // ops->recover_open
     nfs4_open_expired
      nfs4_do_open_expired
       _nfs4_open_expired // gets NFS4ERR_GRACE and retries

On the server side, nfsd4_open returns nfserr_grace because:
1. The session exists
2. The NFSD4_CLIENT_RECLAIM_COMPLETE flag is not set
3. The op_claim_type is not NFS4_OPEN_CLAIM_PREVIOUS


Steps to reproduce:

1. Normal mount on client
   On server:
   mkfs.ext4 -F /dev/sdb
   mount /dev/sdb /mnt/sdb
   echo "/mnt *(rw,no_root_squash,fsid=0)" > /etc/exports
   echo "/mnt/sdb *(rw,no_root_squash,fsid=1)" >> /etc/exports
   systemctl restart nfs-server
   echo 123 > /mnt/sdb/testfile

   On client:
   mount -t nfs -o rw 192.168.122.251:/sdb /mnt/sdbb

2. Client opens a file and prepares a delay before entering the
NFS4CLNT_RECLAIM_NOGRACE branch in the state manager
   exec 100>/mnt/sdbb/testfile
   rpcdebug -m nfs -s proc

3. Change hostname on server
   hostname server-nfs

4. Restart NFS service on server
   systemctl restart nfs-server

5. Wait for client to set NFS4CLNT_RECLAIM_NOGRACE and enter the delay
before the NFS4CLNT_RECLAIM_NOGRACE branch in the state manager

6. Enable delay for force_expire_client on server
   rpcdebug -m nfsd -s proc

7. Trigger client expiration on server (stop at the delay point)
   echo "expire" > /proc/fs/nfsd/clients/4/ctl &

8. Enable delay for the NFS4CLNT_LEASE_EXPIRED branch on client, and
disable the delay for the NFS4CLNT_RECLAIM_NOGRACE branch
   rpcdebug -m nfs -s xdr
   rpcdebug -m nfs -c proc

9. Client state now has flags NFS4CLNT_LEASE_EXPIRED,
NFS4CLNT_RECLAIM_NOGRACE, and NFS4CLNT_MANAGER_RUNNING, and is stopped at
the delay point in the NFS4CLNT_LEASE_EXPIRED branch

10. Disable delay on server
    rpcdebug -m nfsd -c proc

11. Disable delay on client
    rpcdebug -m nfs -c xdr

12. Client state manager enters an infinite loop in the
NFS4CLNT_RECLAIM_NOGRACE branch
[root@nfs-client1 ~]# cat /proc/779/stack
[<0>] nfs4_handle_exception+0x245/0x600
[<0>] nfs4_do_open_expired+0x2c8/0x4e0
[<0>] nfs4_open_expired+0x31/0x90
[<0>] nfs41_open_expired+0x18b/0x290
[<0>] __nfs4_reclaim_open_state+0x4f/0x330
[<0>] nfs4_reclaim_open_state+0x1e9/0x530
[<0>] nfs4_do_reclaim+0x2a9/0x470
[<0>] nfs4_state_manager+0x1644/0x17f0
[<0>] nfs4_run_state_manager+0x1cc/0x490
[<0>] kthread+0x327/0x410
[<0>] ret_from_fork+0x360/0x6c0
[<0>] ret_from_fork_asm+0x1a/0x30
[root@nfs-client1 ~]#

base:
Linux 7.0-rc3
master 1f318b96cc84d7c2ab792fcc0bfd42a7ca890681

diff:
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 305a772e5497..5d0b1eef5d9b 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1315,6 +1315,7 @@ int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *s
        clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
        set_bit(NFS_OWNER_RECLAIM_NOGRACE, &state->owner->so_flags);
        set_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state);
+       printk("%s set NFS4CLNT_RECLAIM_NOGRACE for clp %px\n", __func__, clp);
        return 1;
 }

@@ -1814,6 +1815,7 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
                break;
        case -NFS4ERR_EXPIRED:
                set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
+               printk("%s set NFS4CLNT_LEASE_EXPIRED for clp %px\n", __func__, clp);
                nfs4_state_start_reclaim_nograce(clp);
                break;
        case -NFS4ERR_BADSESSION:
@@ -2540,6 +2542,14 @@ static void nfs4_state_manager(struct nfs_client *clp)

                if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) {
                        section = "lease expired";
+                       while (1) {
+                               ifdebug(XDR) {
+                                       printk("%s sleep before lease expired\n", __func__);
+                                       msleep(5 * 1000);
+                                       continue;
+                               }
+                               break;
+                       }
                        /* We're going to have to re-establish a clientid */
                        status = nfs4_reclaim_lease(clp);
                        if (status < 0)
@@ -2616,9 +2626,18 @@ static void nfs4_state_manager(struct nfs_client *clp)

                /* Now recover expired state... */
                if (test_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) {
+                       while (1) {
+                               ifdebug(PROC) {
+                                       printk("%s sleep before deal NFS4CLNT_RECLAIM_NOGRACE\n", __func__);
+                                       msleep(5 * 1000);
+                                       continue;
+                               }
+                               break;
+                       }
                        section = "reclaim nograce";
                        status = nfs4_do_reclaim(clp,
clp->cl_mvops->nograce_recovery_ops);
+                       printk("%s nograce reclaim status %d clp->cl_state 0x%lx\n", __func__, status, clp->cl_state);
                        if (status == -EAGAIN)
                                continue;
                        if (status < 0)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 6b9c399b89df..203f1d7c6c5f 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3146,6 +3146,14 @@ static void force_expire_client(struct nfs4_client *clp)
        clp->cl_time = 0;
        spin_unlock(&nn->client_lock);

+       while (1) {
+               ifdebug(PROC) {
+                       printk("%s sleep before destroy session\n", __func__);
+                       msleep(5 * 1000);
+                       continue;
+               }
+               break;
+       }
        wait_event(expiry_wq, atomic_read(&clp->cl_rpc_users) == 0);
        spin_lock(&nn->client_lock);
        already_expired = list_empty(&clp->cl_lru);

From the server's perspective, returning nfserr_grace is reasonable when
no RECLAIM_COMPLETE request has set the NFSD4_CLIENT_RECLAIM_COMPLETE
flag. However, I suspect the loss of the NFSD4_CLIENT_RECLAIM_COMPLETE
flag is related to the server-side "expire" write. Therefore, I'm unsure
whether this issue should be attributed to the server or the client.

Please let me know if you need any further information or testing.

Thanks,
Lingfeng.