[PATCH v2] lockd: pin next file across nlm_inspect_file lock-drop
From: Michael Bommarito
Date: Sat May 23 2026 - 08:31:18 EST
nlm_traverse_files() pins the current file with f_count++ across
a mutex_unlock for nlm_inspect_file(), but nothing pins the saved
next pointer. A concurrent nlm_release_file() can kfree the next
file during the unlock window, and the iterator dereferences freed
memory on the next loop step.
Pin both current and next before the lock-drop. Advance by
swapping the pinned cursors at the end of each iteration so next
is always held alive across the unlock.
Only call nlm_file_release() for files that matched the predicate
and were inspected. Skipped files just get f_count-- to undo the
iteration pin; their f_locks is stale and must not drive cleanup.
Cc: stable@xxxxxxxxxxxxxxx
Fixes: 01df9c5e918a ("LOCKD: Fix a deadlock in nlm_traverse_files()")
Assisted-by: Claude:claude-opus-4-7
Signed-off-by: Michael Bommarito <michael.bommarito@xxxxxxxxx>
---
fs/lockd/svcsubs.c | 64 +++++++++++++++++++++++++++++++---------------
1 file changed, 44 insertions(+), 20 deletions(-)
Changes since v1:
- Fixed premature kfree of non-matching files: nlm_file_release()
is now called only for files that matched the predicate and were
inspected. Non-matching files just get f_count-- to undo the
iteration pin. (Spotted by sashiko.dev automated review.)
Reproduced under UML + KASAN with 768 concurrent POSIX holders and
parallel /proc/fs/nfsd/unlock_filesystem writes.
Stock kernel:
BUG: KASAN: slab-use-after-free in nlm_traverse_files+0x71d/0x9d0
Allocated by: nlm_lookup_file via nlm4svc_proc_lock
Freed by: another nlm_traverse_files instance
Patched v2 UML kernel ran the same harness silently.
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index dd0214dcb6950..0b38125cf86ab 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -295,36 +295,60 @@ static void nlm_close_files(struct nlm_file *file)
/*
* Loop over all files in the file table.
*/
+static void nlm_file_release(struct nlm_file *file)
+{
+ if (list_empty(&file->f_blocks) && !file->f_locks
+ && !file->f_shares && !file->f_count) {
+ hlist_del(&file->f_list);
+ nlm_close_files(file);
+ kfree(file);
+ }
+}
+
static int
nlm_traverse_files(void *data, nlm_host_match_fn_t match,
int (*is_failover_file)(void *data, struct nlm_file *file))
{
- struct hlist_node *next;
- struct nlm_file *file;
+ struct nlm_file *file, *next;
int i, ret = 0;
mutex_lock(&nlm_file_mutex);
for (i = 0; i < FILE_NRHASH; i++) {
- hlist_for_each_entry_safe(file, next, &nlm_files[i], f_list) {
- if (is_failover_file && !is_failover_file(data, file))
- continue;
+ file = hlist_entry_safe(nlm_files[i].first,
+ struct nlm_file, f_list);
+ if (file)
file->f_count++;
- mutex_unlock(&nlm_file_mutex);
-
- /* Traverse locks, blocks and shares of this file
- * and update file->f_locks count */
- if (nlm_inspect_file(data, file, match))
- ret = 1;
-
- mutex_lock(&nlm_file_mutex);
- file->f_count--;
- /* No more references to this file. Let go of it. */
- if (list_empty(&file->f_blocks) && !file->f_locks
- && !file->f_shares && !file->f_count) {
- hlist_del(&file->f_list);
- nlm_close_files(file);
- kfree(file);
+ while (file) {
+ /*
+ * Pin the next neighbour before we drop the mutex
+ * for nlm_inspect_file(); a concurrent
+ * nlm_release_file() under the same mutex would
+ * otherwise be free to unlink and kfree it during
+ * the unlock window, leaving us to dereference a
+ * freed slab when we walked to next afterwards.
+ */
+ next = hlist_entry_safe(file->f_list.next,
+ struct nlm_file, f_list);
+ if (next)
+ next->f_count++;
+
+ if (!is_failover_file || is_failover_file(data, file)) {
+ mutex_unlock(&nlm_file_mutex);
+
+ /*
+ * Traverse locks, blocks and shares of this
+ * file and update file->f_locks count.
+ */
+ if (nlm_inspect_file(data, file, match))
+ ret = 1;
+
+ mutex_lock(&nlm_file_mutex);
+ file->f_count--;
+ nlm_file_release(file);
+ } else {
+ file->f_count--;
}
+ file = next;
}
}
mutex_unlock(&nlm_file_mutex);
--
2.53.0