Re: [PATCH] mm: khugepaged: free xarray nodes left behind by failed collapse_file()
From: Matthew Wilcox
Date: Tue Jun 16 2026 - 11:37:19 EST
On Tue, Jun 16, 2026 at 10:54:13AM -0400, Rik van Riel wrote:
> syzkaller reproduces it trivially with MADV_COLLAPSE on a sparse shmem
> mapping (collapse aborts with SCAN_TRUNCATED because the range is empty),
> and also via slab fault injection, which forces xas_create_range() down
> the xas_nomem() path before the same abort. kmemleak then reports the
> 576-byte struct xa_node objects allocated in xas_alloc()/xas_nomem().
I think the ways to produce this problem are sufficiently rare/unlikely
to not merit this level of cleanup. Why don't we defer it to
clear_inode() instead?
reasons not to:
1. the page cache is not the only user of xarrays and
theoretically either of these things could happen elsewhere
2. can a user do this on purpose to screw other users over? i don't
think so, any more than they can dos the system by bringing a lot of
inodes into memory; we have cgroups and ulimits to protect against that
diff --git a/fs/inode.c b/fs/inode.c
index 6a3cbc7dcd28..d92c5a296504 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -779,21 +779,20 @@ void clear_inode(struct inode *inode)
fsverity_cleanup_inode(inode);
/*
- * We have to cycle the i_pages lock here because reclaim can be in the
- * process of removing the last page (in __filemap_remove_folio())
- * and we must not free the mapping under it.
+ * We have to cycle the i_pages lock here because reclaim
+ * can be in the process of removing the last page (in
+ * __filemap_remove_folio()) and we must not free the mapping
+ * under it. We also remove nodes which are empty; these
+ * can occur in two different ways. The first is that radix
+ * tree expansion can fail partway and the second is that THP
+ * collapse_file() can allocate some temporary nodes and not
+ * clean them up.
*/
xa_lock_irq(&inode->i_data.i_pages);
BUG_ON(inode->i_data.nrpages);
- /*
- * Almost always, mapping_empty(&inode->i_data) here; but there are
- * two known and long-standing ways in which nodes may get left behind
- * (when deep radix-tree node allocation failed partway; or when THP
- * collapse_file() failed). Until those two known cases are cleaned up,
- * or a cleanup function is called here, do not BUG_ON(!mapping_empty),
- * nor even WARN_ON(!mapping_empty).
- */
+ __xa_destroy(&inode->i_data.i_pages);
xa_unlock_irq(&inode->i_data.i_pages);
+
BUG_ON(!(inode_state_read_once(inode) & I_FREEING));
BUG_ON(inode_state_read_once(inode) & I_CLEAR);
BUG_ON(!list_empty(&inode->i_wb_list));
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index be850174e802..d776a6e9ad18 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -571,6 +571,7 @@ int __must_check __xa_alloc_cyclic(struct xarray *, u32 *id, void *entry,
struct xa_limit, u32 *next, gfp_t);
void __xa_set_mark(struct xarray *, unsigned long index, xa_mark_t);
void __xa_clear_mark(struct xarray *, unsigned long index, xa_mark_t);
+void __xa_destroy(struct xarray *);
/**
* xa_store_bh() - Store this entry in the XArray.
diff --git a/lib/xarray.c b/lib/xarray.c
index 9a8b4916540c..ee2459ecdc9b 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -2370,6 +2370,21 @@ void xa_delete_node(struct xa_node *node, xa_update_node_t update)
}
EXPORT_SYMBOL_GPL(xa_delete_node); /* For the benefit of the test suite */
+void __xa_destroy(struct xarray *xa)
+{
+ XA_STATE(xas, xa, 0);
+ void *entry;
+
+ xas.xa_node = NULL;
+ entry = xa_head_locked(xa);
+ RCU_INIT_POINTER(xa->xa_head, NULL);
+ xas_init_marks(&xas);
+ if (xa_zero_busy(xa))
+ xa_mark_clear(xa, XA_FREE_MARK);
+ if (xa_is_node(entry))
+ xas_free_nodes(&xas, xa_to_node(entry));
+}
+
/**
* xa_destroy() - Free all internal data structures.
* @xa: XArray.
@@ -2382,21 +2397,11 @@ EXPORT_SYMBOL_GPL(xa_delete_node); /* For the benefit of the test suite */
*/
void xa_destroy(struct xarray *xa)
{
- XA_STATE(xas, xa, 0);
unsigned long flags;
- void *entry;
- xas.xa_node = NULL;
- xas_lock_irqsave(&xas, flags);
- entry = xa_head_locked(xa);
- RCU_INIT_POINTER(xa->xa_head, NULL);
- xas_init_marks(&xas);
- if (xa_zero_busy(xa))
- xa_mark_clear(xa, XA_FREE_MARK);
- /* lockdep checks we're still holding the lock in xas_free_nodes() */
- if (xa_is_node(entry))
- xas_free_nodes(&xas, xa_to_node(entry));
- xas_unlock_irqrestore(&xas, flags);
+ xa_lock_irqsave(xa, flags);
+ __xa_destroy(xa);
+ xa_unlock_irqrestore(xa, flags);
}
EXPORT_SYMBOL(xa_destroy);