[PATCH 3/5] mm/khugepaged: add deduplication when adding new collapse hint

From: Luka Bai

Date: Sun May 31 2026 - 00:24:54 EST

From: Luka Bai <lukabai@xxxxxxxxxxx>

We need to check for duplication before we add a new collapse hint,
and we want the searching and adding to be faster. So there are
several options for doing that:

Option 1. Add a Blooming filter for the hint addresses, but that
will make the hint hard to be deleted after handling.

Option 2. Add a hashtable for each khugepaged_mm_slot. But for a
efficient setup, the hashtable should have maybe 16 ~ 32 slots,
which will cost 128 bytes to 256 bytes for each mm_struct. Seems a
little wasteful.

Option 3. Add an xarray for each khugepaged_mm_slot, which only
takes 16 bytes for each mm_struct. However, each time when we try
to add a new entry into the xarray, it may cause memory allocation.
Collapse hint is supposed to be a best-effort machanism, introducing
xarray seems to be a little too heavy for the calling function.

Option 4. Add a global hashtable for all the memory hints, setup
key by their address and mm_struct ptr. The global hashtable mixes
mm_struct ptr and address as key, but the deduplication only looks
at address for saving memory. As a result, there may be collision
on different mms with a same address. But as we claimed above,
collapse hint is only a best-effort thing, and the collision is
also rare to happen because the address is always 0 for the lower
PMD_SHIFT bits, which normally gives mm struct about 2M size to
scatter (the key is calculated by (ptr of mm ^ pmd aligned address).

By choosing option 4, since the hashtable is global, we decided to
directly use a global lock (we directly use khugepaged_mm_lock here).
To avoid uncessary lock spinning, we used trylock when we try to add
a new hint, and exit when the contension happened. Still, this is
harmless for the correctness of the machanism.

Signed-off-by: Luka Bai <lukabai@xxxxxxxxxxx>
---
mm/khugepaged.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 78 insertions(+), 5 deletions(-)

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 04cf85ea5557..3f5eb8be06d1 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -100,6 +100,24 @@ static DEFINE_READ_MOSTLY_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS);
static struct kmem_cache *mm_slot_cache __ro_after_init;
static struct kmem_cache *collapse_hint_cache __ro_after_init;

+/*
+ * Global lookup table used by khugepaged_add_collapse_hint() to deduplicate
+ * pending hints against an existing address. The key mixes mm and address
+ * but the dedup comparison only looks at @address. As a result, two
+ * different mms hinting the same address may collapse. This is rare
+ * since the aligned_addr is always 0 for the lower PMD_SHIFT bits, which
+ * normally gives mm struct about 2M size for scattering (for 4K paging).
+ * And it's also harmless if the collision happens.
+ */
+#define KHUGEPAGED_HINTS_HASH_BITS 9
+static DEFINE_HASHTABLE(khugepaged_hint_lookup, KHUGEPAGED_HINTS_HASH_BITS);
+
+static inline unsigned long khugepaged_hint_key(struct mm_struct *mm,
+ unsigned long aligned_addr)
+{
+ return (unsigned long)mm ^ aligned_addr;
+}
+
#define KHUGEPAGED_PRIORITY_QUEUE_MAX_FAIL 10

#define KHUGEPAGED_MIN_MTHP_ORDER 2
@@ -165,12 +183,15 @@ static struct khugepaged_scan khugepaged_scan = {

/**
* struct khugepaged_collapse_hint - one collapse hint for a specific address
- * @node: list node on khugepaged_collapse_requests.hints
- * @vma: hint pointer to the target VMA
- * @address: PMD-aligned virtual address inside @vma to attempt collapsing on
+ * @node: list node on khugepaged_collapse_requests.hints
+ * @hash_node: hlist node on the global khugepaged_hint_lookup table, used
+ * for deduplication.
+ * @vma: hint pointer to the target VMA
+ * @address: PMD-aligned virtual address inside @vma to attempt collapsing on
*/
struct khugepaged_collapse_hint {
struct list_head node;
+ struct hlist_node hash_node;
struct vm_area_struct *vma;
unsigned long address;
};
@@ -688,6 +709,29 @@ void khugepaged_enter_vma(struct vm_area_struct *vma,
__khugepaged_enter(vma->vm_mm);
}

+/*
+ * Unhash any hints still queued under @req. Caller must hold
+ * khugepaged_mm_lock so we can safely unhash each hint from the global
+ * khugepaged_hint_lookup table.
+ */
+static void khugepaged_unhash_collapse_hints(
+ struct khugepaged_collapse_requests *req)
+{
+ struct khugepaged_collapse_hint *hint, *tmp;
+
+ lockdep_assert_held(&khugepaged_mm_lock);
+
+ list_for_each_entry_safe(hint, tmp, &req->hints, node) {
+ hash_del(&hint->hash_node);
+ }
+}
+
+/*
+ * Free any hints still queued under @req. No lock need to be held. Caller
+ * must make sure the hints are already unhashed from the global
+ * khugepaged_hint_lookup table and the mm_slot is removed from the
+ * khugepaged_priority_queue[].
+ */
static void khugepaged_release_collapse_hints(
struct khugepaged_collapse_requests *req)
{
@@ -712,6 +756,14 @@ static void khugepaged_remove_priority_requests(struct khugepaged_mm_slot *khp_m
list_del(&khp_mm_slot->request[i].node);
}

+static void khugepaged_unhash_all_hints(struct khugepaged_mm_slot *khp_mm_slot)
+{
+ int i;
+
+ for (i = 0; i < NR_KHUGEPAGED_PRIORITY_LEVEL; i++)
+ khugepaged_unhash_collapse_hints(&khp_mm_slot->request[i]);
+}
+
static void khugepaged_release_all_hints(struct khugepaged_mm_slot *khp_mm_slot)
{
int i;
@@ -733,6 +785,7 @@ void __khugepaged_exit(struct mm_struct *mm)
hash_del(&slot->hash);
list_del(&slot->mm_node);
khugepaged_remove_priority_requests(khp_mm_slot);
+ khugepaged_unhash_all_hints(khp_mm_slot);
free = 1;
}
spin_unlock(&khugepaged_mm_lock);
@@ -1933,6 +1986,7 @@ static void collect_mm_slot(struct mm_slot *slot)
* mm_flags_clear(MMF_VM_HUGEPAGE, mm);
*/

+ khugepaged_unhash_all_hints(khp_mm_slot);
/* khugepaged_mm_lock actually not necessary for the below */
khugepaged_release_all_hints(khp_mm_slot);
mm_slot_free(mm_slot_cache, khp_mm_slot);
@@ -3001,8 +3055,9 @@ void khugepaged_add_collapse_hint(struct mm_struct *mm,
int priority, int max_order)
{
struct khugepaged_mm_slot *khp_mm_slot;
- struct khugepaged_collapse_hint *hint;
+ struct khugepaged_collapse_hint *hint, *existing;
struct mm_slot *slot;
+ unsigned long aligned_addr, key;
int orders;

if (!mm || !vma)
@@ -3022,12 +3077,15 @@ void khugepaged_add_collapse_hint(struct mm_struct *mm,
if (!mm_flags_test(MMF_VM_HUGEPAGE, mm))
return;

+ aligned_addr = address & HPAGE_PMD_MASK;
+ key = khugepaged_hint_key(mm, aligned_addr);
+
hint = kmem_cache_alloc(collapse_hint_cache, GFP_KERNEL);
if (!hint)
return;

hint->vma = vma;
- hint->address = address & HPAGE_PMD_MASK;
+ hint->address = aligned_addr;

/*
* Just use try lock to avoid lock contention because collapse hints are
@@ -3045,7 +3103,21 @@ void khugepaged_add_collapse_hint(struct mm_struct *mm,
return;
}
khp_mm_slot = mm_slot_entry(slot, struct khugepaged_mm_slot, slot);
+
+ /*
+ * For deduplication. The comparison only checks @address here. See comments
+ * above khugepaged_hint_lookup definition for details.
+ */
+ hash_for_each_possible(khugepaged_hint_lookup, existing, hash_node, key) {
+ if (existing->address == aligned_addr) {
+ spin_unlock(&khugepaged_mm_lock);
+ kmem_cache_free(collapse_hint_cache, hint);
+ return;
+ }
+ }
+
list_add_tail(&hint->node, &khp_mm_slot->request[priority].hints);
+ hash_add(khugepaged_hint_lookup, &hint->hash_node, key);
spin_unlock(&khugepaged_mm_lock);

wake_up_interruptible(&khugepaged_wait);
@@ -3124,6 +3196,7 @@ static int collapse_scan_one_priority_entry(unsigned int progress_max,
struct khugepaged_collapse_hint,
node);
list_del(&hint->node);
+ hash_del(&hint->hash_node);
}
spin_unlock(&khugepaged_mm_lock);

--
2.52.0