[PATCH v4 3/5] ksm: add vm_pgoff into ksm_rmap_item

From: xu.xin16

Date: Sun May 03 2026 - 08:48:58 EST


From: xu xin <xu.xin16@xxxxxxxxxx>
The reason for adding vm_pgoff to ksm_rmap_item has been discussed in previous
mailing list threads [1][2]. The main purpose is to allow the KSM reverse mapping
to obtain the original VMA's vm_pgoff, so that during anon_vma_tree travering, it
can conditionally locate the VMAs and avoid scanning the entire address space
[0, ULONG_MAX].

To minimize the size impact of adding vm_pgoff to ksm_rmap_item as much as
possible, a trick that David suggested is to use a UNION that groups the members
related to the unstable tree together with the newly added vm_pgoff. The members
that valids only when in unstable tree include oldchecksum and age information.
However, the function should_skip_rmap_item() in the smart scanning needs slight
modification, since this function still uses the age information even when the
rmap_item is in a stable state (the page is not KSM), a situation that occurs
during COW faults. After using union, the size is still 64 byte without increasing.

The setting and resetting of rmap_item->vm_pgoff are similar to rmap_item->anon_vma.

[1] https://lore.kernel.org/all/adTPQSb-qSSHviJN@lucifer/
[2] https://lore.kernel.org/all/202604091806051535BJWZ_FTtdIm3Snk24ei_@xxxxxxxxxx/

Suggested-by: David Hildenbrand (Arm) <david@xxxxxxxxxx>
Signed-off-by: xu xin <xu.xin16@xxxxxxxxxx>
---
mm/ksm.c | 41 ++++++++++++++++++++++++++++++++++-------
1 file changed, 34 insertions(+), 7 deletions(-)

diff --git a/mm/ksm.c b/mm/ksm.c
index 7d5b76478f0b..0299a53ba7c9 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -195,22 +195,28 @@ struct ksm_stable_node {
* @node: rb node of this rmap_item in the unstable tree
* @head: pointer to stable_node heading this list in the stable tree
* @hlist: link into hlist of rmap_items hanging off that stable_node
- * @age: number of scan iterations since creation
- * @remaining_skips: how many scans to skip
+ * @age: number of scan iterations since creation (unstable node)
+ * @remaining_skips: how many scans to skip (unstable node)
+ * @vm_pgoff: vm_pgoff into the original VMA where the page is mapped (stable node)
*/
struct ksm_rmap_item {
struct ksm_rmap_item *rmap_list;
union {
- struct anon_vma *anon_vma; /* when stable */
+ struct anon_vma *anon_vma; /* for reverse mapping, when stable */
#ifdef CONFIG_NUMA
int nid; /* when node of unstable tree */
#endif
};
struct mm_struct *mm;
unsigned long address; /* + low bits used for flags below */
- unsigned int oldchecksum; /* when unstable */
- rmap_age_t age;
- rmap_age_t remaining_skips;
+ union {
+ struct {
+ unsigned int oldchecksum;
+ rmap_age_t age;
+ rmap_age_t remaining_skips;
+ }; /* when unstable */
+ unsigned long vm_pgoff; /* for reverse mapping, when stable */
+ };
union {
struct rb_node node; /* when node of unstable tree */
struct { /* when listed from stable tree */
@@ -776,6 +782,10 @@ static struct vm_area_struct *find_mergeable_vma(struct mm_struct *mm,
return vma;
}

+/*
+ * break_cow: actively break the write-protect of the VMA. This is calld when
+ * rmap_item has not yet become stable, but page has been merged.
+ */
static void break_cow(struct ksm_rmap_item *rmap_item)
{
struct mm_struct *mm = rmap_item->mm;
@@ -787,6 +797,8 @@ static void break_cow(struct ksm_rmap_item *rmap_item)
* to undo, we also need to drop a reference to the anon_vma.
*/
put_anon_vma(rmap_item->anon_vma);
+ /* Reset pgoff that overlays age-related information. (still unstable) */
+ rmap_item->vm_pgoff = 0;

mmap_read_lock(mm);
vma = find_mergeable_vma(mm, addr);
@@ -899,6 +911,8 @@ static void remove_node_from_stable_tree(struct ksm_stable_node *stable_node)
VM_BUG_ON(stable_node->rmap_hlist_len <= 0);
stable_node->rmap_hlist_len--;
put_anon_vma(rmap_item->anon_vma);
+ /* Reset pgoff that overlays age-related information. */
+ rmap_item->vm_pgoff = 0;
rmap_item->address &= PAGE_MASK;
cond_resched();
}
@@ -1052,6 +1066,8 @@ static void remove_rmap_item_from_tree(struct ksm_rmap_item *rmap_item)
stable_node->rmap_hlist_len--;

put_anon_vma(rmap_item->anon_vma);
+ /* Reset pgoff that overlays age-related information. */
+ rmap_item->vm_pgoff = 0;
rmap_item->head = NULL;
rmap_item->address &= PAGE_MASK;

@@ -1598,8 +1614,15 @@ static int try_to_merge_with_ksm_page(struct ksm_rmap_item *rmap_item,
/* Unstable nid is in union with stable anon_vma: remove first */
remove_rmap_item_from_tree(rmap_item);

- /* Must get reference to anon_vma while still holding mmap_lock */
+ /*
+ * Must get reference to anon_vma while still holding mmap_lock,
+ * We set these two members of stable node here instead of
+ * stable_tree_append(), maybe because we don't want to hold
+ * mmap_read_lock again? Here mmap_read_lock is already held to
+ * find_mergeable_vma before merging.
+ */
rmap_item->anon_vma = vma->anon_vma;
+ rmap_item->vm_pgoff = vma->vm_pgoff;
get_anon_vma(vma->anon_vma);
out:
mmap_read_unlock(mm);
@@ -2458,6 +2481,10 @@ static bool should_skip_rmap_item(struct folio *folio,
if (folio_test_ksm(folio))
return false;

+ /* There is no age information in stable-tree nodes. */
+ if (rmap_item->address & STABLE_FLAG)
+ return false;
+
age = rmap_item->age;
if (age != U8_MAX)
rmap_item->age++;
--
2.25.1