[PATCH 2/3] ksm: add the accounting of zero pages merged by use_zero_pages

From: xu . xin . sc
Date: Wed Sep 28 2022 - 23:00:43 EST


From: xu xin <xu.xin16@xxxxxxxxxx>

Before enabling use_zero_pages by setting /sys/kernel/mm/ksm/
use_zero_pages to 1, Using pages_sharing of KSM to indicate how
much pages saved by KSM is basically accurate. But when enabling
use_zero_pages, it becomes not accurate, and all empty(zeroed)
pages that are merged with kernel zero page are not counted in
pages_sharing or pages_shared. That is because the rmap_items
of these ksm zero pages are never appended to the Stable Tree
of KSM.

This leads to KSM not being fully correct and transparent with all
merged pages when enabling use_zero_pages.

There are two ways to fix it. One way is to count ksm zero pages into
pages_sharing, but it breaks the definition of pages_sharing (means
how many pages is sharing those KSM stable node). So we have to choose
Plan B, which is adding a new interface "zero_pages_sharing" under
/sys/kernel/mm/ksm/ to show it.

To implement that, we introduce a new flag SPECIAL_ZERO_FLAG to mark
those special zero pages (merged with kernel zero pages) for accounting
because these zero pages neither belongs to the existing STABLE_FLAG
nor UNSTABLE_FLAG.

Fixes: e86c59b1b12d ("mm/ksm: improve deduplication of zero pages with colouring")

Co-developed-by: Xiaokai Ran <ran.xiaokai@xxxxxxxxxx>
Signed-off-by: Xiaokai Ran <ran.xiaokai@xxxxxxxxxx>
Co-developed-by: Yang Yang <yang.yang29@xxxxxxxxxx>
Signed-off-by: Yang Yang <yang.yang29@xxxxxxxxxx>
Co-developed-by: Jiang Xuexin <jiang.xuexin@xxxxxxxxxx>
Signed-off-by: Jiang Xuexin <jiang.xuexin@xxxxxxxxxx>
Signed-off-by: xu xin <xu.xin16@xxxxxxxxxx>
---
mm/ksm.c | 98 +++++++++++++++++++++++++++++++++++++++++++++-----------
1 file changed, 79 insertions(+), 19 deletions(-)

diff --git a/mm/ksm.c b/mm/ksm.c
index 5b68482d2b3b..88153d2b497f 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -213,6 +213,7 @@ struct ksm_rmap_item {
#define SEQNR_MASK 0x0ff /* low bits of unstable tree seqnr */
#define UNSTABLE_FLAG 0x100 /* is a node of the unstable tree */
#define STABLE_FLAG 0x200 /* is listed from the stable tree */
+#define SPECIAL_ZERO_FLAG 0x400 /* specially treated zero page */

/* The stable and unstable tree heads */
static struct rb_root one_stable_tree[1] = { RB_ROOT };
@@ -274,6 +275,9 @@ static unsigned int zero_checksum __read_mostly;
/* Whether to merge empty (zeroed) pages with actual zero pages */
static bool ksm_use_zero_pages __read_mostly;

+/* The number of empty(zeroed) pages merged but not in the stable tree */
+static unsigned long ksm_zero_pages_sharing;
+
#ifdef CONFIG_NUMA
/* Zeroed when merging across nodes is not allowed */
static unsigned int ksm_merge_across_nodes = 1;
@@ -796,6 +800,10 @@ static void remove_trailing_rmap_items(struct ksm_rmap_item **rmap_list)
struct ksm_rmap_item *rmap_item = *rmap_list;
*rmap_list = rmap_item->rmap_list;
remove_rmap_item_from_tree(rmap_item);
+ if (rmap_item->address & SPECIAL_ZERO_FLAG) {
+ rmap_item->address &= PAGE_MASK;
+ ksm_zero_pages_sharing--;
+ }
free_rmap_item(rmap_item);
}
}
@@ -2017,6 +2025,39 @@ static void stable_tree_append(struct ksm_rmap_item *rmap_item,
rmap_item->mm->ksm_merging_pages++;
}

+static int try_to_merge_with_kernel_zero_page(struct mm_struct *mm,
+ struct ksm_rmap_item *rmap_item,
+ struct page *page)
+{
+ int err = 0;
+
+ if (!(rmap_item->address & SPECIAL_ZERO_FLAG)) {
+ struct vm_area_struct *vma;
+
+ mmap_read_lock(mm);
+ vma = find_mergeable_vma(mm, rmap_item->address);
+ if (vma) {
+ err = try_to_merge_one_page(vma, page,
+ ZERO_PAGE(rmap_item->address));
+ } else {
+ /* If the vma is out of date, we do not need to continue. */
+ err = 0;
+ }
+ mmap_read_unlock(mm);
+ /*
+ * In case of failure, the page was not really empty, so we
+ * need to continue. Otherwise we're done.
+ */
+ if (!err) {
+ rmap_item->address |= SPECIAL_ZERO_FLAG;
+ ksm_zero_pages_sharing++;
+ }
+
+ }
+
+ return err;
+}
+
/*
* cmp_and_merge_page - first see if page can be merged into the stable tree;
* if not, compare checksum to previous and if it's the same, see if page can
@@ -2101,29 +2142,22 @@ static void cmp_and_merge_page(struct page *page, struct ksm_rmap_item *rmap_ite
* Same checksum as an empty page. We attempt to merge it with the
* appropriate zero page if the user enabled this via sysfs.
*/
- if (ksm_use_zero_pages && (checksum == zero_checksum)) {
- struct vm_area_struct *vma;
-
- mmap_read_lock(mm);
- vma = find_mergeable_vma(mm, rmap_item->address);
- if (vma) {
- err = try_to_merge_one_page(vma, page,
- ZERO_PAGE(rmap_item->address));
- } else {
+ if (ksm_use_zero_pages) {
+ if (checksum == zero_checksum) {
+ /* If success, just return. Otherwise, continue */
+ if (!try_to_merge_with_kernel_zero_page(mm, rmap_item, page))
+ return;
+ } else if (rmap_item->address & SPECIAL_ZERO_FLAG) {
/*
- * If the vma is out of date, we do not need to
- * continue.
+ * The page now is not kernel zero page(modified) but the flag
+ * of its rmap_item is still zero-page, so need to reset the
+ * flag and update the corresponding count.
*/
- err = 0;
+ rmap_item->address &= PAGE_MASK;
+ ksm_zero_pages_sharing--;
}
- mmap_read_unlock(mm);
- /*
- * In case of failure, the page was not really empty, so we
- * need to continue. Otherwise we're done.
- */
- if (!err)
- return;
}
+
tree_rmap_item =
unstable_tree_search_insert(rmap_item, page, &tree_page);
if (tree_rmap_item) {
@@ -2336,6 +2370,24 @@ static struct ksm_rmap_item *scan_get_next_rmap_item(struct page **page)
mmap_read_unlock(mm);
return rmap_item;
}
+ /*
+ * Because we want to count ksm zero pages which is
+ * non-anonymous, we must try to return the rmap_items
+ * of those kernel zero pages which replaces its
+ * original anonymous empty page due to use_zero_pages's
+ * feature.
+ */
+ if (is_zero_pfn(page_to_pfn(*page))) {
+ rmap_item = try_to_get_old_rmap_item(
+ ksm_scan.address,
+ ksm_scan.rmap_list);
+ if (rmap_item->address & SPECIAL_ZERO_FLAG) {
+ ksm_scan.rmap_list = &rmap_item->rmap_list;
+ ksm_scan.address += PAGE_SIZE;
+ mmap_read_unlock(mm);
+ return rmap_item;
+ }
+ }
next_page:
put_page(*page);
ksm_scan.address += PAGE_SIZE;
@@ -3115,6 +3167,13 @@ static ssize_t pages_volatile_show(struct kobject *kobj,
}
KSM_ATTR_RO(pages_volatile);

+static ssize_t zero_pages_sharing_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%ld\n", ksm_zero_pages_sharing);
+}
+KSM_ATTR_RO(zero_pages_sharing);
+
static ssize_t stable_node_dups_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
@@ -3175,6 +3234,7 @@ static struct attribute *ksm_attrs[] = {
&merge_across_nodes_attr.attr,
#endif
&max_page_sharing_attr.attr,
+ &zero_pages_sharing_attr.attr,
&stable_node_chains_attr.attr,
&stable_node_dups_attr.attr,
&stable_node_chains_prune_millisecs_attr.attr,
--
2.25.1