[PATCH 2/5] mm: add pmd level THP COW parameter in sysfs

From: Luka Bai

Date: Fri May 01 2026 - 01:56:23 EST


From: Luka Bai <lukabai@xxxxxxxxxxx>

We would like to use similar logic of huge anonymous page or huge shmem
pages for THP COW: to categorize the strategies into three types: always,
never, madvise. If setting up to always, then we always do THP COW for
all the existing THPs. If setting up to never, then we never do THP COW.
If setting up to madvise, then we follow the setup we introduced in last
commit to decide whether we do COW for each individual vma.

We add TRANSPARENT_HUGEPAGE_COW_FLAG and
TRANSPARENT_HUGEPAGE_REQ_MADV_COW_FLAG that are very similar to
the TRANSPARENT_HUGEPAGE_FLAG and TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG
which are used to decide whether we do anonymous huge page fault when
it permits. And we add sysfs attribute thp_cow_attr as the interface
to choose from the three strategies we mentioned before.

Signed-off-by: Luka Bai <lukabai@xxxxxxxxxxx>
---
.../testing/sysfs-kernel-mm-transparent-hugepage | 1 +
Documentation/admin-guide/mm/transhuge.rst | 27 +++++++++++++++
include/linux/huge_mm.h | 2 ++
mm/huge_memory.c | 39 ++++++++++++++++++++++
4 files changed, 69 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-transparent-hugepage b/Documentation/ABI/testing/sysfs-kernel-mm-transparent-hugepage
index 7bfbb9cc2c11..43a1af13efe0 100644
--- a/Documentation/ABI/testing/sysfs-kernel-mm-transparent-hugepage
+++ b/Documentation/ABI/testing/sysfs-kernel-mm-transparent-hugepage
@@ -11,6 +11,7 @@ Description:
- khugepaged
- shmem_enabled
- use_zero_page
+ - thp_cow
- subdirectories of the form hugepages-<size>kB, where <size>
is the page size of the hugepages supported by the kernel/CPU
combination.
diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst
index 0ef13c451ac8..0926651bad0d 100644
--- a/Documentation/admin-guide/mm/transhuge.rst
+++ b/Documentation/admin-guide/mm/transhuge.rst
@@ -226,6 +226,33 @@ to "always" or "madvise"), and it'll be automatically shutdown when
all THP sizes are disabled (when both the per-size anon control and the
top-level control are "never")

+Some workloads may want to do copy on write on the pmd size to acquire the
+tlb benifit when it tries to write on a shared anonymous pmd sized entry.
+They can do so by setting up the thp_cow control. The control is only enabled
+when the global THP controls are set to "always" or "madvise" for the
+specific memory region::
+
+::
+
+ echo always >/sys/kernel/mm/transparent_hugepage/thp_cow
+ echo madvise >/sys/kernel/mm/transparent_hugepage/thp_cow
+ echo never >/sys/kernel/mm/transparent_hugepage/thp_cow
+
+always
+ means that the writing process will always do copy on write on
+ the pmd size. If there is no pmd sized folio available, it will
+ fallback to the pte size.
+
+madvise
+ will do things like ``always`` but only for regions that have
+ used madvise(MADV_THP_COW).
+
+never
+ will not do copy on write on the pmd size no matter what setup
+ is done using madvise. When a process writes on a shared anonymous
+ pmd sized entry, it will just allocate a pte sized page and do copy
+ on write on the pte size.
+
process THP controls
--------------------

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index a0ce8c0b81f5..2a62f0f92f68 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -57,6 +57,8 @@ enum transparent_hugepage_flag {
TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG,
TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG,
TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG,
+ TRANSPARENT_HUGEPAGE_COW_FLAG,
+ TRANSPARENT_HUGEPAGE_REQ_MADV_COW_FLAG,
};

struct kobject;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 1f0d0b780943..babca060feca 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -531,6 +531,44 @@ static ssize_t split_underused_thp_store(struct kobject *kobj,
static struct kobj_attribute split_underused_thp_attr = __ATTR(
shrink_underused, 0644, split_underused_thp_show, split_underused_thp_store);

+static ssize_t thp_cow_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ const char *output;
+
+ if (test_bit(TRANSPARENT_HUGEPAGE_COW_FLAG, &transparent_hugepage_flags))
+ output = "[always] madvise never";
+ else if (test_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_COW_FLAG,
+ &transparent_hugepage_flags))
+ output = "always [madvise] never";
+ else
+ output = "always madvise [never]";
+
+ return sysfs_emit(buf, "%s\n", output);
+}
+
+static ssize_t thp_cow_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ ssize_t ret = count;
+
+ if (sysfs_streq(buf, "always")) {
+ clear_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_COW_FLAG, &transparent_hugepage_flags);
+ set_bit(TRANSPARENT_HUGEPAGE_COW_FLAG, &transparent_hugepage_flags);
+ } else if (sysfs_streq(buf, "madvise")) {
+ clear_bit(TRANSPARENT_HUGEPAGE_COW_FLAG, &transparent_hugepage_flags);
+ set_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_COW_FLAG, &transparent_hugepage_flags);
+ } else if (sysfs_streq(buf, "never")) {
+ clear_bit(TRANSPARENT_HUGEPAGE_COW_FLAG, &transparent_hugepage_flags);
+ clear_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_COW_FLAG, &transparent_hugepage_flags);
+ } else
+ ret = -EINVAL;
+
+ return ret;
+}
+static struct kobj_attribute thp_cow_attr = __ATTR_RW(thp_cow);
+
static struct attribute *hugepage_attr[] = {
&enabled_attr.attr,
&defrag_attr.attr,
@@ -540,6 +578,7 @@ static struct attribute *hugepage_attr[] = {
&shmem_enabled_attr.attr,
#endif
&split_underused_thp_attr.attr,
+ &thp_cow_attr.attr,
NULL,
};


--
2.52.0