[PATCH RFC 1/3] mm, x86: support copying a folio using non-temporal stores

From: Yiannis Nikolakopoulos

Date: Tue May 26 2026 - 07:39:49 EST


From: Alirad Malek <alirad.malek@xxxxxxxxxxx>

In x86, use memcpy_flushcache (that uses non-temporal store
instructions) to copy a folio. To achieve that, starting from folio_mc_copy
down to copy_mc_to_kernel, create a series of helpers (named with an _nt
suffix) that have similar behavior to the original counterparts.

Signed-off-by: Alirad Malek <alirad.malek@xxxxxxxxxxx>
Co-developed-by: Yiannis Nikolakopoulos <yiannis.nikolakop@xxxxxxxxx>
Signed-off-by: Yiannis Nikolakopoulos <yiannis.nikolakop@xxxxxxxxx>
---
arch/x86/include/asm/uaccess.h | 4 ++++
arch/x86/lib/copy_mc.c | 26 ++++++++++++++++++++++++++
include/linux/highmem.h | 32 ++++++++++++++++++++++++++++++++
include/linux/mm.h | 1 +
mm/util.c | 17 +++++++++++++++++
5 files changed, 80 insertions(+)

diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 367297b188c3..2d0938d3e372 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -494,6 +494,10 @@ unsigned long __must_check
copy_mc_to_kernel(void *to, const void *from, unsigned len);
#define copy_mc_to_kernel copy_mc_to_kernel

+unsigned long __must_check
+copy_mc_to_kernel_nt(void *to, const void *from, unsigned len);
+#define copy_mc_to_kernel_nt copy_mc_to_kernel_nt
+
unsigned long __must_check
copy_mc_to_user(void __user *to, const void *from, unsigned len);
#endif
diff --git a/arch/x86/lib/copy_mc.c b/arch/x86/lib/copy_mc.c
index 97e88e58567b..5a2ee5c2211e 100644
--- a/arch/x86/lib/copy_mc.c
+++ b/arch/x86/lib/copy_mc.c
@@ -81,6 +81,32 @@ unsigned long __must_check copy_mc_to_kernel(void *dst, const void *src, unsigne
}
EXPORT_SYMBOL_GPL(copy_mc_to_kernel);

+/**
+ * copy_mc_to_kernel_nt - memory copy that handles source exceptions
+ * if enabled, otherwise uses non-temporal stores
+ * @dst: destination address
+ * @src: source address
+ * @len: number of bytes to copy
+ *
+ * Return 0 for success, or number of bytes not copied if there was an
+ * exception.
+ */
+unsigned long __must_check copy_mc_to_kernel_nt(void *dst, const void *src, unsigned len)
+{
+ unsigned long ret;
+
+ if (copy_mc_fragile_enabled) {
+ instrument_memcpy_before(dst, src, len);
+ ret = copy_mc_fragile(dst, src, len);
+ instrument_memcpy_after(dst, src, len, ret);
+ return ret;
+ }
+
+ memcpy_flushcache(dst, src, len);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(copy_mc_to_kernel_nt);
+
unsigned long __must_check copy_mc_to_user(void __user *dst, const void *src, unsigned len)
{
unsigned long ret;
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index af03db851a1d..a5cb435b9ffe 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -468,6 +468,32 @@ static inline int copy_mc_highpage(struct page *to, struct page *from)

return ret;
}
+
+#ifdef copy_mc_to_kernel_nt
+static inline int copy_mc_highpage_nt(struct page *to, struct page *from)
+{
+ unsigned long ret;
+ char *vfrom, *vto;
+
+ vfrom = kmap_local_page(from);
+ vto = kmap_local_page(to);
+ ret = copy_mc_to_kernel_nt(vto, vfrom, PAGE_SIZE);
+ if (!ret)
+ kmsan_copy_page_meta(to, from);
+ kunmap_local(vto);
+ kunmap_local(vfrom);
+
+ if (ret)
+ memory_failure_queue(page_to_pfn(from), 0);
+
+ return ret;
+}
+#else
+static inline int copy_mc_highpage_nt(struct page *to, struct page *from)
+{
+ return copy_mc_highpage(to, from);
+}
+#endif
#else
static inline int copy_mc_user_highpage(struct page *to, struct page *from,
unsigned long vaddr, struct vm_area_struct *vma)
@@ -481,6 +507,12 @@ static inline int copy_mc_highpage(struct page *to, struct page *from)
copy_highpage(to, from);
return 0;
}
+
+static inline int copy_mc_highpage_nt(struct page *to, struct page *from)
+{
+ copy_highpage(to, from);
+ return 0;
+}
#endif

static inline void memcpy_page(struct page *dst_page, size_t dst_off,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5be3d8a8f806..d07ce478582d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1644,6 +1644,7 @@ void __folio_put(struct folio *folio);
void split_page(struct page *page, unsigned int order);
void folio_copy(struct folio *dst, struct folio *src);
int folio_mc_copy(struct folio *dst, struct folio *src);
+int folio_mc_copy_nt(struct folio *dst, struct folio *src);

unsigned long nr_free_buffer_pages(void);

diff --git a/mm/util.c b/mm/util.c
index b05ab6f97e11..e09e9b5f8eee 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -749,6 +749,23 @@ int folio_mc_copy(struct folio *dst, struct folio *src)
}
EXPORT_SYMBOL(folio_mc_copy);

+int folio_mc_copy_nt(struct folio *dst, struct folio *src)
+{
+ long nr = folio_nr_pages(src);
+ long i = 0;
+
+ for (;;) {
+ if (copy_mc_highpage_nt(folio_page(dst, i), folio_page(src, i)))
+ return -EHWPOISON;
+ if (++i == nr)
+ break;
+ cond_resched();
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(folio_mc_copy_nt);
+
int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS;
static int sysctl_overcommit_ratio __read_mostly = 50;
static unsigned long sysctl_overcommit_kbytes __read_mostly;

--
2.43.0