[PATCH v3 3/7] s390/mm: Batch PTE updates in lazy MMU mode

From: Alexander Gordeev

Date: Tue Jun 16 2026 - 08:46:51 EST


Make use of the IPTE instruction's "Additional Entries" field to
invalidate multiple PTEs in one go while in lazy MMU mode. This
is the mode in which many memory-management system calls (like
mremap(), mprotect(), etc.) update memory attributes.

To achieve that, the set_pte() and ptep_get() primitives use a
per-CPU cache to store and retrieve PTE values and apply the
cached values to the real page table once lazy MMU mode is left.

The same is done for memory-management platform callbacks that
would otherwise cause intense per-PTE IPTE traffic, reducing the
number of IPTE instructions from up to PTRS_PER_PTE to a single
instruction in the best case. The average reduction is of course
smaller.

Since all existing page table iterators called in lazy MMU mode
handle one table at a time, the per-CPU cache does not need to be
larger than PTRS_PER_PTE entries. That also naturally aligns with
the IPTE instruction, which must not cross a page table boundary.

Before this change, the system calls did:

lazy_mmu_mode_enable_pte()
...
<update PTEs> // up to PTRS_PER_PTE single-IPTEs
...
lazy_mmu_mode_disable()

With this change, the system calls do:

lazy_mmu_mode_enable_pte()
...
<store new PTE values in the per-CPU cache>
...
lazy_mmu_mode_disable() // apply cache with one multi-IPTE

When applied to large memory ranges, some system calls show
significant speedups:

mprotect() ~15x
munmap() ~3x
mremap() ~28x

At the same time, fork() shows a measurable slowdown of ~1.5x.

The overall results depend on memory size and access patterns,
but the change generally does not degrade performance.

In addition to a process-wide impact, the rework affects the
whole Central Electronics Complex (CEC). Each (global) IPTE
instruction initiates a quiesce state in a CEC, so reducing
the number of IPTE calls relieves CEC-wide quiesce traffic.

In an extreme case of mprotect() contiguously triggering the
quiesce state on four LPARs in parallel, measurements show
~25x fewer quiesce events.

Signed-off-by: Alexander Gordeev <agordeev@xxxxxxxxxxxxx>
---
arch/s390/Kconfig | 1 +
arch/s390/include/asm/lazy_mmu.h | 9 +
arch/s390/include/asm/lowcore.h | 2 +-
arch/s390/include/asm/pgtable.h | 157 +++++++++++--
arch/s390/kernel/setup.c | 2 +
arch/s390/kernel/smp.c | 7 +
arch/s390/mm/Makefile | 2 +-
arch/s390/mm/lazy_mmu.c | 382 +++++++++++++++++++++++++++++++
arch/s390/mm/pgtable.c | 8 +-
9 files changed, 546 insertions(+), 24 deletions(-)
create mode 100644 arch/s390/include/asm/lazy_mmu.h
create mode 100644 arch/s390/mm/lazy_mmu.c

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 998971f9a071..35cb36e29cdb 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -97,6 +97,7 @@ config S390
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_GIGANTIC_PAGE
select ARCH_HAS_KCOV
+ select ARCH_HAS_LAZY_MMU_MODE
select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_MEM_ENCRYPT
select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
diff --git a/arch/s390/include/asm/lazy_mmu.h b/arch/s390/include/asm/lazy_mmu.h
new file mode 100644
index 000000000000..98366e9de9bc
--- /dev/null
+++ b/arch/s390/include/asm/lazy_mmu.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LAZY_MMU_H
+#define __LAZY_MMU_H
+
+void lazy_mmu_online_boot_cpu(void);
+int lazy_mmu_online_cpu(gfp_t gfp, unsigned int cpu);
+void lazy_mmu_offline_cpu(unsigned int cpu);
+
+#endif /* __LAZY_MMU_H */
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index cd1ddfdb5d35..afddfbf996e7 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -163,7 +163,7 @@ struct lowcore {
__s32 preempt_count; /* 0x03a8 */
__u32 spinlock_lockval; /* 0x03ac */
__u32 spinlock_index; /* 0x03b0 */
- __u8 pad_0x03b4[0x03b8-0x03b4]; /* 0x03b4 */
+ __s32 lazy_mmu_count; /* 0x03b4 */
__u64 percpu_offset; /* 0x03b8 */
__u8 percpu_register; /* 0x03c0 */
__u8 pad_0x03c1[0x0400-0x03c1]; /* 0x03c1 */
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index f9a8a92fa160..2b6659d61fa5 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -39,6 +39,64 @@ enum {

extern atomic_long_t direct_pages_count[PG_DIRECT_MAP_MAX];

+bool __lazy_mmu_ptep_test_and_clear_young(unsigned long addr, pte_t *ptep, int *res);
+bool __lazy_mmu_ptep_get_and_clear(unsigned long addr, pte_t *ptep, pte_t *res);
+bool __lazy_mmu_ptep_modify_prot_start(unsigned long addr, pte_t *ptep, pte_t *res);
+bool __lazy_mmu_ptep_modify_prot_commit(unsigned long addr, pte_t *ptep, pte_t old_pte, pte_t pte);
+bool __lazy_mmu_ptep_set_wrprotect(unsigned long addr, pte_t *ptep);
+bool __lazy_mmu_set_pte(pte_t *ptep, pte_t pte);
+bool __lazy_mmu_ptep_get(pte_t *ptep, pte_t *res);
+
+static __always_inline bool is_lazy_mmu_active(void)
+{
+ if (__is_defined(__DECOMPRESSOR))
+ return false;
+ if (!get_lowcore()->lazy_mmu_count)
+ return false;
+ return true;
+}
+
+static inline
+bool lazy_mmu_ptep_test_and_clear_young(unsigned long addr, pte_t *ptep, int *res)
+{
+ if (!is_lazy_mmu_active())
+ return false;
+ return __lazy_mmu_ptep_test_and_clear_young(addr, ptep, res);
+}
+
+static inline
+bool lazy_mmu_ptep_get_and_clear(unsigned long addr, pte_t *ptep, pte_t *res)
+{
+ if (!is_lazy_mmu_active())
+ return false;
+ return __lazy_mmu_ptep_get_and_clear(addr, ptep, res);
+}
+
+static inline
+bool lazy_mmu_ptep_modify_prot_start(unsigned long addr, pte_t *ptep, pte_t *res)
+{
+ if (!is_lazy_mmu_active())
+ return false;
+ return __lazy_mmu_ptep_modify_prot_start(addr, ptep, res);
+}
+
+static inline
+bool lazy_mmu_ptep_modify_prot_commit(unsigned long addr, pte_t *ptep,
+ pte_t old_pte, pte_t pte)
+{
+ if (!is_lazy_mmu_active())
+ return false;
+ return __lazy_mmu_ptep_modify_prot_commit(addr, ptep, old_pte, pte);
+}
+
+static inline
+bool lazy_mmu_ptep_set_wrprotect(unsigned long addr, pte_t *ptep)
+{
+ if (!is_lazy_mmu_active())
+ return false;
+ return __lazy_mmu_ptep_set_wrprotect(addr, ptep);
+}
+
static inline void update_page_count(int level, long count)
{
if (IS_ENABLED(CONFIG_PROC_FS))
@@ -978,15 +1036,30 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
WRITE_ONCE(*pmdp, pmd);
}

-static inline void set_pte(pte_t *ptep, pte_t pte)
+static inline void __set_pte(pte_t *ptep, pte_t pte)
{
WRITE_ONCE(*ptep, pte);
}

+static inline void set_pte(pte_t *ptep, pte_t pte)
+{
+ if (!is_lazy_mmu_active() || !__lazy_mmu_set_pte(ptep, pte))
+ __set_pte(ptep, pte);
+}
+
+static inline pte_t __ptep_get(pte_t *ptep)
+{
+ return READ_ONCE(*ptep);
+}
+
#define ptep_get ptep_get
static inline pte_t ptep_get(pte_t *ptep)
{
- return READ_ONCE(*ptep);
+ pte_t res;
+
+ if (!is_lazy_mmu_active() || !__lazy_mmu_ptep_get(ptep, &res))
+ res = __ptep_get(ptep);
+ return res;
}

#define pmdp_get pmdp_get
@@ -1179,6 +1252,15 @@ static __always_inline void __ptep_ipte_range(unsigned long address, int nr,
} while (nr != 255);
}

+void arch_enter_lazy_mmu_mode_with_ptes(struct mm_struct *mm,
+ unsigned long addr, unsigned long end,
+ pte_t *pte);
+#define arch_enter_lazy_mmu_mode_with_ptes arch_enter_lazy_mmu_mode_with_ptes
+
+void arch_enter_lazy_mmu_mode(void);
+void arch_leave_lazy_mmu_mode(void);
+void arch_flush_lazy_mmu_mode(void);
+
/*
* This is hard to understand. ptep_get_and_clear and ptep_clear_flush
* both clear the TLB for the unmapped pte. The reason is that
@@ -1199,10 +1281,16 @@ pte_t ptep_xchg_lazy(struct mm_struct *, unsigned long, pte_t *, pte_t);
static inline bool ptep_test_and_clear_young(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
- pte_t pte = ptep_get(ptep);
+ pte_t pte;
+ int res;

- pte = ptep_xchg_direct(vma->vm_mm, addr, ptep, pte_mkold(pte));
- return pte_young(pte);
+ if (!lazy_mmu_ptep_test_and_clear_young(addr, ptep, &res)) {
+ pte = __ptep_get(ptep);
+ pte = pte_mkold(pte);
+ pte = ptep_xchg_direct(vma->vm_mm, addr, ptep, pte);
+ res = pte_young(pte);
+ }
+ return res;
}

#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
@@ -1218,7 +1306,8 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
{
pte_t res;

- res = ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
+ if (!lazy_mmu_ptep_get_and_clear(addr, ptep, &res))
+ res = ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
page_table_check_pte_clear(mm, addr, res);
/* At this point the reference through the mapping is still present */
if (mm_is_protected(mm) && pte_present(res))
@@ -1227,9 +1316,34 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
}

#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
-pte_t ptep_modify_prot_start(struct vm_area_struct *, unsigned long, pte_t *);
-void ptep_modify_prot_commit(struct vm_area_struct *, unsigned long,
- pte_t *, pte_t, pte_t);
+pte_t ___ptep_modify_prot_start(struct vm_area_struct *, unsigned long, pte_t *);
+void ___ptep_modify_prot_commit(struct vm_area_struct *, unsigned long,
+ pte_t *, pte_t, pte_t);
+
+static inline
+pte_t ptep_modify_prot_start(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep)
+{
+ pte_t res;
+
+ if (!lazy_mmu_ptep_modify_prot_start(addr, ptep, &res))
+ res = ___ptep_modify_prot_start(vma, addr, ptep);
+ return res;
+}
+
+static inline
+void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
+ pte_t *ptep, pte_t old_pte, pte_t pte)
+{
+ if (!lazy_mmu_ptep_modify_prot_commit(addr, ptep, old_pte, pte))
+ ___ptep_modify_prot_commit(vma, addr, ptep, old_pte, pte);
+}
+
+bool ipte_range_ptep_modify_prot_start(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep, pte_t *res);
+bool ipte_range_ptep_modify_prot_commit(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t old_pte, pte_t pte);

#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
@@ -1259,11 +1373,13 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
{
pte_t res;

- if (full) {
- res = ptep_get(ptep);
- set_pte(ptep, __pte(_PAGE_INVALID));
- } else {
- res = ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
+ if (!lazy_mmu_ptep_get_and_clear(addr, ptep, &res)) {
+ if (full) {
+ res = __ptep_get(ptep);
+ __set_pte(ptep, __pte(_PAGE_INVALID));
+ } else {
+ res = ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
+ }
}
page_table_check_pte_clear(mm, addr, res);
/* At this point the reference through the mapping is still present */
@@ -1289,10 +1405,15 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
static inline void ptep_set_wrprotect(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
- pte_t pte = ptep_get(ptep);
+ pte_t pte;

- if (pte_write(pte))
- ptep_xchg_lazy(mm, addr, ptep, pte_wrprotect(pte));
+ if (!lazy_mmu_ptep_set_wrprotect(addr, ptep)) {
+ pte = __ptep_get(ptep);
+ if (pte_write(pte)) {
+ pte = pte_wrprotect(pte);
+ ptep_xchg_lazy(mm, addr, ptep, pte);
+ }
+ }
}

/*
@@ -1325,7 +1446,7 @@ static inline void flush_tlb_fix_spurious_fault(struct vm_area_struct *vma,
* PTE does not have _PAGE_PROTECT set, to avoid unnecessary overhead.
* A local RDP can be used to do the flush.
*/
- if (cpu_has_rdp() && !(pte_val(ptep_get(ptep)) & _PAGE_PROTECT))
+ if (cpu_has_rdp() && !(pte_val(__ptep_get(ptep)) & _PAGE_PROTECT))
__ptep_rdp(address, ptep, 1);
}
#define flush_tlb_fix_spurious_fault flush_tlb_fix_spurious_fault
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index b60284328fe3..f5a3c9e1b6b8 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -77,6 +77,7 @@
#include <asm/maccess.h>
#include <asm/uv.h>
#include <asm/asm-offsets.h>
+#include <asm/lazy_mmu.h>
#include "entry.h"

/*
@@ -1012,5 +1013,6 @@ void __init setup_arch(char **cmdline_p)

void __init arch_cpu_finalize_init(void)
{
+ lazy_mmu_online_boot_cpu();
sclp_init();
}
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 50bb499cf3e5..4a778bc186a4 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -60,6 +60,7 @@
#include <asm/topology.h>
#include <asm/vdso.h>
#include <asm/maccess.h>
+#include <asm/lazy_mmu.h>
#include "entry.h"

enum {
@@ -867,6 +868,11 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
rc = pcpu_alloc_lowcore(pcpu, cpu);
if (rc)
return rc;
+ rc = lazy_mmu_online_cpu(GFP_KERNEL, cpu);
+ if (rc) {
+ pcpu_free_lowcore(pcpu, cpu);
+ return rc;
+ }
/*
* Make sure global control register contents do not change
* until new CPU has initialized control registers.
@@ -922,6 +928,7 @@ void __cpu_die(unsigned int cpu)
pcpu = per_cpu_ptr(&pcpu_devices, cpu);
while (!pcpu_stopped(pcpu))
cpu_relax();
+ lazy_mmu_offline_cpu(cpu);
pcpu_free_lowcore(pcpu, cpu);
cpumask_clear_cpu(cpu, mm_cpumask(&init_mm));
cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask);
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
index 193899c39ca7..26e9fc11543a 100644
--- a/arch/s390/mm/Makefile
+++ b/arch/s390/mm/Makefile
@@ -3,7 +3,7 @@
# Makefile for the linux s390-specific parts of the memory manager.
#

-obj-y := init.o fault.o extmem.o mmap.o vmem.o maccess.o
+obj-y := init.o fault.o extmem.o mmap.o vmem.o maccess.o lazy_mmu.o
obj-y += page-states.o pageattr.o pgtable.o pgalloc.o extable.o

obj-$(CONFIG_CMM) += cmm.o
diff --git a/arch/s390/mm/lazy_mmu.c b/arch/s390/mm/lazy_mmu.c
new file mode 100644
index 000000000000..d75b93d9b0de
--- /dev/null
+++ b/arch/s390/mm/lazy_mmu.c
@@ -0,0 +1,382 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/pgtable.h>
+#include <linux/kasan.h>
+#include <linux/slab.h>
+#include <asm/facility.h>
+#include <asm/lazy_mmu.h>
+#include <kunit/visibility.h>
+
+#define PTE_POISON _PAGE_LARGE
+
+struct ipte_range {
+ struct mm_struct *mm;
+ unsigned long base_addr;
+ unsigned long base_end;
+ pte_t *base_pte;
+ pte_t *start_pte;
+ pte_t *end_pte;
+ pte_t cache[PTRS_PER_PTE];
+};
+
+static DEFINE_PER_CPU(struct ipte_range *, ipte_range);
+
+static int count_contiguous(pte_t *start, pte_t *end, bool *valid)
+{
+ unsigned long page_invalid_bit;
+ pte_t *ptep, pte;
+
+ pte = __ptep_get(start);
+ page_invalid_bit = pte_val(pte) & _PAGE_INVALID;
+
+ for (ptep = start + 1; ptep < end; ptep++) {
+ pte = __ptep_get(ptep);
+ if ((pte_val(pte) & _PAGE_INVALID) != page_invalid_bit)
+ break;
+ }
+
+ *valid = !(page_invalid_bit);
+ return ptep - start;
+}
+
+static void __invalidate_pte_range(struct mm_struct *mm, unsigned long addr,
+ int nr_ptes, pte_t *ptep)
+{
+ atomic_inc(&mm->context.flush_count);
+ if (cpu_has_tlb_lc() && cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
+ __ptep_ipte_range(addr, nr_ptes - 1, ptep, IPTE_LOCAL);
+ else
+ __ptep_ipte_range(addr, nr_ptes - 1, ptep, IPTE_GLOBAL);
+ atomic_dec(&mm->context.flush_count);
+}
+
+static int invalidate_pte_range(struct mm_struct *mm, unsigned long addr,
+ pte_t *start, pte_t *end)
+{
+ int nr_ptes;
+ bool valid;
+
+ nr_ptes = count_contiguous(start, end, &valid);
+ if (valid)
+ __invalidate_pte_range(mm, addr, nr_ptes, start);
+
+ return nr_ptes;
+}
+
+static void set_pte_range(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t *end, pte_t *cache)
+{
+ int i, nr_ptes;
+
+ while (ptep < end) {
+ nr_ptes = invalidate_pte_range(mm, addr, ptep, end);
+
+ for (i = 0; i < nr_ptes; i++, ptep++, cache++) {
+ __set_pte(ptep, *cache);
+ *cache = __pte(PTE_POISON);
+ }
+
+ addr += nr_ptes * PAGE_SIZE;
+ }
+}
+
+static void enter_ipte_norange(void)
+{
+ struct ipte_range __maybe_unused *range;
+
+ if (!test_facility(13))
+ return;
+
+ range = get_cpu_var(ipte_range);
+ get_lowcore()->lazy_mmu_count++;
+}
+
+static void enter_ipte_range(struct mm_struct *mm,
+ unsigned long addr, unsigned long end, pte_t *pte)
+{
+ struct ipte_range *range;
+
+ if (!test_facility(13))
+ return;
+
+ range = get_cpu_var(ipte_range);
+ get_lowcore()->lazy_mmu_count++;
+
+ range->mm = mm;
+ range->base_addr = addr;
+ range->base_end = end;
+ range->base_pte = pte;
+}
+
+static void leave_ipte_range(void)
+{
+ pte_t *ptep, *start, *start_cache, *cache;
+ unsigned long start_addr, addr;
+ struct ipte_range *range;
+ int start_idx;
+
+ if (!test_facility(13))
+ return;
+
+ lockdep_assert_preemption_disabled();
+ range = this_cpu_read(ipte_range);
+ if (!range->mm)
+ goto norange;
+ if (!range->start_pte)
+ goto done;
+
+ start = range->start_pte;
+ start_idx = range->start_pte - range->base_pte;
+ start_addr = range->base_addr + start_idx * PAGE_SIZE;
+ addr = start_addr;
+ start_cache = &range->cache[start_idx];
+ cache = start_cache;
+ for (ptep = start; ptep < range->end_pte; ptep++, cache++, addr += PAGE_SIZE) {
+ if (pte_val(*cache) == PTE_POISON) {
+ if (start) {
+ set_pte_range(range->mm, start_addr, start, ptep, start_cache);
+ start = NULL;
+ }
+ } else if (!start) {
+ start = ptep;
+ start_addr = addr;
+ start_cache = cache;
+ }
+ }
+ set_pte_range(range->mm, start_addr, start, ptep, start_cache);
+
+ range->start_pte = NULL;
+ range->end_pte = NULL;
+
+done:
+ range->mm = NULL;
+ range->base_addr = 0;
+ range->base_end = 0;
+ range->base_pte = NULL;
+
+norange:
+ get_lowcore()->lazy_mmu_count--;
+ put_cpu_var(ipte_range);
+}
+
+static void flush_lazy_mmu_mode(void)
+{
+ unsigned long addr, end;
+ struct ipte_range *range;
+ struct mm_struct *mm;
+ pte_t *pte;
+
+ if (!test_facility(13))
+ return;
+
+ range = get_cpu_var(ipte_range);
+ if (range->mm) {
+ mm = range->mm;
+ addr = range->base_addr;
+ end = range->base_end;
+ pte = range->base_pte;
+
+ leave_ipte_range();
+ enter_ipte_range(mm, addr, end, pte);
+ }
+ put_cpu_var(ipte_range);
+}
+
+void arch_enter_lazy_mmu_mode(void)
+{
+ enter_ipte_norange();
+}
+EXPORT_SYMBOL_IF_KUNIT(arch_enter_lazy_mmu_mode);
+
+void arch_enter_lazy_mmu_mode_with_ptes(struct mm_struct *mm,
+ unsigned long addr, unsigned long end,
+ pte_t *pte)
+{
+ enter_ipte_range(mm, addr, end, pte);
+}
+EXPORT_SYMBOL_IF_KUNIT(arch_enter_lazy_mmu_mode_with_ptes);
+
+void arch_leave_lazy_mmu_mode(void)
+{
+ leave_ipte_range();
+}
+EXPORT_SYMBOL_IF_KUNIT(arch_leave_lazy_mmu_mode);
+
+void arch_flush_lazy_mmu_mode(void)
+{
+ flush_lazy_mmu_mode();
+}
+EXPORT_SYMBOL_IF_KUNIT(arch_flush_lazy_mmu_mode);
+
+static void __ipte_range_set_pte(struct ipte_range *range, pte_t *ptep, pte_t pte)
+{
+ unsigned int idx = ptep - range->base_pte;
+
+ lockdep_assert_preemption_disabled();
+ range->cache[idx] = pte;
+
+ if (!range->start_pte) {
+ range->start_pte = ptep;
+ range->end_pte = ptep + 1;
+ } else if (ptep < range->start_pte) {
+ range->start_pte = ptep;
+ } else if (ptep + 1 > range->end_pte) {
+ range->end_pte = ptep + 1;
+ }
+}
+
+static pte_t __ipte_range_ptep_get(struct ipte_range *range, pte_t *ptep)
+{
+ unsigned int idx = ptep - range->base_pte;
+
+ lockdep_assert_preemption_disabled();
+ if (pte_val(range->cache[idx]) == PTE_POISON)
+ return __ptep_get(ptep);
+ return range->cache[idx];
+}
+
+static struct ipte_range *this_ipte_range(pte_t *ptep)
+{
+ struct ipte_range *range;
+ unsigned int nr_ptes;
+
+ range = this_cpu_read(ipte_range);
+ if (ptep < range->base_pte)
+ return NULL;
+ nr_ptes = (range->base_end - range->base_addr) / PAGE_SIZE;
+ if (ptep >= range->base_pte + nr_ptes)
+ return NULL;
+
+ return range;
+}
+
+bool __lazy_mmu_set_pte(pte_t *ptep, pte_t pte)
+{
+ struct ipte_range *range;
+
+ range = this_ipte_range(ptep);
+ if (!range)
+ return false;
+
+ __ipte_range_set_pte(range, ptep, pte);
+
+ return true;
+}
+
+bool __lazy_mmu_ptep_get(pte_t *ptep, pte_t *res)
+{
+ struct ipte_range *range;
+
+ range = this_ipte_range(ptep);
+ if (!range)
+ return false;
+
+ *res = __ipte_range_ptep_get(range, ptep);
+
+ return true;
+}
+
+bool __lazy_mmu_ptep_test_and_clear_young(unsigned long addr, pte_t *ptep, int *res)
+{
+ struct ipte_range *range;
+ pte_t pte, old;
+
+ range = this_ipte_range(ptep);
+ if (!range)
+ return false;
+
+ old = __ipte_range_ptep_get(range, ptep);
+ pte = pte_mkold(old);
+ __ipte_range_set_pte(range, ptep, pte);
+ *res = pte_young(old);
+
+ return true;
+}
+
+bool __lazy_mmu_ptep_get_and_clear(unsigned long addr, pte_t *ptep, pte_t *res)
+{
+ struct ipte_range *range;
+ pte_t pte, old;
+
+ range = this_ipte_range(ptep);
+ if (!range)
+ return false;
+
+ old = __ipte_range_ptep_get(range, ptep);
+ pte = __pte(_PAGE_INVALID);
+ __ipte_range_set_pte(range, ptep, pte);
+ *res = old;
+
+ return true;
+}
+
+bool __lazy_mmu_ptep_modify_prot_start(unsigned long addr, pte_t *ptep, pte_t *res)
+{
+ return __lazy_mmu_ptep_get_and_clear(addr, ptep, res);
+}
+
+bool __lazy_mmu_ptep_modify_prot_commit(unsigned long addr, pte_t *ptep,
+ pte_t old_pte, pte_t pte)
+{
+ struct ipte_range *range;
+
+ range = this_ipte_range(ptep);
+ if (!range)
+ return false;
+
+ __ipte_range_set_pte(range, ptep, pte);
+
+ return true;
+}
+
+bool __lazy_mmu_ptep_set_wrprotect(unsigned long addr, pte_t *ptep)
+{
+ struct ipte_range *range;
+ pte_t pte;
+
+ range = this_ipte_range(ptep);
+ if (!range)
+ return false;
+
+ pte = __ipte_range_ptep_get(range, ptep);
+ if (pte_write(pte)) {
+ pte = pte_wrprotect(pte);
+ __ipte_range_set_pte(range, ptep, pte);
+ }
+
+ return true;
+}
+
+int lazy_mmu_online_cpu(gfp_t gfp, unsigned int cpu)
+{
+ struct ipte_range *range;
+ int i;
+
+ if (!test_facility(13))
+ return 0;
+
+ range = kzalloc_obj(*range, gfp);
+ if (!range)
+ return -ENOMEM;
+ for (i = 0; i < ARRAY_SIZE(range->cache); i++)
+ range->cache[i] = __pte(PTE_POISON);
+ per_cpu(ipte_range, cpu) = range;
+
+ return 0;
+}
+
+void lazy_mmu_offline_cpu(unsigned int cpu)
+{
+ struct ipte_range *range;
+
+ if (!test_facility(13))
+ return;
+
+ range = per_cpu(ipte_range, cpu);
+ per_cpu(ipte_range, cpu) = NULL;
+ kfree(range);
+}
+
+void __init lazy_mmu_online_boot_cpu(void)
+{
+ lazy_mmu_online_cpu(GFP_ATOMIC, 0);
+}
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 2acc79383e7d..d18a3263b549 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -170,14 +170,14 @@ pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr,
}
EXPORT_SYMBOL(ptep_xchg_lazy);

-pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr,
- pte_t *ptep)
+pte_t ___ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr,
+ pte_t *ptep)
{
return ptep_flush_lazy(vma->vm_mm, addr, ptep, 1);
}

-void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
- pte_t *ptep, pte_t old_pte, pte_t pte)
+void ___ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
+ pte_t *ptep, pte_t old_pte, pte_t pte)
{
if (pte_present(pte))
pte = clear_pte_bit(pte, __pgprot(_PAGE_UNUSED));
--
2.53.0