Re: [PATCH v4 1/4] pkeys: add API to switch to permissive/zero pkey register

From: Mathieu Desnoyers
Date: Mon Feb 24 2025 - 14:06:23 EST


On 2025-02-24 08:20, Dmitry Vyukov wrote:
The API allows to switch to permissive pkey register that allows accesses
to all PKEYs, and to a value that allows acccess to the 0 (default) PKEY.
This functionality is already used in x86 signal delivery, and will be
needed for rseq.

AFAIU the signal delivery uses the "allow access to all PKEYs"
approach only, not the "allow access to 0-pkey". It would be
good to clarify this in the commit message.


Signed-off-by: Dmitry Vyukov <dvyukov@xxxxxxxxxx>
Cc: Mathieu Desnoyers <mathieu.desnoyers@xxxxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: "Paul E. McKenney" <paulmck@xxxxxxxxxx>
Cc: Boqun Feng <boqun.feng@xxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Borislav Petkov <bp@xxxxxxxxx>
Cc: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>
Cc: "H. Peter Anvin" <hpa@xxxxxxxxx>
Cc: Aruna Ramakrishna <aruna.ramakrishna@xxxxxxxxxx>
Cc: x86@xxxxxxxxxx
Cc: linux-kernel@xxxxxxxxxxxxxxx
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@xxxxxxxxxxxx>
Fixes: d7822b1e24f2 ("rseq: Introduce restartable sequences system call")

---
Changes in v4:
- Added Fixes tag

Changes in v3:
- Renamed API functions to write_permissive_pkey_val/write_pkey_val
- Added enable_zero_pkey_val for rseq
- Added Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@xxxxxxxxxxxx>

Changes in v2:
- Fixed typo in commit description
---
arch/x86/Kconfig | 1 +
arch/x86/include/asm/pkeys.h | 33 +++++++++++++++++++++++++++++++++
arch/x86/include/asm/pkru.h | 10 +++++++---
include/linux/pkeys.h | 31 +++++++++++++++++++++++++++++++
mm/Kconfig | 2 ++
5 files changed, 74 insertions(+), 3 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index be2c311f5118d..43af2840d098f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1881,6 +1881,7 @@ config X86_INTEL_MEMORY_PROTECTION_KEYS
depends on X86_64 && (CPU_SUP_INTEL || CPU_SUP_AMD)
select ARCH_USES_HIGH_VMA_FLAGS
select ARCH_HAS_PKEYS
+ select ARCH_HAS_PERMISSIVE_PKEY
help
Memory Protection Keys provides a mechanism for enforcing
page-based protections, but without requiring modification of the
diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h
index 2e6c04d8a45b4..d6e35ab5c3d22 100644
--- a/arch/x86/include/asm/pkeys.h
+++ b/arch/x86/include/asm/pkeys.h
@@ -2,6 +2,8 @@
#ifndef _ASM_X86_PKEYS_H
#define _ASM_X86_PKEYS_H
+#include "pkru.h"
+
/*
* If more than 16 keys are ever supported, a thorough audit
* will be necessary to ensure that the types that store key
@@ -123,4 +125,35 @@ static inline int vma_pkey(struct vm_area_struct *vma)
return (vma->vm_flags & vma_pkey_mask) >> VM_PKEY_SHIFT;
}
+typedef u32 pkey_reg_t;
+
+static inline pkey_reg_t write_permissive_pkey_val(void)
+{
+ return write_pkru(0);
+}
+
+static inline pkey_reg_t enable_zero_pkey_val(void)
+{
+ u32 pkru;
+
+ if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
+ return 0;
+ /*
+ * WRPKRU is relatively expensive compared to RDPKRU,
+ * avoid it if possible.
+ */
+ pkru = rdpkru();
+ if ((pkru & (PKRU_AD_BIT|PKRU_WD_BIT)) != 0)
+ wrpkru(pkru & ~(PKRU_AD_BIT|PKRU_WD_BIT));
+ return pkru;
+
+
+ return write_pkru(0);

This is dead code. What I am missing ?

With those fixed, please keep my reviewed-by :)

Thanks,

Mathieu

+}
+
+static inline void write_pkey_val(pkey_reg_t val)
+{
+ write_pkru(val);
+}
+
#endif /*_ASM_X86_PKEYS_H */
diff --git a/arch/x86/include/asm/pkru.h b/arch/x86/include/asm/pkru.h
index 74f0a2d34ffdd..b9bf9b7f2753b 100644
--- a/arch/x86/include/asm/pkru.h
+++ b/arch/x86/include/asm/pkru.h
@@ -39,16 +39,20 @@ static inline u32 read_pkru(void)
return 0;
}
-static inline void write_pkru(u32 pkru)
+static inline u32 write_pkru(u32 pkru)
{
+ u32 old_pkru;
+
if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
- return;
+ return 0;
/*
* WRPKRU is relatively expensive compared to RDPKRU.
* Avoid WRPKRU when it would not change the value.
*/
- if (pkru != rdpkru())
+ old_pkru = rdpkru();
+ if (pkru != old_pkru)
wrpkru(pkru);
+ return old_pkru;
}
static inline void pkru_write_default(void)
diff --git a/include/linux/pkeys.h b/include/linux/pkeys.h
index 86be8bf27b41b..262d60f6a15f8 100644
--- a/include/linux/pkeys.h
+++ b/include/linux/pkeys.h
@@ -48,4 +48,35 @@ static inline bool arch_pkeys_enabled(void)
#endif /* ! CONFIG_ARCH_HAS_PKEYS */
+#ifndef CONFIG_ARCH_HAS_PERMISSIVE_PKEY
+
+/*
+ * Common name for value of the register that controls access to PKEYs
+ * (called differently on different arches: PKRU, POR, AMR).
+ */
+typedef char pkey_reg_t;
+
+/*
+ * Sets PKEY access register to the most permissive value that allows
+ * accesses to all PKEYs. Returns the current value of PKEY register.
+ * Code should generally arrange switching back to the old value
+ * using write_pkey_val(old_value).
+ */
+static inline pkey_reg_t write_permissive_pkey_val(void)
+{
+ return 0;
+}
+
+/*
+ * Sets PKEY access register to a value that allows access to the 0 (default)
+ * PKEY. Returns the current value of PKEY register.
+ */
+static inline pkey_reg_t enable_zero_pkey_val(void)
+{
+ return 0;
+}
+
+static inline void write_pkey_val(pkey_reg_t val) {}
+#endif /* ! CONFIG_ARCH_HAS_PERMISSIVE_PKEY */
+
#endif /* _LINUX_PKEYS_H */
diff --git a/mm/Kconfig b/mm/Kconfig
index 1b501db064172..9e874f7713a2b 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -1147,6 +1147,8 @@ config ARCH_USES_HIGH_VMA_FLAGS
bool
config ARCH_HAS_PKEYS
bool
+config ARCH_HAS_PERMISSIVE_PKEY
+ bool
config ARCH_USES_PG_ARCH_2
bool


--
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com