[RFC PATCH v2 09/15] mm: Introduce kpkeys_hardened_pgtables
From: Kevin Brodsky
Date: Wed Jan 08 2025 - 05:36:36 EST
kpkeys_hardened_pgtables is a hardening feature based on kpkeys. It
aims to prevent the corruption of page tables by: 1. mapping all
page table pages, both kernel and user, with a privileged pkey
(KPKEYS_PKEY_PGTABLES), and 2. granting write access to that pkey
only when running at a higher kpkeys level (KPKEYS_LVL_PGTABLES).
The feature is exposed as CONFIG_KPKEYS_HARDENED_PGTABLES; it
requires explicit architecture opt-in by selecting
ARCH_HAS_KPKEYS_HARDENED_PGTABLES, since much of the page table
handling is arch-specific.
This patch introduces an API to modify the PTPs' pkey and switch
kpkeys level using a guard object. Because this API is going to be
called from low-level pgtable helpers (setters, allocators), it must
be inactive on boot and explicitly switched on if and when kpkeys
become available. A static key is used for that purpose; it is the
responsibility of each architecture supporting
kpkeys_hardened_pgtables to call kpkeys_hardened_pgtables_enable()
as early as possible to switch on that static key. The initial
kernel page tables are also walked to set their pkey, since they
have already been allocated at that point.
The definition of the kpkeys_hardened_pgtables guard class does not
use the static key on the restore path to avoid mismatched
set/restore pairs. Indeed, enabling the static key itself involves
modifying page tables, and it is thus possible that the guard object
is created when the static key appears as false, and destroyed when it
appears as true. To avoid this situation, we reserve an invalid value
for the pkey register and use it to disable the restore path.
Signed-off-by: Kevin Brodsky <kevin.brodsky@xxxxxxx>
---
include/asm-generic/kpkeys.h | 12 +++++++++
include/linux/kpkeys.h | 48 ++++++++++++++++++++++++++++++++++-
mm/Kconfig | 3 +++
mm/Makefile | 1 +
mm/kpkeys_hardened_pgtables.c | 44 ++++++++++++++++++++++++++++++++
security/Kconfig.hardening | 12 +++++++++
6 files changed, 119 insertions(+), 1 deletion(-)
create mode 100644 mm/kpkeys_hardened_pgtables.c
diff --git a/include/asm-generic/kpkeys.h b/include/asm-generic/kpkeys.h
index 3404ce249757..cec92334a9f3 100644
--- a/include/asm-generic/kpkeys.h
+++ b/include/asm-generic/kpkeys.h
@@ -2,8 +2,20 @@
#ifndef __ASM_GENERIC_KPKEYS_H
#define __ASM_GENERIC_KPKEYS_H
+#ifndef KPKEYS_PKEY_PGTABLES
+#define KPKEYS_PKEY_PGTABLES 1
+#endif
+
#ifndef KPKEYS_PKEY_DEFAULT
#define KPKEYS_PKEY_DEFAULT 0
#endif
+/*
+ * Represents a pkey register value that cannot be used, typically disabling
+ * access to all keys.
+ */
+#ifndef KPKEYS_PKEY_REG_INVAL
+#define KPKEYS_PKEY_REG_INVAL 0
+#endif
+
#endif /* __ASM_GENERIC_KPKEYS_H */
diff --git a/include/linux/kpkeys.h b/include/linux/kpkeys.h
index 70e44b0db150..587cf8b4bd33 100644
--- a/include/linux/kpkeys.h
+++ b/include/linux/kpkeys.h
@@ -4,11 +4,13 @@
#include <linux/bug.h>
#include <linux/cleanup.h>
+#include <linux/jump_label.h>
#define KPKEYS_LVL_DEFAULT 0
+#define KPKEYS_LVL_PGTABLES 1
#define KPKEYS_LVL_MIN KPKEYS_LVL_DEFAULT
-#define KPKEYS_LVL_MAX KPKEYS_LVL_DEFAULT
+#define KPKEYS_LVL_MAX KPKEYS_LVL_PGTABLES
#define KPKEYS_GUARD(_name, set_level, restore_pkey_reg) \
__DEFINE_CLASS_IS_CONDITIONAL(_name, false); \
@@ -63,4 +65,48 @@ static inline bool arch_kpkeys_enabled(void)
#endif /* CONFIG_ARCH_HAS_KPKEYS */
+#ifdef CONFIG_KPKEYS_HARDENED_PGTABLES
+
+DECLARE_STATIC_KEY_FALSE(kpkeys_hardened_pgtables_enabled);
+
+/*
+ * Use guard(kpkeys_hardened_pgtables)() to temporarily grant write access
+ * to page tables.
+ */
+KPKEYS_GUARD(kpkeys_hardened_pgtables,
+ static_branch_unlikely(&kpkeys_hardened_pgtables_enabled) ?
+ kpkeys_set_level(KPKEYS_LVL_PGTABLES) :
+ KPKEYS_PKEY_REG_INVAL,
+ _T != KPKEYS_PKEY_REG_INVAL ?
+ kpkeys_restore_pkey_reg(_T) :
+ (void)0)
+
+int kpkeys_protect_pgtable_memory(struct folio *folio);
+int kpkeys_unprotect_pgtable_memory(struct folio *folio);
+
+/*
+ * Enables kpkeys_hardened_pgtables and switches existing kernel page tables to
+ * a privileged pkey (KPKEYS_PKEY_PGTABLES).
+ *
+ * Should be called as early as possible by architecture code, after (k)pkeys
+ * are initialised and before any user task is spawned.
+ */
+void kpkeys_hardened_pgtables_enable(void);
+
+#else /* CONFIG_KPKEYS_HARDENED_PGTABLES */
+
+KPKEYS_GUARD(kpkeys_hardened_pgtables, 0, (void)_T)
+
+static inline int kpkeys_protect_pgtable_memory(struct folio *folio)
+{
+ return 0;
+}
+static inline int kpkeys_unprotect_pgtable_memory(struct folio *folio)
+{
+ return 0;
+}
+static inline void kpkeys_hardened_pgtables_enable(void) {}
+
+#endif /* CONFIG_KPKEYS_HARDENED_PGTABLES */
+
#endif /* _LINUX_KPKEYS_H */
diff --git a/mm/Kconfig b/mm/Kconfig
index 71edc478f111..2a8ebe780e64 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -1149,6 +1149,9 @@ config ARCH_HAS_PKEYS
bool
config ARCH_HAS_KPKEYS
bool
+# ARCH_HAS_KPKEYS must be selected when selecting this option
+config ARCH_HAS_KPKEYS_HARDENED_PGTABLES
+ bool
config ARCH_USES_PG_ARCH_2
bool
diff --git a/mm/Makefile b/mm/Makefile
index 850386a67b3e..130691364172 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -147,3 +147,4 @@ obj-$(CONFIG_SHRINKER_DEBUG) += shrinker_debug.o
obj-$(CONFIG_EXECMEM) += execmem.o
obj-$(CONFIG_TMPFS_QUOTA) += shmem_quota.o
obj-$(CONFIG_PT_RECLAIM) += pt_reclaim.o
+obj-$(CONFIG_KPKEYS_HARDENED_PGTABLES) += kpkeys_hardened_pgtables.o
diff --git a/mm/kpkeys_hardened_pgtables.c b/mm/kpkeys_hardened_pgtables.c
new file mode 100644
index 000000000000..c6eb7fb6ae56
--- /dev/null
+++ b/mm/kpkeys_hardened_pgtables.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/mm.h>
+#include <linux/kpkeys.h>
+#include <linux/set_memory.h>
+
+DEFINE_STATIC_KEY_FALSE(kpkeys_hardened_pgtables_enabled);
+
+int kpkeys_protect_pgtable_memory(struct folio *folio)
+{
+ unsigned long addr = (unsigned long)folio_address(folio);
+ unsigned int order = folio_order(folio);
+ int ret = 0;
+
+ if (static_branch_unlikely(&kpkeys_hardened_pgtables_enabled))
+ ret = set_memory_pkey(addr, 1 << order, KPKEYS_PKEY_PGTABLES);
+
+ WARN_ON(ret);
+ return ret;
+}
+
+int kpkeys_unprotect_pgtable_memory(struct folio *folio)
+{
+ unsigned long addr = (unsigned long)folio_address(folio);
+ unsigned int order = folio_order(folio);
+ int ret = 0;
+
+ if (static_branch_unlikely(&kpkeys_hardened_pgtables_enabled))
+ ret = set_memory_pkey(addr, 1 << order, KPKEYS_PKEY_DEFAULT);
+
+ WARN_ON(ret);
+ return ret;
+}
+
+void __init kpkeys_hardened_pgtables_enable(void)
+{
+ int ret;
+
+ if (!arch_kpkeys_enabled())
+ return;
+
+ static_branch_enable(&kpkeys_hardened_pgtables_enabled);
+ ret = kernel_pgtables_set_pkey(KPKEYS_PKEY_PGTABLES);
+ WARN_ON(ret);
+}
diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening
index c9d5ca3d8d08..95f93f1d4055 100644
--- a/security/Kconfig.hardening
+++ b/security/Kconfig.hardening
@@ -300,6 +300,18 @@ config BUG_ON_DATA_CORRUPTION
If unsure, say N.
+config KPKEYS_HARDENED_PGTABLES
+ bool "Harden page tables using kernel pkeys"
+ depends on ARCH_HAS_KPKEYS_HARDENED_PGTABLES
+ help
+ This option makes all page tables mostly read-only by
+ allocating them with a non-default protection key (pkey) and
+ only enabling write access to that pkey in routines that are
+ expected to write to page table entries.
+
+ This option has no effect if the system does not support
+ kernel pkeys.
+
endmenu
config CC_HAS_RANDSTRUCT
--
2.47.0