[PATCH V2 21/41] x86/entry: Add the C version ist_restore_cr3()

From: Lai Jiangshan
Date: Sun Sep 26 2021 - 11:11:30 EST


From: Lai Jiangshan <laijs@xxxxxxxxxxxxxxxxx>

It implements the C version of RESTORE_CR3().

Not functional difference intended except the ASM code uses bit test
and clear operations while the C version uses mask check and 'AND'
operations. The resulted asm code of both versions are very similar.

Signed-off-by: Lai Jiangshan <laijs@xxxxxxxxxxxxxxxxx>
---
arch/x86/entry/entry64.c | 46 ++++++++++++++++++++++++++++++++++++++++
1 file changed, 46 insertions(+)

diff --git a/arch/x86/entry/entry64.c b/arch/x86/entry/entry64.c
index faee44a3d1d8..2db9ae3508f1 100644
--- a/arch/x86/entry/entry64.c
+++ b/arch/x86/entry/entry64.c
@@ -8,6 +8,7 @@
* environments that the GS base is user controlled value, or the CR3
* is PTI user CR3 or both.
*/
+#include <asm/tlbflush.h>
#include <asm/traps.h>

extern unsigned char asm_load_gs_index_gs_change[];
@@ -27,6 +28,26 @@ static __always_inline void pti_switch_to_kernel_cr3(unsigned long user_cr3)
native_write_cr3(cr3);
}

+static __always_inline void pti_switch_to_user_cr3(unsigned long user_cr3)
+{
+#define KERN_PCID_MASK (CR3_PCID_MASK & ~PTI_USER_PCID_MASK)
+
+ if (static_cpu_has(X86_FEATURE_PCID)) {
+ int pcid = user_cr3 & KERN_PCID_MASK;
+ unsigned short pcid_mask = 1ull << pcid;
+
+ /*
+ * Check if there's a pending flush for the user ASID we're
+ * about to set.
+ */
+ if (!(this_cpu_read(cpu_tlbstate.user_pcid_flush_mask) & pcid_mask))
+ user_cr3 |= X86_CR3_PCID_NOFLUSH;
+ else
+ this_cpu_and(cpu_tlbstate.user_pcid_flush_mask, ~pcid_mask);
+ }
+ native_write_cr3(user_cr3);
+}
+
static __always_inline void switch_to_kernel_cr3(void)
{
if (static_cpu_has(X86_FEATURE_PTI))
@@ -46,9 +67,34 @@ static __always_inline unsigned long ist_switch_to_kernel_cr3(void)

return cr3;
}
+
+static __always_inline void ist_restore_cr3(unsigned long cr3)
+{
+ if (!static_cpu_has(X86_FEATURE_PTI))
+ return;
+
+ if (unlikely(cr3 & PTI_USER_PGTABLE_MASK)) {
+ pti_switch_to_user_cr3(cr3);
+ return;
+ }
+
+ /*
+ * KERNEL pages can always resume with NOFLUSH as we do
+ * explicit flushes.
+ */
+ if (static_cpu_has(X86_FEATURE_PCID))
+ cr3 |= X86_CR3_PCID_NOFLUSH;
+
+ /*
+ * The CR3 write could be avoided when not changing its value,
+ * but would require a CR3 read.
+ */
+ native_write_cr3(cr3);
+}
#else
static __always_inline void switch_to_kernel_cr3(void) {}
static __always_inline unsigned long ist_switch_to_kernel_cr3(void) { return 0; }
+static __always_inline void ist_restore_cr3(unsigned long cr3) {}
#endif

/*
--
2.19.1.6.gb485710b