[PATCH 19/24] x86/entry: Add the C version ist_restore_cr3()

From: Lai Jiangshan
Date: Tue Aug 31 2021 - 13:52:40 EST


From: Lai Jiangshan <laijs@xxxxxxxxxxxxxxxxx>

It implements the C version of RESTORE_CR3().

Not functional difference intended except the ASM code uses bit test
and clear operations while the C version uses mask check and 'AND'
operations. The resulted asm code of both versions are very similar.

Signed-off-by: Lai Jiangshan <laijs@xxxxxxxxxxxxxxxxx>
---
arch/x86/entry/traps.c | 46 ++++++++++++++++++++++++++++++++++++++++++
1 file changed, 46 insertions(+)

diff --git a/arch/x86/entry/traps.c b/arch/x86/entry/traps.c
index 9d6753d0b9fc..e912bfbd2a61 100644
--- a/arch/x86/entry/traps.c
+++ b/arch/x86/entry/traps.c
@@ -65,6 +65,7 @@
#ifdef CONFIG_X86_64
#include <asm/x86_init.h>
#include <asm/proto.h>
+#include <asm/tlbflush.h>

extern unsigned char native_irq_return_iret[];
extern unsigned char asm_load_gs_index_gs_change[];
@@ -799,9 +800,54 @@ static __always_inline unsigned long ist_switch_to_kernel_cr3(void)

return cr3;
}
+
+static __always_inline void pti_switch_to_user_cr3(unsigned long user_cr3)
+{
+#define KERN_PCID_MASK (CR3_PCID_MASK & ~PTI_USER_PCID_MASK)
+
+ if (static_cpu_has(X86_FEATURE_PCID)) {
+ int pcid = user_cr3 & KERN_PCID_MASK;
+ unsigned short pcid_mask = 1ull << pcid;
+
+ /*
+ * Check if there's a pending flush for the user ASID we're
+ * about to set.
+ */
+ if (!(this_cpu_read(cpu_tlbstate.user_pcid_flush_mask) & pcid_mask))
+ user_cr3 |= X86_CR3_PCID_NOFLUSH;
+ else
+ this_cpu_and(cpu_tlbstate.user_pcid_flush_mask, ~pcid_mask);
+ }
+ native_write_cr3(user_cr3);
+}
+
+static __always_inline void ist_restore_cr3(unsigned long cr3)
+{
+ if (!static_cpu_has(X86_FEATURE_PTI))
+ return;
+
+ if (unlikely(cr3 & PTI_USER_PGTABLE_MASK)) {
+ pti_switch_to_user_cr3(cr3);
+ return;
+ }
+
+ /*
+ * KERNEL pages can always resume with NOFLUSH as we do
+ * explicit flushes.
+ */
+ if (static_cpu_has(X86_FEATURE_PCID))
+ cr3 |= X86_CR3_PCID_NOFLUSH;
+
+ /*
+ * The CR3 write could be avoided when not changing its value,
+ * but would require a CR3 read.
+ */
+ native_write_cr3(cr3);
+}
#else
static __always_inline void switch_to_kernel_cr3(void) {}
static __always_inline unsigned long ist_switch_to_kernel_cr3(void) { return 0; }
+static __always_inline void ist_restore_cr3(unsigned long cr3) {}
#endif

/*
--
2.19.1.6.gb485710b