[RFC PATCH 66/73] x86/pvm: Use new cpu feature to describe XENPV and PVM

From: Lai Jiangshan
Date: Mon Feb 26 2024 - 10:01:46 EST


From: Hou Wenlong <houwenlong.hwl@xxxxxxxxxxxx>

Some PVOPS are patched as the native version directly if the guest is
not a XENPV guest. However, this approach will not work after
introducing a PVM guest. To address this, use a new CPU feature to
describe XENPV and PVM, and ensure that those PVOPS are patched only
when it is not a paravirtual guest.

Signed-off-by: Hou Wenlong <houwenlong.hwl@xxxxxxxxxxxx>
Signed-off-by: Lai Jiangshan <jiangshan.ljs@xxxxxxxxxxxx>
---
arch/x86/entry/entry_64.S | 5 ++---
arch/x86/include/asm/cpufeatures.h | 1 +
arch/x86/include/asm/paravirt.h | 14 +++++++-------
arch/x86/kernel/pvm.c | 1 +
arch/x86/xen/enlighten_pv.c | 1 +
5 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index fe12605b3c05..6b41a1837698 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -127,9 +127,8 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
* In the PVM guest case we must use eretu synthetic instruction.
*/

- ALTERNATIVE_2 "testb %al, %al; jz swapgs_restore_regs_and_return_to_usermode", \
- "jmp swapgs_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV, \
- "jmp swapgs_restore_regs_and_return_to_usermode", X86_FEATURE_KVM_PVM_GUEST
+ ALTERNATIVE "testb %al, %al; jz swapgs_restore_regs_and_return_to_usermode", \
+ "jmp swapgs_restore_regs_and_return_to_usermode", X86_FEATURE_PV_GUEST

/*
* We win! This label is here just for ease of understanding
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index e17e72f13423..72ef58a2db19 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -238,6 +238,7 @@
#define X86_FEATURE_VCPUPREEMPT ( 8*32+21) /* "" PV vcpu_is_preempted function */
#define X86_FEATURE_TDX_GUEST ( 8*32+22) /* Intel Trust Domain Extensions Guest */
#define X86_FEATURE_KVM_PVM_GUEST ( 8*32+23) /* KVM Pagetable-based Virtual Machine guest */
+#define X86_FEATURE_PV_GUEST ( 8*32+24) /* "" Paravirtual guest */

/* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index deaee9ec575e..a864ee481ca2 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -143,7 +143,7 @@ static __always_inline unsigned long read_cr2(void)
{
return PVOP_ALT_CALLEE0(unsigned long, mmu.read_cr2,
"mov %%cr2, %%rax;",
- ALT_NOT(X86_FEATURE_XENPV));
+ ALT_NOT(X86_FEATURE_PV_GUEST));
}

static __always_inline void write_cr2(unsigned long x)
@@ -154,13 +154,13 @@ static __always_inline void write_cr2(unsigned long x)
static inline unsigned long __read_cr3(void)
{
return PVOP_ALT_CALL0(unsigned long, mmu.read_cr3,
- "mov %%cr3, %%rax;", ALT_NOT(X86_FEATURE_XENPV));
+ "mov %%cr3, %%rax;", ALT_NOT(X86_FEATURE_PV_GUEST));
}

static inline void write_cr3(unsigned long x)
{
PVOP_ALT_VCALL1(mmu.write_cr3, x,
- "mov %%rdi, %%cr3", ALT_NOT(X86_FEATURE_XENPV));
+ "mov %%rdi, %%cr3", ALT_NOT(X86_FEATURE_PV_GUEST));
}

static inline void __write_cr4(unsigned long x)
@@ -694,17 +694,17 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu);
static __always_inline unsigned long arch_local_save_flags(void)
{
return PVOP_ALT_CALLEE0(unsigned long, irq.save_fl, "pushf; pop %%rax;",
- ALT_NOT(X86_FEATURE_XENPV));
+ ALT_NOT(X86_FEATURE_PV_GUEST));
}

static __always_inline void arch_local_irq_disable(void)
{
- PVOP_ALT_VCALLEE0(irq.irq_disable, "cli;", ALT_NOT(X86_FEATURE_XENPV));
+ PVOP_ALT_VCALLEE0(irq.irq_disable, "cli;", ALT_NOT(X86_FEATURE_PV_GUEST));
}

static __always_inline void arch_local_irq_enable(void)
{
- PVOP_ALT_VCALLEE0(irq.irq_enable, "sti;", ALT_NOT(X86_FEATURE_XENPV));
+ PVOP_ALT_VCALLEE0(irq.irq_enable, "sti;", ALT_NOT(X86_FEATURE_PV_GUEST));
}

static __always_inline unsigned long arch_local_irq_save(void)
@@ -776,7 +776,7 @@ void native_pv_lock_init(void) __init;
.endm

#define SAVE_FLAGS ALTERNATIVE "PARA_IRQ_save_fl;", "pushf; pop %rax;", \
- ALT_NOT(X86_FEATURE_XENPV)
+ ALT_NOT(X86_FEATURE_PV_GUEST)
#endif
#endif /* CONFIG_PARAVIRT_XXL */
#endif /* CONFIG_X86_64 */
diff --git a/arch/x86/kernel/pvm.c b/arch/x86/kernel/pvm.c
index c38e46a96ad3..d39550a8159f 100644
--- a/arch/x86/kernel/pvm.c
+++ b/arch/x86/kernel/pvm.c
@@ -300,6 +300,7 @@ void __init pvm_early_setup(void)
return;

setup_force_cpu_cap(X86_FEATURE_KVM_PVM_GUEST);
+ setup_force_cpu_cap(X86_FEATURE_PV_GUEST);

wrmsrl(MSR_PVM_VCPU_STRUCT, __pa(this_cpu_ptr(&pvm_vcpu_struct)));
wrmsrl(MSR_PVM_EVENT_ENTRY, (unsigned long)(void *)pvm_early_kernel_event_entry - 256);
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index aeb33e0a3f76..c56483051528 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -335,6 +335,7 @@ static bool __init xen_check_xsave(void)
static void __init xen_init_capabilities(void)
{
setup_force_cpu_cap(X86_FEATURE_XENPV);
+ setup_force_cpu_cap(X86_FEATURE_PV_GUEST);
setup_clear_cpu_cap(X86_FEATURE_DCA);
setup_clear_cpu_cap(X86_FEATURE_APERFMPERF);
setup_clear_cpu_cap(X86_FEATURE_MTRR);
--
2.19.1.6.gb485710b