Re: WARNING: can't access registers at asm_common_interrupt

From: Peter Zijlstra
Date: Mon Nov 16 2020 - 08:04:26 EST


On Mon, Nov 16, 2020 at 12:56:32PM +0100, Jürgen Groß wrote:
> > > > > > > static inline notrace unsigned long arch_local_save_flags(void)
> > > > > > > {
> > > > > > > PVOP_CALL_ARGS;
> > > > > > > PVOP_TEST_NULL(irq.save_fl);
> > > > > > > asm_inline volatile(ALTERNATIVE(paravirt_alt(PARAVIRT_CALL),
> > > > > > > "PUSHF; POP _ASM_AX",
> > > > > > > X86_FEATURE_NATIVE)
>
> I am wondering whether we really want a new feature (basically "not
> XENPV). We could use ~X86_FEATURE_XENPV and teach apply_alternatives()
> to understand negated features (yes, this limits the number of features
> to 32767, but I don't think this is a real problem for quite some time).
>
> Thoughts?

I went with the simple thing for now... If we ever want/need another
negative alternative I suppose we can do as you suggest...

I was still poking at objtool to actually dtrt though..

---
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index dad350d42ecf..cc88f358bac5 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -237,6 +237,7 @@
#define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */
#define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */
#define X86_FEATURE_SEV_ES ( 8*32+20) /* AMD Secure Encrypted Virtualization - Encrypted State */
+#define X86_FEATURE_NOT_XENPV ( 8*32+21) /* "" inverse of X86_FEATURE_XENPV */

/* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index d25cc6830e89..e2a9d3e6b7ad 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -645,22 +645,56 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu);
#ifdef CONFIG_PARAVIRT_XXL
static inline notrace unsigned long arch_local_save_flags(void)
{
- return PVOP_CALLEE0(unsigned long, irq.save_fl);
+ PVOP_CALL_ARGS;
+ PVOP_TEST_NULL(irq.save_fl);
+ asm_inline volatile(ALTERNATIVE(paravirt_alt(PARAVIRT_CALL),
+ "pushf; pop %%" _ASM_AX ";",
+ X86_FEATURE_NOT_XENPV)
+ : PVOP_CALLEE_CLOBBERS, ASM_CALL_CONSTRAINT
+ : paravirt_type(irq.save_fl.func),
+ paravirt_clobber(CLBR_RET_REG)
+ : "memory", "cc");
+ return __eax;
}

static inline notrace void arch_local_irq_restore(unsigned long f)
{
- PVOP_VCALLEE1(irq.restore_fl, f);
+ PVOP_CALL_ARGS;
+ PVOP_TEST_NULL(irq.restore_fl);
+ asm_inline volatile(ALTERNATIVE(paravirt_alt(PARAVIRT_CALL),
+ "push %%" _ASM_ARG1 "; popf;",
+ X86_FEATURE_NOT_XENPV)
+ : PVOP_VCALLEE_CLOBBERS, ASM_CALL_CONSTRAINT
+ : paravirt_type(irq.restore_fl.func),
+ paravirt_clobber(CLBR_RET_REG),
+ PVOP_CALL_ARG1(f)
+ : "memory", "cc");
}

static inline notrace void arch_local_irq_disable(void)
{
- PVOP_VCALLEE0(irq.irq_disable);
+ PVOP_CALL_ARGS;
+ PVOP_TEST_NULL(irq.irq_disable);
+ asm_inline volatile(ALTERNATIVE(paravirt_alt(PARAVIRT_CALL),
+ "cli;",
+ X86_FEATURE_NOT_XENPV)
+ : PVOP_VCALLEE_CLOBBERS, ASM_CALL_CONSTRAINT
+ : paravirt_type(irq.irq_disable.func),
+ paravirt_clobber(CLBR_RET_REG)
+ : "memory", "cc");
}

static inline notrace void arch_local_irq_enable(void)
{
- PVOP_VCALLEE0(irq.irq_enable);
+ PVOP_CALL_ARGS;
+ PVOP_TEST_NULL(irq.irq_enable);
+ asm_inline volatile(ALTERNATIVE(paravirt_alt(PARAVIRT_CALL),
+ "sti;",
+ X86_FEATURE_NOT_XENPV)
+ : PVOP_VCALLEE_CLOBBERS, ASM_CALL_CONSTRAINT
+ : paravirt_type(irq.irq_enable.func),
+ paravirt_clobber(CLBR_RET_REG)
+ : "memory", "cc");
}

static inline notrace unsigned long arch_local_irq_save(void)
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 2400ad62f330..5bd8f5503652 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -723,6 +723,19 @@ void __init alternative_instructions(void)
* patching.
*/

+ if (!boot_cpu_has(X86_FEATURE_XENPV))
+ setup_force_cpu_cap(X86_FEATURE_NOT_XENPV);
+
+ /*
+ * First patch paravirt functions, such that we overwrite the indirect
+ * call with the direct call.
+ */
+ apply_paravirt(__parainstructions, __parainstructions_end);
+
+ /*
+ * Then patch alternatives, such that those paravirt calls that are in
+ * alternatives can be overwritten by their immediate fragments.
+ */
apply_alternatives(__alt_instructions, __alt_instructions_end);

#ifdef CONFIG_SMP
@@ -741,8 +754,6 @@ void __init alternative_instructions(void)
}
#endif

- apply_paravirt(__parainstructions, __parainstructions_end);
-
restart_nmi();
alternatives_patched = 1;
}
diff --git a/arch/x86/kernel/paravirt_patch.c b/arch/x86/kernel/paravirt_patch.c
index ace6e334cb39..867498db53ad 100644
--- a/arch/x86/kernel/paravirt_patch.c
+++ b/arch/x86/kernel/paravirt_patch.c
@@ -33,13 +33,9 @@ struct patch_xxl {
};

static const struct patch_xxl patch_data_xxl = {
- .irq_irq_disable = { 0xfa }, // cli
- .irq_irq_enable = { 0xfb }, // sti
- .irq_save_fl = { 0x9c, 0x58 }, // pushf; pop %[re]ax
.mmu_read_cr2 = { 0x0f, 0x20, 0xd0 }, // mov %cr2, %[re]ax
.mmu_read_cr3 = { 0x0f, 0x20, 0xd8 }, // mov %cr3, %[re]ax
.mmu_write_cr3 = { 0x0f, 0x22, 0xdf }, // mov %rdi, %cr3
- .irq_restore_fl = { 0x57, 0x9d }, // push %rdi; popfq
.cpu_wbinvd = { 0x0f, 0x09 }, // wbinvd
.cpu_usergs_sysret64 = { 0x0f, 0x01, 0xf8,
0x48, 0x0f, 0x07 }, // swapgs; sysretq
@@ -76,11 +72,6 @@ unsigned int native_patch(u8 type, void *insn_buff, unsigned long addr,
switch (type) {

#ifdef CONFIG_PARAVIRT_XXL
- PATCH_CASE(irq, restore_fl, xxl, insn_buff, len);
- PATCH_CASE(irq, save_fl, xxl, insn_buff, len);
- PATCH_CASE(irq, irq_enable, xxl, insn_buff, len);
- PATCH_CASE(irq, irq_disable, xxl, insn_buff, len);
-
PATCH_CASE(mmu, read_cr2, xxl, insn_buff, len);
PATCH_CASE(mmu, read_cr3, xxl, insn_buff, len);
PATCH_CASE(mmu, write_cr3, xxl, insn_buff, len);

Attachment: signature.asc
Description: PGP signature