Re: [RFC PATCH] x86_64,entry,xen: Do not invoke espfix64 on Xen

From: Boris Ostrovsky
Date: Tue Jul 15 2014 - 13:17:36 EST

On 07/15/2014 12:23 PM, Andy Lutomirski wrote:
This moves the espfix64 logic into native_iret. To make this work,
it gets rid of the native patch for INTERRUPT_RETURN:
INTERRUPT_RETURN on native kernels is now 'jmp native_iret'.

This changes the 16-bit SS behavior on Xen from OOPSing to leaking
some bits of the Xen hypervisor's RSP (I think).

Signed-off-by: Andy Lutomirski <luto@xxxxxxxxxxxxxx>

I just pushed this to, so the kbuild robot should be pounding
on it soon. It passes all my tests on bare metal, and it fails them
without crashing on Xen.

It's not exactly beautiful, but I think it's an improvement.

arch/x86/include/asm/irqflags.h | 2 +-
arch/x86/kernel/entry_64.S | 28 ++++++++++------------------
arch/x86/kernel/paravirt_patch_64.c | 2 --
3 files changed, 11 insertions(+), 21 deletions(-)

diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index bba3cf8..0a8b519 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -129,7 +129,7 @@ static inline notrace unsigned long arch_local_irq_save(void)
-#define INTERRUPT_RETURN iretq
+#define INTERRUPT_RETURN jmp native_iret
#define USERGS_SYSRET64 \
swapgs; \
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index b25ca96..c844f08 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -830,27 +830,24 @@ restore_args:
* Are we returning to a stack segment from the LDT? Note: in
* 64-bit mode SS:RSP on the exception stack is always valid.
#ifdef CONFIG_X86_ESPFIX64
testb $4,(SS-RIP)(%rsp)
- jnz irq_return_ldt
+ jnz native_irq_return_ldt
- _ASM_EXTABLE(irq_return_iret, bad_iret)
- _ASM_EXTABLE(native_iret, bad_iret)
+ _ASM_EXTABLE(native_irq_return_iret, bad_iret)
#ifdef CONFIG_X86_ESPFIX64
pushq_cfi %rax
pushq_cfi %rdi
@@ -872,7 +869,7 @@ irq_return_ldt:
movq %rax,%rsp
popq_cfi %rax
- jmp irq_return_iret
+ jmp native_irq_return_iret
.section .fixup,"ax"
@@ -956,13 +953,8 @@ __do_double_fault:
cmpl $__KERNEL_CS,CS(%rdi)
jne do_double_fault
movq RIP(%rdi),%rax
- cmpq $irq_return_iret,%rax
- je 1f
- cmpq $native_iret,%rax
+ cmpq $native_irq_return_iret,%rax
jne do_double_fault /* This shouldn't happen... */
movq PER_CPU_VAR(kernel_stack),%rax
subq $(6*8-KERNEL_STACK_OFFSET),%rax /* Reset to original stack */
movq %rax,RSP(%rdi)
@@ -1428,7 +1420,7 @@ error_sti:
incl %ebx
- leaq irq_return_iret(%rip),%rcx
+ leaq native_irq_return_iret(%rip),%rcx
cmpq %rcx,RIP+8(%rsp)
je error_swapgs
movl %ecx,%eax /* zero extend */
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index 3f08f34..a1da673 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -6,7 +6,6 @@ DEF_NATIVE(pv_irq_ops, irq_disable, "cli");
DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
DEF_NATIVE(pv_irq_ops, restore_fl, "pushq %rdi; popfq");
DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax");
-DEF_NATIVE(pv_cpu_ops, iret, "iretq");
DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax");
DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax");
DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
@@ -50,7 +49,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
PATCH_SITE(pv_irq_ops, save_fl);
PATCH_SITE(pv_irq_ops, irq_enable);
PATCH_SITE(pv_irq_ops, irq_disable);
- PATCH_SITE(pv_cpu_ops, iret);

Does this mean that we are no longer patching IRET with a jump to a hypercall?


PATCH_SITE(pv_cpu_ops, irq_enable_sysexit);
PATCH_SITE(pv_cpu_ops, usergs_sysret32);
PATCH_SITE(pv_cpu_ops, usergs_sysret64);

To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at
Please read the FAQ at