[PATCH v2 5/6] x86/xen: Add a Xen-specific sync_core() implementation

From: Andy Lutomirski
Date: Thu Dec 01 2016 - 19:35:29 EST


On Xen PV, CPUID is likely to trap, and Xen hypercalls aren't
guaranteed to serialize. (Even CPUID isn't *really* guaranteed to
serialize on Xen PV, but, in practice, any trap it generates will
serialize.)

On my laptop, CPUID(eax=1, ecx=0) is ~83ns and IRET-to-self is
~110ns. But Xen PV will trap CPUID if possible, so IRET-to-self
should end up being a nice speedup.

Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
Signed-off-by: Andy Lutomirski <luto@xxxxxxxxxx>
---
arch/x86/xen/enlighten.c | 35 +++++++++++++++++++++++++++++++++++
1 file changed, 35 insertions(+)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index bdd855685403..1f765b41eee7 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -311,6 +311,39 @@ static __read_mostly unsigned int cpuid_leaf1_ecx_set_mask;
static __read_mostly unsigned int cpuid_leaf5_ecx_val;
static __read_mostly unsigned int cpuid_leaf5_edx_val;

+static void xen_sync_core(void)
+{
+ register void *__sp asm(_ASM_SP);
+
+#ifdef CONFIG_X86_32
+ asm volatile (
+ "pushl %%ss\n\t"
+ "pushl %%esp\n\t"
+ "addl $4, (%%esp)\n\t"
+ "pushfl\n\t"
+ "pushl %%cs\n\t"
+ "pushl $1f\n\t"
+ "iret\n\t"
+ "1:"
+ : "+r" (__sp) : : "cc");
+#else
+ unsigned long tmp;
+
+ asm volatile (
+ "movq %%ss, %0\n\t"
+ "pushq %0\n\t"
+ "pushq %%rsp\n\t"
+ "addq $8, (%%rsp)\n\t"
+ "pushfq\n\t"
+ "movq %%cs, %0\n\t"
+ "pushq %0\n\t"
+ "pushq $1f\n\t"
+ "iretq\n\t"
+ "1:"
+ : "=r" (tmp), "+r" (__sp) : : "cc");
+#endif
+}
+
static void xen_cpuid(unsigned int *ax, unsigned int *bx,
unsigned int *cx, unsigned int *dx)
{
@@ -1289,6 +1322,8 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {

.start_context_switch = paravirt_start_context_switch,
.end_context_switch = xen_end_context_switch,
+
+ .sync_core = xen_sync_core,
};

static void xen_reboot(int reason)
--
2.9.3