Re: [PATCH RFC dontapply 5/5] kvm_para: guest side for eoi avoidance

From: Gleb Natapov
Date: Tue Apr 24 2012 - 02:50:17 EST


On Mon, Apr 23, 2012 at 05:04:40PM +0300, Michael S. Tsirkin wrote:
> The idea is simple: there's a bit, per APIC, in guest memory,
> that tells the guest that it does not need EOI.
> Guest tests it using a single est and clear operation - this is
> necessary so that host can detect interrupt nesting - and if set, it can
> skip the EOI MSR.
>
> This patch is not final yet, pls don't apply
> until host side is also finalized. Including
> it here for completeness to show another user
> of the new eoi_write interface.
>
> Signed-off-by: Michael S. Tsirkin <mst@xxxxxxxxxx>
> ---
> arch/x86/include/asm/bitops.h | 6 +++-
> arch/x86/include/asm/kvm_para.h | 2 +
> arch/x86/kernel/kvm.c | 51 ++++++++++++++++++++++++++++++++++++--
> 3 files changed, 54 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
> index b97596e..c9c70ea 100644
> --- a/arch/x86/include/asm/bitops.h
> +++ b/arch/x86/include/asm/bitops.h
> @@ -26,11 +26,13 @@
> #if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1)
> /* Technically wrong, but this avoids compilation errors on some gcc
> versions. */
> -#define BITOP_ADDR(x) "=m" (*(volatile long *) (x))
> +#define BITOP_ADDR_CONSTRAINT "=m"
> #else
> -#define BITOP_ADDR(x) "+m" (*(volatile long *) (x))
> +#define BITOP_ADDR_CONSTRAINT "+m"
> #endif
>
> +#define BITOP_ADDR(x) BITOP_ADDR_CONSTRAINT (*(volatile long *) (x))
> +
> #define ADDR BITOP_ADDR(addr)
>
> /*
> diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
> index 734c376..195840d 100644
> --- a/arch/x86/include/asm/kvm_para.h
> +++ b/arch/x86/include/asm/kvm_para.h
> @@ -22,6 +22,7 @@
> #define KVM_FEATURE_CLOCKSOURCE2 3
> #define KVM_FEATURE_ASYNC_PF 4
> #define KVM_FEATURE_STEAL_TIME 5
> +#define KVM_FEATURE_EOI 6
>
> /* The last 8 bits are used to indicate how to interpret the flags field
> * in pvclock structure. If no bits are set, all flags are ignored.
> @@ -37,6 +38,7 @@
> #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01
> #define MSR_KVM_ASYNC_PF_EN 0x4b564d02
> #define MSR_KVM_STEAL_TIME 0x4b564d03
> +#define MSR_KVM_EOI_EN 0x4b564d04
>
> struct kvm_steal_time {
> __u64 steal;
> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> index b8ba6e4..ad66cdd 100644
> --- a/arch/x86/kernel/kvm.c
> +++ b/arch/x86/kernel/kvm.c
> @@ -39,6 +39,8 @@
> #include <asm/desc.h>
> #include <asm/tlbflush.h>
> #include <asm/idle.h>
> +#include <asm/apic.h>
> +#include <asm/apicdef.h>
>
> static int kvmapf = 1;
>
> @@ -290,6 +292,27 @@ static void kvm_register_steal_time(void)
> cpu, __pa(st));
> }
>
> +static DEFINE_PER_CPU(u16, kvm_apic_eoi) __aligned(2) = 0;
> +
> +/* Our own copy of __test_and_clear_bit to make sure
> + * it is done with a single instruction */
> +static inline int kvm_test_and_clear_bit(int nr, volatile u16* addr)
> +{
> + int oldbit;
> +
> + asm volatile("btr %2,%1\n\t"
> + "sbb %0,%0"
> + : "=r" (oldbit), BITOP_ADDR_CONSTRAINT(*addr) : "Ir" (nr));
> + return oldbit;
> +}
> +
> +static void kvm_guest_apic_eoi_write(u32 reg, u32 val)
> +{
> + if (kvm_test_and_clear_bit(0, &__get_cpu_var(kvm_apic_eoi)))
> + return;
> + apic->write(APIC_EOI, APIC_EOI_ACK);
> +}
> +
> void __cpuinit kvm_guest_cpu_init(void)
> {
> if (!kvm_para_available())
> @@ -307,11 +330,17 @@ void __cpuinit kvm_guest_cpu_init(void)
> smp_processor_id());
> }
>
> + if (kvm_para_has_feature(KVM_FEATURE_EOI)) {
> + __get_cpu_var(kvm_apic_eoi) = 0;
> + wrmsrl(MSR_KVM_EOI_EN, __pa(&__get_cpu_var(kvm_apic_eoi)) |
> + KVM_MSR_ENABLED);
> + }
> +
> if (has_steal_clock)
> kvm_register_steal_time();
> }
>
> -static void kvm_pv_disable_apf(void *unused)
> +static void kvm_pv_disable_apf(void)
> {
> if (!__get_cpu_var(apf_reason).enabled)
> return;
> @@ -323,11 +352,18 @@ static void kvm_pv_disable_apf(void *unused)
> smp_processor_id());
> }
>
> +static void kvm_pv_guest_cpu_reboot(void *unused)
> +{
> + if (kvm_para_has_feature(KVM_FEATURE_EOI))
> + wrmsrl(MSR_KVM_EOI_EN, 0);
> + kvm_pv_disable_apf();
> +}
> +
> static int kvm_pv_reboot_notify(struct notifier_block *nb,
> unsigned long code, void *unused)
> {
> if (code == SYS_RESTART)
> - on_each_cpu(kvm_pv_disable_apf, NULL, 1);
> + on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1);
> return NOTIFY_DONE;
> }
>
> @@ -378,7 +414,9 @@ static void __cpuinit kvm_guest_cpu_online(void *dummy)
> static void kvm_guest_cpu_offline(void *dummy)
> {
> kvm_disable_steal_time();
> - kvm_pv_disable_apf(NULL);
> + if (kvm_para_has_feature(KVM_FEATURE_EOI))
> + wrmsrl(MSR_KVM_EOI_EN, 0);
> + kvm_pv_disable_apf();
> apf_task_wake_all();
> }
>
> @@ -431,6 +469,13 @@ void __init kvm_guest_init(void)
> pv_time_ops.steal_clock = kvm_steal_clock;
> }
>
> + if (kvm_para_has_feature(KVM_FEATURE_EOI)) {
> + /* Should happen once after apic is initialized */
> + BUG_ON(!apic);
What if kernel was started with noapic?

> + BUG_ON(apic->eoi_write == kvm_guest_apic_eoi_write);
> + apic->eoi_write = kvm_guest_apic_eoi_write;
> + }
> +
> #ifdef CONFIG_SMP
> smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
> register_cpu_notifier(&kvm_cpu_notifier);
> --
> 1.7.9.111.gf3fb0

--
Gleb.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/