Re: [v6] kvm/fpu: Enable fully eager restore kvm FPU

From: Wanpeng Li
Date: Thu Apr 23 2015 - 19:35:45 EST


On Fri, Apr 24, 2015 at 05:13:03AM +0800, Liang Li wrote:
>Romove lazy FPU logic and use eager FPU entirely. Eager FPU does
>not have performance regression, and it can simplify the code.
>
>When compiling kernel on westmere, the performance of eager FPU
>is about 0.4% faster than lazy FPU.
>
>Signed-off-by: Liang Li <liang.z.li@xxxxxxxxx>
>Signed-off-by: Xudong Hao <xudong.hao@xxxxxxxxx>
>---
> arch/x86/include/asm/kvm_host.h | 1 -
> arch/x86/kvm/svm.c | 22 ++----------
> arch/x86/kvm/vmx.c | 74 +++--------------------------------------
> arch/x86/kvm/x86.c | 8 +----
> include/linux/kvm_host.h | 2 --
> 5 files changed, 9 insertions(+), 98 deletions(-)
>
>diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>index dea2e7e..5d84cc9 100644
>--- a/arch/x86/include/asm/kvm_host.h
>+++ b/arch/x86/include/asm/kvm_host.h
>@@ -743,7 +743,6 @@ struct kvm_x86_ops {
> void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
> unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
> void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
>- void (*fpu_deactivate)(struct kvm_vcpu *vcpu);
>
> void (*tlb_flush)(struct kvm_vcpu *vcpu);
>
>diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>index ce741b8..1b3b29b 100644
>--- a/arch/x86/kvm/svm.c
>+++ b/arch/x86/kvm/svm.c
>@@ -1087,7 +1087,6 @@ static void init_vmcb(struct vcpu_svm *svm)
> struct vmcb_control_area *control = &svm->vmcb->control;
> struct vmcb_save_area *save = &svm->vmcb->save;
>
>- svm->vcpu.fpu_active = 1;
> svm->vcpu.arch.hflags = 0;
>
> set_cr_intercept(svm, INTERCEPT_CR0_READ);
>@@ -1529,15 +1528,12 @@ static void update_cr0_intercept(struct vcpu_svm *svm)
> ulong gcr0 = svm->vcpu.arch.cr0;
> u64 *hcr0 = &svm->vmcb->save.cr0;
>
>- if (!svm->vcpu.fpu_active)
>- *hcr0 |= SVM_CR0_SELECTIVE_MASK;
>- else
>- *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK)
>- | (gcr0 & SVM_CR0_SELECTIVE_MASK);
>+ *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK)
>+ | (gcr0 & SVM_CR0_SELECTIVE_MASK);
>
> mark_dirty(svm->vmcb, VMCB_CR);
>
>- if (gcr0 == *hcr0 && svm->vcpu.fpu_active) {
>+ if (gcr0 == *hcr0) {
> clr_cr_intercept(svm, INTERCEPT_CR0_READ);
> clr_cr_intercept(svm, INTERCEPT_CR0_WRITE);
> } else {
>@@ -1568,8 +1564,6 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
> if (!npt_enabled)
> cr0 |= X86_CR0_PG | X86_CR0_WP;
>
>- if (!vcpu->fpu_active)
>- cr0 |= X86_CR0_TS;
> /*
> * re-enable caching here because the QEMU bios
> * does not do it - this results in some delay at
>@@ -1795,7 +1789,6 @@ static void svm_fpu_activate(struct kvm_vcpu *vcpu)
>
> clr_exception_intercept(svm, NM_VECTOR);
>
>- svm->vcpu.fpu_active = 1;
> update_cr0_intercept(svm);
> }
>
>@@ -4139,14 +4132,6 @@ static bool svm_has_wbinvd_exit(void)
> return true;
> }
>
>-static void svm_fpu_deactivate(struct kvm_vcpu *vcpu)
>-{
>- struct vcpu_svm *svm = to_svm(vcpu);
>-
>- set_exception_intercept(svm, NM_VECTOR);
>- update_cr0_intercept(svm);
>-}

Do you test it on AMD cpu? What's the performance you get?

Regards,
Wanpeng Li

>-
> #define PRE_EX(exit) { .exit_code = (exit), \
> .stage = X86_ICPT_PRE_EXCEPT, }
> #define POST_EX(exit) { .exit_code = (exit), \
>@@ -4381,7 +4366,6 @@ static struct kvm_x86_ops svm_x86_ops = {
> .cache_reg = svm_cache_reg,
> .get_rflags = svm_get_rflags,
> .set_rflags = svm_set_rflags,
>- .fpu_deactivate = svm_fpu_deactivate,
>
> .tlb_flush = svm_flush_tlb,
>
>diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>index f5e8dce..811a666 100644
>--- a/arch/x86/kvm/vmx.c
>+++ b/arch/x86/kvm/vmx.c
>@@ -1567,7 +1567,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
> u32 eb;
>
> eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) |
>- (1u << NM_VECTOR) | (1u << DB_VECTOR);
>+ (1u << DB_VECTOR);
> if ((vcpu->guest_debug &
> (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
> (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP))
>@@ -1576,8 +1576,6 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
> eb = ~0;
> if (enable_ept)
> eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */
>- if (vcpu->fpu_active)
>- eb &= ~(1u << NM_VECTOR);
>
> /* When we are running a nested L2 guest and L1 specified for it a
> * certain exception bitmap, we must trap the same exceptions and pass
>@@ -1961,9 +1959,6 @@ static void vmx_fpu_activate(struct kvm_vcpu *vcpu)
> {
> ulong cr0;
>
>- if (vcpu->fpu_active)
>- return;
>- vcpu->fpu_active = 1;
> cr0 = vmcs_readl(GUEST_CR0);
> cr0 &= ~(X86_CR0_TS | X86_CR0_MP);
> cr0 |= kvm_read_cr0_bits(vcpu, X86_CR0_TS | X86_CR0_MP);
>@@ -1994,33 +1989,6 @@ static inline unsigned long nested_read_cr4(struct vmcs12 *fields)
> (fields->cr4_read_shadow & fields->cr4_guest_host_mask);
> }
>
>-static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu)
>-{
>- /* Note that there is no vcpu->fpu_active = 0 here. The caller must
>- * set this *before* calling this function.
>- */
>- vmx_decache_cr0_guest_bits(vcpu);
>- vmcs_set_bits(GUEST_CR0, X86_CR0_TS | X86_CR0_MP);
>- update_exception_bitmap(vcpu);
>- vcpu->arch.cr0_guest_owned_bits = 0;
>- vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
>- if (is_guest_mode(vcpu)) {
>- /*
>- * L1's specified read shadow might not contain the TS bit,
>- * so now that we turned on shadowing of this bit, we need to
>- * set this bit of the shadow. Like in nested_vmx_run we need
>- * nested_read_cr0(vmcs12), but vmcs12->guest_cr0 is not yet
>- * up-to-date here because we just decached cr0.TS (and we'll
>- * only update vmcs12->guest_cr0 on nested exit).
>- */
>- struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
>- vmcs12->guest_cr0 = (vmcs12->guest_cr0 & ~X86_CR0_TS) |
>- (vcpu->arch.cr0 & X86_CR0_TS);
>- vmcs_writel(CR0_READ_SHADOW, nested_read_cr0(vmcs12));
>- } else
>- vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0);
>-}
>-
> static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
> {
> unsigned long rflags, save_rflags;
>@@ -3575,9 +3543,6 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
> if (enable_ept)
> ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu);
>
>- if (!vcpu->fpu_active)
>- hw_cr0 |= X86_CR0_TS | X86_CR0_MP;
>-
> vmcs_writel(CR0_READ_SHADOW, cr0);
> vmcs_writel(GUEST_CR0, hw_cr0);
> vcpu->arch.cr0 = cr0;
>@@ -5066,11 +5031,6 @@ static int handle_exception(struct kvm_vcpu *vcpu)
> if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR)
> return 1; /* already handled by vmx_vcpu_run() */
>
>- if (is_no_device(intr_info)) {
>- vmx_fpu_activate(vcpu);
>- return 1;
>- }
>-
> if (is_invalid_opcode(intr_info)) {
> if (is_guest_mode(vcpu)) {
> kvm_queue_exception(vcpu, UD_VECTOR);
>@@ -5263,22 +5223,6 @@ static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val)
> return kvm_set_cr4(vcpu, val);
> }
>
>-/* called to set cr0 as approriate for clts instruction exit. */
>-static void handle_clts(struct kvm_vcpu *vcpu)
>-{
>- if (is_guest_mode(vcpu)) {
>- /*
>- * We get here when L2 did CLTS, and L1 didn't shadow CR0.TS
>- * but we did (!fpu_active). We need to keep GUEST_CR0.TS on,
>- * just pretend it's off (also in arch.cr0 for fpu_activate).
>- */
>- vmcs_writel(CR0_READ_SHADOW,
>- vmcs_readl(CR0_READ_SHADOW) & ~X86_CR0_TS);
>- vcpu->arch.cr0 &= ~X86_CR0_TS;
>- } else
>- vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
>-}
>-
> static int handle_cr(struct kvm_vcpu *vcpu)
> {
> unsigned long exit_qualification, val;
>@@ -5321,10 +5265,6 @@ static int handle_cr(struct kvm_vcpu *vcpu)
> }
> break;
> case 2: /* clts */
>- handle_clts(vcpu);
>- trace_kvm_cr_write(0, kvm_read_cr0(vcpu));
>- skip_emulated_instruction(vcpu);
>- vmx_fpu_activate(vcpu);
> return 1;
> case 1: /*mov from cr*/
> switch (cr) {
>@@ -9856,19 +9796,16 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
> kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->host_rip);
> vmx_set_rflags(vcpu, X86_EFLAGS_FIXED);
> /*
>- * Note that calling vmx_set_cr0 is important, even if cr0 hasn't
>- * actually changed, because it depends on the current state of
>- * fpu_active (which may have changed).
> * Note that vmx_set_cr0 refers to efer set above.
> */
> vmx_set_cr0(vcpu, vmcs12->host_cr0);
> /*
>- * If we did fpu_activate()/fpu_deactivate() during L2's run, we need
>- * to apply the same changes to L1's vmcs. We just set cr0 correctly,
>- * but we also need to update cr0_guest_host_mask and exception_bitmap.
>+ * If we did fpu_activate() during L2's run, we need to apply the same
>+ * changes to L1's vmcs. We just set cr0 correctly, but we also need
>+ * to update cr0_guest_host_mask and exception_bitmap.
> */
> update_exception_bitmap(vcpu);
>- vcpu->arch.cr0_guest_owned_bits = (vcpu->fpu_active ? X86_CR0_TS : 0);
>+ vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS;
> vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
>
> /*
>@@ -10177,7 +10114,6 @@ static struct kvm_x86_ops vmx_x86_ops = {
> .cache_reg = vmx_cache_reg,
> .get_rflags = vmx_get_rflags,
> .set_rflags = vmx_set_rflags,
>- .fpu_deactivate = vmx_fpu_deactivate,
>
> .tlb_flush = vmx_flush_tlb,
>
>diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>index e1a8126..f7597fe 100644
>--- a/arch/x86/kvm/x86.c
>+++ b/arch/x86/kvm/x86.c
>@@ -6236,10 +6236,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> r = 0;
> goto out;
> }
>- if (kvm_check_request(KVM_REQ_DEACTIVATE_FPU, vcpu)) {
>- vcpu->fpu_active = 0;
>- kvm_x86_ops->fpu_deactivate(vcpu);
>- }
> if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) {
> /* Page is swapped out. Do synthetic halt */
> vcpu->arch.apf.halted = true;
>@@ -6296,8 +6292,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> preempt_disable();
>
> kvm_x86_ops->prepare_guest_switch(vcpu);
>- if (vcpu->fpu_active)
>- kvm_load_guest_fpu(vcpu);
>+ kvm_load_guest_fpu(vcpu);
> kvm_load_guest_xcr0(vcpu);
>
> vcpu->mode = IN_GUEST_MODE;
>@@ -7038,7 +7033,6 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
> fpu_save_init(&vcpu->arch.guest_fpu);
> __kernel_fpu_end();
> ++vcpu->stat.fpu_reload;
>- kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
> trace_kvm_fpu(0);
> }
>
>diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>index 82af5d0..c82f798 100644
>--- a/include/linux/kvm_host.h
>+++ b/include/linux/kvm_host.h
>@@ -118,7 +118,6 @@ static inline bool is_error_page(struct page *page)
> #define KVM_REQ_MMU_SYNC 7
> #define KVM_REQ_CLOCK_UPDATE 8
> #define KVM_REQ_KICK 9
>-#define KVM_REQ_DEACTIVATE_FPU 10
> #define KVM_REQ_EVENT 11
> #define KVM_REQ_APF_HALT 12
> #define KVM_REQ_STEAL_UPDATE 13
>@@ -228,7 +227,6 @@ struct kvm_vcpu {
> struct mutex mutex;
> struct kvm_run *run;
>
>- int fpu_active;
> int guest_fpu_loaded, guest_xcr0_loaded;
> wait_queue_head_t wq;
> struct pid *pid;
>--
>1.9.1
>
>--
>To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>the body of a message to majordomo@xxxxxxxxxxxxxxx
>More majordomo info at http://vger.kernel.org/majordomo-info.html
>Please read the FAQ at http://www.tux.org/lkml/
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/