Re: [PATCH] KVM: x86: Use get_cpl directly in case of vcpu_load to improve accuracy

From: Sean Christopherson
Date: Mon Nov 27 2023 - 20:30:53 EST


On Thu, Nov 23, 2023, Like Xu wrote:
> Signed-off-by: Like Xu <likexu@xxxxxxxxxxx>
> ---
> arch/x86/kvm/x86.c | 5 ++++-
> 1 file changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 2c924075f6f1..c454df904a45 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -13031,7 +13031,10 @@ bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
> if (vcpu->arch.guest_state_protected)
> return true;
>
> - return vcpu->arch.preempted_in_kernel;
> + if (vcpu != kvm_get_running_vcpu())
> + return vcpu->arch.preempted_in_kernel;

Eww, KVM really shouldn't be reading vcpu->arch.preempted_in_kernel in a generic
vcpu_in_kernel() API.

Rather than fudge around that ugliness with a kvm_get_running_vcpu() check, what
if we instead repurpose kvm_arch_dy_has_pending_interrupt(), which is effectively
x86 specific, to deal with not being able to read the current CPL for a vCPU that
is (possibly) not "loaded", which AFAICT is also x86 specific (or rather, Intel/VMX
specific).

And if getting the CPL for a vCPU that may not be loaded is problematic for other
architectures, then I think the correct fix is to move preempted_in_kernel into
common code and check it directly in kvm_vcpu_on_spin().

This is what I'm thinking:

---
arch/x86/kvm/x86.c | 22 +++++++++++++++-------
include/linux/kvm_host.h | 2 +-
virt/kvm/kvm_main.c | 7 +++----
3 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6d0772b47041..5c1a75c0dafe 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -13022,13 +13022,21 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu);
}

-bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu)
+static bool kvm_dy_has_pending_interrupt(struct kvm_vcpu *vcpu)
{
- if (kvm_vcpu_apicv_active(vcpu) &&
- static_call(kvm_x86_dy_apicv_has_pending_interrupt)(vcpu))
- return true;
+ return kvm_vcpu_apicv_active(vcpu) &&
+ static_call(kvm_x86_dy_apicv_has_pending_interrupt)(vcpu);
+}

- return false;
+bool kvm_arch_vcpu_preempted_in_kernel(struct kvm_vcpu *vcpu)
+{
+ /*
+ * Treat the vCPU as being in-kernel if it has a pending interrupt, as
+ * the vCPU trying to yield may be spinning on IPI delivery, i.e. the
+ * the target vCPU is in-kernel for the purposes of directed yield.
+ */
+ return vcpu->arch.preempted_in_kernel ||
+ kvm_dy_has_pending_interrupt(vcpu);
}

bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu)
@@ -13043,7 +13051,7 @@ bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu)
kvm_test_request(KVM_REQ_EVENT, vcpu))
return true;

- return kvm_arch_dy_has_pending_interrupt(vcpu);
+ return kvm_dy_has_pending_interrupt(vcpu);
}

bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
@@ -13051,7 +13059,7 @@ bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
if (vcpu->arch.guest_state_protected)
return true;

- return vcpu->arch.preempted_in_kernel;
+ return static_call(kvm_x86_get_cpl)(vcpu);
}

unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ea1523a7b83a..820c5b64230f 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1505,7 +1505,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu);
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu);
bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu);
-bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu);
+bool kvm_arch_vcpu_preempted_in_kernel(struct kvm_vcpu *vcpu);
int kvm_arch_post_init_vm(struct kvm *kvm);
void kvm_arch_pre_destroy_vm(struct kvm *kvm);
int kvm_arch_create_vm_debugfs(struct kvm *kvm);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 8758cb799e18..e84be7e2e05e 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -4049,9 +4049,9 @@ static bool vcpu_dy_runnable(struct kvm_vcpu *vcpu)
return false;
}

-bool __weak kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu)
+bool __weak kvm_arch_vcpu_preempted_in_kernel(struct kvm_vcpu *vcpu)
{
- return false;
+ return kvm_arch_vcpu_in_kernel(vcpu);
}

void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
@@ -4086,8 +4086,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
if (kvm_vcpu_is_blocking(vcpu) && !vcpu_dy_runnable(vcpu))
continue;
if (READ_ONCE(vcpu->preempted) && yield_to_kernel_mode &&
- !kvm_arch_dy_has_pending_interrupt(vcpu) &&
- !kvm_arch_vcpu_in_kernel(vcpu))
+ kvm_arch_vcpu_preempted_in_kernel(vcpu))
continue;
if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
continue;

base-commit: e9e60c82fe391d04db55a91c733df4a017c28b2f
--