Re: disabling halt polling broken? (was Re: [PATCH 00/14] KVM: Halt-polling fixes, cleanups and a new stat)

From: Sean Christopherson
Date: Mon Sep 27 2021 - 11:00:00 EST


On Mon, Sep 27, 2021, Christian Borntraeger wrote:
> While looking into this series,
>
> I realized that Davids patch
>
> commit acd05785e48c01edb2c4f4d014d28478b5f19fb5
> Author: David Matlack <dmatlack@xxxxxxxxxx>
> AuthorDate: Fri Apr 17 15:14:46 2020 -0700
> Commit: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> CommitDate: Fri Apr 24 12:53:17 2020 -0400
>
> kvm: add capability for halt polling
>
> broke the possibility for an admin to disable halt polling for already running KVM guests.
> In past times doing
> echo 0 > /sys/module/kvm/parameters/halt_poll_ns
>
> stopped polling system wide.
> Now all KVM guests will use the halt_poll_ns value that was active during
> startup - even those that do not use KVM_CAP_HALT_POLL.
>
> I guess this was not intended?

Ouch. I would go so far as to say that halt_poll_ns should be a hard limit on
the capability. What about having the per-VM variable track only the capability,
and then use the module param to cap the max when doing adjustments? E.g. add
a variant of this early in the series?

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 80f78daa6b8d..f50e4e31a0cf 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1078,8 +1078,6 @@ static struct kvm *kvm_create_vm(unsigned long type)
goto out_err_no_arch_destroy_vm;
}

- kvm->max_halt_poll_ns = halt_poll_ns;
-
r = kvm_arch_init_vm(kvm, type);
if (r)
goto out_err_no_arch_destroy_vm;
@@ -3136,7 +3134,8 @@ void kvm_sigset_deactivate(struct kvm_vcpu *vcpu)
sigemptyset(&current->real_blocked);
}

-static void grow_halt_poll_ns(struct kvm_vcpu *vcpu)
+static void grow_halt_poll_ns(struct kvm_vcpu *vcpu,
+ unsigned int max_halt_poll_ns)
{
unsigned int old, val, grow, grow_start;

@@ -3150,8 +3149,8 @@ static void grow_halt_poll_ns(struct kvm_vcpu *vcpu)
if (val < grow_start)
val = grow_start;

- if (val > vcpu->kvm->max_halt_poll_ns)
- val = vcpu->kvm->max_halt_poll_ns;
+ if (val > max_halt_poll_ns)
+ val = max_halt_poll_ns;

vcpu->halt_poll_ns = val;
out:
@@ -3261,6 +3260,7 @@ void kvm_vcpu_halt(struct kvm_vcpu *vcpu)
{
bool halt_poll_allowed = !kvm_arch_no_poll(vcpu);
bool do_halt_poll = halt_poll_allowed && vcpu->halt_poll_ns;
+ unsigned int max_halt_poll_ns;
ktime_t start, cur, poll_end;
bool waited = false;
u64 halt_ns;
@@ -3304,19 +3304,25 @@ void kvm_vcpu_halt(struct kvm_vcpu *vcpu)
update_halt_poll_stats(vcpu, start, poll_end, !waited);

if (halt_poll_allowed) {
+ max_halt_poll_ns = vcpu->kvm->max_halt_poll_ns;
+ if (max_halt_poll_ns)
+ max_halt_poll_ns = min(max_halt_poll_ns, halt_poll_ns);
+ else
+ max_halt_poll_ns = halt_poll_ns;
+
if (!vcpu_valid_wakeup(vcpu)) {
shrink_halt_poll_ns(vcpu);
- } else if (vcpu->kvm->max_halt_poll_ns) {
+ } else if (max_halt_poll_ns) {
if (halt_ns <= vcpu->halt_poll_ns)
;
/* we had a long block, shrink polling */
else if (vcpu->halt_poll_ns &&
- halt_ns > vcpu->kvm->max_halt_poll_ns)
+ halt_ns > max_halt_poll_ns)
shrink_halt_poll_ns(vcpu);
/* we had a short halt and our poll time is too small */
- else if (vcpu->halt_poll_ns < vcpu->kvm->max_halt_poll_ns &&
- halt_ns < vcpu->kvm->max_halt_poll_ns)
- grow_halt_poll_ns(vcpu);
+ else if (vcpu->halt_poll_ns < max_halt_poll_ns &&
+ halt_ns < max_halt_poll_ns)
+ grow_halt_poll_ns(vcpu, max_halt_poll_ns);
} else {
vcpu->halt_poll_ns = 0;
}