[PATCH v4 4/8] KVM: Optimize kvm_make_vcpus_request_mask() a bit

From: Vitaly Kuznetsov
Date: Fri Aug 27 2021 - 05:25:48 EST


Iterating over set bits in 'vcpu_bitmap' should be faster than going
through all vCPUs, especially when just a few bits are set.

Drop kvm_make_vcpus_request_mask() call from kvm_make_all_cpus_request_except()
to avoid handling the special case when 'vcpu_bitmap' is NULL, move the
code to kvm_make_all_cpus_request_except() itself.

Signed-off-by: Vitaly Kuznetsov <vkuznets@xxxxxxxxxx>
---
virt/kvm/kvm_main.c | 88 +++++++++++++++++++++++++++------------------
1 file changed, 53 insertions(+), 35 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 2082aceffbf6..e32ba210025f 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -261,50 +261,57 @@ static inline bool kvm_kick_many_cpus(cpumask_var_t tmp, bool wait)
return true;
}

+static void kvm_make_vcpu_request(struct kvm *kvm, struct kvm_vcpu *vcpu,
+ unsigned int req, cpumask_var_t tmp,
+ int current_cpu)
+{
+ int cpu = vcpu->cpu;
+
+ kvm_make_request(req, vcpu);
+
+ if (!(req & KVM_REQUEST_NO_WAKEUP) && kvm_vcpu_wake_up(vcpu))
+ return;
+
+ /*
+ * tmp can be "unavailable" if cpumasks are allocated off stack as
+ * allocation of the mask is deliberately not fatal and is handled by
+ * falling back to kicking all online CPUs.
+ */
+ if (!cpumask_available(tmp))
+ return;
+
+ /*
+ * Note, the vCPU could get migrated to a different pCPU at any point
+ * after kvm_request_needs_ipi(), which could result in sending an IPI
+ * to the previous pCPU. But, that's OK because the purpose of the IPI
+ * is to ensure the vCPU returns to OUTSIDE_GUEST_MODE, which is
+ * satisfied if the vCPU migrates. Entering READING_SHADOW_PAGE_TABLES
+ * after this point is also OK, as the requirement is only that KVM wait
+ * for vCPUs that were reading SPTEs _before_ any changes were
+ * finalized. See kvm_vcpu_kick() for more details on handling requests.
+ */
+ if (kvm_request_needs_ipi(vcpu, req)) {
+ cpu = READ_ONCE(vcpu->cpu);
+ if (cpu != -1 && cpu != current_cpu)
+ __cpumask_set_cpu(cpu, tmp);
+ }
+}
+
bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req,
struct kvm_vcpu *except,
unsigned long *vcpu_bitmap, cpumask_var_t tmp)
{
- int i, cpu, me;
struct kvm_vcpu *vcpu;
+ int i, me;
bool called;

me = get_cpu();

- kvm_for_each_vcpu(i, vcpu, kvm) {
- if ((vcpu_bitmap && !test_bit(i, vcpu_bitmap)) ||
- vcpu == except)
- continue;
-
- kvm_make_request(req, vcpu);
-
- if (!(req & KVM_REQUEST_NO_WAKEUP) && kvm_vcpu_wake_up(vcpu))
+ for_each_set_bit(i, vcpu_bitmap, KVM_MAX_VCPUS) {
+ vcpu = kvm_get_vcpu(kvm, i);
+ if (!vcpu || vcpu == except)
continue;
-
- /*
- * tmp can be "unavailable" if cpumasks are allocated off stack
- * as allocation of the mask is deliberately not fatal and is
- * handled by falling back to kicking all online CPUs.
- */
- if (!cpumask_available(tmp))
- continue;
-
- /*
- * Note, the vCPU could get migrated to a different pCPU at any
- * point after kvm_request_needs_ipi(), which could result in
- * sending an IPI to the previous pCPU. But, that's ok because
- * the purpose of the IPI is to ensure the vCPU returns to
- * OUTSIDE_GUEST_MODE, which is satisfied if the vCPU migrates.
- * Entering READING_SHADOW_PAGE_TABLES after this point is also
- * ok, as the requirement is only that KVM wait for vCPUs that
- * were reading SPTEs _before_ any changes were finalized. See
- * kvm_vcpu_kick() for more details on handling requests.
- */
- if (kvm_request_needs_ipi(vcpu, req)) {
- cpu = READ_ONCE(vcpu->cpu);
- if (cpu != -1 && cpu != me)
- __cpumask_set_cpu(cpu, tmp);
- }
+ kvm_make_vcpu_request(kvm, vcpu, req, tmp, me);
}

called = kvm_kick_many_cpus(tmp, !!(req & KVM_REQUEST_WAIT));
@@ -316,12 +323,23 @@ bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req,
bool kvm_make_all_cpus_request_except(struct kvm *kvm, unsigned int req,
struct kvm_vcpu *except)
{
+ struct kvm_vcpu *vcpu;
cpumask_var_t cpus;
bool called;
+ int i, me;

zalloc_cpumask_var(&cpus, GFP_ATOMIC);

- called = kvm_make_vcpus_request_mask(kvm, req, except, NULL, cpus);
+ me = get_cpu();
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (vcpu == except)
+ continue;
+ kvm_make_vcpu_request(kvm, vcpu, req, cpus, me);
+ }
+
+ called = kvm_kick_many_cpus(cpus, !!(req & KVM_REQUEST_WAIT));
+ put_cpu();

free_cpumask_var(cpus);
return called;
--
2.31.1