[PATCH v1 7/8] KVM/x86/vPMU: save/restore guest perf counters on vCPU switching

From: Wei Wang
Date: Thu Nov 01 2018 - 06:36:42 EST


When the vCPU is scheduled in, restore the assigned perf counter states
and register the guest pmi callback. When the vCPU is scheduled out,
save the assigned perf counter states and unregister the guest PMI
callback.

Signed-off-by: Wei Wang <wei.w.wang@xxxxxxxxx>
Cc: Andi Kleen <ak@xxxxxxxxxxxxxxx>
Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
---
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/kvm/pmu.c | 12 ++++++++++++
arch/x86/kvm/pmu.h | 4 ++++
arch/x86/kvm/pmu_intel.c | 38 ++++++++++++++++++++++++++++++++++++++
arch/x86/kvm/x86.c | 6 ++++++
include/linux/kvm_host.h | 1 +
virt/kvm/kvm_main.c | 3 +++
7 files changed, 65 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b66d164..cb1c0bf 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -474,6 +474,7 @@ struct kvm_pmu {
u64 counter_bitmask[2];
u64 global_ctrl_mask;
u64 assigned_pmc_bitmap;
+ u64 restore_pmc_bitmap;
u64 reserved_bits;
u8 version;
struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC];
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 7f2f63e..4448a88 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -340,6 +340,18 @@ void kvm_pmu_reset(struct kvm_vcpu *vcpu)
kvm_x86_ops->pmu_ops->reset(vcpu);
}

+void kvm_pmu_sched_in(struct kvm_vcpu *vcpu)
+{
+ if (kvm_x86_ops->pmu_ops->sched_in)
+ kvm_x86_ops->pmu_ops->sched_in(vcpu);
+}
+
+void kvm_pmu_sched_out(struct kvm_vcpu *vcpu)
+{
+ if (kvm_x86_ops->pmu_ops->sched_out)
+ kvm_x86_ops->pmu_ops->sched_out(vcpu);
+}
+
void kvm_pmu_init(struct kvm_vcpu *vcpu)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index 7ab85bf..77fc973 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -34,6 +34,8 @@ struct kvm_pmu_ops {
void (*refresh)(struct kvm_vcpu *vcpu);
void (*init)(struct kvm_vcpu *vcpu);
void (*reset)(struct kvm_vcpu *vcpu);
+ void (*sched_in)(struct kvm_vcpu *vcpu);
+ void (*sched_out)(struct kvm_vcpu *vcpu);
};

static inline u64 pmc_bitmask(struct kvm_pmc *pmc)
@@ -118,6 +120,8 @@ void kvm_pmu_refresh(struct kvm_vcpu *vcpu);
void kvm_pmu_reset(struct kvm_vcpu *vcpu);
void kvm_pmu_init(struct kvm_vcpu *vcpu);
void kvm_pmu_destroy(struct kvm_vcpu *vcpu);
+void kvm_pmu_sched_in(struct kvm_vcpu *vcpu);
+void kvm_pmu_sched_out(struct kvm_vcpu *vcpu);

bool is_vmware_backdoor_pmc(u32 pmc_idx);
struct kvm_perf_switch_msr *intel_pmu_get_switch_msrs(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/pmu_intel.c b/arch/x86/kvm/pmu_intel.c
index 66d7c09..9eb5230 100644
--- a/arch/x86/kvm/pmu_intel.c
+++ b/arch/x86/kvm/pmu_intel.c
@@ -427,6 +427,7 @@ static void intel_pmu_reset(struct kvm_vcpu *vcpu)
pmu->global_status = 0;
pmu->global_ovf_ctrl = 0;
pmu->assigned_pmc_bitmap = 0;
+ pmu->restore_pmc_bitmap = 0;
}

struct kvm_perf_switch_msr *intel_pmu_get_switch_msrs(struct kvm_vcpu *vcpu,
@@ -448,6 +449,41 @@ struct kvm_perf_switch_msr *intel_pmu_get_switch_msrs(struct kvm_vcpu *vcpu,
return arr;
}

+static void intel_pmu_inject_guest_pmi(void *opaque, u64 status)
+{
+ struct kvm_vcpu *vcpu = opaque;
+ struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+
+ pmu->global_status |= status;
+ kvm_make_request(KVM_REQ_PMI, vcpu);
+}
+
+static void intel_pmu_sched_out(struct kvm_vcpu *vcpu)
+{
+ struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+ u32 bit;
+
+ pmu->restore_pmc_bitmap = pmu->assigned_pmc_bitmap;
+ for_each_set_bit(bit, (unsigned long *)&pmu->restore_pmc_bitmap,
+ X86_PMC_IDX_MAX)
+ intel_pmu_put_pmc(pmu, bit);
+
+ x86_perf_unregister_pmi_callback();
+}
+
+static void intel_pmu_sched_in(struct kvm_vcpu *vcpu)
+{
+ struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+ u32 bit;
+
+ for_each_set_bit(bit, (unsigned long *)&pmu->restore_pmc_bitmap,
+ X86_PMC_IDX_MAX)
+ intel_pmu_get_pmc(pmu, bit);
+ pmu->restore_pmc_bitmap = 0;
+
+ x86_perf_register_pmi_callback(intel_pmu_inject_guest_pmi, vcpu);
+}
+
struct kvm_pmu_ops intel_pmu_ops = {
.is_valid_msr_idx = intel_is_valid_msr_idx,
.is_valid_msr = intel_is_valid_msr,
@@ -457,4 +493,6 @@ struct kvm_pmu_ops intel_pmu_ops = {
.refresh = intel_pmu_refresh,
.init = intel_pmu_init,
.reset = intel_pmu_reset,
+ .sched_out = intel_pmu_sched_out,
+ .sched_in = intel_pmu_sched_in,
};
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 66d66d7..47308bd 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8986,9 +8986,15 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
{
vcpu->arch.l1tf_flush_l1d = true;
+ kvm_pmu_sched_in(vcpu);
kvm_x86_ops->sched_in(vcpu, cpu);
}

+void kvm_arch_sched_out(struct kvm_vcpu *vcpu)
+{
+ kvm_pmu_sched_out(vcpu);
+}
+
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
{
if (type)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index c926698..478d602 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -795,6 +795,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu);

void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu);
+void kvm_arch_sched_out(struct kvm_vcpu *vcpu);

void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 2679e47..24c2be2 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3990,6 +3990,9 @@ static void kvm_sched_out(struct preempt_notifier *pn,

if (current->state == TASK_RUNNING)
vcpu->preempted = true;
+
+ kvm_arch_sched_out(vcpu);
+
kvm_arch_vcpu_put(vcpu);
}

--
2.7.4