[PATCH] KVM: arm64: Properly restore PMU state during live-migration
From: Jinank Jain
Date: Thu Jun 03 2021 - 07:06:46 EST
Currently if a guest is live-migrated while it is actively using perf
counters, then after live-migrate it will notice that all counters would
suddenly start reporting 0s. This is due to the fact we are not
re-creating the relevant perf events inside the kernel.
Usually on live-migration guest state is restored using KVM_SET_ONE_REG
ioctl interface, which simply restores the value of PMU registers
values but does not re-program the perf events so that the guest can seamlessly
use these counters even after live-migration like it was doing before
live-migration.
Instead there are two completely different code path between guest
accessing PMU registers and VMM restoring counters on
live-migration.
In case of KVM_SET_ONE_REG:
kvm_arm_set_reg()
...... kvm_arm_sys_reg_set_reg()
........... reg_from_user()
but in case when guest tries to access these counters:
handle_exit()
..... kvm_handle_sys_reg()
..........perform_access()
...............access_pmu_evcntr()
...................kvm_pmu_set_counter_value()
.......................kvm_pmu_create_perf_event()
The drawback of using the KVM_SET_ONE_REG interface is that the host pmu
events which were registered for the source instance and not present for
the destination instance. Thus passively restoring PMCR_EL0 using
KVM_SET_ONE_REG interface would not create the necessary host pmu events
which are crucial for seamless guest experience across live migration.
In ordet to fix the situation, on first vcpu load we should restore
PMCR_EL0 in the same exact way like the guest was trying to access
these counters. And then we will also recreate the relevant host pmu
events.
Signed-off-by: Jinank Jain <jinankj@xxxxxxxxx>
Cc: Alexander Graf (AWS) <graf@xxxxxxxxx>
Cc: Marc Zyngier <maz@xxxxxxxxxx>
Cc: James Morse <james.morse@xxxxxxx>
Cc: Alexandru Elisei <alexandru.elisei@xxxxxxx>
Cc: Suzuki K Poulose <suzuki.poulose@xxxxxxx>
Cc: Catalin Marinas <catalin.marinas@xxxxxxx>
Cc: Will Deacon <will@xxxxxxxxxx>
---
arch/arm64/include/asm/kvm_host.h | 1 +
arch/arm64/kvm/arm.c | 1 +
arch/arm64/kvm/pmu-emul.c | 10 ++++++++--
arch/arm64/kvm/pmu.c | 15 +++++++++++++++
include/kvm/arm_pmu.h | 3 +++
5 files changed, 28 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 7cd7d5c8c4bc..2376ad3c2fc2 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -745,6 +745,7 @@ static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr);
void kvm_clr_pmu_events(u32 clr);
+void kvm_vcpu_pmu_restore(struct kvm_vcpu *vcpu);
void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu);
void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu);
#else
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index e720148232a0..c66f6d16ec06 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -408,6 +408,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (has_vhe())
kvm_vcpu_load_sysregs_vhe(vcpu);
kvm_arch_vcpu_load_fp(vcpu);
+ kvm_vcpu_pmu_restore(vcpu);
kvm_vcpu_pmu_restore_guest(vcpu);
if (kvm_arm_is_pvtime_enabled(&vcpu->arch))
kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu);
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index fd167d4f4215..12a40f4b5f0d 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -574,10 +574,16 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
kvm_pmu_disable_counter_mask(vcpu, mask);
}
- if (val & ARMV8_PMU_PMCR_C)
+ /*
+ * Cycle counter needs to reset in case of first vcpu load.
+ */
+ if (val & ARMV8_PMU_PMCR_C || !kvm_arm_pmu_v3_restored(vcpu))
kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0);
- if (val & ARMV8_PMU_PMCR_P) {
+ /*
+ * All the counters needs to reset in case of first vcpu load.
+ */
+ if (val & ARMV8_PMU_PMCR_P || !kvm_arm_pmu_v3_restored(vcpu)) {
for_each_set_bit(i, &mask, 32)
kvm_pmu_set_counter_value(vcpu, i, 0);
}
diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c
index 03a6c1f4a09a..574daeeaa4e4 100644
--- a/arch/arm64/kvm/pmu.c
+++ b/arch/arm64/kvm/pmu.c
@@ -161,6 +161,21 @@ static void kvm_vcpu_pmu_disable_el0(unsigned long events)
}
}
+/*
+ * Restore PMU events on first vcpu load.
+ */
+void kvm_vcpu_pmu_restore(struct kvm_vcpu *vcpu)
+{
+ if (kvm_arm_pmu_v3_restored(vcpu))
+ return;
+
+ u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0);
+
+ kvm_pmu_handle_pmcr(vcpu, val);
+
+ vcpu->arch.pmu.restored = true;
+}
+
/*
* On VHE ensure that only guest events have EL0 counting enabled.
* This is called from both vcpu_{load,put} and the sysreg handling.
diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
index 864b9997efb2..7444fd894cf3 100644
--- a/include/kvm/arm_pmu.h
+++ b/include/kvm/arm_pmu.h
@@ -32,10 +32,12 @@ struct kvm_pmu {
struct kvm_pmc pmc[ARMV8_PMU_MAX_COUNTERS];
DECLARE_BITMAP(chained, ARMV8_PMU_MAX_COUNTER_PAIRS);
bool created;
+ bool restored;
bool irq_level;
struct irq_work overflow_work;
};
+#define kvm_arm_pmu_v3_restored(v) ((v)->arch.pmu.restored)
#define kvm_arm_pmu_irq_initialized(v) ((v)->arch.pmu.irq_num >= VGIC_NR_SGIS)
u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx);
void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val);
@@ -67,6 +69,7 @@ struct kvm_pmu {
};
#define kvm_arm_pmu_irq_initialized(v) (false)
+#define kvm_arm_pmu_v3_restored(v) (false)
static inline u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu,
u64 select_idx)
{
--
2.31.1
Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879