[PATCH 06/15] KVM: x86: Register perf callbacks only when actively handling interrupt

From: Sean Christopherson
Date: Thu Aug 26 2021 - 20:58:00 EST


Register KVM's perf callback only when handling an interrupt that may be
a PMI (sadly this includes IRQs), and unregister the callback immediately
after handling the interrupt (or closing the window). Registering the
callback on a per-CPU basis (with preemption disabled!), fixes a mostly
theoretical bug where perf could dereference a NULL pointer due to KVM
unloading and unregistering the callbacks in between perf queries of the
callback functions. The precise registration will also allow for future
cleanups and optimizations, e.g. the existence of the callbacks can serve
as the "in guest" check.

Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
---
arch/x86/kvm/x86.c | 27 +++++++++++++++++----------
arch/x86/kvm/x86.h | 10 ++++++++++
include/linux/perf_event.h | 2 ++
kernel/events/core.c | 12 ++++++++++++
4 files changed, 41 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bae951344e28..bc4ee6ea7752 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8274,28 +8274,31 @@ int kvm_is_in_guest(void)

static int kvm_is_user_mode(void)
{
- int user_mode = 3;
+ struct kvm_vcpu *vcpu = __this_cpu_read(current_vcpu);

- if (__this_cpu_read(current_vcpu))
- user_mode = static_call(kvm_x86_get_cpl)(__this_cpu_read(current_vcpu));
+ if (WARN_ON_ONCE(!vcpu))
+ return 0;

- return user_mode != 0;
+ return static_call(kvm_x86_get_cpl)(vcpu) != 0;
}

static unsigned long kvm_get_guest_ip(void)
{
- unsigned long ip = 0;
+ struct kvm_vcpu *vcpu = __this_cpu_read(current_vcpu);

- if (__this_cpu_read(current_vcpu))
- ip = kvm_rip_read(__this_cpu_read(current_vcpu));
+ if (WARN_ON_ONCE(!vcpu))
+ return 0;

- return ip;
+ return kvm_rip_read(vcpu);
}

static void kvm_handle_intel_pt_intr(void)
{
struct kvm_vcpu *vcpu = __this_cpu_read(current_vcpu);

+ if (WARN_ON_ONCE(!vcpu))
+ return;
+
kvm_make_request(KVM_REQ_PMI, vcpu);
__set_bit(MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT,
(unsigned long *)&vcpu->arch.pmu.global_status);
@@ -8308,6 +8311,12 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = {
.handle_intel_pt_intr = NULL,
};

+void kvm_register_perf_callbacks(void)
+{
+ __perf_register_guest_info_callbacks(&kvm_guest_cbs);
+}
+EXPORT_SYMBOL_GPL(kvm_register_perf_callbacks);
+
#ifdef CONFIG_X86_64
static void pvclock_gtod_update_fn(struct work_struct *work)
{
@@ -11063,7 +11072,6 @@ int kvm_arch_hardware_setup(void *opaque)

if (ops->intel_pt_intr_in_guest && ops->intel_pt_intr_in_guest())
kvm_guest_cbs.handle_intel_pt_intr = kvm_handle_intel_pt_intr;
- perf_register_guest_info_callbacks(&kvm_guest_cbs);

if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
supported_xss = 0;
@@ -11092,7 +11100,6 @@ int kvm_arch_hardware_setup(void *opaque)

void kvm_arch_hardware_unsetup(void)
{
- perf_unregister_guest_info_callbacks();
kvm_guest_cbs.handle_intel_pt_intr = NULL;

static_call(kvm_x86_hardware_unsetup)();
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 7d66d63dc55a..5cedc0e8a5d5 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -387,15 +387,25 @@ static inline bool kvm_cstate_in_guest(struct kvm *kvm)
return kvm->arch.cstate_in_guest;
}

+void kvm_register_perf_callbacks(void);
+static inline void kvm_unregister_perf_callbacks(void)
+{
+ __perf_unregister_guest_info_callbacks();
+}
+
DECLARE_PER_CPU(struct kvm_vcpu *, current_vcpu);

static inline void kvm_before_interrupt(struct kvm_vcpu *vcpu)
{
__this_cpu_write(current_vcpu, vcpu);
+
+ kvm_register_perf_callbacks();
}

static inline void kvm_after_interrupt(struct kvm_vcpu *vcpu)
{
+ kvm_unregister_perf_callbacks();
+
__this_cpu_write(current_vcpu, NULL);
}

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index c98253dae037..7a367bf1b78d 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1238,6 +1238,8 @@ extern void perf_event_bpf_event(struct bpf_prog *prog,

#ifdef CONFIG_HAVE_GUEST_PERF_EVENTS
DECLARE_PER_CPU(struct perf_guest_info_callbacks *, perf_guest_cbs);
+extern void __perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
+extern void __perf_unregister_guest_info_callbacks(void);
extern void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
extern void perf_unregister_guest_info_callbacks(void);
#endif /* CONFIG_HAVE_GUEST_PERF_EVENTS */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 9bc1375d6ed9..2f28d9d8dc94 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6485,6 +6485,18 @@ static void perf_pending_event(struct irq_work *entry)
#ifdef CONFIG_HAVE_GUEST_PERF_EVENTS
DEFINE_PER_CPU(struct perf_guest_info_callbacks *, perf_guest_cbs);

+void __perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
+{
+ __this_cpu_write(perf_guest_cbs, cbs);
+}
+EXPORT_SYMBOL_GPL(__perf_register_guest_info_callbacks);
+
+void __perf_unregister_guest_info_callbacks(void)
+{
+ __this_cpu_write(perf_guest_cbs, NULL);
+}
+EXPORT_SYMBOL_GPL(__perf_unregister_guest_info_callbacks);
+
void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
{
int cpu;
--
2.33.0.259.gc128427fd7-goog