[PATCH 2/2] [DEBUG] preempt timer debug test

From: Yao Yuan
Date: Wed Jul 03 2024 - 02:08:02 EST


A specific kselftesting based program to allow set the VMX
preempt timer value from VM directly.

Introduce 2 hypercall 0x56780001/2, 01 to set the preempt
timer value, 02 to wait for the preemption time expired.

Usage:
Reload kvm applied this change, then:
$KRNEL_SRC_ROOT/tools/testing/selftests/kvm/x86_64/preempt_test -p 'preempt_timer_vale'

'preempt_timer_vale' is the preempt timer value in DEC format, HEX is not supported.

For example:

perf record -e "kvm:*" tools/testing/selftests/kvm/x86_64/preempt_test -p 2281718445

Above set the preempt value to 2281718445(0x880042AD) and
capture the trace, then check the kvm_vmx_debug in the trace
to know the preempt timer behavior.

Signed-off-by: Yao Yuan <yuan.yao@xxxxxxxxx>
---
tools/testing/selftests/kvm/Makefile | 1 +
arch/x86/kvm/vmx/vmx.h | 5 +
arch/x86/kvm/vmx/vmx.c | 113 +++++++++++++++++-
.../selftests/kvm/x86_64/preempt_test.c | 82 +++++++++++++
4 files changed, 198 insertions(+), 3 deletions(-)
create mode 100644 tools/testing/selftests/kvm/x86_64/preempt_test.c

diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index ad8b5d15f2bd..957509957f80 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -129,6 +129,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/amx_test
TEST_GEN_PROGS_x86_64 += x86_64/max_vcpuid_cap_test
TEST_GEN_PROGS_x86_64 += x86_64/triple_fault_event_test
TEST_GEN_PROGS_x86_64 += x86_64/recalc_apic_map_test
+TEST_GEN_PROGS_x86_64 += x86_64/preempt_test
TEST_GEN_PROGS_x86_64 += access_tracking_perf_test
TEST_GEN_PROGS_x86_64 += demand_paging_test
TEST_GEN_PROGS_x86_64 += dirty_log_test
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 42498fa63abb..82ea0ccc7a63 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -368,6 +368,11 @@ struct vcpu_vmx {

/* ve_info must be page aligned. */
struct vmx_ve_information *ve_info;
+
+ volatile bool debug_timer;
+ bool debug_timer_set_to_hardware;
+ u32 debug_timer_val;
+ u64 debug_timer_deadline_tsc;
};

struct kvm_vmx {
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index f18c2d8c7476..73f084c29f9a 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4431,8 +4431,9 @@ static u32 vmx_vmexit_ctrl(void)
* Not used by KVM and never set in vmcs01 or vmcs02, but emulated for
* nested virtualization and thus allowed to be set in vmcs12.
*/
- vmexit_ctrl &= ~(VM_EXIT_SAVE_IA32_PAT | VM_EXIT_SAVE_IA32_EFER |
- VM_EXIT_SAVE_VMX_PREEMPTION_TIMER);
+ vmexit_ctrl &= ~(VM_EXIT_SAVE_IA32_PAT | VM_EXIT_SAVE_IA32_EFER);
+ pr_info("Set VM_EXIT_SAVE_VMX_PREEMPTION_TIMER forcedly for preempt timer debug\n");
+

if (vmx_pt_mode_is_system())
vmexit_ctrl &= ~(VM_EXIT_PT_CONCEAL_PIP |
@@ -5993,11 +5994,41 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
return 1;
}

+static fastpath_t handle_fastpath_debug_timer(struct kvm_vcpu *vcpu,
+ bool force_immediate_exit)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ u64 tscl;
+ u32 delta;
+
+ tscl = rdtsc();
+
+ if (vmx->debug_timer_deadline_tsc > tscl)
+ delta = (u32)((vmx->debug_timer_deadline_tsc - tscl) >>
+ cpu_preemption_timer_multi);
+ else
+ delta = 0;
+
+ trace_kvm_vmx_debug(2UL,
+ (unsigned long)vmcs_read32(VM_EXIT_REASON),
+ (unsigned long)vmcs_read32(VMX_PREEMPTION_TIMER_VALUE),
+ (unsigned long)delta, tscl);
+
+ vmx->debug_timer = false;
+
+ return EXIT_FASTPATH_REENTER_GUEST;
+}
+
static fastpath_t handle_fastpath_preemption_timer(struct kvm_vcpu *vcpu,
bool force_immediate_exit)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);

+ WARN_ON(vmx->debug_timer && force_immediate_exit);
+ if (vmx->debug_timer)
+ return handle_fastpath_debug_timer(vcpu,
+ force_immediate_exit);
+
/*
* In the *extremely* unlikely scenario that this is a spurious VM-Exit
* due to the timer expiring while it was "soft" disabled, just eat the
@@ -6096,6 +6127,60 @@ static int handle_notify(struct kvm_vcpu *vcpu)
return 1;
}

+static unsigned long vmx_debug_set_preempt_timer(struct kvm_vcpu *vcpu,
+ unsigned long a0,
+ unsigned long a1,
+ unsigned long a2,
+ unsigned long a3)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ vmx->debug_timer = true;
+ vmx->debug_timer_set_to_hardware = false;
+ vmx->debug_timer_val = a0;
+ vmx->debug_timer_deadline_tsc = rdtsc() + (a0 << cpu_preemption_timer_multi);
+ pr_info("debug_timer = %u\n", (u32)a0);
+
+ return 0;
+}
+
+
+static unsigned long vmx_debug_get_preempt_timer_result(struct kvm_vcpu *vcpu,
+ unsigned long a0,
+ unsigned long a1,
+ unsigned long a2,
+ unsigned long a3)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ if (vmx->debug_timer)
+ return 1;
+ return 0;
+}
+
+static int vmx_emulate_hypercall(struct kvm_vcpu *vcpu)
+{
+ unsigned long nr, a0, a1, a2, a3;
+ unsigned long ret;
+
+ nr = kvm_rax_read(vcpu);
+ if (nr != 0x87650001 && nr != 0x87650002)
+ return kvm_emulate_hypercall(vcpu);
+
+ a0 = kvm_rbx_read(vcpu);
+ a1 = kvm_rcx_read(vcpu);
+ a2 = kvm_rdx_read(vcpu);
+ a3 = kvm_rsi_read(vcpu);
+
+ if (nr == 0x87650001)
+ ret = vmx_debug_set_preempt_timer(vcpu, a0, a1, a2, a3);
+ else
+ ret = vmx_debug_get_preempt_timer_result(vcpu, a0, a1, a2, a3);
+
+ kvm_rax_write(vcpu, ret);
+ return kvm_skip_emulated_instruction(vcpu);
+}
+
/*
* The exit handlers return 1 if the exit was handled fully and guest execution
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
@@ -6117,7 +6202,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[EXIT_REASON_INVD] = kvm_emulate_invd,
[EXIT_REASON_INVLPG] = handle_invlpg,
[EXIT_REASON_RDPMC] = kvm_emulate_rdpmc,
- [EXIT_REASON_VMCALL] = kvm_emulate_hypercall,
+ [EXIT_REASON_VMCALL] = vmx_emulate_hypercall,
[EXIT_REASON_VMCLEAR] = handle_vmx_instruction,
[EXIT_REASON_VMLAUNCH] = handle_vmx_instruction,
[EXIT_REASON_VMPTRLD] = handle_vmx_instruction,
@@ -7199,6 +7284,28 @@ static void vmx_update_hv_timer(struct kvm_vcpu *vcpu, bool force_immediate_exit
if (force_immediate_exit) {
vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, 0);
vmx->loaded_vmcs->hv_timer_soft_disabled = false;
+ } else if (vmx->debug_timer) {
+ u32 old;
+
+ tscl = rdtsc();
+
+ if (!vmx->debug_timer_set_to_hardware) {
+ delta_tsc = vmx->debug_timer_val;
+ vmx->debug_timer_set_to_hardware = true;
+ } else {
+ if (vmx->debug_timer_deadline_tsc > tscl)
+ delta_tsc = (u32)((vmx->debug_timer_deadline_tsc - tscl)
+ >> cpu_preemption_timer_multi);
+ else
+ delta_tsc = 0;
+ }
+
+ old = vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
+ vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, delta_tsc);
+ trace_kvm_vmx_debug(3UL, old,
+ vmcs_read32(VMX_PREEMPTION_TIMER_VALUE),
+ delta_tsc, tscl);
+ vmx->loaded_vmcs->hv_timer_soft_disabled = false;
} else if (vmx->hv_deadline_tsc != -1) {
tscl = rdtsc();
if (vmx->hv_deadline_tsc > tscl)
diff --git a/tools/testing/selftests/kvm/x86_64/preempt_test.c b/tools/testing/selftests/kvm/x86_64/preempt_test.c
new file mode 100644
index 000000000000..2e58cfee61d0
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/preempt_test.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024 Intel Corporation
+ *
+ * Debug the preemption timer behavior
+ */
+
+#include "test_util.h"
+#include "processor.h"
+#include "ucall_common.h"
+
+uint32_t preempt_timer_val = 0x1000000;
+static void guest_code(uint64_t apic_hz, uint64_t delay_ms)
+{
+ volatile unsigned long r;
+
+ kvm_hypercall(0x87650001, preempt_timer_val, 0, 0, 0);
+ do {
+ udelay(100);
+ r = kvm_hypercall(0x87650002, 0, 0, 0, 0);
+ } while(r != 0);
+
+ GUEST_DONE();
+}
+
+static void do_test(struct kvm_vcpu *vcpu)
+{
+ bool done = false;
+ struct ucall uc;
+
+ while (!done) {
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_DONE:
+ done = true;
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+static void run_test(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ vm = vm_create(1);
+
+ sync_global_to_guest(vm, preempt_timer_val);
+
+ vcpu = vm_vcpu_add(vm, 0, guest_code);
+
+ do_test(vcpu);
+
+ kvm_vm_free(vm);
+}
+
+
+int main(int argc, char *argv[])
+{
+ int opt;
+
+ while ((opt = getopt(argc, argv, "p:h")) != -1) {
+ switch (opt) {
+ case 'p':
+ preempt_timer_val = atoi(optarg);
+ break;
+ default:
+ exit(KSFT_SKIP);
+ }
+ }
+
+ printf("preempt timer value:%u(0x%x)\n",
+ preempt_timer_val, preempt_timer_val);
+
+ run_test();
+}
--
2.27.0