Re: [RFC PATCH] kvm: Make KVM DF intercept configurable

From: Paolo Bonzini
Date: Tue Mar 01 2016 - 16:12:07 EST




On 01/03/2016 20:28, Borislav Petkov wrote:
> Hi Paolo,
>
> so I've been hacking at early code recently and found this to be pretty
> useful. Thoughts?

I just use QEMU's binary translation mode to debug this kind of code (-d
in_asm is useful and relatively compact (because it only shows loops
once), or alternatively ept=0. But perhaps... why not. :) It's not
like it adds overhead.

Paolo

> This is AMD-only now but I'll extend it to Intel too, if there's
> interest.
>
> ---
> From: Borislav Petkov <bp@xxxxxxx>
> Date: Tue, 1 Mar 2016 14:34:28 +0100
> Subject: [RFC PATCH] kvm: Make KVM DF intercept configurable
>
> Sometimes it is useful when testing kernels in guests to know why the
> guest triple-faults, especially if one is poking at early kernel code
> where there's no debugging output yet.
>
> So add a module parameter which makes the guest exit on a #DF and log
> error info into a #DF tracepoint. Which turns out to be very useful:
>
> 1. Early PF causing a DF:
> -------------------------
>
> diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
> index 22fbf9df61bb..57d7d7a68c7d 100644
> --- a/arch/x86/kernel/head_64.S
> +++ b/arch/x86/kernel/head_64.S
> @@ -64,6 +64,8 @@ startup_64:
> * tables and then reload them.
> */
>
> + movq stack_start, %rsp
> ---
>
> We can't read that early from stack_start because we haven't enabled
> paging yet. KVM logs:
>
> qemu-system-x86-9523 [001] .... 1941.666236: kvm_df: Guest DF! rIP: 0x1000000, prev exception: 0xe, error_code: 0x0
>
> and the exception causing the #DF is a #PF (vector 0xe).
>
> 2. #GP
> -------------------------
>
> diff --git a/init/main.c b/init/main.c
> index 7c27de4577ed..869eb6110765 100644
> --- a/init/main.c
> +++ b/init/main.c
> @@ -967,6 +969,9 @@ static int __ref kernel_init(void *unused)
> ramdisk_execute_command, ret);
> }
>
> + load_idt(&no_idt);
> + __asm__ __volatile__("int3");
> ---
>
> Loading a NULL IDT and causing a debug interrupt, causes a #GP.
>
> KVM logs:
>
> qemu-system-x86-12853 [000] .... 2463.500732: kvm_df: Guest DF! rIP: 0xffffffff815ba8ba, prev exception: 0xd, error_code: 0x1a
>
> and the previous exception was a #GP (vector 0xd).
>
> Signed-off-by: Borislav Petkov <bp@xxxxxxx>
> ---
> arch/x86/kvm/svm.c | 15 +++++++++++++++
> arch/x86/kvm/trace.h | 20 ++++++++++++++++++++
> arch/x86/kvm/x86.c | 1 +
> 3 files changed, 36 insertions(+)
>
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index c13a64b7d789..09f78bbd8b47 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -205,6 +205,9 @@ module_param(npt, int, S_IRUGO);
> static int nested = true;
> module_param(nested, int, S_IRUGO);
>
> +static int intercept_df = 0;
> +module_param(intercept_df, int, S_IRUGO);
> +
> static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
> static void svm_flush_tlb(struct kvm_vcpu *vcpu);
> static void svm_complete_interrupts(struct vcpu_svm *svm);
> @@ -1023,6 +1026,8 @@ static void init_vmcb(struct vcpu_svm *svm)
> set_exception_intercept(svm, MC_VECTOR);
> set_exception_intercept(svm, AC_VECTOR);
> set_exception_intercept(svm, DB_VECTOR);
> + if (intercept_df)
> + set_exception_intercept(svm, DF_VECTOR);
>
> set_intercept(svm, INTERCEPT_INTR);
> set_intercept(svm, INTERCEPT_NMI);
> @@ -1728,6 +1733,15 @@ static int nm_interception(struct vcpu_svm *svm)
> return 1;
> }
>
> +static int df_interception(struct vcpu_svm *svm)
> +{
> + trace_kvm_df(svm->vmcb->save.rip,
> + svm->vcpu.arch.exception.nr,
> + svm->vcpu.arch.exception.has_error_code ?
> + svm->vcpu.arch.exception.error_code : 0);
> + return 1;
> +}
> +
> static bool is_erratum_383(void)
> {
> int err, i;
> @@ -3308,6 +3322,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
> [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception,
> [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception,
> [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception,
> + [SVM_EXIT_EXCP_BASE + DF_VECTOR] = df_interception,
> [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception,
> [SVM_EXIT_EXCP_BASE + AC_VECTOR] = ac_interception,
> [SVM_EXIT_INTR] = intr_interception,
> diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
> index ad9f6a23f139..6143c53f7bde 100644
> --- a/arch/x86/kvm/trace.h
> +++ b/arch/x86/kvm/trace.h
> @@ -89,6 +89,26 @@ TRACE_EVENT(kvm_hv_hypercall,
> __entry->outgpa)
> );
>
> +TRACE_EVENT(kvm_df,
> + TP_PROTO(unsigned long rip, u8 nr, u32 error_code),
> + TP_ARGS(rip, nr, error_code),
> +
> + TP_STRUCT__entry(
> + __field( unsigned long, rip )
> + __field( u8, nr )
> + __field( u32, error_code )
> + ),
> +
> + TP_fast_assign(
> + __entry->rip = rip;
> + __entry->nr = nr;
> + __entry->error_code = error_code;
> + ),
> +
> + TP_printk("Guest DF! rIP: 0x%lx, prev exception: 0x%x, error_code: 0x%x",
> + __entry->rip, __entry->nr, __entry->error_code)
> +);
> +
> /*
> * Tracepoint for PIO.
> */
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index f18a08281015..49f1bd3279f2 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -8388,3 +8388,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
> EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window);
> EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full);
> EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pi_irte_update);
> +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_df);
>