Re: [PATCH] KVM: x86: use positive error values for msr emulation that causes #GP
From: Pankaj Gupta
Date: Thu Nov 05 2020 - 01:15:12 EST
> Recent introduction of the userspace msr filtering added code that uses
> negative error codes for cases that result in either #GP delivery to
> the guest, or handled by the userspace msr filtering.
>
> This breaks an assumption that a negative error code returned from the
> msr emulation code is a semi-fatal error which should be returned
> to userspace via KVM_RUN ioctl and usually kill the guest.
>
> Fix this by reusing the already existing KVM_MSR_RET_INVALID error code,
> and by adding a new KVM_MSR_RET_FILTERED error code for the
> userspace filtered msrs.
>
> Fixes: 291f35fb2c1d1 ("KVM: x86: report negative values from wrmsr emulation to userspace")
> Reported-by: Qian Cai <cai@xxxxxxxxxx>
> Signed-off-by: Maxim Levitsky <mlevitsk@xxxxxxxxxx>
> ---
> arch/x86/kvm/x86.c | 29 +++++++++++++++--------------
> arch/x86/kvm/x86.h | 8 +++++++-
> 2 files changed, 22 insertions(+), 15 deletions(-)
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 397f599b20e5a..537130d78b2af 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -255,11 +255,10 @@ static struct kmem_cache *x86_emulator_cache;
>
> /*
> * When called, it means the previous get/set msr reached an invalid msr.
> - * Return 0 if we want to ignore/silent this failed msr access, or 1 if we want
> - * to fail the caller.
> + * Return true if we want to ignore/silent this failed msr access.
> */
> -static int kvm_msr_ignored_check(struct kvm_vcpu *vcpu, u32 msr,
> - u64 data, bool write)
> +static bool kvm_msr_ignored_check(struct kvm_vcpu *vcpu, u32 msr,
> + u64 data, bool write)
> {
> const char *op = write ? "wrmsr" : "rdmsr";
>
> @@ -267,12 +266,11 @@ static int kvm_msr_ignored_check(struct kvm_vcpu *vcpu, u32 msr,
> if (report_ignored_msrs)
> vcpu_unimpl(vcpu, "ignored %s: 0x%x data 0x%llx\n",
> op, msr, data);
> - /* Mask the error */
> - return 0;
> + return true;
> } else {
> vcpu_debug_ratelimited(vcpu, "unhandled %s: 0x%x data 0x%llx\n",
> op, msr, data);
> - return -ENOENT;
> + return false;
> }
> }
>
> @@ -1416,7 +1414,8 @@ static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
> if (r == KVM_MSR_RET_INVALID) {
> /* Unconditionally clear the output for simplicity */
> *data = 0;
> - r = kvm_msr_ignored_check(vcpu, index, 0, false);
> + if (kvm_msr_ignored_check(vcpu, index, 0, false))
> + r = 0;
> }
>
> if (r)
> @@ -1540,7 +1539,7 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
> struct msr_data msr;
>
> if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_WRITE))
> - return -EPERM;
> + return KVM_MSR_RET_FILTERED;
>
> switch (index) {
> case MSR_FS_BASE:
> @@ -1581,7 +1580,8 @@ static int kvm_set_msr_ignored_check(struct kvm_vcpu *vcpu,
> int ret = __kvm_set_msr(vcpu, index, data, host_initiated);
>
> if (ret == KVM_MSR_RET_INVALID)
> - ret = kvm_msr_ignored_check(vcpu, index, data, true);
> + if (kvm_msr_ignored_check(vcpu, index, data, true))
> + ret = 0;
>
> return ret;
> }
> @@ -1599,7 +1599,7 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
> int ret;
>
> if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ))
> - return -EPERM;
> + return KVM_MSR_RET_FILTERED;
>
> msr.index = index;
> msr.host_initiated = host_initiated;
> @@ -1618,7 +1618,8 @@ static int kvm_get_msr_ignored_check(struct kvm_vcpu *vcpu,
> if (ret == KVM_MSR_RET_INVALID) {
> /* Unconditionally clear *data for simplicity */
> *data = 0;
> - ret = kvm_msr_ignored_check(vcpu, index, 0, false);
> + if (kvm_msr_ignored_check(vcpu, index, 0, false))
> + ret = 0;
> }
>
> return ret;
> @@ -1662,9 +1663,9 @@ static int complete_emulated_wrmsr(struct kvm_vcpu *vcpu)
> static u64 kvm_msr_reason(int r)
> {
> switch (r) {
> - case -ENOENT:
> + case KVM_MSR_RET_INVALID:
> return KVM_MSR_EXIT_REASON_UNKNOWN;
> - case -EPERM:
> + case KVM_MSR_RET_FILTERED:
> return KVM_MSR_EXIT_REASON_FILTER;
> default:
> return KVM_MSR_EXIT_REASON_INVAL;
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index 3900ab0c6004d..e7ca622a468f5 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -376,7 +376,13 @@ int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r,
> int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva);
> bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type);
>
> -#define KVM_MSR_RET_INVALID 2
> +/*
> + * Internal error codes that are used to indicate that MSR emulation encountered
> + * an error that should result in #GP in the guest, unless userspace
> + * handles it.
> + */
> +#define KVM_MSR_RET_INVALID 2 /* in-kernel MSR emulation #GP condition */
> +#define KVM_MSR_RET_FILTERED 3 /* #GP due to userspace MSR filter */
>
> #define __cr4_reserved_bits(__cpu_has, __c) \
> ({ \
This looks good to me. This should solve "-EPERM" return by "__kvm_set_msr" .
A question I have, In the case of "kvm_emulate_rdmsr()", for "r" we
are injecting #GP.
Is there any possibility of this check to be hit and still result in #GP?
int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu)
{
....
r = kvm_get_msr(vcpu, ecx, &data);
/* MSR read failed? See if we should ask user space */
if (r && kvm_get_msr_user_space(vcpu, ecx, r)) {
/* Bounce to user space */
return 0;
}
/* MSR read failed? Inject a #GP */
if (r) {
trace_kvm_msr_read_ex(ecx);
kvm_inject_gp(vcpu, 0);
return 1;
}
....
}
Apart from the question above, feel free to add:
Reviewed-by: Pankaj Gupta <pankaj.gupta@xxxxxxxxxxxxxxx>