Re: [PATCH v5 07/17] KVM: x86: add fields to struct kvm_arch for CoCo features

From: Isaku Yamahata
Date: Thu Apr 04 2024 - 17:39:52 EST


On Thu, Apr 04, 2024 at 08:13:17AM -0400,
Paolo Bonzini <pbonzini@xxxxxxxxxx> wrote:

> Some VM types have characteristics in common; in fact, the only use
> of VM types right now is kvm_arch_has_private_mem and it assumes that
> _all_ nonzero VM types have private memory.
>
> We will soon introduce a VM type for SEV and SEV-ES VMs, and at that
> point we will have two special characteristics of confidential VMs
> that depend on the VM type: not just if memory is private, but
> also whether guest state is protected. For the latter we have
> kvm->arch.guest_state_protected, which is only set on a fully initialized
> VM.
>
> For VM types with protected guest state, we can actually fix a problem in
> the SEV-ES implementation, where ioctls to set registers do not cause an
> error even if the VM has been initialized and the guest state encrypted.
> Make sure that when using VM types that will become an error.
>
> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> Message-Id: <20240209183743.22030-7-pbonzini@xxxxxxxxxx>
> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> ---
> arch/x86/include/asm/kvm_host.h | 7 ++-
> arch/x86/kvm/x86.c | 93 ++++++++++++++++++++++++++-------
> 2 files changed, 79 insertions(+), 21 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 04c430eb25cf..3d56b5bb10e9 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1279,12 +1279,14 @@ enum kvm_apicv_inhibit {
> };
>
> struct kvm_arch {
> - unsigned long vm_type;
> unsigned long n_used_mmu_pages;
> unsigned long n_requested_mmu_pages;
> unsigned long n_max_mmu_pages;
> unsigned int indirect_shadow_pages;
> u8 mmu_valid_gen;
> + u8 vm_type;
> + bool has_private_mem;
> + bool has_protected_state;
> struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
> struct list_head active_mmu_pages;
> struct list_head zapped_obsolete_pages;
> @@ -2153,8 +2155,9 @@ void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd);
> void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level,
> int tdp_max_root_level, int tdp_huge_page_level);
>
> +
> #ifdef CONFIG_KVM_PRIVATE_MEM
> -#define kvm_arch_has_private_mem(kvm) ((kvm)->arch.vm_type != KVM_X86_DEFAULT_VM)
> +#define kvm_arch_has_private_mem(kvm) ((kvm)->arch.has_private_mem)
> #else
> #define kvm_arch_has_private_mem(kvm) false
> #endif
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 3934e7682734..d4a8d896798f 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -5555,11 +5555,15 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
> return 0;
> }
>
> -static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
> - struct kvm_debugregs *dbgregs)
> +static int kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
> + struct kvm_debugregs *dbgregs)
> {
> unsigned int i;
>
> + if (vcpu->kvm->arch.has_protected_state &&
> + vcpu->arch.guest_state_protected)
> + return -EINVAL;
> +
> memset(dbgregs, 0, sizeof(*dbgregs));
>
> BUILD_BUG_ON(ARRAY_SIZE(vcpu->arch.db) != ARRAY_SIZE(dbgregs->db));
> @@ -5568,6 +5572,7 @@ static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
>
> dbgregs->dr6 = vcpu->arch.dr6;
> dbgregs->dr7 = vcpu->arch.dr7;
> + return 0;
> }
>
> static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
> @@ -5575,6 +5580,10 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
> {
> unsigned int i;
>
> + if (vcpu->kvm->arch.has_protected_state &&
> + vcpu->arch.guest_state_protected)
> + return -EINVAL;
> +
> if (dbgregs->flags)
> return -EINVAL;
>
> @@ -5595,8 +5604,8 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
> }
>
>
> -static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu,
> - u8 *state, unsigned int size)
> +static int kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu,
> + u8 *state, unsigned int size)
> {
> /*
> * Only copy state for features that are enabled for the guest. The
> @@ -5614,24 +5623,25 @@ static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu,
> XFEATURE_MASK_FPSSE;
>
> if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
> - return;
> + return vcpu->kvm->arch.has_protected_state ? -EINVAL : 0;
>
> fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, state, size,
> supported_xcr0, vcpu->arch.pkru);
> + return 0;
> }
>
> -static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
> - struct kvm_xsave *guest_xsave)
> +static int kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
> + struct kvm_xsave *guest_xsave)
> {
> - kvm_vcpu_ioctl_x86_get_xsave2(vcpu, (void *)guest_xsave->region,
> - sizeof(guest_xsave->region));
> + return kvm_vcpu_ioctl_x86_get_xsave2(vcpu, (void *)guest_xsave->region,
> + sizeof(guest_xsave->region));
> }
>
> static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
> struct kvm_xsave *guest_xsave)
> {
> if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
> - return 0;
> + return vcpu->kvm->arch.has_protected_state ? -EINVAL : 0;
>
> return fpu_copy_uabi_to_guest_fpstate(&vcpu->arch.guest_fpu,
> guest_xsave->region,
> @@ -5639,18 +5649,23 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
> &vcpu->arch.pkru);
> }
>
> -static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu,
> - struct kvm_xcrs *guest_xcrs)
> +static int kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu,
> + struct kvm_xcrs *guest_xcrs)
> {
> + if (vcpu->kvm->arch.has_protected_state &&
> + vcpu->arch.guest_state_protected)
> + return -EINVAL;
> +
> if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
> guest_xcrs->nr_xcrs = 0;
> - return;
> + return 0;
> }
>
> guest_xcrs->nr_xcrs = 1;
> guest_xcrs->flags = 0;
> guest_xcrs->xcrs[0].xcr = XCR_XFEATURE_ENABLED_MASK;
> guest_xcrs->xcrs[0].value = vcpu->arch.xcr0;
> + return 0;
> }
>
> static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
> @@ -5658,6 +5673,10 @@ static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
> {
> int i, r = 0;
>
> + if (vcpu->kvm->arch.has_protected_state &&
> + vcpu->arch.guest_state_protected)
> + return -EINVAL;
> +
> if (!boot_cpu_has(X86_FEATURE_XSAVE))
> return -EINVAL;
>
> @@ -6040,7 +6059,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
> case KVM_GET_DEBUGREGS: {
> struct kvm_debugregs dbgregs;
>
> - kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs);
> + r = kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs);
> + if (r < 0)
> + break;
>
> r = -EFAULT;
> if (copy_to_user(argp, &dbgregs,
> @@ -6070,7 +6091,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
> if (!u.xsave)
> break;
>
> - kvm_vcpu_ioctl_x86_get_xsave(vcpu, u.xsave);
> + r = kvm_vcpu_ioctl_x86_get_xsave(vcpu, u.xsave);
> + if (r < 0)
> + break;
>
> r = -EFAULT;
> if (copy_to_user(argp, u.xsave, sizeof(struct kvm_xsave)))
> @@ -6099,7 +6122,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
> if (!u.xsave)
> break;
>
> - kvm_vcpu_ioctl_x86_get_xsave2(vcpu, u.buffer, size);
> + r = kvm_vcpu_ioctl_x86_get_xsave2(vcpu, u.buffer, size);
> + if (r < 0)
> + break;
>
> r = -EFAULT;
> if (copy_to_user(argp, u.xsave, size))
> @@ -6115,7 +6140,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
> if (!u.xcrs)
> break;
>
> - kvm_vcpu_ioctl_x86_get_xcrs(vcpu, u.xcrs);
> + r = kvm_vcpu_ioctl_x86_get_xcrs(vcpu, u.xcrs);
> + if (r < 0)
> + break;
>
> r = -EFAULT;
> if (copy_to_user(argp, u.xcrs,
> @@ -6259,6 +6286,11 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
> }
> #endif
> case KVM_GET_SREGS2: {
> + r = -EINVAL;
> + if (vcpu->kvm->arch.has_protected_state &&
> + vcpu->arch.guest_state_protected)
> + goto out;
> +
> u.sregs2 = kzalloc(sizeof(struct kvm_sregs2), GFP_KERNEL);
> r = -ENOMEM;
> if (!u.sregs2)
> @@ -6271,6 +6303,11 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
> break;
> }
> case KVM_SET_SREGS2: {
> + r = -EINVAL;
> + if (vcpu->kvm->arch.has_protected_state &&
> + vcpu->arch.guest_state_protected)
> + goto out;
> +
> u.sregs2 = memdup_user(argp, sizeof(struct kvm_sregs2));
> if (IS_ERR(u.sregs2)) {
> r = PTR_ERR(u.sregs2);
> @@ -11478,6 +11515,10 @@ static void __get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
>
> int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
> {
> + if (vcpu->kvm->arch.has_protected_state &&
> + vcpu->arch.guest_state_protected)
> + return -EINVAL;
> +
> vcpu_load(vcpu);
> __get_regs(vcpu, regs);
> vcpu_put(vcpu);
> @@ -11519,6 +11560,10 @@ static void __set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
>
> int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
> {
> + if (vcpu->kvm->arch.has_protected_state &&
> + vcpu->arch.guest_state_protected)
> + return -EINVAL;
> +
> vcpu_load(vcpu);
> __set_regs(vcpu, regs);
> vcpu_put(vcpu);
> @@ -11591,6 +11636,10 @@ static void __get_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2)
> int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
> struct kvm_sregs *sregs)
> {
> + if (vcpu->kvm->arch.has_protected_state &&
> + vcpu->arch.guest_state_protected)
> + return -EINVAL;
> +
> vcpu_load(vcpu);
> __get_sregs(vcpu, sregs);
> vcpu_put(vcpu);
> @@ -11858,6 +11907,10 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
> {
> int ret;
>
> + if (vcpu->kvm->arch.has_protected_state &&
> + vcpu->arch.guest_state_protected)
> + return -EINVAL;
> +
> vcpu_load(vcpu);
> ret = __set_sregs(vcpu, sregs);
> vcpu_put(vcpu);
> @@ -11975,7 +12028,7 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
> struct fxregs_state *fxsave;
>
> if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
> - return 0;
> + return vcpu->kvm->arch.has_protected_state ? -EINVAL : 0;
>
> vcpu_load(vcpu);
>
> @@ -11998,7 +12051,7 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
> struct fxregs_state *fxsave;
>
> if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
> - return 0;
> + return vcpu->kvm->arch.has_protected_state ? -EINVAL : 0;
>
> vcpu_load(vcpu);
>
> @@ -12524,6 +12577,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
> return -EINVAL;
>
> kvm->arch.vm_type = type;
> + kvm->arch.has_private_mem =
> + (type == KVM_X86_SW_PROTECTED_VM);
>
> ret = kvm_page_track_init(kvm);
> if (ret)
> --
> 2.43.0

This works well with TDX KVM patch series.

Reviewed-by: Isaku Yamahata <isaku.yamahata@xxxxxxxxx>
--
Isaku Yamahata <isaku.yamahata@xxxxxxxxx>