Re: [PATCH 21/25] KVM: VMX: Add SGX ENCLS[ECREATE] handler to enforce CPUID restrictions

From: Kai Huang
Date: Tue Mar 02 2021 - 03:16:22 EST


On Mon, 1 Mar 2021 09:20:15 -0800 Sean Christopherson wrote:
> On Mon, Mar 01, 2021, Kai Huang wrote:
> > +static int handle_encls_ecreate(struct kvm_vcpu *vcpu)
> > +{
> > + struct kvm_cpuid_entry2 *sgx_12_0, *sgx_12_1;
> > + gva_t pageinfo_gva, secs_gva;
> > + gva_t metadata_gva, contents_gva;
> > + gpa_t metadata_gpa, contents_gpa, secs_gpa;
> > + unsigned long metadata_hva, contents_hva, secs_hva;
> > + struct sgx_pageinfo pageinfo;
> > + struct sgx_secs *contents;
> > + u64 attributes, xfrm, size;
> > + u32 miscselect;
> > + struct x86_exception ex;
> > + u8 max_size_log2;
> > + int trapnr, r;
> > +
>
> (see below)
>
> --- cut here --- >8
>
> > + sgx_12_0 = kvm_find_cpuid_entry(vcpu, 0x12, 0);
> > + sgx_12_1 = kvm_find_cpuid_entry(vcpu, 0x12, 1);
> > + if (!sgx_12_0 || !sgx_12_1) {
> > + kvm_inject_gp(vcpu, 0);
>
> This should probably be an emulation failure. This code is reached iff SGX1 is
> enabled in the guest, userspace done messed up if they enabled SGX1 without
> defining CPUID.0x12.1. That also makes it more obvious that burying this in a
> helper after a bunch of other checks isn't wrong, i.e. KVM has already verified
> that SGX1 is enabled in the guest.
>
> > + return 1;
> > + }
>
> ---
>
> > +
> > + if (sgx_get_encls_gva(vcpu, kvm_rbx_read(vcpu), 32, 32, &pageinfo_gva) ||
> > + sgx_get_encls_gva(vcpu, kvm_rcx_read(vcpu), 4096, 4096, &secs_gva))
> > + return 1;
> > +
> > + /*
> > + * Copy the PAGEINFO to local memory, its pointers need to be
> > + * translated, i.e. we need to do a deep copy/translate.
> > + */
> > + r = kvm_read_guest_virt(vcpu, pageinfo_gva, &pageinfo,
> > + sizeof(pageinfo), &ex);
> > + if (r == X86EMUL_PROPAGATE_FAULT) {
> > + kvm_inject_emulated_page_fault(vcpu, &ex);
> > + return 1;
> > + } else if (r != X86EMUL_CONTINUE) {
> > + sgx_handle_emulation_failure(vcpu, pageinfo_gva, size);
> > + return 0;
> > + }
> > +
> > + if (sgx_get_encls_gva(vcpu, pageinfo.metadata, 64, 64, &metadata_gva) ||
> > + sgx_get_encls_gva(vcpu, pageinfo.contents, 4096, 4096,
> > + &contents_gva))
> > + return 1;
> > +
> > + /*
> > + * Translate the SECINFO, SOURCE and SECS pointers from GVA to GPA.
> > + * Resume the guest on failure to inject a #PF.
> > + */
> > + if (sgx_gva_to_gpa(vcpu, metadata_gva, false, &metadata_gpa) ||
> > + sgx_gva_to_gpa(vcpu, contents_gva, false, &contents_gpa) ||
> > + sgx_gva_to_gpa(vcpu, secs_gva, true, &secs_gpa))
> > + return 1;
> > +
> > + /*
> > + * ...and then to HVA. The order of accesses isn't architectural, i.e.
> > + * KVM doesn't have to fully process one address at a time. Exit to
> > + * userspace if a GPA is invalid.
> > + */
> > + if (sgx_gpa_to_hva(vcpu, metadata_gpa, &metadata_hva) ||
> > + sgx_gpa_to_hva(vcpu, contents_gpa, &contents_hva) ||
> > + sgx_gpa_to_hva(vcpu, secs_gpa, &secs_hva))
> > + return 0;
> > + /*
> > + * Copy contents into kernel memory to prevent TOCTOU attack. E.g. the
> > + * guest could do ECREATE w/ SECS.SGX_ATTR_PROVISIONKEY=0, and
> > + * simultaneously set SGX_ATTR_PROVISIONKEY to bypass the check to
> > + * enforce restriction of access to the PROVISIONKEY.
> > + */
> > + contents = (struct sgx_secs *)__get_free_page(GFP_KERNEL);
> > + if (!contents)
> > + return -ENOMEM;
>
> --- cut here --- >8
>
> > +
> > + /* Exit to userspace if copying from a host userspace address fails. */
> > + if (sgx_read_hva(vcpu, contents_hva, (void *)contents, PAGE_SIZE))
>
> This, and every failure path below, will leak 'contents'. The easiest thing is
> probably to wrap everything in "cut here" in a separate helper. The CPUID
> lookups can be , e.g.
>
> contents = (struct sgx_secs *)__get_free_page(GFP_KERNEL);
> if (!contents)
> return -ENOMEM;
>
> r = __handle_encls_ecreate(vcpu, &pageinfo, secs);
>
> free_page((unsigned long)contents);
> return r;
>
> And then the helper can be everything below, plus the CPUID lookup:
>
> sgx_12_0 = kvm_find_cpuid_entry(vcpu, 0x12, 0);
> sgx_12_1 = kvm_find_cpuid_entry(vcpu, 0x12, 1);
> if (!sgx_12_0 || !sgx_12_1) {
> vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
> vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
> vcpu->run->internal.ndata = 0;
> return 0;
> }
>
>

Hi Sean,

I ended up with below:

1) Copy contents is out of __handle_encls_ecreate() since from my perspective
it is a more reasonable split logically.
2) In __handle_encls_ecreate(), finding sgx_12_0 and sgx_12_1 is at first since
provisionkey bit check requires sgx_12_1->eax.
3) __handle_encls_ecreate() needs secs_gva since sgx_inject_fault() requires it.

Is it OK to you?

+static int __handle_encls_ecreate(struct kvm_vcpu *vcpu,
+ struct sgx_pageinfo *pageinfo,
+ unsigned long secs_hva,
+ gva_t secs_gva)
+{
+ struct sgx_secs *contents = (struct sgx_secs *)pageinfo->contents;
+ struct kvm_cpuid_entry2 *sgx_12_0, *sgx_12_1;
+ u64 attributes, xfrm, size;
+ u32 miscselect;
+ u8 max_size_log2;
+ int trapnr;
+
+ sgx_12_0 = kvm_find_cpuid_entry(vcpu, 0x12, 0);
+ sgx_12_1 = kvm_find_cpuid_entry(vcpu, 0x12, 1);
+ if (!sgx_12_0 || !sgx_12_1) {
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+ vcpu->run->internal.ndata = 0;
+ return 0;
+ }
+
+ miscselect = contents->miscselect;
+ attributes = contents->attributes;
+ xfrm = contents->xfrm;
+ size = contents->size;
+
+ /* Enforce restriction of access to the PROVISIONKEY. */
+ if (!vcpu->kvm->arch.sgx_provisioning_allowed &&
+ (attributes & SGX_ATTR_PROVISIONKEY)) {
+ if (sgx_12_1->eax & SGX_ATTR_PROVISIONKEY)
+ pr_warn_once("KVM: SGX PROVISIONKEY advertised but not
allowed\n");
+ kvm_inject_gp(vcpu, 0);
+ return 1;
+ }
+
+ /* Enforce CPUID restrictions on MISCSELECT, ATTRIBUTES and XFRM. */
+ if ((u32)miscselect & ~sgx_12_0->ebx ||
+ (u32)attributes & ~sgx_12_1->eax ||
+ (u32)(attributes >> 32) & ~sgx_12_1->ebx ||
+ (u32)xfrm & ~sgx_12_1->ecx ||
+ (u32)(xfrm >> 32) & ~sgx_12_1->edx) {
+ kvm_inject_gp(vcpu, 0);
+ return 1;
+ }
+
+ /* Enforce CPUID restriction on max enclave size. */
+ max_size_log2 = (attributes & SGX_ATTR_MODE64BIT) ? sgx_12_0->edx >> 8 :
+ sgx_12_0->edx;
+ if (size >= BIT_ULL(max_size_log2))
+ kvm_inject_gp(vcpu, 0);
+
+ if (sgx_virt_ecreate(pageinfo, (void __user *)secs_hva, &trapnr))
+ return sgx_inject_fault(vcpu, secs_gva, trapnr);
+
+ return kvm_skip_emulated_instruction(vcpu);
+}
+
+static int handle_encls_ecreate(struct kvm_vcpu *vcpu)
+{
+ gva_t pageinfo_gva, secs_gva;
+ gva_t metadata_gva, contents_gva;
+ gpa_t metadata_gpa, contents_gpa, secs_gpa;
+ unsigned long metadata_hva, contents_hva, secs_hva;
+ struct sgx_pageinfo pageinfo;
+ struct sgx_secs *contents;
+ struct x86_exception ex;
+ int r;
+
+ if (sgx_get_encls_gva(vcpu, kvm_rbx_read(vcpu), 32, 32, &pageinfo_gva)
||
+ sgx_get_encls_gva(vcpu, kvm_rcx_read(vcpu), 4096, 4096, &secs_gva))
+ return 1;
+
+ /*
+ * Copy the PAGEINFO to local memory, its pointers need to be
+ * translated, i.e. we need to do a deep copy/translate.
+ */
+ r = kvm_read_guest_virt(vcpu, pageinfo_gva, &pageinfo,
+ sizeof(pageinfo), &ex);
+ if (r == X86EMUL_PROPAGATE_FAULT) {
+ kvm_inject_emulated_page_fault(vcpu, &ex);
+ return 1;
+ } else if (r != X86EMUL_CONTINUE) {
+ sgx_handle_emulation_failure(vcpu, pageinfo_gva,
+ sizeof(pageinfo));
+ return 0;
+ }
+
+ if (sgx_get_encls_gva(vcpu, pageinfo.metadata, 64, 64, &metadata_gva) ||
+ sgx_get_encls_gva(vcpu, pageinfo.contents, 4096, 4096,
+ &contents_gva))
+ return 1;
+
+ /*
+ * Translate the SECINFO, SOURCE and SECS pointers from GVA to GPA.
+ * Resume the guest on failure to inject a #PF.
+ */
+ if (sgx_gva_to_gpa(vcpu, metadata_gva, false, &metadata_gpa) ||
+ sgx_gva_to_gpa(vcpu, contents_gva, false, &contents_gpa) ||
+ sgx_gva_to_gpa(vcpu, secs_gva, true, &secs_gpa))
+ return 1;
+
+ /*
+ * ...and then to HVA. The order of accesses isn't architectural, i.e.
+ * KVM doesn't have to fully process one address at a time. Exit to
+ * userspace if a GPA is invalid.
+ */
+ if (sgx_gpa_to_hva(vcpu, metadata_gpa, &metadata_hva) ||
+ sgx_gpa_to_hva(vcpu, contents_gpa, &contents_hva) ||
+ sgx_gpa_to_hva(vcpu, secs_gpa, &secs_hva))
+ return 0;
+
+ /*
+ * Copy contents into kernel memory to prevent TOCTOU attack. E.g. the
+ * guest could do ECREATE w/ SECS.SGX_ATTR_PROVISIONKEY=0, and
+ * simultaneously set SGX_ATTR_PROVISIONKEY to bypass the check to
+ * enforce restriction of access to the PROVISIONKEY.
+ */
+ contents = (struct sgx_secs *)__get_free_page(GFP_KERNEL);
+ if (!contents)
+ return -ENOMEM;
+
+ /* Exit to userspace if copying from a host userspace address fails. */
+ if (sgx_read_hva(vcpu, contents_hva, (void *)contents, PAGE_SIZE)) {
+ free_page((unsigned long)contents);
+ return 0;
+ }
+
+ pageinfo.metadata = metadata_hva;
+ pageinfo.contents = (u64)contents;
+
+ r = __handle_encls_ecreate(vcpu, &pageinfo, secs_hva, secs_gva);
+
+ free_page((unsigned long)contents);
+
+ return r;
+}