Re: [RESEND PATCH v10 06/10] vmx: spp: Set up SPP paging table at vmentry/vmexit

From: Sean Christopherson
Date: Fri Jan 10 2020 - 13:05:01 EST


On Thu, Jan 02, 2020 at 02:13:15PM +0800, Yang Weijiang wrote:
> @@ -3585,7 +3602,30 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
> if ((error_code & PFERR_WRITE_MASK) &&
> spte_can_locklessly_be_made_writable(spte))
> {
> - new_spte |= PT_WRITABLE_MASK;
> + /*
> + * Record write protect fault caused by
> + * Sub-page Protection, let VMI decide
> + * the next step.
> + */
> + if (spte & PT_SPP_MASK) {
> + int len = kvm_x86_ops->get_inst_len(vcpu);

There's got to be a better way to handle SPP exits than adding a helper
to retrieve the instruction length.

> +
> + fault_handled = true;
> + vcpu->run->exit_reason = KVM_EXIT_SPP;
> + vcpu->run->spp.addr = gva;
> + vcpu->run->spp.ins_len = len;

s/ins_len/insn_len to be consistent with other KVM nomenclature.

> + trace_kvm_spp_induced_page_fault(vcpu,
> + gva,
> + len);
> + break;
> + }
> +
> + if (was_spp_armed(new_spte)) {
> + restore_spp_bit(&new_spte);
> + spp_protected = true;
> + } else {
> + new_spte |= PT_WRITABLE_MASK;
> + }
>
> /*
> * Do not fix write-permission on the large spte. Since

...

> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 24e4e1c47f42..97d862c79124 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -200,7 +200,6 @@ static const struct {
> [VMENTER_L1D_FLUSH_EPT_DISABLED] = {"EPT disabled", false},
> [VMENTER_L1D_FLUSH_NOT_REQUIRED] = {"not required", false},
> };
> -

Spurious whitepsace.

> #define L1D_CACHE_ORDER 4
> static void *vmx_l1d_flush_pages;
>
> @@ -2999,6 +2998,7 @@ void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
> bool update_guest_cr3 = true;
> unsigned long guest_cr3;
> u64 eptp;
> + u64 spptp;
>
> guest_cr3 = cr3;
> if (enable_ept) {
> @@ -3027,6 +3027,12 @@ void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
>
> if (update_guest_cr3)
> vmcs_writel(GUEST_CR3, guest_cr3);
> +
> + if (kvm->arch.spp_active && VALID_PAGE(vcpu->kvm->arch.sppt_root)) {
> + spptp = construct_spptp(vcpu->kvm->arch.sppt_root);
> + vmcs_write64(SPPT_POINTER, spptp);
> + vmx_flush_tlb(vcpu, true);

Why is SPP so special that it gets to force TLB flushes?

> + }
> }
>
> int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
> @@ -5361,6 +5367,74 @@ static int handle_monitor_trap(struct kvm_vcpu *vcpu)
> return 1;
> }
>
> +int handle_spp(struct kvm_vcpu *vcpu)

Can be static.

> +{
> + unsigned long exit_qualification;
> + struct kvm_memory_slot *slot;
> + gpa_t gpa;
> + gfn_t gfn;
> +
> + exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
> +
> + /*
> + * SPP VM exit happened while executing iret from NMI,
> + * "blocked by NMI" bit has to be set before next VM entry.
> + * There are errata that may cause this bit to not be set:
> + * AAK134, BY25.
> + */
> + if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
> + (exit_qualification & SPPT_INTR_INFO_UNBLOCK_NMI))
> + vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
> + GUEST_INTR_STATE_NMI);
> +
> + vcpu->arch.exit_qualification = exit_qualification;

if (WARN_ON(!(exit_qualification & SPPT_INDUCED_EXIT_TYPE)))
goto out_err;

<handle spp exit>

return 1;

out_err:
vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
vcpu->run->hw.hardware_exit_reason = EXIT_REASON_SPP;
return 0;

> + if (exit_qualification & SPPT_INDUCED_EXIT_TYPE) {
> + int page_num = KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL);

The compiler is probably clever enough to make these constants, but if
this logic is a fundamental property of SPP then it should be defined as
a macro somewhere.

> + u32 *access;
> + gfn_t gfn_max;
> +
> + /*
> + * SPPT missing
> + * We don't set SPP write access for the corresponding
> + * GPA, if we haven't setup, we need to construct
> + * SPP table here.
> + */
> + gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
> + gfn = gpa >> PAGE_SHIFT;

gpa_to_gfn()

> + trace_kvm_spp_induced_exit(vcpu, gpa, exit_qualification);
> + /*
> + * In level 1 of SPPT, there's no PRESENT bit, all data is
> + * regarded as permission vector, so need to check from
> + * level 2 to set up the vector if target page is protected.
> + */
> + spin_lock(&vcpu->kvm->mmu_lock);
> + gfn &= ~(page_num - 1);



> + gfn_max = gfn + page_num - 1;

s/gfn_max/gfn_end

> + for (; gfn <= gfn_max; gfn++) {

My preference would be to do:
gfn_end = gfn + page_num;

for ( ; gfn < gfn_end; gfn++)

> + slot = gfn_to_memslot(vcpu->kvm, gfn);
> + if (!slot)
> + continue;
> + access = gfn_to_subpage_wp_info(slot, gfn);
> + if (access && *access != FULL_SPP_ACCESS)
> + kvm_spp_setup_structure(vcpu,
> + *access,
> + gfn);
> + }
> + spin_unlock(&vcpu->kvm->mmu_lock);
> + return 1;
> + }
> + /*
> + * SPPT Misconfig
> + * This is probably caused by some mis-configuration in SPPT
> + * entries, cannot handle it here, escalate the fault to
> + * emulator.
> + */
> + WARN_ON(1);
> + vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
> + vcpu->run->hw.hardware_exit_reason = EXIT_REASON_SPP;
> + return 0;
> +}
> +
> static int handle_monitor(struct kvm_vcpu *vcpu)
> {
> printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n");
> @@ -5575,6 +5649,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
> [EXIT_REASON_INVVPID] = handle_vmx_instruction,
> [EXIT_REASON_RDRAND] = handle_invalid_op,
> [EXIT_REASON_RDSEED] = handle_invalid_op,
> + [EXIT_REASON_SPP] = handle_spp,
> [EXIT_REASON_PML_FULL] = handle_pml_full,
> [EXIT_REASON_INVPCID] = handle_invpcid,
> [EXIT_REASON_VMFUNC] = handle_vmx_instruction,
> @@ -5807,6 +5882,9 @@ void dump_vmcs(void)
> pr_err("PostedIntrVec = 0x%02x\n", vmcs_read16(POSTED_INTR_NV));
> if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT))
> pr_err("EPT pointer = 0x%016llx\n", vmcs_read64(EPT_POINTER));
> + if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_SPP))
> + pr_err("SPPT pointer = 0x%016llx\n", vmcs_read64(SPPT_POINTER));
> +
> n = vmcs_read32(CR3_TARGET_COUNT);
> for (i = 0; i + 1 < n; i += 4)
> pr_err("CR3 target%u=%016lx target%u=%016lx\n",
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index fb7da000ceaf..a9d7fc21dad6 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -9782,6 +9782,7 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
> }
>
> kvm_page_track_free_memslot(free, dont);
> + kvm_spp_free_memslot(free, dont);
> }
>
> int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
> @@ -10406,3 +10407,6 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full);
> EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pi_irte_update);
> EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_unaccelerated_access);
> EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_incomplete_ipi);
> +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_spp_set_subpages);
> +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_spp_induced_exit);
> +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_spp_induced_page_fault);
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 09e5e8e6e6dd..c0f3162ee46a 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -244,6 +244,7 @@ struct kvm_hyperv_exit {
> #define KVM_EXIT_IOAPIC_EOI 26
> #define KVM_EXIT_HYPERV 27
> #define KVM_EXIT_ARM_NISV 28
> +#define KVM_EXIT_SPP 29
>
> /* For KVM_EXIT_INTERNAL_ERROR */
> /* Emulate instruction failed. */
> @@ -401,6 +402,11 @@ struct kvm_run {
> struct {
> __u8 vector;
> } eoi;
> + /* KVM_EXIT_SPP */
> + struct {
> + __u64 addr;
> + __u8 ins_len;
> + } spp;
> /* KVM_EXIT_HYPERV */
> struct kvm_hyperv_exit hyperv;
> /* KVM_EXIT_ARM_NISV */
> --
> 2.17.2
>