Re: [RESEND PATCH v10 06/10] vmx: spp: Set up SPP paging table at vmentry/vmexit

From: Yang Weijiang
Date: Mon Jan 13 2020 - 03:06:34 EST


On Fri, Jan 10, 2020 at 10:04:59AM -0800, Sean Christopherson wrote:
> On Thu, Jan 02, 2020 at 02:13:15PM +0800, Yang Weijiang wrote:
> > @@ -3585,7 +3602,30 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
> > if ((error_code & PFERR_WRITE_MASK) &&
> > spte_can_locklessly_be_made_writable(spte))
> > {
> > - new_spte |= PT_WRITABLE_MASK;
> > + /*
> > + * Record write protect fault caused by
> > + * Sub-page Protection, let VMI decide
> > + * the next step.
> > + */
> > + if (spte & PT_SPP_MASK) {
> > + int len = kvm_x86_ops->get_inst_len(vcpu);
>
> There's got to be a better way to handle SPP exits than adding a helper
> to retrieve the instruction length.
>
The fault instruction was skipped by kvm_skip_emulated_instruction()
before, but Paolo suggested leave the re-do or skip option to user-space
to make it flexible for write protection or write tracking, so return
length to user-space.

> > +
> > + fault_handled = true;
> > + vcpu->run->exit_reason = KVM_EXIT_SPP;
> > + vcpu->run->spp.addr = gva;
> > + vcpu->run->spp.ins_len = len;
>
> s/ins_len/insn_len to be consistent with other KVM nomenclature.
>
OK.

> > + trace_kvm_spp_induced_page_fault(vcpu,
> > + gva,
> > + len);
> > + break;
> > + }
> > +
> > + if (was_spp_armed(new_spte)) {
> > + restore_spp_bit(&new_spte);
> > + spp_protected = true;
> > + } else {
> > + new_spte |= PT_WRITABLE_MASK;
> > + }
> >
> > /*
> > * Do not fix write-permission on the large spte. Since
>
> ...
>
> > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> > index 24e4e1c47f42..97d862c79124 100644
> > --- a/arch/x86/kvm/vmx/vmx.c
> > +++ b/arch/x86/kvm/vmx/vmx.c
> > @@ -200,7 +200,6 @@ static const struct {
> > [VMENTER_L1D_FLUSH_EPT_DISABLED] = {"EPT disabled", false},
> > [VMENTER_L1D_FLUSH_NOT_REQUIRED] = {"not required", false},
> > };
> > -
>
> Spurious whitepsace.
>
OK.

> > #define L1D_CACHE_ORDER 4
> > static void *vmx_l1d_flush_pages;
> >
> > @@ -2999,6 +2998,7 @@ void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
> > bool update_guest_cr3 = true;
> > unsigned long guest_cr3;
> > u64 eptp;
> > + u64 spptp;
> >
> > guest_cr3 = cr3;
> > if (enable_ept) {
> > @@ -3027,6 +3027,12 @@ void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
> >
> > if (update_guest_cr3)
> > vmcs_writel(GUEST_CR3, guest_cr3);
> > +
> > + if (kvm->arch.spp_active && VALID_PAGE(vcpu->kvm->arch.sppt_root)) {
> > + spptp = construct_spptp(vcpu->kvm->arch.sppt_root);
> > + vmcs_write64(SPPT_POINTER, spptp);
> > + vmx_flush_tlb(vcpu, true);
>
> Why is SPP so special that it gets to force TLB flushes?
I double checked the code, there's a call to vmx_flush_tlb() in
mmu_load(), so it's unnecessary here, thank you!

> > + }
> > }
> >
> > int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
> > @@ -5361,6 +5367,74 @@ static int handle_monitor_trap(struct kvm_vcpu *vcpu)
> > return 1;
> > }
> >
> > +int handle_spp(struct kvm_vcpu *vcpu)
>
> Can be static.
>
Thanks!

> > +{
> > + unsigned long exit_qualification;
> > + struct kvm_memory_slot *slot;
> > + gpa_t gpa;
> > + gfn_t gfn;
> > +
> > + exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
> > +
> > + /*
> > + * SPP VM exit happened while executing iret from NMI,
> > + * "blocked by NMI" bit has to be set before next VM entry.
> > + * There are errata that may cause this bit to not be set:
> > + * AAK134, BY25.
> > + */
> > + if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
> > + (exit_qualification & SPPT_INTR_INFO_UNBLOCK_NMI))
> > + vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
> > + GUEST_INTR_STATE_NMI);
> > +
> > + vcpu->arch.exit_qualification = exit_qualification;
>
> if (WARN_ON(!(exit_qualification & SPPT_INDUCED_EXIT_TYPE)))
> goto out_err;
>
> <handle spp exit>
>
> return 1;
>
> out_err:
> vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
> vcpu->run->hw.hardware_exit_reason = EXIT_REASON_SPP;
> return 0;
>
Sure, will change it.

> > + if (exit_qualification & SPPT_INDUCED_EXIT_TYPE) {
> > + int page_num = KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL);
>
> The compiler is probably clever enough to make these constants, but if
> this logic is a fundamental property of SPP then it should be defined as
> a macro somewhere.
>
OK, will change it.

> > + u32 *access;
> > + gfn_t gfn_max;
> > +
> > + /*
> > + * SPPT missing
> > + * We don't set SPP write access for the corresponding
> > + * GPA, if we haven't setup, we need to construct
> > + * SPP table here.
> > + */
> > + gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
> > + gfn = gpa >> PAGE_SHIFT;
>
> gpa_to_gfn()
>
OK.

> > + trace_kvm_spp_induced_exit(vcpu, gpa, exit_qualification);
> > + /*
> > + * In level 1 of SPPT, there's no PRESENT bit, all data is
> > + * regarded as permission vector, so need to check from
> > + * level 2 to set up the vector if target page is protected.
> > + */
> > + spin_lock(&vcpu->kvm->mmu_lock);
> > + gfn &= ~(page_num - 1);
>
>
>
> > + gfn_max = gfn + page_num - 1;
>
> s/gfn_max/gfn_end
OK.

>
> > + for (; gfn <= gfn_max; gfn++) {
>
> My preference would be to do:
> gfn_end = gfn + page_num;
>
> for ( ; gfn < gfn_end; gfn++)
>
Thank you!
> > + slot = gfn_to_memslot(vcpu->kvm, gfn);