Re: [PATCH v3 07/28] drm/i915/gvt: Don't rely on KVM's gfn_to_pfn() to query possible 2M GTT

From: Yan Zhao
Date: Tue May 16 2023 - 21:22:50 EST


Reviewed-by: Yan Zhao <yan.y.zhao@xxxxxxxxx>
Tested-by: Yan Zhao <yan.y.zhao@xxxxxxxxx>

On Fri, May 12, 2023 at 05:35:39PM -0700, Sean Christopherson wrote:
> Now that gvt_pin_guest_page() explicitly verifies the pinned PFN is a
> transparent hugepage page, don't use KVM's gfn_to_pfn() to pre-check if a
> 2MiB GTT entry is possible and instead just try to map the GFN with a 2MiB
> entry. Using KVM to query pfn that is ultimately managed through VFIO is
> odd, and KVM's gfn_to_pfn() is not intended for non-KVM consumption; it's
> exported only because of KVM vendor modules (x86 and PPC).
>
> Open code the check on 2MiB support instead of keeping
> is_2MB_gtt_possible() around for a single line of code.
>
> Move the call to intel_gvt_dma_map_guest_page() for a 4KiB entry into its
> case statement, i.e. fork the common path into the 4KiB and 2MiB "direct"
> shadow paths. Keeping the call in the "common" path is arguably more in
> the spirit of "one change per patch", but retaining the local "page_size"
> variable is silly, i.e. the call site will be changed either way, and
> jumping around the no-longer-common code is more subtle and rather odd,
> i.e. would just need to be immediately cleaned up.
>
> Drop the error message from gvt_pin_guest_page() when KVMGT attempts to
> shadow a 2MiB guest page that isn't backed by a compatible hugepage in the
> host. Dropping the pre-check on a THP makes it much more likely that the
> "error" will be encountered in normal operation.
>
> Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
> ---
> drivers/gpu/drm/i915/gvt/gtt.c | 49 ++++++--------------------------
> drivers/gpu/drm/i915/gvt/kvmgt.c | 1 -
> 2 files changed, 8 insertions(+), 42 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
> index 61e38acee2d5..f505be9e647a 100644
> --- a/drivers/gpu/drm/i915/gvt/gtt.c
> +++ b/drivers/gpu/drm/i915/gvt/gtt.c
> @@ -1145,36 +1145,6 @@ static inline void ppgtt_generate_shadow_entry(struct intel_gvt_gtt_entry *se,
> ops->set_pfn(se, s->shadow_page.mfn);
> }
>
> -/*
> - * Check if can do 2M page
> - * @vgpu: target vgpu
> - * @entry: target pfn's gtt entry
> - *
> - * Return 1 if 2MB huge gtt shadowing is possible, 0 if miscondition,
> - * negative if found err.
> - */
> -static int is_2MB_gtt_possible(struct intel_vgpu *vgpu,
> - struct intel_gvt_gtt_entry *entry)
> -{
> - const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
> - kvm_pfn_t pfn;
> - int ret;
> -
> - if (!HAS_PAGE_SIZES(vgpu->gvt->gt->i915, I915_GTT_PAGE_SIZE_2M))
> - return 0;
> -
> - pfn = gfn_to_pfn(vgpu->vfio_device.kvm, ops->get_pfn(entry));
> - if (is_error_noslot_pfn(pfn))
> - return -EINVAL;
> -
> - if (!pfn_valid(pfn))
> - return -EINVAL;
> -
> - ret = PageTransHuge(pfn_to_page(pfn));
> - kvm_release_pfn_clean(pfn);
> - return ret;
> -}
> -
> static int split_2MB_gtt_entry(struct intel_vgpu *vgpu,
> struct intel_vgpu_ppgtt_spt *spt, unsigned long index,
> struct intel_gvt_gtt_entry *se)
> @@ -1268,7 +1238,7 @@ static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu,
> {
> const struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
> struct intel_gvt_gtt_entry se = *ge;
> - unsigned long gfn, page_size = PAGE_SIZE;
> + unsigned long gfn;
> dma_addr_t dma_addr;
> int ret;
>
> @@ -1283,6 +1253,9 @@ static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu,
> switch (ge->type) {
> case GTT_TYPE_PPGTT_PTE_4K_ENTRY:
> gvt_vdbg_mm("shadow 4K gtt entry\n");
> + ret = intel_gvt_dma_map_guest_page(vgpu, gfn, PAGE_SIZE, &dma_addr);
> + if (ret)
> + return -ENXIO;
> break;
> case GTT_TYPE_PPGTT_PTE_64K_ENTRY:
> gvt_vdbg_mm("shadow 64K gtt entry\n");
> @@ -1294,12 +1267,10 @@ static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu,
> return split_64KB_gtt_entry(vgpu, spt, index, &se);
> case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
> gvt_vdbg_mm("shadow 2M gtt entry\n");
> - ret = is_2MB_gtt_possible(vgpu, ge);
> - if (ret == 0)
> + if (!HAS_PAGE_SIZES(vgpu->gvt->gt->i915, I915_GTT_PAGE_SIZE_2M) ||
> + intel_gvt_dma_map_guest_page(vgpu, gfn,
> + I915_GTT_PAGE_SIZE_2M, &dma_addr))
> return split_2MB_gtt_entry(vgpu, spt, index, &se);
> - else if (ret < 0)
> - return ret;
> - page_size = I915_GTT_PAGE_SIZE_2M;
> break;
> case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
> gvt_vgpu_err("GVT doesn't support 1GB entry\n");
> @@ -1309,11 +1280,7 @@ static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu,
> return -EINVAL;
> }
>
> - /* direct shadow */
> - ret = intel_gvt_dma_map_guest_page(vgpu, gfn, page_size, &dma_addr);
> - if (ret)
> - return -ENXIO;
> -
> + /* Successfully shadowed a 4K or 2M page (without splitting). */
> pte_ops->set_pfn(&se, dma_addr >> PAGE_SHIFT);
> ppgtt_set_shadow_entry(spt, &se, index);
> return 0;
> diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
> index 429f0f993a13..92ceefe1e6fb 100644
> --- a/drivers/gpu/drm/i915/gvt/kvmgt.c
> +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
> @@ -162,7 +162,6 @@ static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
> if (npage == 0)
> base_page = cur_page;
> else if (page_to_pfn(base_page) + npage != page_to_pfn(cur_page)) {
> - gvt_vgpu_err("The pages are not continuous\n");
> ret = -EINVAL;
> npage++;
> goto err;
> --
> 2.40.1.606.ga4b1b128d6-goog
>