Re: [PATCH v6 1/2] iommu/io-pgtable-arm: Add way to debug pgtable walk

From: Mostafa Saleh
Date: Mon Jul 29 2024 - 07:45:48 EST


Hi Rob,

On Wed, Jul 17, 2024 at 09:36:21AM -0700, Rob Clark wrote:
> From: Rob Clark <robdclark@xxxxxxxxxxxx>
>
> Add an io-pgtable method to walk the pgtable returning the raw PTEs that
> would be traversed for a given iova access.
>
> Signed-off-by: Rob Clark <robdclark@xxxxxxxxxxxx>
> Acked-by: Joerg Roedel <jroedel@xxxxxxx>
>
> ---
> drivers/iommu/io-pgtable-arm.c | 36 +++++++++++++++++++++++++---------
> include/linux/io-pgtable.h | 17 ++++++++++++++++
> 2 files changed, 44 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
> index 3d23b924cec1..e70803940b46 100644
> --- a/drivers/iommu/io-pgtable-arm.c
> +++ b/drivers/iommu/io-pgtable-arm.c
> @@ -690,9 +690,11 @@ static size_t arm_lpae_unmap_pages(struct io_pgtable_ops *ops, unsigned long iov
> data->start_level, ptep);
> }
>
> -static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
> - unsigned long iova)
> +static int arm_lpae_pgtable_walk(struct io_pgtable_ops *ops,
> + unsigned long iova,
> + void *_wd)
> {
> + struct arm_lpae_io_pgtable_walk_data *wd = _wd;
> struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
> arm_lpae_iopte pte, *ptep = data->pgd;
> int lvl = data->start_level;
> @@ -700,7 +702,7 @@ static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
> do {
> /* Valid IOPTE pointer? */
> if (!ptep)
> - return 0;
> + return -ENOENT;
>
> /* Grab the IOPTE we're interested in */
> ptep += ARM_LPAE_LVL_IDX(iova, lvl, data);
> @@ -708,22 +710,37 @@ static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
>
> /* Valid entry? */
> if (!pte)
> - return 0;
> + return -ENOENT;
>
> - /* Leaf entry? */
> + wd->ptes[wd->level++] = pte;
> +
> + /* Leaf entry? If so, we've found the translation */
> if (iopte_leaf(pte, lvl, data->iop.fmt))
> - goto found_translation;
> + return 0;
>
> /* Take it to the next level */
> ptep = iopte_deref(pte, data);
> } while (++lvl < ARM_LPAE_MAX_LEVELS);
>
> /* Ran out of page tables to walk */
> - return 0;
> + return -ENOENT;
> +}
> +
> +static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
> + unsigned long iova)
> +{
> + struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
> + struct arm_lpae_io_pgtable_walk_data wd = {};
> + int ret, lvl;
> +
> + ret = arm_lpae_pgtable_walk(ops, iova, &wd);
> + if (ret)
> + return 0;
> +
> + lvl = wd.level + data->start_level;
>
> -found_translation:
> iova &= (ARM_LPAE_BLOCK_SIZE(lvl, data) - 1);
> - return iopte_to_paddr(pte, data) | iova;
> + return iopte_to_paddr(wd.ptes[wd.level - 1], data) | iova;
> }
>
> static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg)
> @@ -804,6 +821,7 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg)
> .map_pages = arm_lpae_map_pages,
> .unmap_pages = arm_lpae_unmap_pages,
> .iova_to_phys = arm_lpae_iova_to_phys,
> + .pgtable_walk = arm_lpae_pgtable_walk,
> };
>
> return data;
> diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
> index 86cf1f7ae389..df6f6e58310c 100644
> --- a/include/linux/io-pgtable.h
> +++ b/include/linux/io-pgtable.h
> @@ -171,12 +171,28 @@ struct io_pgtable_cfg {
> };
> };
>
> +/**
> + * struct arm_lpae_io_pgtable_walk_data - information from a pgtable walk
> + *
> + * @ptes: The recorded PTE values from the walk
> + * @level: The level of the last PTE
> + *
> + * @level also specifies the last valid index in @ptes
> + */
> +struct arm_lpae_io_pgtable_walk_data {
> + u64 ptes[4];

I don’t see a reason to save the whole walk for iova_to_phys,
I see that the DRM driver uses those next, but I am worried that
won’t scale, a callback mechanism sounds better.

Also, there is a page table walker recently added to io-pagtable-arm,
for dirty bit tracking:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/drivers/iommu/io-pgtable-arm.c?id=4fe88fd8b4aecb7f9680bf898811db76b94095a9

I’d suggest consolidating those into one walker where each caller
has its own logic in a callback.

Thanks,
Mostafa

> + int level;
> +};
> +
> /**
> * struct io_pgtable_ops - Page table manipulation API for IOMMU drivers.
> *
> * @map_pages: Map a physically contiguous range of pages of the same size.
> * @unmap_pages: Unmap a range of virtually contiguous pages of the same size.
> * @iova_to_phys: Translate iova to physical address.
> + * @pgtable_walk: (optional) Perform a page table walk for a given iova. The
> + * type for the wd parameter is specific to pgtable type, as
> + * the PTE size and number of levels differs per pgtable type.
> *
> * These functions map directly onto the iommu_ops member functions with
> * the same names.
> @@ -190,6 +206,7 @@ struct io_pgtable_ops {
> struct iommu_iotlb_gather *gather);
> phys_addr_t (*iova_to_phys)(struct io_pgtable_ops *ops,
> unsigned long iova);
> + int (*pgtable_walk)(struct io_pgtable_ops *ops, unsigned long iova, void *wd);
> int (*read_and_clear_dirty)(struct io_pgtable_ops *ops,
> unsigned long iova, size_t size,
> unsigned long flags,
> --
> 2.45.2
>