Re: [PATCH v3 1/2] iommu/io-pgtable-arm: Add support for ARM_ADRENO_GPU_LPAE io-pgtable format

From: Jordan Crouse
Date: Thu Aug 15 2019 - 11:35:29 EST


On Wed, Aug 07, 2019 at 04:21:39PM -0600, Jordan Crouse wrote:
> Add a new sub-format ARM_ADRENO_GPU_LPAE to set up TTBR0 and TTBR1 for
> use by the Adreno GPU. This will allow The GPU driver to map global
> buffers in the TTBR1 and leave the TTBR0 configured but unset and
> free to be changed dynamically by the GPU.

It would take a bit of code rework and un-static-ifying a few functions but I'm
wondering if it would be cleaner to add the Adreno GPU pagetable format in a new
file, such as io-pgtable-adreno.c.

Jordan

> Signed-off-by: Jordan Crouse <jcrouse@xxxxxxxxxxxxxx>
> ---
>
> drivers/iommu/io-pgtable-arm.c | 214 ++++++++++++++++++++++++++++++++++++++---
> drivers/iommu/io-pgtable.c | 1 +
> include/linux/io-pgtable.h | 2 +
> 3 files changed, 202 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
> index 161a7d5..8eb0dbb 100644
> --- a/drivers/iommu/io-pgtable-arm.c
> +++ b/drivers/iommu/io-pgtable-arm.c
> @@ -112,13 +112,19 @@
> #define ARM_32_LPAE_TCR_EAE (1 << 31)
> #define ARM_64_LPAE_S2_TCR_RES1 (1 << 31)
>
> +#define ARM_LPAE_TCR_EPD0 (1 << 7)
> #define ARM_LPAE_TCR_EPD1 (1 << 23)
>
> #define ARM_LPAE_TCR_TG0_4K (0 << 14)
> #define ARM_LPAE_TCR_TG0_64K (1 << 14)
> #define ARM_LPAE_TCR_TG0_16K (2 << 14)
>
> +#define ARM_LPAE_TCR_TG1_4K (0 << 30)
> +#define ARM_LPAE_TCR_TG1_64K (1 << 30)
> +#define ARM_LPAE_TCR_TG1_16K (2 << 30)
> +
> #define ARM_LPAE_TCR_SH0_SHIFT 12
> +#define ARM_LPAE_TCR_SH1_SHIFT 28
> #define ARM_LPAE_TCR_SH0_MASK 0x3
> #define ARM_LPAE_TCR_SH_NS 0
> #define ARM_LPAE_TCR_SH_OS 2
> @@ -126,6 +132,8 @@
>
> #define ARM_LPAE_TCR_ORGN0_SHIFT 10
> #define ARM_LPAE_TCR_IRGN0_SHIFT 8
> +#define ARM_LPAE_TCR_ORGN1_SHIFT 26
> +#define ARM_LPAE_TCR_IRGN1_SHIFT 24
> #define ARM_LPAE_TCR_RGN_MASK 0x3
> #define ARM_LPAE_TCR_RGN_NC 0
> #define ARM_LPAE_TCR_RGN_WBWA 1
> @@ -136,6 +144,7 @@
> #define ARM_LPAE_TCR_SL0_MASK 0x3
>
> #define ARM_LPAE_TCR_T0SZ_SHIFT 0
> +#define ARM_LPAE_TCR_T1SZ_SHIFT 16
> #define ARM_LPAE_TCR_SZ_MASK 0xf
>
> #define ARM_LPAE_TCR_PS_SHIFT 16
> @@ -152,6 +161,14 @@
> #define ARM_LPAE_TCR_PS_48_BIT 0x5ULL
> #define ARM_LPAE_TCR_PS_52_BIT 0x6ULL
>
> +#define ARM_LPAE_TCR_SEP_SHIFT 47
> +#define ARM_LPAE_TCR_SEP_31 (0x0ULL << ARM_LPAE_TCR_SEP_SHIFT)
> +#define ARM_LPAE_TCR_SEP_35 (0x1ULL << ARM_LPAE_TCR_SEP_SHIFT)
> +#define ARM_LPAE_TCR_SEP_39 (0x2ULL << ARM_LPAE_TCR_SEP_SHIFT)
> +#define ARM_LPAE_TCR_SEP_41 (0x3ULL << ARM_LPAE_TCR_SEP_SHIFT)
> +#define ARM_LPAE_TCR_SEP_43 (0x4ULL << ARM_LPAE_TCR_SEP_SHIFT)
> +#define ARM_LPAE_TCR_SEP_UPSTREAM (0x7ULL << ARM_LPAE_TCR_SEP_SHIFT)
> +
> #define ARM_LPAE_MAIR_ATTR_SHIFT(n) ((n) << 3)
> #define ARM_LPAE_MAIR_ATTR_MASK 0xff
> #define ARM_LPAE_MAIR_ATTR_DEVICE 0x04
> @@ -426,7 +443,8 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data,
> arm_lpae_iopte pte;
>
> if (data->iop.fmt == ARM_64_LPAE_S1 ||
> - data->iop.fmt == ARM_32_LPAE_S1) {
> + data->iop.fmt == ARM_32_LPAE_S1 ||
> + data->iop.fmt == ARM_ADRENO_GPU_LPAE) {
> pte = ARM_LPAE_PTE_nG;
> if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ))
> pte |= ARM_LPAE_PTE_AP_RDONLY;
> @@ -497,6 +515,21 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
> return ret;
> }
>
> +static int arm_adreno_gpu_lpae_map(struct io_pgtable_ops *ops,
> + unsigned long iova, phys_addr_t paddr, size_t size,
> + int iommu_prot)
> +{
> + struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
> + unsigned long mask = 1UL << data->iop.cfg.ias;
> +
> + /* This configuration expects all iova addresses to be in TTBR1 */
> + if (WARN_ON(iova & mask))
> + return -ERANGE;
> +
> + /* Mask off the sign extended bits and map as usual */
> + return arm_lpae_map(ops, iova & (mask - 1), paddr, size, iommu_prot);
> +}
> +
> static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl,
> arm_lpae_iopte *ptep)
> {
> @@ -643,6 +676,22 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
> return __arm_lpae_unmap(data, iova, size, lvl + 1, ptep);
> }
>
> +static size_t arm_adreno_gpu_lpae_unmap(struct io_pgtable_ops *ops,
> + unsigned long iova, size_t size)
> +{
> + struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
> + arm_lpae_iopte *ptep = data->pgd;
> + int lvl = ARM_LPAE_START_LVL(data);
> + unsigned long mask = 1UL << data->iop.cfg.ias;
> +
> + /* Make sure the sign extend bit is set in the iova */
> + if (WARN_ON(!(iova & mask)))
> + return 0;
> +
> + /* Mask off the sign extended bits before unmapping */
> + return __arm_lpae_unmap(data, iova & (mask - 1), size, lvl, ptep);
> +}
> +
> static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova,
> size_t size)
> {
> @@ -692,6 +741,17 @@ static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
> return iopte_to_paddr(pte, data) | iova;
> }
>
> +
> +static phys_addr_t arm_adreno_gpu_lpae_iova_to_phys(struct io_pgtable_ops *ops,
> + unsigned long iova)
> +{
> + struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
> + unsigned long mask = 1UL << data->iop.cfg.ias;
> +
> + /* Mask off the sign extended bits before translating */
> + return arm_lpae_iova_to_phys(ops, iova & (mask - 1));
> +}
> +
> static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg)
> {
> unsigned long granule, page_sizes;
> @@ -771,17 +831,16 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg)
> pgd_bits = va_bits - (data->bits_per_level * (data->levels - 1));
> data->pgd_size = 1UL << (pgd_bits + ilog2(sizeof(arm_lpae_iopte)));
>
> - data->iop.ops = (struct io_pgtable_ops) {
> - .map = arm_lpae_map,
> - .unmap = arm_lpae_unmap,
> - .iova_to_phys = arm_lpae_iova_to_phys,
> - };
>
> return data;
> }
>
> -static struct io_pgtable *
> -arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
> +/*
> + * Common allocation function for S1 pagetables. Set up the TTBR0 region and
> + * allocate a default pagetable
> + */
> +static struct arm_lpae_io_pgtable *
> +_arm_64_lpae_alloc_pgtable_s1_common(struct io_pgtable_cfg *cfg)
> {
> u64 reg;
> struct arm_lpae_io_pgtable *data;
> @@ -845,8 +904,6 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
>
> reg |= (64ULL - cfg->ias) << ARM_LPAE_TCR_T0SZ_SHIFT;
>
> - /* Disable speculative walks through TTBR1 */
> - reg |= ARM_LPAE_TCR_EPD1;
> cfg->arm_lpae_s1_cfg.tcr = reg;
>
> /* MAIRs */
> @@ -870,16 +927,131 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
> /* Ensure the empty pgd is visible before any actual TTBR write */
> wmb();
>
> - /* TTBRs */
> - cfg->arm_lpae_s1_cfg.ttbr[0] = virt_to_phys(data->pgd);
> - cfg->arm_lpae_s1_cfg.ttbr[1] = 0;
> - return &data->iop;
> -
> + return data;
> out_free_data:
> kfree(data);
> return NULL;
> }
>
> +
> +static struct io_pgtable *
> +arm_adreno_gpu_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
> +{
> + struct arm_lpae_io_pgtable *data;
> + u64 reg;
> +
> + /*
> + * Make sure the ias aligns with the available options for the sign
> + * extension bit
> + */
> + switch (cfg->ias) {
> + case 32:
> + case 36:
> + case 40:
> + case 42:
> + case 44:
> + /*
> + * The SEP will be the highest available bit so adjust the data
> + * size by one to accommodate it
> + */
> + cfg->ias--;
> + break;
> + case 48:
> + /*
> + * IAS of 48 is a special case, it has a dedicated sign
> + * extension bit so we can use the full IAS size
> + */
> + break;
> + default:
> + /* The ias doesn't work for the available SEP options */
> + return NULL;
> + }
> +
> + data = _arm_64_lpae_alloc_pgtable_s1_common(cfg);
> + if (!data)
> + return NULL;
> +
> + reg = (ARM_LPAE_TCR_SH_IS << ARM_LPAE_TCR_SH1_SHIFT) |
> + (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_IRGN1_SHIFT) |
> + (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_ORGN1_SHIFT);
> +
> + switch (ARM_LPAE_GRANULE(data)) {
> + case SZ_4K:
> + reg |= ARM_LPAE_TCR_TG1_4K;
> + break;
> + case SZ_16K:
> + reg |= ARM_LPAE_TCR_TG1_16K;
> + break;
> + case SZ_64K:
> + reg |= ARM_LPAE_TCR_TG1_64K;
> + break;
> + }
> +
> + reg |= (64ULL - cfg->ias) << ARM_LPAE_TCR_T1SZ_SHIFT;
> +
> + /* Set the sign extension bit */
> + switch (cfg->ias) {
> + case 31:
> + reg |= ARM_LPAE_TCR_SEP_31;
> + break;
> + case 35:
> + reg |= ARM_LPAE_TCR_SEP_35;
> + break;
> + case 39:
> + reg |= ARM_LPAE_TCR_SEP_39;
> + break;
> + case 41:
> + reg |= ARM_LPAE_TCR_SEP_41;
> + break;
> + case 43:
> + reg |= ARM_LPAE_TCR_SEP_43;
> + break;
> + case 48:
> + reg |= ARM_LPAE_TCR_SEP_UPSTREAM;
> + break;
> + }
> +
> + cfg->arm_lpae_s1_cfg.tcr |= reg;
> +
> + /* Set the allocated pgd to ttbr1 and leave ttbr0 empty */
> + cfg->arm_lpae_s1_cfg.ttbr[0] = 0;
> + cfg->arm_lpae_s1_cfg.ttbr[1] = virt_to_phys(data->pgd);
> +
> + /* Set use case specific pgtable helpers */
> + data->iop.ops = (struct io_pgtable_ops) {
> + .map = arm_adreno_gpu_lpae_map,
> + .unmap = arm_adreno_gpu_lpae_unmap,
> + .iova_to_phys = arm_adreno_gpu_lpae_iova_to_phys,
> + };
> +
> + return &data->iop;
> +}
> +
> +static struct io_pgtable *
> +arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
> +{
> + struct arm_lpae_io_pgtable *data;
> +
> + data = _arm_64_lpae_alloc_pgtable_s1_common(cfg);
> + if (!data)
> + return NULL;
> +
> + /* Disable speculative walks through TTBR1 */
> + cfg->arm_lpae_s1_cfg.tcr |= ARM_LPAE_TCR_EPD1;
> +
> + /* Set the pgd to TTBR0 */
> + cfg->arm_lpae_s1_cfg.ttbr[0] = virt_to_phys(data->pgd);
> + cfg->arm_lpae_s1_cfg.ttbr[1] = 0;
> +
> + data->iop.ops = (struct io_pgtable_ops) {
> + .map = arm_lpae_map,
> + .unmap = arm_lpae_unmap,
> + .iova_to_phys = arm_lpae_iova_to_phys,
> + };
> +
> + return &data->iop;
> +}
> +
> static struct io_pgtable *
> arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
> {
> @@ -894,6 +1066,12 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
> if (!data)
> return NULL;
>
> + data->iop.ops = (struct io_pgtable_ops) {
> + .map = arm_lpae_map,
> + .unmap = arm_lpae_unmap,
> + .iova_to_phys = arm_lpae_iova_to_phys,
> + };
> +
> /*
> * Concatenate PGDs at level 1 if possible in order to reduce
> * the depth of the stage-2 walk.
> @@ -1041,6 +1219,11 @@ struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns = {
> .free = arm_lpae_free_pgtable,
> };
>
> +struct io_pgtable_init_fns io_pgtable_arm_adreno_gpu_lpae_init_fns = {
> + .alloc = arm_adreno_gpu_lpae_alloc_pgtable,
> + .free = arm_lpae_free_pgtable,
> +};
> +
> struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s2_init_fns = {
> .alloc = arm_64_lpae_alloc_pgtable_s2,
> .free = arm_lpae_free_pgtable,
> @@ -1112,6 +1295,7 @@ static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg)
> static const enum io_pgtable_fmt fmts[] = {
> ARM_64_LPAE_S1,
> ARM_64_LPAE_S2,
> + ARM_64_LPAE_TTBR1_S1,
> };
>
> int i, j;
> diff --git a/drivers/iommu/io-pgtable.c b/drivers/iommu/io-pgtable.c
> index ced53e5..e47ed2d 100644
> --- a/drivers/iommu/io-pgtable.c
> +++ b/drivers/iommu/io-pgtable.c
> @@ -20,6 +20,7 @@ io_pgtable_init_table[IO_PGTABLE_NUM_FMTS] = {
> [ARM_64_LPAE_S1] = &io_pgtable_arm_64_lpae_s1_init_fns,
> [ARM_64_LPAE_S2] = &io_pgtable_arm_64_lpae_s2_init_fns,
> [ARM_MALI_LPAE] = &io_pgtable_arm_mali_lpae_init_fns,
> + [ARM_ADRENO_GPU_LPAE] = &io_pgtable_arm_adreno_gpu_lpae_init_fns,
> #endif
> #ifdef CONFIG_IOMMU_IO_PGTABLE_ARMV7S
> [ARM_V7S] = &io_pgtable_arm_v7s_init_fns,
> diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
> index b5a450a..4871e85 100644
> --- a/include/linux/io-pgtable.h
> +++ b/include/linux/io-pgtable.h
> @@ -13,6 +13,7 @@ enum io_pgtable_fmt {
> ARM_64_LPAE_S2,
> ARM_V7S,
> ARM_MALI_LPAE,
> + ARM_ADRENO_GPU_LPAE,
> IO_PGTABLE_NUM_FMTS,
> };
>
> @@ -213,5 +214,6 @@ extern struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns;
> extern struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s2_init_fns;
> extern struct io_pgtable_init_fns io_pgtable_arm_v7s_init_fns;
> extern struct io_pgtable_init_fns io_pgtable_arm_mali_lpae_init_fns;
> +extern struct io_pgtable_init_fns io_pgtable_arm_adreno_gpu_lpae_init_fns;
>
> #endif /* __IO_PGTABLE_H */
> --
> 2.7.4
>

--
The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project