Re: [PATCH v3 12/14] arm64: realm: Support nonsecure ITS emulation shared

From: Marc Zyngier
Date: Wed Jun 05 2024 - 09:52:11 EST


The subject line is... odd. I'd expect something like:

"irqchip/gic-v3-its: Share ITS tables with a non-trusted hypervisor"

because nothing here should be CCA specific.

On Wed, 05 Jun 2024 10:30:04 +0100,
Steven Price <steven.price@xxxxxxx> wrote:
>
> Within a realm guest the ITS is emulated by the host. This means the
> allocations must have been made available to the host by a call to
> set_memory_decrypted(). Introduce an allocation function which performs
> this extra call.

This doesn't mention that this patch radically changes the allocation
of some tables.

>
> Co-developed-by: Suzuki K Poulose <suzuki.poulose@xxxxxxx>
> Signed-off-by: Suzuki K Poulose <suzuki.poulose@xxxxxxx>
> Signed-off-by: Steven Price <steven.price@xxxxxxx>
> ---
> Changes since v2:
> * Drop 'shared' from the new its_xxx function names as they are used
> for non-realm guests too.
> * Don't handle the NUMA_NO_NODE case specially - alloc_pages_node()
> should do the right thing.
> * Drop a pointless (void *) cast.
> ---
> drivers/irqchip/irq-gic-v3-its.c | 90 ++++++++++++++++++++++++--------
> 1 file changed, 67 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
> index 40ebf1726393..ca72f830f4cc 100644
> --- a/drivers/irqchip/irq-gic-v3-its.c
> +++ b/drivers/irqchip/irq-gic-v3-its.c
> @@ -18,6 +18,7 @@
> #include <linux/irqdomain.h>
> #include <linux/list.h>
> #include <linux/log2.h>
> +#include <linux/mem_encrypt.h>
> #include <linux/memblock.h>
> #include <linux/mm.h>
> #include <linux/msi.h>
> @@ -27,6 +28,7 @@
> #include <linux/of_pci.h>
> #include <linux/of_platform.h>
> #include <linux/percpu.h>
> +#include <linux/set_memory.h>
> #include <linux/slab.h>
> #include <linux/syscore_ops.h>
>
> @@ -163,6 +165,7 @@ struct its_device {
> struct its_node *its;
> struct event_lpi_map event_map;
> void *itt;
> + u32 itt_order;
> u32 nr_ites;
> u32 device_id;
> bool shared;
> @@ -198,6 +201,30 @@ static DEFINE_IDA(its_vpeid_ida);
> #define gic_data_rdist_rd_base() (gic_data_rdist()->rd_base)
> #define gic_data_rdist_vlpi_base() (gic_data_rdist_rd_base() + SZ_128K)
>
> +static struct page *its_alloc_pages_node(int node, gfp_t gfp,
> + unsigned int order)
> +{
> + struct page *page;
> +
> + page = alloc_pages_node(node, gfp, order);
> +
> + if (page)
> + set_memory_decrypted((unsigned long)page_address(page),
> + 1 << order);

Please use BIT(order).

> + return page;
> +}
> +
> +static struct page *its_alloc_pages(gfp_t gfp, unsigned int order)
> +{
> + return its_alloc_pages_node(NUMA_NO_NODE, gfp, order);
> +}
> +
> +static void its_free_pages(void *addr, unsigned int order)
> +{
> + set_memory_encrypted((unsigned long)addr, 1 << order);
> + free_pages((unsigned long)addr, order);
> +}
> +
> /*
> * Skip ITSs that have no vLPIs mapped, unless we're on GICv4.1, as we
> * always have vSGIs mapped.
> @@ -2212,7 +2239,8 @@ static struct page *its_allocate_prop_table(gfp_t gfp_flags)
> {
> struct page *prop_page;
>
> - prop_page = alloc_pages(gfp_flags, get_order(LPI_PROPBASE_SZ));
> + prop_page = its_alloc_pages(gfp_flags,
> + get_order(LPI_PROPBASE_SZ));
> if (!prop_page)
> return NULL;
>
> @@ -2223,8 +2251,8 @@ static struct page *its_allocate_prop_table(gfp_t gfp_flags)
>
> static void its_free_prop_table(struct page *prop_page)
> {
> - free_pages((unsigned long)page_address(prop_page),
> - get_order(LPI_PROPBASE_SZ));
> + its_free_pages(page_address(prop_page),
> + get_order(LPI_PROPBASE_SZ));
> }
>
> static bool gic_check_reserved_range(phys_addr_t addr, unsigned long size)
> @@ -2346,7 +2374,8 @@ static int its_setup_baser(struct its_node *its, struct its_baser *baser,
> order = get_order(GITS_BASER_PAGES_MAX * psz);
> }
>
> - page = alloc_pages_node(its->numa_node, GFP_KERNEL | __GFP_ZERO, order);
> + page = its_alloc_pages_node(its->numa_node,
> + GFP_KERNEL | __GFP_ZERO, order);
> if (!page)
> return -ENOMEM;
>
> @@ -2359,7 +2388,7 @@ static int its_setup_baser(struct its_node *its, struct its_baser *baser,
> /* 52bit PA is supported only when PageSize=64K */
> if (psz != SZ_64K) {
> pr_err("ITS: no 52bit PA support when psz=%d\n", psz);
> - free_pages((unsigned long)base, order);
> + its_free_pages(base, order);
> return -ENXIO;
> }
>
> @@ -2415,7 +2444,7 @@ static int its_setup_baser(struct its_node *its, struct its_baser *baser,
> pr_err("ITS@%pa: %s doesn't stick: %llx %llx\n",
> &its->phys_base, its_base_type_string[type],
> val, tmp);
> - free_pages((unsigned long)base, order);
> + its_free_pages(base, order);
> return -ENXIO;
> }
>
> @@ -2554,8 +2583,8 @@ static void its_free_tables(struct its_node *its)
>
> for (i = 0; i < GITS_BASER_NR_REGS; i++) {
> if (its->tables[i].base) {
> - free_pages((unsigned long)its->tables[i].base,
> - its->tables[i].order);
> + its_free_pages(its->tables[i].base,
> + its->tables[i].order);
> its->tables[i].base = NULL;
> }
> }
> @@ -2821,7 +2850,8 @@ static bool allocate_vpe_l2_table(int cpu, u32 id)
>
> /* Allocate memory for 2nd level table */
> if (!table[idx]) {
> - page = alloc_pages(GFP_KERNEL | __GFP_ZERO, get_order(psz));
> + page = its_alloc_pages(GFP_KERNEL | __GFP_ZERO,
> + get_order(psz));
> if (!page)
> return false;
>
> @@ -2940,7 +2970,8 @@ static int allocate_vpe_l1_table(void)
>
> pr_debug("np = %d, npg = %lld, psz = %d, epp = %d, esz = %d\n",
> np, npg, psz, epp, esz);
> - page = alloc_pages(GFP_ATOMIC | __GFP_ZERO, get_order(np * PAGE_SIZE));
> + page = its_alloc_pages(GFP_ATOMIC | __GFP_ZERO,
> + get_order(np * PAGE_SIZE));
> if (!page)
> return -ENOMEM;
>
> @@ -2986,8 +3017,8 @@ static struct page *its_allocate_pending_table(gfp_t gfp_flags)
> {
> struct page *pend_page;
>
> - pend_page = alloc_pages(gfp_flags | __GFP_ZERO,
> - get_order(LPI_PENDBASE_SZ));
> + pend_page = its_alloc_pages(gfp_flags | __GFP_ZERO,
> + get_order(LPI_PENDBASE_SZ));
> if (!pend_page)
> return NULL;
>
> @@ -2999,7 +3030,7 @@ static struct page *its_allocate_pending_table(gfp_t gfp_flags)
>
> static void its_free_pending_table(struct page *pt)
> {
> - free_pages((unsigned long)page_address(pt), get_order(LPI_PENDBASE_SZ));
> + its_free_pages(page_address(pt), get_order(LPI_PENDBASE_SZ));
> }
>
> /*
> @@ -3334,8 +3365,9 @@ static bool its_alloc_table_entry(struct its_node *its,
>
> /* Allocate memory for 2nd level table */
> if (!table[idx]) {
> - page = alloc_pages_node(its->numa_node, GFP_KERNEL | __GFP_ZERO,
> - get_order(baser->psz));
> + page = its_alloc_pages_node(its->numa_node,
> + GFP_KERNEL | __GFP_ZERO,
> + get_order(baser->psz));
> if (!page)
> return false;
>
> @@ -3418,7 +3450,9 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id,
> unsigned long *lpi_map = NULL;
> unsigned long flags;
> u16 *col_map = NULL;
> + struct page *page;
> void *itt;
> + int itt_order;
> int lpi_base;
> int nr_lpis;
> int nr_ites;
> @@ -3430,7 +3464,6 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id,
> if (WARN_ON(!is_power_of_2(nvecs)))
> nvecs = roundup_pow_of_two(nvecs);
>
> - dev = kzalloc(sizeof(*dev), GFP_KERNEL);
> /*
> * Even if the device wants a single LPI, the ITT must be
> * sized as a power of two (and you need at least one bit...).
> @@ -3438,7 +3471,16 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id,
> nr_ites = max(2, nvecs);
> sz = nr_ites * (FIELD_GET(GITS_TYPER_ITT_ENTRY_SIZE, its->typer) + 1);
> sz = max(sz, ITS_ITT_ALIGN) + ITS_ITT_ALIGN - 1;
> - itt = kzalloc_node(sz, GFP_KERNEL, its->numa_node);
> + itt_order = get_order(sz);
> + page = its_alloc_pages_node(its->numa_node,
> + GFP_KERNEL | __GFP_ZERO,
> + itt_order);

So we go from an allocation that was so far measured in *bytes* to
something that is now at least a page. Per device. This seems a bit
excessive to me, specially when it isn't conditioned on anything and
is now imposed on all platforms, including the non-CCA systems (which
are exactly 100% of the machines).

Another thing is that if we go with page alignment, then the 256 byte
alignment can obviously be removed everywhere (hint: MAPD needs to
change).

Thanks,

M.

--
Without deviation from the norm, progress is not possible.