Re: [RFC 1/9] iommu: Introduce bind_pasid_table API function

From: Alex Williamson
Date: Thu Jun 22 2017 - 18:52:12 EST


On Wed, 14 Jun 2017 15:22:55 -0700
Jacob Pan <jacob.jun.pan@xxxxxxxxxxxxxxx> wrote:

> Virtual IOMMU was proposed to support Shared Virtual Memory (SVM) use
> case in the guest:
> https://lists.gnu.org/archive/html/qemu-devel/2016-11/msg05311.html
>
> As part of the proposed architecture, when a SVM capable PCI
> device is assigned to a guest, nested mode is turned on. Guest owns the
> first level page tables (request with PASID) and performs GVA->GPA
> translation. Second level page tables are owned by the host for GPA->HPA
> translation for both request with and without PASID.
>
> A new IOMMU driver interface is therefore needed to perform tasks as
> follows:
> * Enable nested translation and appropriate translation type
> * Assign guest PASID table pointer (in GPA) and size to host IOMMU
>
> This patch introduces new functions called iommu_(un)bind_pasid_table()
> to IOMMU APIs. Architecture specific IOMMU function can be added later
> to perform the specific steps for binding pasid table of assigned devices.
>
> This patch also adds model definition in iommu.h. It would be used to
> check if the bind request is from a compatible entity. e.g. a bind
> request from an intel_iommu emulator may not be supported by an ARM SMMU
> driver.
>
> Signed-off-by: Jacob Pan <jacob.jun.pan@xxxxxxxxxxxxxxx>
> Signed-off-by: Liu, Yi L <yi.l.liu@xxxxxxxxxxxxxxx>
> Signed-off-by: Ashok Raj <ashok.raj@xxxxxxxxx>
> ---
> drivers/iommu/iommu.c | 19 +++++++++++++++++++
> include/linux/iommu.h | 23 +++++++++++++++++++++++
> include/uapi/linux/iommu.h | 33 +++++++++++++++++++++++++++++++++
> 3 files changed, 75 insertions(+)
> create mode 100644 include/uapi/linux/iommu.h
>
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index cf7ca7e..494309b 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -1328,6 +1328,25 @@ int iommu_attach_device(struct iommu_domain *domain, struct device *dev)
> }
> EXPORT_SYMBOL_GPL(iommu_attach_device);
>
> +int iommu_bind_pasid_table(struct iommu_domain *domain, struct device *dev,
> + struct pasid_table_info *pasidt_binfo)
> +{
> + if (unlikely(!domain->ops->bind_pasid_table))
> + return -EINVAL;
> +
> + return domain->ops->bind_pasid_table(domain, dev, pasidt_binfo);
> +}
> +EXPORT_SYMBOL_GPL(iommu_bind_pasid_table);
> +
> +int iommu_unbind_pasid_table(struct iommu_domain *domain, struct device *dev)
> +{
> + if (unlikely(!domain->ops->unbind_pasid_table))
> + return -EINVAL;
> +
> + return domain->ops->unbind_pasid_table(domain, dev);
> +}
> +EXPORT_SYMBOL_GPL(iommu_unbind_pasid_table);
> +
> static void __iommu_detach_device(struct iommu_domain *domain,
> struct device *dev)
> {
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index 2cb54ad..7122add 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -25,6 +25,7 @@
> #include <linux/errno.h>
> #include <linux/err.h>
> #include <linux/of.h>
> +#include <uapi/linux/iommu.h>
>
> #define IOMMU_READ (1 << 0)
> #define IOMMU_WRITE (1 << 1)
> @@ -183,6 +184,8 @@ struct iommu_resv_region {
> * @domain_get_windows: Return the number of windows for a domain
> * @of_xlate: add OF master IDs to iommu grouping
> * @pgsize_bitmap: bitmap of all possible supported page sizes
> + * @bind_pasid_table: bind pasid table pointer for guest SVM
> + * @unbind_pasid_table: unbind pasid table pointer and restore defaults
> */
> struct iommu_ops {
> bool (*capable)(enum iommu_cap);
> @@ -225,6 +228,10 @@ struct iommu_ops {
> u32 (*domain_get_windows)(struct iommu_domain *domain);
>
> int (*of_xlate)(struct device *dev, struct of_phandle_args *args);
> + int (*bind_pasid_table)(struct iommu_domain *domain, struct device *dev,
> + struct pasid_table_info *pasidt_binfo);
> + int (*unbind_pasid_table)(struct iommu_domain *domain,
> + struct device *dev);
>
> unsigned long pgsize_bitmap;
> };
> @@ -282,6 +289,10 @@ extern int iommu_attach_device(struct iommu_domain *domain,
> struct device *dev);
> extern void iommu_detach_device(struct iommu_domain *domain,
> struct device *dev);
> +extern int iommu_bind_pasid_table(struct iommu_domain *domain,
> + struct device *dev, struct pasid_table_info *pasidt_binfo);
> +extern int iommu_unbind_pasid_table(struct iommu_domain *domain,
> + struct device *dev);
> extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev);
> extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
> phys_addr_t paddr, size_t size, int prot);
> @@ -637,6 +648,18 @@ const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode)
> return NULL;
> }
>
> +static inline
> +int iommu_bind_pasid_table(struct iommu_domain *domain, struct device *dev,
> + struct pasid_table_info *pasidt_binfo)
> +{
> + return -EINVAL;
> +}
> +static inline
> +int iommu_unbind_pasid_table(struct iommu_domain *domain, struct device *dev)
> +{
> + return -EINVAL;
> +}
> +
> #endif /* CONFIG_IOMMU_API */
>
> #endif /* __LINUX_IOMMU_H */
> diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h
> new file mode 100644
> index 0000000..5ef0e7c
> --- /dev/null
> +++ b/include/uapi/linux/iommu.h
> @@ -0,0 +1,33 @@
> +/*
> + * IOMMU user API definitions
> + *
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#ifndef _UAPI_IOMMU_H
> +#define _UAPI_IOMMU_H
> +
> +/**
> + * PASID table data used to bind guest PASID table to the host IOMMU. This will
> + * enable guest managed first level page tables.
> + * @ptr PASID table pointer in GPA

GPA? I'm confused how the host physical IOMMU needs the guest physical
address of the PASID table here. The first level translation does
GVA to GPA lookup, but doesn't that translation still come from a
table that the IOMMU references via a host physical address? If not,
is the address space of this pointer implementation/architecture
specific? In general I think it's good policy to not make the interface
specific to the VM use case. After all, vfio is a userspace driver
interface and device assignment is just one use case.

> + * @size size of the guest PASID table, must be <= host table size

Presumably in bytes, best to say so.

> + * @model magic number tells vendor apart
> + * @length length of the opaque data

Also in bytes.

> + * @opaque architecture specific IOMMU data

s/architecture/model/?

> + */
> +struct pasid_table_info {
> + __u64 ptr;
> + __u64 size;
> + __u32 model;
> +#define INTEL_IOMMU (1 << 0)
> +#define ARM_SMMU (1 << 1)

Why are we using this as a bit field rather than an enum? Does it make
sense for model to be (INTEL_IOMMU|ARM_SMMU)?

> + __u32 length;
> + __u8 opaque[];
> +};
> +
> +
> +#endif /* _UAPI_IOMMU_H */