Re: [PATCH v4 11/22] iommu: Introduce guest PASID bind function
From: Jean-Philippe Brucker
Date: Tue Jun 18 2019 - 11:42:08 EST
On 09/06/2019 14:44, Jacob Pan wrote:
> Guest shared virtual address (SVA) may require host to shadow guest
> PASID tables. Guest PASID can also be allocated from the host via
> enlightened interfaces. In this case, guest needs to bind the guest
> mm, i.e. cr3 in guest physical address to the actual PASID table in
> the host IOMMU. Nesting will be turned on such that guest virtual
> address can go through a two level translation:
> - 1st level translates GVA to GPA
> - 2nd level translates GPA to HPA
> This patch introduces APIs to bind guest PASID data to the assigned
> device entry in the physical IOMMU. See the diagram below for usage
> explaination.
explanation
>
> .-------------. .---------------------------.
> | vIOMMU | | Guest process mm, FL only |
> | | '---------------------------'
> .----------------/
> | PASID Entry |--- PASID cache flush -
> '-------------' |
> | | V
> | | GP
> '-------------'
> Guest
> ------| Shadow |----------------------- GP->HP* ---------
> v v |
> Host v
> .-------------. .----------------------.
> | pIOMMU | | Bind FL for GVA-GPA |
> | | '----------------------'
> .----------------/ |
> | PASID Entry | V (Nested xlate)
> '----------------\.---------------------.
> | | |Set SL to GPA-HPA |
> | | '---------------------'
> '-------------'
>
> Where:
> - FL = First level/stage one page tables
> - SL = Second level/stage two page tables
> - GP = Guest PASID
> - HP = Host PASID
> * Conversion needed if non-identity GP-HP mapping option is chosen.
>
> Signed-off-by: Jacob Pan <jacob.jun.pan@xxxxxxxxxxxxxxx>
> Signed-off-by: Liu Yi L <yi.l.liu@xxxxxxxxx>
> ---
> drivers/iommu/iommu.c | 20 ++++++++++++++++
> include/linux/iommu.h | 21 +++++++++++++++++
> include/uapi/linux/iommu.h | 58 ++++++++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 99 insertions(+)
>
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index 1758b57..d0416f60 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -1648,6 +1648,26 @@ int iommu_cache_invalidate(struct iommu_domain *domain, struct device *dev,
> }
> EXPORT_SYMBOL_GPL(iommu_cache_invalidate);
>
> +int iommu_sva_bind_gpasid(struct iommu_domain *domain,
> + struct device *dev, struct gpasid_bind_data *data)
I'm curious about the VFIO side of this. Is the ioctl on the device or
on the container fd? For bind_pasid_table, it's on the container and we
only pass the iommu_domain to the IOMMU driver, not the device (since
devices in a domain share the same PASID table).
> +{
> + if (unlikely(!domain->ops->sva_bind_gpasid))
> + return -ENODEV;
> +
> + return domain->ops->sva_bind_gpasid(domain, dev, data);
> +}
> +EXPORT_SYMBOL_GPL(iommu_sva_bind_gpasid);
> +
> +int iommu_sva_unbind_gpasid(struct iommu_domain *domain, struct device *dev,
> + ioasid_t pasid)
> +{
> + if (unlikely(!domain->ops->sva_unbind_gpasid))
> + return -ENODEV;
> +
> + return domain->ops->sva_unbind_gpasid(dev, pasid);
> +}
> +EXPORT_SYMBOL_GPL(iommu_sva_unbind_gpasid);
> +
> static void __iommu_detach_device(struct iommu_domain *domain,
> struct device *dev)
> {
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index 8d766a8..560c8c8 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -25,6 +25,7 @@
> #include <linux/errno.h>
> #include <linux/err.h>
> #include <linux/of.h>
> +#include <linux/ioasid.h>
> #include <uapi/linux/iommu.h>
>
> #define IOMMU_READ (1 << 0)
> @@ -267,6 +268,8 @@ struct page_response_msg {
> * @detach_pasid_table: detach the pasid table
> * @cache_invalidate: invalidate translation caches
> * @pgsize_bitmap: bitmap of all possible supported page sizes
> + * @sva_bind_gpasid: bind guest pasid and mm
> + * @sva_unbind_gpasid: unbind guest pasid and mm
> */
> struct iommu_ops {
> bool (*capable)(enum iommu_cap);
> @@ -332,6 +335,10 @@ struct iommu_ops {
> int (*page_response)(struct device *dev, struct page_response_msg *msg);
> int (*cache_invalidate)(struct iommu_domain *domain, struct device *dev,
> struct iommu_cache_invalidate_info *inv_info);
> + int (*sva_bind_gpasid)(struct iommu_domain *domain,
> + struct device *dev, struct gpasid_bind_data *data);
> +
> + int (*sva_unbind_gpasid)(struct device *dev, int pasid);
>
> unsigned long pgsize_bitmap;
> };
> @@ -447,6 +454,10 @@ extern void iommu_detach_pasid_table(struct iommu_domain *domain);
> extern int iommu_cache_invalidate(struct iommu_domain *domain,
> struct device *dev,
> struct iommu_cache_invalidate_info *inv_info);
> +extern int iommu_sva_bind_gpasid(struct iommu_domain *domain,
> + struct device *dev, struct gpasid_bind_data *data);
> +extern int iommu_sva_unbind_gpasid(struct iommu_domain *domain,
> + struct device *dev, ioasid_t pasid);
> extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev);
> extern struct iommu_domain *iommu_get_dma_domain(struct device *dev);
> extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
> @@ -998,6 +1009,16 @@ iommu_cache_invalidate(struct iommu_domain *domain,
> {
> return -ENODEV;
> }
> +static inline int iommu_sva_bind_gpasid(struct iommu_domain *domain,
> + struct device *dev, struct gpasid_bind_data *data)
> +{
> + return -ENODEV;
> +}
> +
> +static inline int sva_unbind_gpasid(struct device *dev, int pasid)
The prototype above also has a domain argument
> +{
> + return -ENODEV;
> +}
>
> #endif /* CONFIG_IOMMU_API */
>
> diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h
> index ca4b753..a9cdc63 100644
> --- a/include/uapi/linux/iommu.h
> +++ b/include/uapi/linux/iommu.h
> @@ -277,4 +277,62 @@ struct iommu_cache_invalidate_info {
> };
> };
>
> +/**
> + * struct gpasid_bind_data_vtd - Intel VT-d specific data on device and guest
> + * SVA binding.
> + *
> + * @flags: VT-d PASID table entry attributes
> + * @pat: Page attribute table data to compute effective memory type
> + * @emt: Extended memory type
> + *
> + * Only guest vIOMMU selectable and effective options are passed down to
> + * the host IOMMU.
> + */
> +struct gpasid_bind_data_vtd {
> +#define IOMMU_SVA_VTD_GPASID_SRE (1 << 0) /* supervisor request */
> +#define IOMMU_SVA_VTD_GPASID_EAFE (1 << 1) /* extended access enable */
> +#define IOMMU_SVA_VTD_GPASID_PCD (1 << 2) /* page-level cache disable */
> +#define IOMMU_SVA_VTD_GPASID_PWT (1 << 3) /* page-level write through */
> +#define IOMMU_SVA_VTD_GPASID_EMTE (1 << 4) /* extended mem type enable */
> +#define IOMMU_SVA_VTD_GPASID_CD (1 << 5) /* PASID-level cache disable */
> + __u64 flags;
> + __u32 pat;
> + __u32 emt;
> +};
> +
> +/**
> + * struct gpasid_bind_data - Information about device and guest PASID binding
> + * @version: Version of this data structure
> + * @format: PASID table entry format
> + * @flags: Additional information on guest bind request
> + * @gpgd: Guest page directory base of the guest mm to bind
> + * @hpasid: Process address space ID used for the guest mm in host IOMMU
> + * @gpasid: Process address space ID used for the guest mm in guest IOMMU
> + * @addr_width: Guest virtual address width
+ "in bits"
> + * @vtd: Intel VT-d specific data
> + *
> + * Guest to host PASID mapping can be an identity or non-identity, where guest
> + * has its own PASID space. For non-identify mapping, guest to host PASID lookup
> + * is needed when VM programs guest PASID into an assigned device. VMM may
> + * trap such PASID programming then request host IOMMU driver to convert guest
> + * PASID to host PASID based on this bind data.
> + */
> +struct gpasid_bind_data {
> +#define IOMMU_GPASID_BIND_VERSION_1 1
> + __u32 version;
> +#define IOMMU_PASID_FORMAT_INTEL_VTD 1
> + __u32 format;
> +#define IOMMU_SVA_GPASID_VAL (1 << 0) /* guest PASID valid */
> + __u64 flags;
> + __u64 gpgd;
> + __u64 hpasid;
> + __u64 gpasid;
> + __u32 addr_width;
We could use a __u8 for addr_width
Thanks,
Jean
> + __u8 padding[4];
> + /* Vendor specific data */
> + union {
> + struct gpasid_bind_data_vtd vtd;
> + };
> +};
> +
> #endif /* _UAPI_IOMMU_H */
>