RE: [PATCH v2 02/12] iommu/vt-d: Manage scalalble mode PASID tables

From: Tian, Kevin
Date: Wed Sep 05 2018 - 22:15:49 EST


> From: Lu Baolu [mailto:baolu.lu@xxxxxxxxxxxxxxx]
> Sent: Thursday, August 30, 2018 9:35 AM
>
> In scalable mode, pasid structure is a two level table with
> a pasid directory table and a pasid table. Any pasid entry
> can be identified by a pasid value in below way.
>
> 1
> 9 6 5 0
> .-----------------------.-------.
> | PASID | |
> '-----------------------'-------' .-------------.
> | | | |
> | | | |
> | | | |
> | .-----------. | .-------------.
> | | | |----->| PASID Entry |
> | | | | '-------------'
> | | | |Plus | |
> | .-----------. | | |
> |---->| DIR Entry |-------->| |
> | '-----------' '-------------'
> .---------. |Plus | |
> | Context | | | |
> | Entry |------->| |
> '---------' '-----------'
>
> This changes the pasid table APIs to support scalable mode
> PASID directory and PASID table. It also adds a helper to
> get the PASID table entry according to the pasid value.
>
> Cc: Ashok Raj <ashok.raj@xxxxxxxxx>
> Cc: Jacob Pan <jacob.jun.pan@xxxxxxxxxxxxxxx>
> Cc: Kevin Tian <kevin.tian@xxxxxxxxx>
> Cc: Liu Yi L <yi.l.liu@xxxxxxxxx>
> Signed-off-by: Sanjay Kumar <sanjay.k.kumar@xxxxxxxxx>
> Signed-off-by: Lu Baolu <baolu.lu@xxxxxxxxxxxxxxx>
> Reviewed-by: Ashok Raj <ashok.raj@xxxxxxxxx>
> ---
> drivers/iommu/intel-iommu.c | 2 +-
> drivers/iommu/intel-pasid.c | 72 ++++++++++++++++++++++++++++++++----
> -
> drivers/iommu/intel-pasid.h | 10 +++++-
> drivers/iommu/intel-svm.c | 6 +---
> 4 files changed, 74 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
> index 5845edf4dcf9..b0da4f765274 100644
> --- a/drivers/iommu/intel-iommu.c
> +++ b/drivers/iommu/intel-iommu.c
> @@ -2507,7 +2507,7 @@ static struct dmar_domain
> *dmar_insert_one_dev_info(struct intel_iommu *iommu,
> if (dev)
> dev->archdata.iommu = info;
>
> - if (dev && dev_is_pci(dev) && info->pasid_supported) {
> + if (dev && dev_is_pci(dev) && sm_supported(iommu)) {

worthy of a comment here that PASID table now is mandatory in
scalable mode, instead of optional for 1st level usage before.

> ret = intel_pasid_alloc_table(dev);
> if (ret) {
> __dmar_remove_one_dev_info(info);
> diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c
> index fe95c9bd4d33..d6e90cd5b062 100644
> --- a/drivers/iommu/intel-pasid.c
> +++ b/drivers/iommu/intel-pasid.c
> @@ -127,8 +127,7 @@ int intel_pasid_alloc_table(struct device *dev)
> int ret, order;
>
> info = dev->archdata.iommu;
> - if (WARN_ON(!info || !dev_is_pci(dev) ||
> - !info->pasid_supported || info->pasid_table))
> + if (WARN_ON(!info || !dev_is_pci(dev) || info->pasid_table))
> return -EINVAL;

following same logic should you check sm_supported here?

>
> /* DMA alias device already has a pasid table, use it: */
> @@ -143,8 +142,9 @@ int intel_pasid_alloc_table(struct device *dev)
> return -ENOMEM;
> INIT_LIST_HEAD(&pasid_table->dev);
>
> - size = sizeof(struct pasid_entry);
> + size = sizeof(struct pasid_dir_entry);
> count = min_t(int, pci_max_pasids(to_pci_dev(dev)),
> intel_pasid_max_id);
> + count >>= PASID_PDE_SHIFT;
> order = get_order(size * count);
> pages = alloc_pages_node(info->iommu->node,
> GFP_ATOMIC | __GFP_ZERO,
> @@ -154,7 +154,7 @@ int intel_pasid_alloc_table(struct device *dev)
>
> pasid_table->table = page_address(pages);
> pasid_table->order = order;
> - pasid_table->max_pasid = count;
> + pasid_table->max_pasid = count << PASID_PDE_SHIFT;

are you sure of that count is PDE_SHIFT aligned? otherwise >>
then << would lose some bits. If sure, then better add some check.

>
> attach_out:
> device_attach_pasid_table(info, pasid_table);
> @@ -162,14 +162,33 @@ int intel_pasid_alloc_table(struct device *dev)
> return 0;
> }
>
> +/* Get PRESENT bit of a PASID directory entry. */
> +static inline bool
> +pasid_pde_is_present(struct pasid_dir_entry *pde)
> +{
> + return READ_ONCE(pde->val) & PASID_PTE_PRESENT;

curious why adding READ_ONCE specifically for PASID structure,
but not used for any other existing vtd structures? Is it to address
some specific requirement on PASID structure as defined in spec?

> +}
> +
> +/* Get PASID table from a PASID directory entry. */
> +static inline struct pasid_entry *
> +get_pasid_table_from_pde(struct pasid_dir_entry *pde)
> +{
> + if (!pasid_pde_is_present(pde))
> + return NULL;
> +
> + return phys_to_virt(READ_ONCE(pde->val) & PDE_PFN_MASK);
> +}
> +
> void intel_pasid_free_table(struct device *dev)
> {
> struct device_domain_info *info;
> struct pasid_table *pasid_table;
> + struct pasid_dir_entry *dir;
> + struct pasid_entry *table;
> + int i, max_pde;
>
> info = dev->archdata.iommu;
> - if (!info || !dev_is_pci(dev) ||
> - !info->pasid_supported || !info->pasid_table)
> + if (!info || !dev_is_pci(dev) || !info->pasid_table)
> return;
>
> pasid_table = info->pasid_table;
> @@ -178,6 +197,14 @@ void intel_pasid_free_table(struct device *dev)
> if (!list_empty(&pasid_table->dev))
> return;
>
> + /* Free scalable mode PASID directory tables: */
> + dir = pasid_table->table;
> + max_pde = pasid_table->max_pasid >> PASID_PDE_SHIFT;
> + for (i = 0; i < max_pde; i++) {
> + table = get_pasid_table_from_pde(&dir[i]);
> + free_pgtable_page(table);
> + }
> +
> free_pages((unsigned long)pasid_table->table, pasid_table->order);
> kfree(pasid_table);
> }
> @@ -206,17 +233,37 @@ int intel_pasid_get_dev_max_id(struct device
> *dev)
>
> struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid)
> {
> + struct device_domain_info *info;
> struct pasid_table *pasid_table;
> + struct pasid_dir_entry *dir;
> struct pasid_entry *entries;
> + int dir_index, index;
>
> pasid_table = intel_pasid_get_table(dev);
> if (WARN_ON(!pasid_table || pasid < 0 ||
> pasid >= intel_pasid_get_dev_max_id(dev)))
> return NULL;
>
> - entries = pasid_table->table;
> + dir = pasid_table->table;
> + info = dev->archdata.iommu;
> + dir_index = pasid >> PASID_PDE_SHIFT;
> + index = pasid & PASID_PTE_MASK;
> +
> + spin_lock(&pasid_lock);
> + entries = get_pasid_table_from_pde(&dir[dir_index]);
> + if (!entries) {
> + entries = alloc_pgtable_page(info->iommu->node);
> + if (!entries) {
> + spin_unlock(&pasid_lock);
> + return NULL;
> + }
> +
> + WRITE_ONCE(dir[dir_index].val,
> + (u64)virt_to_phys(entries) | PASID_PTE_PRESENT);
> + }
> + spin_unlock(&pasid_lock);
>
> - return &entries[pasid];
> + return &entries[index];
> }
>
> /*
> @@ -224,7 +271,14 @@ struct pasid_entry *intel_pasid_get_entry(struct
> device *dev, int pasid)
> */
> static inline void pasid_clear_entry(struct pasid_entry *pe)
> {
> - WRITE_ONCE(pe->val, 0);
> + WRITE_ONCE(pe->val[0], 0);
> + WRITE_ONCE(pe->val[1], 0);
> + WRITE_ONCE(pe->val[2], 0);
> + WRITE_ONCE(pe->val[3], 0);
> + WRITE_ONCE(pe->val[4], 0);
> + WRITE_ONCE(pe->val[5], 0);
> + WRITE_ONCE(pe->val[6], 0);
> + WRITE_ONCE(pe->val[7], 0);

memset?

> }
>
> void intel_pasid_clear_entry(struct device *dev, int pasid)
> diff --git a/drivers/iommu/intel-pasid.h b/drivers/iommu/intel-pasid.h
> index 1c05ed6fc5a5..12f480c2bb8b 100644
> --- a/drivers/iommu/intel-pasid.h
> +++ b/drivers/iommu/intel-pasid.h
> @@ -12,11 +12,19 @@
>
> #define PASID_MIN 0x1
> #define PASID_MAX 0x100000
> +#define PASID_PTE_MASK 0x3F
> +#define PASID_PTE_PRESENT 1
> +#define PDE_PFN_MASK PAGE_MASK
> +#define PASID_PDE_SHIFT 6
>
> -struct pasid_entry {
> +struct pasid_dir_entry {
> u64 val;
> };
>
> +struct pasid_entry {
> + u64 val[8];
> +};
> +
> /* The representative of a PASID table */
> struct pasid_table {
> void *table; /* pasid table pointer */
> diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
> index 4a03e5090952..6c0bd9ee9602 100644
> --- a/drivers/iommu/intel-svm.c
> +++ b/drivers/iommu/intel-svm.c
> @@ -65,8 +65,6 @@ int intel_svm_init(struct intel_iommu *iommu)
>
> order = get_order(sizeof(struct pasid_entry) * iommu->pasid_max);
> if (ecap_dis(iommu->ecap)) {
> - /* Just making it explicit... */
> - BUILD_BUG_ON(sizeof(struct pasid_entry) != sizeof(struct
> pasid_state_entry));
> pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
> if (pages)
> iommu->pasid_state_table = page_address(pages);
> @@ -406,9 +404,7 @@ int intel_svm_bind_mm(struct device *dev, int
> *pasid, int flags, struct svm_dev_
> pasid_entry_val |= PASID_ENTRY_FLPM_5LP;
>
> entry = intel_pasid_get_entry(dev, svm->pasid);
> - entry->val = pasid_entry_val;
> -
> - wmb();
> + WRITE_ONCE(entry->val[0], pasid_entry_val);
>
> /*
> * Flush PASID cache when a PASID table entry becomes
> --
> 2.17.1