Re: [PATCH v2 11/16] iommu/vt-d: preserve PASID table of preserved device
From: Samiullah Khawaja
Date: Wed May 20 2026 - 14:29:45 EST
On Tue, May 19, 2026 at 10:35:26PM +0000, Pranjal Shrivastava wrote:
On Mon, Apr 27, 2026 at 05:56:28PM +0000, Samiullah Khawaja wrote:
In scalable mode the PASID table is used to fetch the io page tables.
Preserve and restore the PASID table of the preserved devices.
Signed-off-by: Samiullah Khawaja <skhawaja@xxxxxxxxxx>
---
drivers/iommu/intel/iommu.c | 5 +-
drivers/iommu/intel/iommu.h | 12 +++
drivers/iommu/intel/liveupdate.c | 141 +++++++++++++++++++++++++++++++
drivers/iommu/intel/pasid.c | 7 +-
drivers/iommu/intel/pasid.h | 9 ++
include/linux/kho/abi/iommu.h | 13 +++
6 files changed, 184 insertions(+), 3 deletions(-)
[snip]
+
+static int pasid_lu_do_op(void *table, enum pasid_lu_op op)
+{
+ int ret = 0;
+
+ switch (op) {
+ case PASID_LU_OP_PRESERVE:
+ ret = iommu_preserve_page(table);
Nit: This is making me consider renaming the helper as
`iommu_preserve_folio`. I almost thought why are we preserving a single
page.
Interestingly the iommu pages API uses plural of page in API name as
each iopt_desc can be backed by multiple pages:
iommu_free_pages()
iommu_alloc_pages_*()
So I will rename these to:
iommu_preserve_pages()
iommu_preserve_pages_list(list)
iommu_unpreserve_pages()
iommu_unpreserve_pages_list(list)
iommu_restore_pages()
+ break;
+ case PASID_LU_OP_UNPRESERVE:
+ iommu_unpreserve_page(table);
+ break;
+ case PASID_LU_OP_RESTORE:
+ iommu_restore_page(virt_to_phys(table));
+ break;
+ case PASID_LU_OP_FREE:
+ iommu_free_pages(table);
+ break;
+ }
+
+ return ret;
+}
+
[snip]
+
+void pasid_cleanup_preserved_table(struct device *dev)
+{
+ struct pasid_table *pasid_table;
+ struct pasid_dir_entry *dir;
+ struct pasid_entry *table;
+ size_t dir_size;
+
+ pasid_table = intel_pasid_get_table(dev);
+ if (!pasid_table)
+ return;
+
+ dir = pasid_table->table;
+ table = get_pasid_table_from_pde(&dir[0]);
+ if (!table)
+ return;
+
+ /* Clear everything except the first entry in table. */
+ memset(&table[1], 0, SZ_4K - sizeof(*table));
Nit: Is the first entry always 4K or could it change based on PAGE_SIZE?
VT-d uses 4k always, but for clarity I will change this to
VTD_PAGE_SIZE.
+
+ /* Use the folio order to calculate the size of Pasid Directory */
+ dir_size = (1 << (folio_order(virt_to_folio(dir)) + PAGE_SHIFT));
+
+ /* Clear everything except the first entry in directory */
+ memset(&dir[1], 0, dir_size - sizeof(struct pasid_dir_entry));
+
+ clflush_cache_range(&table[0], SZ_4K);
+ clflush_cache_range(&dir[0], dir_size);
+}
+
[...]
+void *intel_pasid_try_restore_table(struct device *dev, u64 max_pasid)
+{
+ struct iommu_device_ser *ser = dev_iommu_restored_state(dev);
+
+ if (!ser)
+ return NULL;
+
+ BUG_ON(pasid_lu_handle_pd(phys_to_virt(ser->intel.pasid_table),
+ PASID_LU_OP_RESTORE));
+ if (WARN_ON_ONCE(ser->intel.max_pasid != max_pasid)) {
I'm wondering if this could be slightly relaxed to:
if (ser->intel.max_pasid < max_pasid) to ensure it's a minimum
requirement rather than an exact match?
Makes sense. I will update this.
+ pasid_lu_handle_pd(phys_to_virt(ser->intel.pasid_table),
+ PASID_LU_OP_FREE);
+ return NULL;
+ }
+
+ return phys_to_virt(ser->intel.pasid_table);
+}
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index 89541b74ab8c..5cac8e95f73b 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -60,8 +60,11 @@ int intel_pasid_alloc_table(struct device *dev)
size = max_pasid >> (PASID_PDE_SHIFT - 3);
order = size ? get_order(size) : 0;
- dir = iommu_alloc_pages_node_sz(info->iommu->node, GFP_KERNEL,
- 1 << (order + PAGE_SHIFT));
+
+ dir = intel_pasid_try_restore_table(dev, 1 << (order + PAGE_SHIFT + 3));
+ if (!dir)
+ dir = iommu_alloc_pages_node_sz(info->iommu->node, GFP_KERNEL,
+ 1 << (order + PAGE_SHIFT));
if (!dir) {
kfree(pasid_table);
return -ENOMEM;
Thanks,
Praan
Sami