On 2022-05-31 04:02, Baolu Lu wrote:
On 2022/5/30 20:14, Jason Gunthorpe wrote:
On Sun, May 29, 2022 at 01:14:46PM +0800, Baolu Lu wrote:
diff --git a/drivers/iommu/intel/debugfs.c b/drivers/iommu/intel/debugfs.c
index d927ef10641b..e6f4835b8d9f 100644
--- a/drivers/iommu/intel/debugfs.c
+++ b/drivers/iommu/intel/debugfs.c
@@ -333,25 +333,28 @@ static void pgtable_walk_level(struct seq_file *m, struct dma_pte *pde,
continue;
path[level] = pde->val;
- if (dma_pte_superpage(pde) || level == 1)
+ if (dma_pte_superpage(pde) || level == 1) {
dump_page_info(m, start, path);
- else
- pgtable_walk_level(m, phys_to_virt(dma_pte_addr(pde)),
+ } else {
+ unsigned long phys_addr;
+
+ phys_addr = (unsigned long)dma_pte_addr(pde);
+ if (!pfn_valid(__phys_to_pfn(phys_addr)))
Given that pte_present(pde) passed just above, it was almost certainly a valid entry, so it seems unlikely that the physical address it pointed to could have disappeared in the meantime. If you're worried about the potential case where we've been preempted during this walk for long enough that the page has already been freed by an unmap, reallocated,
and filled with someone else's data that happens to look like valid PTEs, this still isn't enough, since that data could just as well happen to look like valid physical addresses too.
I imagine that if you want to safely walk pagetables concurrently with them potentially being freed, you'd probably need to get RCU involved.
+ break;
+ pgtable_walk_level(m, phys_to_virt(phys_addr),
Also, obligatory reminder that pfn_valid() only means that pfn_to_page() gets you a valid struct page. Whether that page is direct-mapped kernel memory or not is a different matter.
level - 1, start, path);
+ }
path[level] = 0;
}
}
-static int show_device_domain_translation(struct device *dev, void *data)
+static int __show_device_domain_translation(struct device *dev, void *data)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct dmar_domain *domain = info->domain;
struct seq_file *m = data;
u64 path[6] = { 0 };
- if (!domain)
- return 0;
-
seq_printf(m, "Device %s @0x%llx\n", dev_name(dev),
(u64)virt_to_phys(domain->pgd));
seq_puts(m, "IOVA_PFN\t\tPML5E\t\t\tPML4E\t\t\tPDPE\t\t\tPDE\t\t\tPTE\n");
@@ -359,20 +362,27 @@ static int show_device_domain_translation(struct device *dev, void *data)
pgtable_walk_level(m, domain->pgd, domain->agaw + 2, 0, path);
seq_putc(m, '\n');
- return 0;
+ return 1;
}
-static int domain_translation_struct_show(struct seq_file *m, void *unused)
+static int show_device_domain_translation(struct device *dev, void *data)
{
- unsigned long flags;
- int ret;
+ struct iommu_group *group;
- spin_lock_irqsave(&device_domain_lock, flags);
- ret = bus_for_each_dev(&pci_bus_type, NULL, m,
- show_device_domain_translation);
- spin_unlock_irqrestore(&device_domain_lock, flags);
+ group = iommu_group_get(dev);
+ if (group) {
+ iommu_group_for_each_dev(group, data,
+ __show_device_domain_translation);
Why group_for_each_dev?
If there *are* multiple devices in the group then by definition they should be attached to the same domain, so dumping that domain's mappings more than once seems pointless. Especially given that the outer bus_for_each_dev iteration will already visit each individual device anyway, so this would only make the redundancy even worse than it already is.