Cache tag invalidation requests for a domain are accumulated until a
different iommu unit is found when traversing the cache_tags linked list.
But cache tags of same iommu unit can be distributed in the linked list,
this make batched flush less efficient. E.g., one device backed by iommu0
is attached to a domain in between two devices attaching backed by iommu1.
Group cache tags together for same iommu unit in cache_tag_assign() to
maximize the performance of batched flush.
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@xxxxxxxxx>
---
drivers/iommu/intel/cache.c | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c
index e5b89f728ad3..726052a841e0 100644
--- a/drivers/iommu/intel/cache.c
+++ b/drivers/iommu/intel/cache.c
@@ -48,6 +48,8 @@ static int cache_tag_assign(struct dmar_domain *domain, u16 did,
struct intel_iommu *iommu = info->iommu;
struct cache_tag *tag, *temp;
unsigned long flags;
+ struct cache_tag *temp2 = list_entry(&domain->cache_tags,
+ struct cache_tag, node);
tag = kzalloc(sizeof(*tag), GFP_KERNEL);
if (!tag)
@@ -73,8 +75,15 @@ static int cache_tag_assign(struct dmar_domain *domain, u16 did,
trace_cache_tag_assign(temp);
return 0;
}
+ if (temp->iommu == iommu)
+ temp2 = temp;
}
- list_add_tail(&tag->node, &domain->cache_tags);
+ /*
+ * Link cache tags of same iommu unit together, so consponding
+ * flush ops can be batched for iommu unit.
+ */
+ list_add(&tag->node, &temp2->node);
+
spin_unlock_irqrestore(&domain->cache_lock, flags);
trace_cache_tag_assign(tag);