Re: [RFC PATCH 06/11] iommu/arm-smmu-v3: Scan leaf TTD to sync hardware dirty log
From: Keqian Zhu
Date: Sun Feb 07 2021 - 20:26:45 EST
On 2021/2/5 3:52, Robin Murphy wrote:
> On 2021-01-28 15:17, Keqian Zhu wrote:
>> From: jiangkunkun <jiangkunkun@xxxxxxxxxx>
>>
>> During dirty log tracking, user will try to retrieve dirty log from
>> iommu if it supports hardware dirty log. This adds a new interface
[...]
>> static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg)
>> {
>> unsigned long granule, page_sizes;
>> @@ -957,6 +1046,7 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg)
>> .iova_to_phys = arm_lpae_iova_to_phys,
>> .split_block = arm_lpae_split_block,
>> .merge_page = arm_lpae_merge_page,
>> + .sync_dirty_log = arm_lpae_sync_dirty_log,
>> };
>> return data;
>> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
>> index f1261da11ea8..69f268069282 100644
>> --- a/drivers/iommu/iommu.c
>> +++ b/drivers/iommu/iommu.c
>> @@ -2822,6 +2822,47 @@ size_t iommu_merge_page(struct iommu_domain *domain, unsigned long iova,
>> }
>> EXPORT_SYMBOL_GPL(iommu_merge_page);
>> +int iommu_sync_dirty_log(struct iommu_domain *domain, unsigned long iova,
>> + size_t size, unsigned long *bitmap,
>> + unsigned long base_iova, unsigned long bitmap_pgshift)
>> +{
>> + const struct iommu_ops *ops = domain->ops;
>> + unsigned int min_pagesz;
>> + size_t pgsize;
>> + int ret;
>> +
>> + min_pagesz = 1 << __ffs(domain->pgsize_bitmap);
>> +
>> + if (!IS_ALIGNED(iova | size, min_pagesz)) {
>> + pr_err("unaligned: iova 0x%lx size 0x%zx min_pagesz 0x%x\n",
>> + iova, size, min_pagesz);
>> + return -EINVAL;
>> + }
>> +
>> + if (!ops || !ops->sync_dirty_log) {
>> + pr_err("don't support sync dirty log\n");
>> + return -ENODEV;
>> + }
>> +
>> + while (size) {
>> + pgsize = iommu_pgsize(domain, iova, size);
>> +
>> + ret = ops->sync_dirty_log(domain, iova, pgsize,
>> + bitmap, base_iova, bitmap_pgshift);
>
> Once again, we have a worst-of-both-worlds iteration that doesn't make much sense. iommu_pgsize() essentially tells you the best supported size that an IOVA range *can* be mapped with, but we're iterating a range that's already mapped, so we don't know if it's relevant, and either way it may not bear any relation to the granularity of the bitmap, which is presumably what actually matters.
>
> Logically, either we should iterate at the bitmap granularity here, and the driver just says whether the given iova chunk contains any dirty pages or not, or we just pass everything through to the driver and let it do the whole job itself. Doing a little bit of both is just an overcomplicated mess.
>
> I'm skimming patch #7 and pretty much the same comments apply, so I can't be bothered to repeat them there...
>
> Robin.
Sorry that I missed these comments...
As I clarified in #4, due to unsuitable variable name, the @pgsize actually is the max size that meets alignment acquirement and fits into the pgsize_bitmap.
All iommu interfaces acquire the @size fits into pgsize_bitmap to simplify their implementation. And the logic is very similar to "unmap" here.
Thanks,
Keqian
>
>> + if (ret)
>> + break;
>> +
>> + pr_debug("dirty_log_sync: iova 0x%lx pagesz 0x%zx\n", iova,
>> + pgsize);
>> +
>> + iova += pgsize;
>> + size -= pgsize;
>> + }
>> +
>> + return ret;
>> +}
>> +EXPORT_SYMBOL_GPL(iommu_sync_dirty_log);
>> +
>> void iommu_get_resv_regions(struct device *dev, struct list_head *list)
>> {
>> const struct iommu_ops *ops = dev->bus->iommu_ops;
>> diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
>> index 754b62a1bbaf..f44551e4a454 100644
>> --- a/include/linux/io-pgtable.h
>> +++ b/include/linux/io-pgtable.h
>> @@ -166,6 +166,10 @@ struct io_pgtable_ops {
>> size_t size);
>> size_t (*merge_page)(struct io_pgtable_ops *ops, unsigned long iova,
>> phys_addr_t phys, size_t size, int prot);
>> + int (*sync_dirty_log)(struct io_pgtable_ops *ops,
>> + unsigned long iova, size_t size,
>> + unsigned long *bitmap, unsigned long base_iova,
>> + unsigned long bitmap_pgshift);
>> };
>> /**
>> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
>> index ac2b0b1bce0f..8069c8375e63 100644
>> --- a/include/linux/iommu.h
>> +++ b/include/linux/iommu.h
>> @@ -262,6 +262,10 @@ struct iommu_ops {
>> size_t size);
>> size_t (*merge_page)(struct iommu_domain *domain, unsigned long iova,
>> phys_addr_t phys, size_t size, int prot);
>> + int (*sync_dirty_log)(struct iommu_domain *domain,
>> + unsigned long iova, size_t size,
>> + unsigned long *bitmap, unsigned long base_iova,
>> + unsigned long bitmap_pgshift);
>> /* Request/Free a list of reserved regions for a device */
>> void (*get_resv_regions)(struct device *dev, struct list_head *list);
>> @@ -517,6 +521,10 @@ extern size_t iommu_split_block(struct iommu_domain *domain, unsigned long iova,
>> size_t size);
>> extern size_t iommu_merge_page(struct iommu_domain *domain, unsigned long iova,
>> size_t size, int prot);
>> +extern int iommu_sync_dirty_log(struct iommu_domain *domain, unsigned long iova,
>> + size_t size, unsigned long *bitmap,
>> + unsigned long base_iova,
>> + unsigned long bitmap_pgshift);
>> /* Window handling function prototypes */
>> extern int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr,
>> @@ -923,6 +931,15 @@ static inline size_t iommu_merge_page(struct iommu_domain *domain,
>> return -EINVAL;
>> }
>> +static inline int iommu_sync_dirty_log(struct iommu_domain *domain,
>> + unsigned long iova, size_t size,
>> + unsigned long *bitmap,
>> + unsigned long base_iova,
>> + unsigned long pgshift)
>> +{
>> + return -EINVAL;
>> +}
>> +
>> static inline int iommu_device_register(struct iommu_device *iommu)
>> {
>> return -ENODEV;
>>
> .
>