Re: [RFC v1 3/4] swiotlb: Allow dynamic allocation of bounce buffers

From: Petr Tesarik
Date: Tue Mar 28 2023 - 08:44:11 EST


On 3/28/2023 9:54 AM, Petr Tesarik wrote:
> On 3/28/2023 6:07 AM, Christoph Hellwig wrote:
>> [adding Alex as he has been interested in this in the past]
>>
>[...]>> I'm a little worried about all that because it causes quite a bit
>> of overhead even for callers that don't end up going into the
>> dynamic range or do not use swiotlb at all. I don't really have a
>> good answer here except for the usual avoid bounce buffering whenever
>> you can that might not always be easy to do.
>
> I'm also worried about all this overhead.

Oh, wait! I can do at least something for devices which do not use
swiotlb at all.

If a device does not use bounce buffers, it cannot pass an address
that belongs to the swiotlb. Consequently, the potentially
expensive check can be skipped. This avoids the dynamic lookup
penalty for devices which do not need the swiotlb.

Note that the counter always remains zero if dma_io_tlb_mem is
NULL, so the NULL check is not required.

diff --git a/drivers/base/core.c b/drivers/base/core.c
index a3e14143ec0c..f36638f207b8 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -2957,6 +2957,7 @@ void device_initialize(struct device *dev)
#endif
#ifdef CONFIG_SWIOTLB
dev->dma_io_tlb_mem = &io_tlb_default_mem;
+ atomic_set(&dev->dma_io_tlb_cnt, 0);
#endif
}
EXPORT_SYMBOL_GPL(device_initialize);
diff --git a/include/linux/device.h b/include/linux/device.h
index 44e3acae7b36..cfdddce4cc30 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -504,6 +504,7 @@ struct device_physical_location {
* @dma_mem: Internal for coherent mem override.
* @cma_area: Contiguous memory area for dma allocations
* @dma_io_tlb_mem: Pointer to the swiotlb pool used. Not for driver use.
+ * @dma_io_tlb_cnt: Number of buffers mapped from the swiotlb pool.
* @archdata: For arch-specific additions.
* @of_node: Associated device tree node.
* @fwnode: Associated device node supplied by platform firmware.
@@ -609,6 +610,7 @@ struct device {
#endif
#ifdef CONFIG_SWIOTLB
struct io_tlb_mem *dma_io_tlb_mem;
+ atomic_t dma_io_tlb_cnt;
#endif
/* arch specific additions */
struct dev_archdata archdata;
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 628e25ad7db7..7a115f4db49d 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -122,7 +122,7 @@ static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr)
{
struct io_tlb_mem *mem = dev->dma_io_tlb_mem;

- return mem &&
+ return atomic_read(&dev->dma_io_tlb_cnt) &&
(is_swiotlb_fixed(mem, paddr) ||
(mem->allow_dyn && is_swiotlb_dyn(mem, paddr)));
}
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 3efaefebb6af..3dda1d3a39e8 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -954,6 +954,8 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
return tlb_addr;
}

+ atomic_inc(&dev->dma_io_tlb_cnt);
+
/*
* When dir == DMA_FROM_DEVICE we could omit the copy from the orig
* to the tlb buffer, if we knew for sure the device will
@@ -1030,6 +1032,7 @@ void swiotlb_tbl_unmap_single(struct device *dev, phys_addr_t tlb_addr,
swiotlb_release_slots(dev, tlb_addr);
else
swiotlb_dyn_unmap(dev, tlb_addr, dir);
+ atomic_dec(&dev->dma_io_tlb_cnt);
}

void swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr,