[PATCH v3 1/1] swiotlb: Track and report io_tlb_used high water mark in debugfs

From: Michael Kelley
Date: Sat Apr 01 2023 - 00:47:37 EST


swiotlb currently reports the total number of slabs and the instantaneous
in-use slabs in debugfs. But with increased usage of swiotlb for all I/O
in Confidential Computing (coco) VMs, it has become difficult to know
how much memory to allocate for swiotlb bounce buffers, either via the
automatic algorithm in the kernel or by specifying a value on the
kernel boot line. The current automatic algorithm generously allocates
swiotlb bounce buffer memory, and may be wasting significant memory in
many use cases.

To support better understanding of swiotlb usage, add tracking of the
the high water mark usage of the default swiotlb bounce buffer memory
pool. Report the high water mark in debugfs along with the other swiotlb
metrics. Allow the high water mark to be reset to zero at runtime by
writing to it.

Signed-off-by: Michael Kelley <mikelley@xxxxxxxxxxxxx>
---
Changes in v3:
* Do high water mark accounting only when CONFIG_DEBUG_FS=y. As
as a result, add back the mem_used() function for the "swiotlb
buffer is full" error message. [Christoph -- I didn't hear back
whether this approach addresses your concern about one additional
atomic operation when slots are allocated and again when freed. I've
gone ahead with this new version, and we can obviously have further
discussion.]

* Remove unnecessary u64 casts. [Christoph Hellwig]

* Track slot usage and the high water mark only for io_tlb_default_mem.
Previous versions incorrectly included per-device pools. [Petr Tesarik]

Changes in v2:
* Only reset the high water mark to zero when the specified new value
is zero, to prevent confusion about the ability to reset to some
other value [Dexuan Cui]

kernel/dma/swiotlb.c | 41 ++++++++++++++++++++++++++++++++++++++++-
1 file changed, 40 insertions(+), 1 deletion(-)

diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index d3d6be0..6587a3d 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -76,6 +76,9 @@ struct io_tlb_slot {
static unsigned long default_nslabs = IO_TLB_DEFAULT_SIZE >> IO_TLB_SHIFT;
static unsigned long default_nareas;

+static atomic_long_t total_used = ATOMIC_LONG_INIT(0);
+static atomic_long_t used_hiwater = ATOMIC_LONG_INIT(0);
+
/**
* struct io_tlb_area - IO TLB memory area descriptor
*
@@ -594,6 +597,7 @@ static int swiotlb_do_find_slots(struct device *dev, int area_index,
unsigned long flags;
unsigned int slot_base;
unsigned int slot_index;
+ unsigned long old_hiwater, new_used;

BUG_ON(!nslots);
BUG_ON(area_index >= mem->nareas);
@@ -663,6 +667,17 @@ static int swiotlb_do_find_slots(struct device *dev, int area_index,
area->index = 0;
area->used += nslots;
spin_unlock_irqrestore(&area->lock, flags);
+
+ if (IS_ENABLED(CONFIG_DEBUG_FS) && mem == &io_tlb_default_mem) {
+ new_used = atomic_long_add_return(nslots, &total_used);
+ old_hiwater = atomic_long_read(&used_hiwater);
+ do {
+ if (new_used <= old_hiwater)
+ break;
+ } while (!atomic_long_try_cmpxchg(&used_hiwater,
+ &old_hiwater, new_used));
+ }
+
return slot_index;
}

@@ -795,6 +810,9 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
mem->slots[i].list = ++count;
area->used -= nslots;
spin_unlock_irqrestore(&area->lock, flags);
+
+ if (IS_ENABLED(CONFIG_DEBUG_FS) && mem == &io_tlb_default_mem)
+ atomic_long_sub(nslots, &total_used);
}

/*
@@ -891,10 +909,29 @@ bool is_swiotlb_active(struct device *dev)

static int io_tlb_used_get(void *data, u64 *val)
{
- *val = mem_used(&io_tlb_default_mem);
+ *val = atomic_long_read(&total_used);
+ return 0;
+}
+
+static int io_tlb_hiwater_get(void *data, u64 *val)
+{
+ *val = atomic_long_read(&used_hiwater);
+ return 0;
+}
+
+static int io_tlb_hiwater_set(void *data, u64 val)
+{
+ /* Only allow setting to zero */
+ if (val != 0)
+ return -EINVAL;
+
+ atomic_long_set(&used_hiwater, val);
return 0;
}
+
DEFINE_DEBUGFS_ATTRIBUTE(fops_io_tlb_used, io_tlb_used_get, NULL, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_io_tlb_hiwater, io_tlb_hiwater_get,
+ io_tlb_hiwater_set, "%llu\n");

static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem,
const char *dirname)
@@ -906,6 +943,8 @@ static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem,
debugfs_create_ulong("io_tlb_nslabs", 0400, mem->debugfs, &mem->nslabs);
debugfs_create_file("io_tlb_used", 0400, mem->debugfs, NULL,
&fops_io_tlb_used);
+ debugfs_create_file("io_tlb_used_hiwater", 0600, mem->debugfs, NULL,
+ &fops_io_tlb_hiwater);
}

static int __init __maybe_unused swiotlb_create_default_debugfs(void)
--
1.8.3.1