[PATCH v2 1/1] swiotlb: Track and report io_tlb_used high water mark in debugfs

From: Michael Kelley
Date: Sat Mar 25 2023 - 13:54:32 EST


swiotlb currently reports the total number of slabs and the instantaneous
in-use slabs in debugfs. But with increased usage of swiotlb for all I/O
in Confidential Computing (coco) VMs, it has become difficult to know
how much memory to allocate for swiotlb bounce buffers, either via the
automatic algorithm in the kernel or by specifying a value on the
kernel boot line. The current automatic algorithm generously allocates
swiotlb bounce buffer memory, and may be wasting significant memory in
many use cases.

To support better understanding swiotlb usage, add tracking of the
the high water mark usage of swiotlb bounce buffer memory. Report the
high water mark in debugfs along with the other swiotlb metrics. Allow
the high water to be reset to zero at runtime by writing to it.

Since a global in-use slab count is added alongside the existing
per-area in-use count, the mem_used() function that sums across all
areas is no longer needed. Remove it and replace with the global
in-use count.

Signed-off-by: Michael Kelley <mikelley@xxxxxxxxxxxxx>

Changes in v2:
* Only reset the high water mark to zero when the specified new value
is zero, to prevent confusion about the ability to reset to some
other value [Dexuan Cui]

---
kernel/dma/swiotlb.c | 49 +++++++++++++++++++++++++++++++++++++------------
1 file changed, 37 insertions(+), 12 deletions(-)

diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index f9f0279..3e50639 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -76,6 +76,9 @@ struct io_tlb_slot {
static unsigned long default_nslabs = IO_TLB_DEFAULT_SIZE >> IO_TLB_SHIFT;
static unsigned long default_nareas;

+static atomic_long_t total_used = ATOMIC_LONG_INIT(0);
+static atomic_long_t used_hiwater = ATOMIC_LONG_INIT(0);
+
/**
* struct io_tlb_area - IO TLB memory area descriptor
*
@@ -587,6 +590,7 @@ static int swiotlb_do_find_slots(struct device *dev, int area_index,
unsigned long flags;
unsigned int slot_base;
unsigned int slot_index;
+ unsigned long old_hiwater, new_used;

BUG_ON(!nslots);
BUG_ON(area_index >= mem->nareas);
@@ -659,6 +663,14 @@ static int swiotlb_do_find_slots(struct device *dev, int area_index,
area->index = wrap_area_index(mem, index + nslots);
area->used += nslots;
spin_unlock_irqrestore(&area->lock, flags);
+
+ new_used = atomic_long_add_return(nslots, &total_used);
+ old_hiwater = atomic_long_read(&used_hiwater);
+ do {
+ if (new_used <= old_hiwater)
+ break;
+ } while (!atomic_long_try_cmpxchg(&used_hiwater, &old_hiwater, new_used));
+
return slot_index;
}

@@ -681,16 +693,6 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
return -1;
}

-static unsigned long mem_used(struct io_tlb_mem *mem)
-{
- int i;
- unsigned long used = 0;
-
- for (i = 0; i < mem->nareas; i++)
- used += mem->areas[i].used;
- return used;
-}
-
phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
size_t mapping_size, size_t alloc_size,
unsigned int alloc_align_mask, enum dma_data_direction dir,
@@ -723,7 +725,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
if (!(attrs & DMA_ATTR_NO_WARN))
dev_warn_ratelimited(dev,
"swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
- alloc_size, mem->nslabs, mem_used(mem));
+ alloc_size, mem->nslabs, atomic_long_read(&total_used));
return (phys_addr_t)DMA_MAPPING_ERROR;
}

@@ -791,6 +793,8 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
mem->slots[i].list = ++count;
area->used -= nslots;
spin_unlock_irqrestore(&area->lock, flags);
+
+ atomic_long_sub(nslots, &total_used);
}

/*
@@ -887,10 +891,29 @@ bool is_swiotlb_active(struct device *dev)

static int io_tlb_used_get(void *data, u64 *val)
{
- *val = mem_used(&io_tlb_default_mem);
+ *val = (u64)atomic_long_read(&total_used);
return 0;
}
+
+static int io_tlb_hiwater_get(void *data, u64 *val)
+{
+ *val = (u64)atomic_long_read(&used_hiwater);
+ return 0;
+}
+
+static int io_tlb_hiwater_set(void *data, u64 val)
+{
+ /* Only allow setting to zero */
+ if (val != 0)
+ return -EINVAL;
+
+ atomic_long_set(&used_hiwater, val);
+ return 0;
+}
+
DEFINE_DEBUGFS_ATTRIBUTE(fops_io_tlb_used, io_tlb_used_get, NULL, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_io_tlb_hiwater, io_tlb_hiwater_get,
+ io_tlb_hiwater_set, "%llu\n");

static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem,
const char *dirname)
@@ -902,6 +925,8 @@ static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem,
debugfs_create_ulong("io_tlb_nslabs", 0400, mem->debugfs, &mem->nslabs);
debugfs_create_file("io_tlb_used", 0400, mem->debugfs, NULL,
&fops_io_tlb_used);
+ debugfs_create_file("io_tlb_used_hiwater", 0600, mem->debugfs, NULL,
+ &fops_io_tlb_hiwater);
}

static int __init __maybe_unused swiotlb_create_default_debugfs(void)
--
1.8.3.1