[PATCH v2 8/8] dma-debug: Batch dma_debug_entry allocation
From: Robin Murphy
Date: Wed Dec 05 2018 - 14:57:32 EST
DMA debug entries are one of those things which aren't that useful
individually - we will always want some larger quantity of them - and
that we don't really need to manage the exact number of - we only care
about having 'enough'. In that regard, the current behaviour of creating
them one-by-one from the slab allocator means an awful lot of function
call overhead and memory wasted on alignment padding.
Now that we don't have to worry about freeing anything via
dma_debug_resize_entries(), we can optimise the allocation behaviour by
grabbing whole pages at once, which will save considerably on the
aforementioned overheads, and probably offer a little more cache/TLB
locality benefit for traversing the lists under normal operation.
Since freeing a whole page of entries at once becomes enough of a
challenge that it's not really worth complicating dma_debug_init(),
we may as well tweak the preallocation behaviour so that as long as we
manage to allocate *some* pages, we can leave debugging enabled on a
best-effort basis rather than otherwise wasting them.
Signed-off-by: Robin Murphy <robin.murphy@xxxxxxx>
---
v2: New
Documentation/DMA-API.txt | 4 +++-
kernel/dma/debug.c | 45 +++++++++++++++++----------------------
2 files changed, 22 insertions(+), 27 deletions(-)
diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt
index 7a7d8a415ce8..097c51b79330 100644
--- a/Documentation/DMA-API.txt
+++ b/Documentation/DMA-API.txt
@@ -747,7 +747,9 @@ driver afterwards. This filter can be disabled or changed later using debugfs.
When the code disables itself at runtime this is most likely because it ran
out of dma_debug_entries and was unable to allocate more on-demand. 65536
entries are preallocated at boot - if this is too low for you boot with
-'dma_debug_entries=<your_desired_number>' to overwrite the default. The
+'dma_debug_entries=<your_desired_number>' to overwrite the default. Note
+that the code allocates entires in batches, so the exact number of
+preallocated entries may be greater than the actual number requested. The
code will print to the kernel log each time it has dynamically allocated
as many entries as were initially preallocated. This is to indicate that a
larger preallocation size may be appropriate, or if it happens continually
diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c
index 2202402afe9a..a6a603526c8f 100644
--- a/kernel/dma/debug.c
+++ b/kernel/dma/debug.c
@@ -48,7 +48,7 @@
#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
#endif
/* If the pool runs out, add this many new entries at once */
-#define DMA_DEBUG_DYNAMIC_ENTRIES 256
+#define DMA_DEBUG_DYNAMIC_ENTRIES (PAGE_SIZE / sizeof(struct dma_debug_entry))
enum {
dma_debug_single,
@@ -648,34 +648,22 @@ static void add_dma_entry(struct dma_debug_entry *entry)
*/
}
-static int dma_debug_add_entries(u32 num_entries, gfp_t gfp)
+static int dma_debug_add_entries(gfp_t gfp)
{
- struct dma_debug_entry *entry, *next_entry;
- LIST_HEAD(tmp);
+ struct dma_debug_entry *entry;
int i;
- for (i = 0; i < num_entries; ++i) {
- entry = kzalloc(sizeof(*entry), gfp);
- if (!entry)
- goto out_err;
+ entry = (void *)get_zeroed_page(gfp);
+ if (!entry)
+ return -ENOMEM;
- list_add_tail(&entry->list, &tmp);
- }
+ for (i = 0; i < DMA_DEBUG_DYNAMIC_ENTRIES; i++)
+ list_add_tail(&entry[i].list, &free_entries);
- list_splice(&tmp, &free_entries);
- num_free_entries += num_entries;
- nr_total_entries += num_entries;
+ num_free_entries += DMA_DEBUG_DYNAMIC_ENTRIES;
+ nr_total_entries += DMA_DEBUG_DYNAMIC_ENTRIES;
return 0;
-
-out_err:
-
- list_for_each_entry_safe(entry, next_entry, &tmp, list) {
- list_del(&entry->list);
- kfree(entry);
- }
-
- return -ENOMEM;
}
static struct dma_debug_entry *__dma_entry_alloc(void)
@@ -717,7 +705,7 @@ static struct dma_debug_entry *dma_entry_alloc(void)
spin_lock_irqsave(&free_entries_lock, flags);
if (num_free_entries == 0) {
- if (dma_debug_create_entries(DMA_DEBUG_DYNAMIC_ENTRIES, GFP_ATOMIC)) {
+ if (dma_debug_add_entries(GFP_ATOMIC)) {
global_disable = true;
spin_unlock_irqrestore(&free_entries_lock, flags);
pr_err("debugging out of memory - disabling\n");
@@ -1008,15 +996,20 @@ static int dma_debug_init(void)
return 0;
}
- if (dma_debug_add_entries(nr_prealloc_entries, GFP_KERNEL) != 0) {
+ for (i = 0; i < DIV_ROUND_UP(nr_prealloc_entries, DMA_DEBUG_DYNAMIC_ENTRIES); ++i)
+ dma_debug_add_entries(GFP_KERNEL);
+ if (num_free_entries >= nr_prealloc_entries) {
+ pr_info("preallocated %d debug entries\n", nr_total_entries);
+ } else if (num_free_entries > 0) {
+ pr_warn("%d debug entries requested but only %d allocated\n",
+ nr_prealloc_entries, nr_total_entries);
+ } else {
pr_err("debugging out of memory error - disabled\n");
global_disable = true;
return 0;
}
-
min_free_entries = num_free_entries;
- pr_info("preallocated %d debug entries\n", nr_total_entries);
dma_debug_initialized = true;
--
2.19.1.dirty