[PATCH net-next V2 3/3] net/mlx5: use internal dma pools for frag buf alloc

From: Tariq Toukan

Date: Wed Apr 29 2026 - 16:18:29 EST


From: Nimrod Oren <noren@xxxxxxxxxx>

Add mlx5_dma_pool alloc/free paths, and wire mlx5_frag_buf allocation
and free paths to use them.

mlx5_frag_buf_alloc_node() now selects an mlx5_dma_pool to allocate
fragments from, instead of directly allocating full coherent pages.

mlx5_frag_buf_free() frees from the respective pool.

mlx5_dma_pool_alloc() keeps allocation fast by maintaining pages with
available indexes at the head of the list, so the common allocation path
can take a free index immediately. New backing pages are allocated only
when no free index is available.

mlx5_dma_pool_free() returns released indexes to the pool and frees a
backing page once all of its indexes become free. This avoids keeping
fully free pages for the lifetime of the pool and reduces coherent DMA
memory footprint.

Signed-off-by: Nimrod Oren <noren@xxxxxxxxxx>
Signed-off-by: Tariq Toukan <tariqt@xxxxxxxxxx>
---
.../net/ethernet/mellanox/mlx5/core/alloc.c | 170 ++++++++++++++----
include/linux/mlx5/driver.h | 2 +
2 files changed, 140 insertions(+), 32 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
index fcc859c5f810..f19644183828 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
@@ -120,6 +120,111 @@ static struct mlx5_dma_pool *mlx5_dma_pool_create(struct mlx5_core_dev *dev,
return pool;
}

+static struct mlx5_dma_pool_page *
+mlx5_dma_pool_page_alloc(struct mlx5_dma_pool *pool)
+{
+ int blocks_per_page = BIT(PAGE_SHIFT - pool->block_shift);
+ struct mlx5_dma_pool_page *page;
+
+ page = kzalloc_obj(*page);
+ if (!page)
+ goto err_out;
+
+ page->pool = pool;
+ page->bitmap = bitmap_zalloc(blocks_per_page, GFP_KERNEL);
+ if (!page->bitmap)
+ goto err_free_page;
+
+ bitmap_fill(page->bitmap, blocks_per_page);
+ page->buf = mlx5_dma_zalloc_coherent_node(pool->dev, PAGE_SIZE,
+ &page->dma, pool->node);
+ if (!page->buf)
+ goto err_free_bitmap;
+
+ return page;
+
+err_free_bitmap:
+ bitmap_free(page->bitmap);
+err_free_page:
+ kfree(page);
+err_out:
+ return NULL;
+}
+
+static void mlx5_dma_pool_page_free(struct mlx5_core_dev *dev,
+ struct mlx5_dma_pool_page *page)
+{
+ dma_free_coherent(mlx5_core_dma_dev(dev), PAGE_SIZE, page->buf,
+ page->dma);
+ bitmap_free(page->bitmap);
+ kfree(page);
+}
+
+static int mlx5_dma_pool_alloc_from_page(struct mlx5_dma_pool *pool,
+ struct mlx5_dma_pool_page *page,
+ unsigned long *idx_out)
+{
+ int blocks_per_page = BIT(PAGE_SHIFT - pool->block_shift);
+
+ *idx_out = find_first_bit(page->bitmap, blocks_per_page);
+ if (*idx_out >= blocks_per_page)
+ return -ENOMEM;
+
+ __clear_bit(*idx_out, page->bitmap);
+
+ if (bitmap_empty(page->bitmap, blocks_per_page))
+ list_move_tail(&page->pool_link, &pool->page_list);
+
+ return 0;
+}
+
+static struct mlx5_dma_pool_page *
+mlx5_dma_pool_alloc(struct mlx5_dma_pool *pool, unsigned long *idx_out)
+{
+ struct mlx5_dma_pool_page *page;
+
+ mutex_lock(&pool->lock);
+
+ page = list_first_entry_or_null(&pool->page_list,
+ struct mlx5_dma_pool_page, pool_link);
+ if (page && !mlx5_dma_pool_alloc_from_page(pool, page, idx_out))
+ goto unlock; /* successfully allocated from existing page */
+
+ page = mlx5_dma_pool_page_alloc(pool);
+ if (!page)
+ goto unlock;
+
+ list_add(&page->pool_link, &pool->page_list);
+ mlx5_dma_pool_alloc_from_page(pool, page, idx_out);
+
+unlock:
+ mutex_unlock(&pool->lock);
+ return page;
+}
+
+static void mlx5_dma_pool_free(struct mlx5_dma_pool *pool,
+ struct mlx5_dma_pool_page *page,
+ unsigned long idx)
+{
+ int blocks_per_page = BIT(PAGE_SHIFT - pool->block_shift);
+ bool was_full;
+
+ mutex_lock(&pool->lock);
+ was_full = bitmap_empty(page->bitmap, blocks_per_page);
+ __set_bit(idx, page->bitmap);
+
+ if (bitmap_full(page->bitmap, blocks_per_page)) {
+ list_del(&page->pool_link);
+ mlx5_dma_pool_page_free(pool->dev, page);
+ } else {
+ memset((u8 *)page->buf + (idx << pool->block_shift), 0,
+ BIT(pool->block_shift));
+ if (was_full)
+ list_move(&page->pool_link, &pool->page_list);
+ }
+ mutex_unlock(&pool->lock);
+}
+
static void
mlx5_frag_buf_node_pools_destroy(struct mlx5_frag_buf_node_pools *node_pools)
{
@@ -197,56 +302,57 @@ int mlx5_frag_buf_pools_init(struct mlx5_core_dev *dev)
int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size,
struct mlx5_frag_buf *buf, int node)
{
- int i;
+ struct mlx5_dma_pool *pool;
+ int pool_idx;
+
+ node = node == NUMA_NO_NODE ? first_online_node : node;

buf->size = size;
buf->npages = DIV_ROUND_UP(size, PAGE_SIZE);
- buf->page_shift = PAGE_SHIFT;
- buf->frags = kzalloc_objs(struct mlx5_buf_list, buf->npages);
+ buf->page_shift = clamp_t(int, order_base_2(size),
+ MLX5_FRAG_BUF_POOL_MIN_BLOCK_SHIFT,
+ PAGE_SHIFT);
+ buf->frags = kcalloc_node(buf->npages, sizeof(*buf->frags),
+ GFP_KERNEL, node);
if (!buf->frags)
- goto err_out;
+ return -ENOMEM;

- for (i = 0; i < buf->npages; i++) {
+ pool_idx = buf->page_shift - MLX5_FRAG_BUF_POOL_MIN_BLOCK_SHIFT;
+ pool = dev->priv.frag_buf_node_pools[node]->pools[pool_idx];
+ for (int i = 0; i < buf->npages; i++) {
struct mlx5_buf_list *frag = &buf->frags[i];
- int frag_sz = min_t(int, size, PAGE_SIZE);
+ struct mlx5_dma_pool_page *page;
+ unsigned long idx;

- frag->buf = mlx5_dma_zalloc_coherent_node(dev, frag_sz,
- &frag->map, node);
- if (!frag->buf)
- goto err_free_buf;
- if (frag->map & ((1 << buf->page_shift) - 1)) {
- dma_free_coherent(mlx5_core_dma_dev(dev), frag_sz,
- buf->frags[i].buf, buf->frags[i].map);
- mlx5_core_warn(dev, "unexpected map alignment: %pad, page_shift=%d\n",
- &frag->map, buf->page_shift);
- goto err_free_buf;
+ page = mlx5_dma_pool_alloc(pool, &idx);
+ if (!page) {
+ mlx5_frag_buf_free(dev, buf);
+ return -ENOMEM;
}
- size -= frag_sz;
+ frag->buf = (u8 *)page->buf + (idx << pool->block_shift);
+ frag->map = page->dma + (idx << pool->block_shift);
+ frag->frag_page = page;
}

return 0;
-
-err_free_buf:
- while (i--)
- dma_free_coherent(mlx5_core_dma_dev(dev), PAGE_SIZE, buf->frags[i].buf,
- buf->frags[i].map);
- kfree(buf->frags);
-err_out:
- return -ENOMEM;
}
EXPORT_SYMBOL_GPL(mlx5_frag_buf_alloc_node);

void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf)
{
- int size = buf->size;
- int i;
+ for (int i = 0; i < buf->npages; i++) {
+ struct mlx5_buf_list *frag = &buf->frags[i];
+ struct mlx5_dma_pool_page *page;
+ struct mlx5_dma_pool *pool;
+ unsigned long idx;

- for (i = 0; i < buf->npages; i++) {
- int frag_sz = min_t(int, size, PAGE_SIZE);
+ if (!frag->buf)
+ continue;

- dma_free_coherent(mlx5_core_dma_dev(dev), frag_sz, buf->frags[i].buf,
- buf->frags[i].map);
- size -= frag_sz;
+ page = frag->frag_page;
+ pool = page->pool;
+ idx = (frag->map - page->dma) >> pool->block_shift;
+ mlx5_dma_pool_free(pool, page, idx);
}
kfree(buf->frags);
}
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 71f7615ab553..531ce66fc8ef 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -343,9 +343,11 @@ struct mlx5_cmd_mailbox {
struct mlx5_cmd_mailbox *next;
};

+struct mlx5_dma_pool_page;
struct mlx5_buf_list {
void *buf;
dma_addr_t map;
+ struct mlx5_dma_pool_page *frag_page;
};

struct mlx5_frag_buf {
--
2.44.0