[PATCH net-next 2/3] net/mlx5: add frag buf pools create/destroy paths

From: Tariq Toukan

Date: Tue Apr 28 2026 - 01:31:38 EST


From: Nimrod Oren <noren@xxxxxxxxxx>

Introduce mlx5 DMA pool and pool-page data structures, and add the
creation and teardown paths.

Each NUMA node owns a set of mlx5_dma_pool instances, each one with a
different block size. The sizes are defined as all powers of two
starting from MLX5_ADAPTER_PAGE_SHIFT and up to PAGE_SHIFT. Since
mlx5_frag_bufs are used to back objects whose sizes are encoded relative
to MLX5_ADAPTER_PAGE_SHIFT, a smaller block_shift value cannot be used.
Requests larger than PAGE_SIZE continue to be handled as page-sized
fragments, as in the existing frag-buf allocation model.

Signed-off-by: Nimrod Oren <noren@xxxxxxxxxx>
Signed-off-by: Tariq Toukan <tariqt@xxxxxxxxxx>
---
.../net/ethernet/mellanox/mlx5/core/alloc.c | 136 +++++++++++++++++-
include/linux/mlx5/driver.h | 7 +-
2 files changed, 140 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
index cebb3559d2c9..918cf027bcbc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
@@ -37,10 +37,15 @@
#include <linux/bitmap.h>
#include <linux/dma-mapping.h>
#include <linux/vmalloc.h>
+#include <linux/nodemask.h>
#include <linux/mlx5/driver.h>

#include "mlx5_core.h"

+#define MLX5_FRAG_BUF_POOL_MIN_BLOCK_SHIFT MLX5_ADAPTER_PAGE_SHIFT
+#define MLX5_FRAG_BUF_POOLS_NUM \
+ (PAGE_SHIFT - MLX5_FRAG_BUF_POOL_MIN_BLOCK_SHIFT + 1)
+
struct mlx5_db_pgdir {
struct list_head list;
unsigned long *bitmap;
@@ -48,6 +53,27 @@ struct mlx5_db_pgdir {
dma_addr_t db_dma;
};

+struct mlx5_dma_pool {
+ /* Protects page_list and per-page allocation bitmaps. */
+ struct mutex lock;
+ struct list_head page_list;
+ struct mlx5_core_dev *dev;
+ int node;
+ u8 block_shift;
+};
+
+struct mlx5_dma_pool_page {
+ struct mlx5_dma_pool *pool;
+ struct list_head pool_link;
+ unsigned long *bitmap;
+ void *buf;
+ dma_addr_t dma;
+};
+
+struct mlx5_frag_buf_node_pools {
+ struct mlx5_dma_pool *pools[MLX5_FRAG_BUF_POOLS_NUM];
+};
+
/* Handling for queue buffers -- we allocate a bunch of memory and
* register it in a memory region at HCA virtual address 0.
*/
@@ -72,13 +98,121 @@ static void *mlx5_dma_zalloc_coherent_node(struct mlx5_core_dev *dev,
}

/* Implemented later in the series */
+static void mlx5_dma_pool_page_free(struct mlx5_core_dev *dev,
+ struct mlx5_dma_pool_page *page)
+{
+}
+
+static void mlx5_dma_pool_destroy(struct mlx5_dma_pool *pool)
+{
+ struct list_head *page_list = &pool->page_list;
+ struct mlx5_dma_pool_page *page, *tmp;
+
+ if (WARN(!list_empty(page_list),
+ "mlx5 dma pool destroy with non-empty pool: block_shift=%u\n",
+ pool->block_shift))
+ list_for_each_entry_safe(page, tmp, page_list, pool_link) {
+ list_del(&page->pool_link);
+ mlx5_dma_pool_page_free(pool->dev, page);
+ }
+
+ mutex_destroy(&pool->lock);
+ kfree(pool);
+}
+
+static struct mlx5_dma_pool *mlx5_dma_pool_create(struct mlx5_core_dev *dev,
+ int node, u8 block_shift)
+{
+ struct mlx5_dma_pool *pool;
+
+ if (WARN_ONCE(block_shift > PAGE_SHIFT,
+ "mlx5 dma pool invalid block_shift: %u (max %d)\n",
+ block_shift, PAGE_SHIFT))
+ return NULL;
+
+ pool = kzalloc_obj(*pool);
+ if (!pool)
+ return NULL;
+
+ INIT_LIST_HEAD(&pool->page_list);
+ mutex_init(&pool->lock);
+ pool->dev = dev;
+ pool->node = node;
+ pool->block_shift = block_shift;
+ return pool;
+}
+
+static void
+mlx5_frag_buf_node_pools_destroy(struct mlx5_frag_buf_node_pools *node_pools)
+{
+ for (int i = 0; i < MLX5_FRAG_BUF_POOLS_NUM; i++)
+ if (node_pools->pools[i])
+ mlx5_dma_pool_destroy(node_pools->pools[i]);
+ kfree(node_pools);
+}
+
+static struct mlx5_frag_buf_node_pools *
+mlx5_frag_buf_node_pools_create(struct mlx5_core_dev *dev, int node)
+{
+ struct mlx5_frag_buf_node_pools *node_pools;
+
+ node_pools = kzalloc_obj(*node_pools);
+ if (!node_pools)
+ return NULL;
+
+ for (int i = 0; i < MLX5_FRAG_BUF_POOLS_NUM; i++) {
+ u8 block_shift = MLX5_FRAG_BUF_POOL_MIN_BLOCK_SHIFT + i;
+
+ node_pools->pools[i] = mlx5_dma_pool_create(dev, node,
+ block_shift);
+ if (!node_pools->pools[i]) {
+ mlx5_frag_buf_node_pools_destroy(node_pools);
+ return NULL;
+ }
+ }
+
+ return node_pools;
+}
+
void mlx5_frag_buf_pools_cleanup(struct mlx5_core_dev *dev)
{
+ struct mlx5_priv *priv = &dev->priv;
+ int node;
+
+ for_each_node_state(node, N_POSSIBLE) {
+ struct mlx5_frag_buf_node_pools *node_pools;
+
+ node_pools = priv->frag_buf_node_pools[node];
+ if (!node_pools)
+ continue;
+ mlx5_frag_buf_node_pools_destroy(node_pools);
+ }
+
+ kfree(priv->frag_buf_node_pools);
+ priv->frag_buf_node_pools = NULL;
}

-/* Implemented later in the series */
int mlx5_frag_buf_pools_init(struct mlx5_core_dev *dev)
{
+ struct mlx5_priv *priv = &dev->priv;
+ int node;
+
+ priv->frag_buf_node_pools = kzalloc_objs(*priv->frag_buf_node_pools,
+ nr_node_ids);
+ if (!priv->frag_buf_node_pools)
+ return -ENOMEM;
+
+ for_each_node_state(node, N_POSSIBLE) {
+ struct mlx5_frag_buf_node_pools *node_pools;
+
+ node_pools = mlx5_frag_buf_node_pools_create(dev, node);
+ if (!node_pools) {
+ mlx5_frag_buf_pools_cleanup(dev);
+ return -ENOMEM;
+ }
+ priv->frag_buf_node_pools[node] = node_pools;
+ }
+
return 0;
}

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 04b96c5abb57..71f7615ab553 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -558,6 +558,7 @@ enum mlx5_func_type {
MLX5_FUNC_TYPE_NUM,
};

+struct mlx5_frag_buf_node_pools;
struct mlx5_ft_pool;
struct mlx5_priv {
/* IRQ table valid only for real pci devices PF or VF */
@@ -581,14 +582,16 @@ struct mlx5_priv {

struct mlx5_debugfs_entries dbg;

- /* start: alloc staff */
+ /* start: alloc stuff */
/* protect buffer allocation according to numa node */
struct mutex alloc_mutex;
int numa_node;

struct mutex pgdir_mutex;
struct list_head pgdir_list;
- /* end: alloc staff */
+
+ struct mlx5_frag_buf_node_pools **frag_buf_node_pools;
+ /* end: alloc stuff */

struct mlx5_adev **adev;
int adev_idx;
--
2.44.0