[PATCH net-next V2 2/3] net/mlx5: add frag buf pools create/destroy paths
From: Tariq Toukan
Date: Wed Apr 29 2026 - 16:16:39 EST
From: Nimrod Oren <noren@xxxxxxxxxx>
Introduce mlx5 DMA pool and pool-page data structures, and add the
creation and teardown paths.
Each NUMA node owns a set of mlx5_dma_pool instances, each one with a
different block size. The sizes are defined as all powers of two
starting from MLX5_ADAPTER_PAGE_SHIFT and up to PAGE_SHIFT. Since
mlx5_frag_bufs are used to back objects whose sizes are encoded relative
to MLX5_ADAPTER_PAGE_SHIFT, a smaller block_shift value cannot be used.
Requests larger than PAGE_SIZE continue to be handled as page-sized
fragments, as in the existing frag-buf allocation model.
Signed-off-by: Nimrod Oren <noren@xxxxxxxxxx>
Signed-off-by: Tariq Toukan <tariqt@xxxxxxxxxx>
---
.../net/ethernet/mellanox/mlx5/core/alloc.c | 116 +++++++++++++++++-
include/linux/mlx5/driver.h | 7 +-
2 files changed, 119 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
index cebb3559d2c9..fcc859c5f810 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
@@ -37,10 +37,15 @@
#include <linux/bitmap.h>
#include <linux/dma-mapping.h>
#include <linux/vmalloc.h>
+#include <linux/nodemask.h>
#include <linux/mlx5/driver.h>
#include "mlx5_core.h"
+#define MLX5_FRAG_BUF_POOL_MIN_BLOCK_SHIFT MLX5_ADAPTER_PAGE_SHIFT
+#define MLX5_FRAG_BUF_POOLS_NUM \
+ (PAGE_SHIFT - MLX5_FRAG_BUF_POOL_MIN_BLOCK_SHIFT + 1)
+
struct mlx5_db_pgdir {
struct list_head list;
unsigned long *bitmap;
@@ -48,6 +53,27 @@ struct mlx5_db_pgdir {
dma_addr_t db_dma;
};
+struct mlx5_dma_pool {
+ /* Protects page_list and per-page allocation bitmaps. */
+ struct mutex lock;
+ struct list_head page_list;
+ struct mlx5_core_dev *dev;
+ int node;
+ u8 block_shift;
+};
+
+struct mlx5_dma_pool_page {
+ struct mlx5_dma_pool *pool;
+ struct list_head pool_link;
+ unsigned long *bitmap;
+ void *buf;
+ dma_addr_t dma;
+};
+
+struct mlx5_frag_buf_node_pools {
+ struct mlx5_dma_pool *pools[MLX5_FRAG_BUF_POOLS_NUM];
+};
+
/* Handling for queue buffers -- we allocate a bunch of memory and
* register it in a memory region at HCA virtual address 0.
*/
@@ -71,14 +97,100 @@ static void *mlx5_dma_zalloc_coherent_node(struct mlx5_core_dev *dev,
return cpu_handle;
}
-/* Implemented later in the series */
+static void mlx5_dma_pool_destroy(struct mlx5_dma_pool *pool)
+{
+ mutex_destroy(&pool->lock);
+ kfree(pool);
+}
+
+static struct mlx5_dma_pool *mlx5_dma_pool_create(struct mlx5_core_dev *dev,
+ int node, u8 block_shift)
+{
+ struct mlx5_dma_pool *pool;
+
+ pool = kzalloc_obj(*pool);
+ if (!pool)
+ return NULL;
+
+ INIT_LIST_HEAD(&pool->page_list);
+ mutex_init(&pool->lock);
+ pool->dev = dev;
+ pool->node = node;
+ pool->block_shift = block_shift;
+ return pool;
+}
+
+static void
+mlx5_frag_buf_node_pools_destroy(struct mlx5_frag_buf_node_pools *node_pools)
+{
+ for (int i = 0; i < MLX5_FRAG_BUF_POOLS_NUM; i++)
+ if (node_pools->pools[i])
+ mlx5_dma_pool_destroy(node_pools->pools[i]);
+ kfree(node_pools);
+}
+
+static struct mlx5_frag_buf_node_pools *
+mlx5_frag_buf_node_pools_create(struct mlx5_core_dev *dev, int node)
+{
+ struct mlx5_frag_buf_node_pools *node_pools;
+
+ node_pools = kzalloc_obj(*node_pools);
+ if (!node_pools)
+ return NULL;
+
+ for (int i = 0; i < MLX5_FRAG_BUF_POOLS_NUM; i++) {
+ u8 block_shift = MLX5_FRAG_BUF_POOL_MIN_BLOCK_SHIFT + i;
+
+ node_pools->pools[i] = mlx5_dma_pool_create(dev, node,
+ block_shift);
+ if (!node_pools->pools[i]) {
+ mlx5_frag_buf_node_pools_destroy(node_pools);
+ return NULL;
+ }
+ }
+
+ return node_pools;
+}
+
void mlx5_frag_buf_pools_cleanup(struct mlx5_core_dev *dev)
{
+ struct mlx5_priv *priv = &dev->priv;
+ int node;
+
+ for_each_node_state(node, N_POSSIBLE) {
+ struct mlx5_frag_buf_node_pools *node_pools;
+
+ node_pools = priv->frag_buf_node_pools[node];
+ if (!node_pools)
+ continue;
+ mlx5_frag_buf_node_pools_destroy(node_pools);
+ }
+
+ kfree(priv->frag_buf_node_pools);
+ priv->frag_buf_node_pools = NULL;
}
-/* Implemented later in the series */
int mlx5_frag_buf_pools_init(struct mlx5_core_dev *dev)
{
+ struct mlx5_priv *priv = &dev->priv;
+ int node;
+
+ priv->frag_buf_node_pools = kzalloc_objs(*priv->frag_buf_node_pools,
+ nr_node_ids);
+ if (!priv->frag_buf_node_pools)
+ return -ENOMEM;
+
+ for_each_node_state(node, N_POSSIBLE) {
+ struct mlx5_frag_buf_node_pools *node_pools;
+
+ node_pools = mlx5_frag_buf_node_pools_create(dev, node);
+ if (!node_pools) {
+ mlx5_frag_buf_pools_cleanup(dev);
+ return -ENOMEM;
+ }
+ priv->frag_buf_node_pools[node] = node_pools;
+ }
+
return 0;
}
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 04b96c5abb57..71f7615ab553 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -558,6 +558,7 @@ enum mlx5_func_type {
MLX5_FUNC_TYPE_NUM,
};
+struct mlx5_frag_buf_node_pools;
struct mlx5_ft_pool;
struct mlx5_priv {
/* IRQ table valid only for real pci devices PF or VF */
@@ -581,14 +582,16 @@ struct mlx5_priv {
struct mlx5_debugfs_entries dbg;
- /* start: alloc staff */
+ /* start: alloc stuff */
/* protect buffer allocation according to numa node */
struct mutex alloc_mutex;
int numa_node;
struct mutex pgdir_mutex;
struct list_head pgdir_list;
- /* end: alloc staff */
+
+ struct mlx5_frag_buf_node_pools **frag_buf_node_pools;
+ /* end: alloc stuff */
struct mlx5_adev **adev;
int adev_idx;
--
2.44.0