[PATCH net-next 2/2] net/mlx5: implement max_sfs parameter

From: Tariq Toukan

Date: Sun May 17 2026 - 07:28:00 EST


From: Nikolay Aleksandrov <nikolay@xxxxxxxxxx>

Implement max_sfs generic parameter to allow users to control the total
light-weight NIC subfunctions that can be created using devlink instead
of external vendor tools. A value of 0 will effectively disable creation
of new subfunction devices. A warning is sent to user-space via extack
(returning extack without error code is interpreted as a warning by
user-space tools).

Signed-off-by: Nikolay Aleksandrov <nikolay@xxxxxxxxxx>
Reviewed-by: David Ahern <dsahern@xxxxxxxxxx>
Signed-off-by: Tariq Toukan <tariqt@xxxxxxxxxx>
---
Documentation/networking/devlink/mlx5.rst | 7 +-
.../mellanox/mlx5/core/lib/nv_param.c | 83 ++++++++++++++++++-
2 files changed, 86 insertions(+), 4 deletions(-)

diff --git a/Documentation/networking/devlink/mlx5.rst b/Documentation/networking/devlink/mlx5.rst
index 4bba4d780a4a..283b93d16861 100644
--- a/Documentation/networking/devlink/mlx5.rst
+++ b/Documentation/networking/devlink/mlx5.rst
@@ -45,8 +45,13 @@ Parameters
- The range is between 1 and a device-specific max.
- Applies to each physical function (PF) independently, if the device
supports it. Otherwise, it applies symmetrically to all PFs.
+ * - ``max_sfs``
+ - permanent
+ - The range is between 0 and a device-specific max.
+ - Applies to each physical function (PF) independently.

-Note: permanent parameters such as ``enable_sriov`` and ``total_vfs`` require FW reset to take effect
+Note: permanent parameters such as ``enable_sriov``, ``total_vfs` and ``max_sfs``
+ require FW reset to take effect

.. code-block:: bash

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c
index 19bb620b7436..eff3a67e4ca0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c
@@ -68,7 +68,9 @@ struct mlx5_ifc_mnvda_reg_bits {

struct mlx5_ifc_nv_global_pci_conf_bits {
u8 sriov_valid[0x1];
- u8 reserved_at_1[0x10];
+ u8 reserved_at_1[0xa];
+ u8 per_pf_num_sf[0x1];
+ u8 reserved_at_c[0x5];
u8 per_pf_total_vf[0x1];
u8 reserved_at_12[0xe];

@@ -93,9 +95,11 @@ struct mlx5_ifc_nv_global_pci_cap_bits {
};

struct mlx5_ifc_nv_pf_pci_conf_bits {
- u8 reserved_at_0[0x9];
+ u8 log_sf_bar_size[0x8];
+ u8 pf_total_sf_en[0x1];
u8 pf_total_vf_en[0x1];
- u8 reserved_at_a[0x16];
+ u8 reserved_at_a[0x6];
+ u8 total_sf[0x10];

u8 reserved_at_20[0x20];

@@ -755,6 +759,76 @@ static int mlx5_devlink_total_vfs_validate(struct devlink *devlink, u32 id,
return 0;
}

+static int mlx5_devlink_max_sfs_get(struct devlink *devlink, u32 id,
+ struct devlink_param_gset_ctx *ctx,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ u32 mnvda[MLX5_ST_SZ_DW(mnvda_reg)] = {};
+ void *data;
+ int err;
+
+ err = mlx5_nv_param_read_per_host_pf_conf(dev, mnvda, sizeof(mnvda));
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to read PF configuration");
+ return err;
+ }
+
+ data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data);
+ ctx->val.vu32 = MLX5_GET(nv_pf_pci_conf, data, total_sf);
+
+ return 0;
+}
+
+static int mlx5_devlink_max_sfs_set(struct devlink *devlink, u32 id,
+ struct devlink_param_gset_ctx *ctx,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ u32 mnvda[MLX5_ST_SZ_DW(mnvda_reg)] = {};
+ void *data;
+ int err;
+
+ err = mlx5_nv_param_read_global_pci_conf(dev, mnvda, sizeof(mnvda));
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Failed to read global PCI configuration");
+ return err;
+ }
+
+ data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data);
+ MLX5_SET(nv_global_pci_conf, data, per_pf_num_sf, !!ctx->val.vu32);
+
+ err = mlx5_nv_param_write(dev, mnvda, sizeof(mnvda));
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Failed to change per_pf_num_sf global PCI configuration");
+ return err;
+ }
+
+ memset(mnvda, 0, sizeof(mnvda));
+ err = mlx5_nv_param_read_per_host_pf_conf(dev, mnvda, sizeof(mnvda));
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to read PF configuration");
+ return err;
+ }
+
+ data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data);
+ MLX5_SET(nv_pf_pci_conf, data, log_sf_bar_size, ctx->val.vu32 ? 12 : 0);
+ MLX5_SET(nv_pf_pci_conf, data, pf_total_sf_en, !!ctx->val.vu32);
+ MLX5_SET(nv_pf_pci_conf, data, total_sf, ctx->val.vu32);
+
+ err = mlx5_nv_param_write(dev, mnvda, sizeof(mnvda));
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Failed to change PF PCI configuration");
+ return err;
+ }
+ NL_SET_ERR_MSG(extack, "Modifying max_sfs requires a reboot");
+
+ return 0;
+}
+
static const struct devlink_param mlx5_nv_param_devlink_params[] = {
DEVLINK_PARAM_GENERIC(ENABLE_SRIOV, BIT(DEVLINK_PARAM_CMODE_PERMANENT),
mlx5_devlink_enable_sriov_get,
@@ -763,6 +837,9 @@ static const struct devlink_param mlx5_nv_param_devlink_params[] = {
mlx5_devlink_total_vfs_get,
mlx5_devlink_total_vfs_set,
mlx5_devlink_total_vfs_validate),
+ DEVLINK_PARAM_GENERIC(MAX_SFS, BIT(DEVLINK_PARAM_CMODE_PERMANENT),
+ mlx5_devlink_max_sfs_get,
+ mlx5_devlink_max_sfs_set, NULL),
DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_CQE_COMPRESSION_TYPE,
"cqe_compress_type", DEVLINK_PARAM_TYPE_STRING,
BIT(DEVLINK_PARAM_CMODE_PERMANENT),
--
2.44.0