[PATCH rdma-next v2 10/13] RDMA/mlx5: Add steering support in optional flow counters

From: Leon Romanovsky
Date: Thu Sep 30 2021 - 04:05:38 EST


From: Aharon Landau <aharonl@xxxxxxxxxx>

Adding steering infrastructure for adding and removing optional counter.
This allows to add and remove the counters dynamically in order not to
hurt performance.

Signed-off-by: Aharon Landau <aharonl@xxxxxxxxxx>
Reviewed-by: Maor Gottlieb <maorg@xxxxxxxxxx>
Signed-off-by: Leon Romanovsky <leonro@xxxxxxxxxx>
---
drivers/infiniband/hw/mlx5/fs.c | 187 +++++++++++++++++++++++++++
drivers/infiniband/hw/mlx5/mlx5_ib.h | 24 ++++
include/rdma/ib_hdrs.h | 1 +
3 files changed, 212 insertions(+)

diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c
index 5fbc0a8454b9..b780185d9dc6 100644
--- a/drivers/infiniband/hw/mlx5/fs.c
+++ b/drivers/infiniband/hw/mlx5/fs.c
@@ -10,12 +10,14 @@
#include <rdma/uverbs_std_types.h>
#include <rdma/mlx5_user_ioctl_cmds.h>
#include <rdma/mlx5_user_ioctl_verbs.h>
+#include <rdma/ib_hdrs.h>
#include <rdma/ib_umem.h>
#include <linux/mlx5/driver.h>
#include <linux/mlx5/fs.h>
#include <linux/mlx5/fs_helpers.h>
#include <linux/mlx5/accel.h>
#include <linux/mlx5/eswitch.h>
+#include <net/inet_ecn.h>
#include "mlx5_ib.h"
#include "counters.h"
#include "devx.h"
@@ -847,6 +849,191 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
return prio;
}

+enum {
+ RDMA_RX_ECN_OPCOUNTER_PRIO,
+ RDMA_RX_CNP_OPCOUNTER_PRIO,
+};
+
+enum {
+ RDMA_TX_CNP_OPCOUNTER_PRIO,
+};
+
+static int set_vhca_port_spec(struct mlx5_ib_dev *dev, u32 port_num,
+ struct mlx5_flow_spec *spec)
+{
+ if (!MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
+ ft_field_support.source_vhca_port) ||
+ !MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev,
+ ft_field_support.source_vhca_port))
+ return -EOPNOTSUPP;
+
+ MLX5_SET_TO_ONES(fte_match_param, &spec->match_criteria,
+ misc_parameters.source_vhca_port);
+ MLX5_SET(fte_match_param, &spec->match_value,
+ misc_parameters.source_vhca_port, port_num);
+
+ return 0;
+}
+
+static int set_ecn_ce_spec(struct mlx5_ib_dev *dev, u32 port_num,
+ struct mlx5_flow_spec *spec, int ipv)
+{
+ if (!MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
+ ft_field_support.outer_ip_version))
+ return -EOPNOTSUPP;
+
+ if (mlx5_core_mp_enabled(dev->mdev) &&
+ set_vhca_port_spec(dev, port_num, spec))
+ return -EOPNOTSUPP;
+
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.ip_ecn);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_ecn,
+ INET_ECN_CE);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.ip_version);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version,
+ ipv);
+
+ spec->match_criteria_enable =
+ get_match_criteria_enable(spec->match_criteria);
+
+ return 0;
+}
+
+static int set_cnp_spec(struct mlx5_ib_dev *dev, u32 port_num,
+ struct mlx5_flow_spec *spec)
+{
+ if (mlx5_core_mp_enabled(dev->mdev) &&
+ set_vhca_port_spec(dev, port_num, spec))
+ return -EOPNOTSUPP;
+
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ misc_parameters.bth_opcode);
+ MLX5_SET(fte_match_param, spec->match_value, misc_parameters.bth_opcode,
+ IB_BTH_OPCODE_CNP);
+
+ spec->match_criteria_enable =
+ get_match_criteria_enable(spec->match_criteria);
+
+ return 0;
+}
+
+int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
+ struct mlx5_ib_op_fc *opfc,
+ enum mlx5_ib_optional_counter_type type)
+{
+ enum mlx5_flow_namespace_type fn_type;
+ int priority, i, err, spec_num;
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_destination dst;
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_ib_flow_prio *prio;
+ struct mlx5_flow_spec *spec;
+
+ spec = kcalloc(MAX_OPFC_RULES, sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ switch (type) {
+ case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS:
+ if (set_ecn_ce_spec(dev, port_num, &spec[0],
+ MLX5_FS_IPV4_VERSION) ||
+ set_ecn_ce_spec(dev, port_num, &spec[1],
+ MLX5_FS_IPV6_VERSION)) {
+ err = -EOPNOTSUPP;
+ goto free;
+ }
+ spec_num = 2;
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
+ priority = RDMA_RX_ECN_OPCOUNTER_PRIO;
+ break;
+
+ case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS:
+ if (!MLX5_CAP_FLOWTABLE(dev->mdev,
+ ft_field_support_2_nic_receive_rdma.bth_opcode) ||
+ set_cnp_spec(dev, port_num, &spec[0])) {
+ err = -EOPNOTSUPP;
+ goto free;
+ }
+ spec_num = 1;
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
+ priority = RDMA_RX_CNP_OPCOUNTER_PRIO;
+ break;
+
+ case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS:
+ if (!MLX5_CAP_FLOWTABLE(dev->mdev,
+ ft_field_support_2_nic_transmit_rdma.bth_opcode) ||
+ set_cnp_spec(dev, port_num, &spec[0])) {
+ err = -EOPNOTSUPP;
+ goto free;
+ }
+ spec_num = 1;
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
+ priority = RDMA_TX_CNP_OPCOUNTER_PRIO;
+ break;
+
+ default:
+ err = -EOPNOTSUPP;
+ goto free;
+ }
+
+ ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
+ if (!ns) {
+ err = -EOPNOTSUPP;
+ goto free;
+ }
+
+ prio = &dev->flow_db->opfcs[type];
+ if (!prio->flow_table) {
+ prio = _get_prio(ns, prio, priority,
+ dev->num_ports * MAX_OPFC_RULES, 1, 0);
+ if (IS_ERR(prio)) {
+ err = PTR_ERR(prio);
+ goto free;
+ }
+ }
+
+ dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dst.counter_id = mlx5_fc_id(opfc->fc);
+
+ flow_act.action =
+ MLX5_FLOW_CONTEXT_ACTION_COUNT | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+
+ for (i = 0; i < spec_num; i++) {
+ opfc->rule[i] = mlx5_add_flow_rules(prio->flow_table, &spec[i],
+ &flow_act, &dst, 1);
+ if (IS_ERR(opfc->rule[i])) {
+ err = PTR_ERR(opfc->rule[i]);
+ goto del_rules;
+ }
+ }
+ prio->refcount += spec_num;
+ kfree(spec);
+
+ return 0;
+
+del_rules:
+ for (i -= 1; i >= 0; i--)
+ mlx5_del_flow_rules(opfc->rule[i]);
+ put_flow_table(dev, prio, false);
+free:
+ kfree(spec);
+ return err;
+}
+
+void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_op_fc *opfc,
+ enum mlx5_ib_optional_counter_type type)
+{
+ int i;
+
+ for (i = 0; i < MAX_OPFC_RULES && opfc->rule[i]; i++) {
+ mlx5_del_flow_rules(opfc->rule[i]);
+ put_flow_table(dev, &dev->flow_db->opfcs[type], true);
+ }
+}
+
static void set_underlay_qp(struct mlx5_ib_dev *dev,
struct mlx5_flow_spec *spec,
u32 underlay_qpn)
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 8215d7ab579d..d81ff5078e5e 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -263,6 +263,14 @@ struct mlx5_ib_pp {
struct mlx5_core_dev *mdev;
};

+enum mlx5_ib_optional_counter_type {
+ MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS,
+ MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS,
+ MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS,
+
+ MLX5_IB_OPCOUNTER_MAX,
+};
+
struct mlx5_ib_flow_db {
struct mlx5_ib_flow_prio prios[MLX5_IB_NUM_FLOW_FT];
struct mlx5_ib_flow_prio egress_prios[MLX5_IB_NUM_FLOW_FT];
@@ -271,6 +279,7 @@ struct mlx5_ib_flow_db {
struct mlx5_ib_flow_prio fdb;
struct mlx5_ib_flow_prio rdma_rx[MLX5_IB_NUM_FLOW_FT];
struct mlx5_ib_flow_prio rdma_tx[MLX5_IB_NUM_FLOW_FT];
+ struct mlx5_ib_flow_prio opfcs[MLX5_IB_OPCOUNTER_MAX];
struct mlx5_flow_table *lag_demux_ft;
/* Protect flow steering bypass flow tables
* when add/del flow rules.
@@ -797,6 +806,13 @@ struct mlx5_ib_resources {
struct mlx5_ib_port_resources ports[2];
};

+#define MAX_OPFC_RULES 2
+
+struct mlx5_ib_op_fc {
+ struct mlx5_fc *fc;
+ struct mlx5_flow_handle *rule[MAX_OPFC_RULES];
+};
+
struct mlx5_ib_counters {
struct rdma_stat_desc *descs;
size_t *offsets;
@@ -807,6 +823,14 @@ struct mlx5_ib_counters {
u16 set_id;
};

+int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
+ struct mlx5_ib_op_fc *opfc,
+ enum mlx5_ib_optional_counter_type type);
+
+void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_op_fc *opfc,
+ enum mlx5_ib_optional_counter_type type);
+
struct mlx5_ib_multiport_info;

struct mlx5_ib_multiport {
diff --git a/include/rdma/ib_hdrs.h b/include/rdma/ib_hdrs.h
index 7e542205861c..8ae07c0ecdf7 100644
--- a/include/rdma/ib_hdrs.h
+++ b/include/rdma/ib_hdrs.h
@@ -232,6 +232,7 @@ static inline u32 ib_get_sqpn(struct ib_other_headers *ohdr)
#define IB_BTH_SE_SHIFT 23
#define IB_BTH_TVER_MASK 0xf
#define IB_BTH_TVER_SHIFT 16
+#define IB_BTH_OPCODE_CNP 0x81

static inline u8 ib_bth_get_pad(struct ib_other_headers *ohdr)
{
--
2.31.1