RDMA/mlx5: Add steering support in optional flow counters
authorAharon Landau <aharonl@nvidia.com>
Fri, 8 Oct 2021 12:24:37 +0000 (15:24 +0300)
committerJason Gunthorpe <jgg@nvidia.com>
Tue, 12 Oct 2021 15:48:06 +0000 (12:48 -0300)
Adding steering infrastructure for adding and removing optional counter.
This allows to add and remove the counters dynamically in order not to
hurt performance.

Link: https://lore.kernel.org/r/20211008122439.166063-12-markzhang@nvidia.com
Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Reviewed-by: Maor Gottlieb <maorg@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Mark Zhang <markzhang@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
drivers/infiniband/hw/mlx5/fs.c
drivers/infiniband/hw/mlx5/mlx5_ib.h
include/rdma/ib_hdrs.h

index 5fbc0a8454b9142e1d2aa8e7449f4e870ba2b378..b780185d9dc69e2ad09c65e6a75d3cae8e022615 100644 (file)
 #include <rdma/uverbs_std_types.h>
 #include <rdma/mlx5_user_ioctl_cmds.h>
 #include <rdma/mlx5_user_ioctl_verbs.h>
+#include <rdma/ib_hdrs.h>
 #include <rdma/ib_umem.h>
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/fs.h>
 #include <linux/mlx5/fs_helpers.h>
 #include <linux/mlx5/accel.h>
 #include <linux/mlx5/eswitch.h>
+#include <net/inet_ecn.h>
 #include "mlx5_ib.h"
 #include "counters.h"
 #include "devx.h"
@@ -847,6 +849,191 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
        return prio;
 }
 
+enum {
+       RDMA_RX_ECN_OPCOUNTER_PRIO,
+       RDMA_RX_CNP_OPCOUNTER_PRIO,
+};
+
+enum {
+       RDMA_TX_CNP_OPCOUNTER_PRIO,
+};
+
+static int set_vhca_port_spec(struct mlx5_ib_dev *dev, u32 port_num,
+                             struct mlx5_flow_spec *spec)
+{
+       if (!MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
+                                       ft_field_support.source_vhca_port) ||
+           !MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev,
+                                       ft_field_support.source_vhca_port))
+               return -EOPNOTSUPP;
+
+       MLX5_SET_TO_ONES(fte_match_param, &spec->match_criteria,
+                        misc_parameters.source_vhca_port);
+       MLX5_SET(fte_match_param, &spec->match_value,
+                misc_parameters.source_vhca_port, port_num);
+
+       return 0;
+}
+
+static int set_ecn_ce_spec(struct mlx5_ib_dev *dev, u32 port_num,
+                          struct mlx5_flow_spec *spec, int ipv)
+{
+       if (!MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
+                                       ft_field_support.outer_ip_version))
+               return -EOPNOTSUPP;
+
+       if (mlx5_core_mp_enabled(dev->mdev) &&
+           set_vhca_port_spec(dev, port_num, spec))
+               return -EOPNOTSUPP;
+
+       MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+                        outer_headers.ip_ecn);
+       MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_ecn,
+                INET_ECN_CE);
+       MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+                        outer_headers.ip_version);
+       MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version,
+                ipv);
+
+       spec->match_criteria_enable =
+               get_match_criteria_enable(spec->match_criteria);
+
+       return 0;
+}
+
+static int set_cnp_spec(struct mlx5_ib_dev *dev, u32 port_num,
+                       struct mlx5_flow_spec *spec)
+{
+       if (mlx5_core_mp_enabled(dev->mdev) &&
+           set_vhca_port_spec(dev, port_num, spec))
+               return -EOPNOTSUPP;
+
+       MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+                        misc_parameters.bth_opcode);
+       MLX5_SET(fte_match_param, spec->match_value, misc_parameters.bth_opcode,
+                IB_BTH_OPCODE_CNP);
+
+       spec->match_criteria_enable =
+               get_match_criteria_enable(spec->match_criteria);
+
+       return 0;
+}
+
+int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
+                        struct mlx5_ib_op_fc *opfc,
+                        enum mlx5_ib_optional_counter_type type)
+{
+       enum mlx5_flow_namespace_type fn_type;
+       int priority, i, err, spec_num;
+       struct mlx5_flow_act flow_act = {};
+       struct mlx5_flow_destination dst;
+       struct mlx5_flow_namespace *ns;
+       struct mlx5_ib_flow_prio *prio;
+       struct mlx5_flow_spec *spec;
+
+       spec = kcalloc(MAX_OPFC_RULES, sizeof(*spec), GFP_KERNEL);
+       if (!spec)
+               return -ENOMEM;
+
+       switch (type) {
+       case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS:
+               if (set_ecn_ce_spec(dev, port_num, &spec[0],
+                                   MLX5_FS_IPV4_VERSION) ||
+                   set_ecn_ce_spec(dev, port_num, &spec[1],
+                                   MLX5_FS_IPV6_VERSION)) {
+                       err = -EOPNOTSUPP;
+                       goto free;
+               }
+               spec_num = 2;
+               fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
+               priority = RDMA_RX_ECN_OPCOUNTER_PRIO;
+               break;
+
+       case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS:
+               if (!MLX5_CAP_FLOWTABLE(dev->mdev,
+                                       ft_field_support_2_nic_receive_rdma.bth_opcode) ||
+                   set_cnp_spec(dev, port_num, &spec[0])) {
+                       err = -EOPNOTSUPP;
+                       goto free;
+               }
+               spec_num = 1;
+               fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
+               priority = RDMA_RX_CNP_OPCOUNTER_PRIO;
+               break;
+
+       case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS:
+               if (!MLX5_CAP_FLOWTABLE(dev->mdev,
+                                       ft_field_support_2_nic_transmit_rdma.bth_opcode) ||
+                   set_cnp_spec(dev, port_num, &spec[0])) {
+                       err = -EOPNOTSUPP;
+                       goto free;
+               }
+               spec_num = 1;
+               fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
+               priority = RDMA_TX_CNP_OPCOUNTER_PRIO;
+               break;
+
+       default:
+               err = -EOPNOTSUPP;
+               goto free;
+       }
+
+       ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
+       if (!ns) {
+               err = -EOPNOTSUPP;
+               goto free;
+       }
+
+       prio = &dev->flow_db->opfcs[type];
+       if (!prio->flow_table) {
+               prio = _get_prio(ns, prio, priority,
+                                dev->num_ports * MAX_OPFC_RULES, 1, 0);
+               if (IS_ERR(prio)) {
+                       err = PTR_ERR(prio);
+                       goto free;
+               }
+       }
+
+       dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+       dst.counter_id = mlx5_fc_id(opfc->fc);
+
+       flow_act.action =
+               MLX5_FLOW_CONTEXT_ACTION_COUNT | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+
+       for (i = 0; i < spec_num; i++) {
+               opfc->rule[i] = mlx5_add_flow_rules(prio->flow_table, &spec[i],
+                                                   &flow_act, &dst, 1);
+               if (IS_ERR(opfc->rule[i])) {
+                       err = PTR_ERR(opfc->rule[i]);
+                       goto del_rules;
+               }
+       }
+       prio->refcount += spec_num;
+       kfree(spec);
+
+       return 0;
+
+del_rules:
+       for (i -= 1; i >= 0; i--)
+               mlx5_del_flow_rules(opfc->rule[i]);
+       put_flow_table(dev, prio, false);
+free:
+       kfree(spec);
+       return err;
+}
+
+void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev,
+                            struct mlx5_ib_op_fc *opfc,
+                            enum mlx5_ib_optional_counter_type type)
+{
+       int i;
+
+       for (i = 0; i < MAX_OPFC_RULES && opfc->rule[i]; i++) {
+               mlx5_del_flow_rules(opfc->rule[i]);
+               put_flow_table(dev, &dev->flow_db->opfcs[type], true);
+       }
+}
+
 static void set_underlay_qp(struct mlx5_ib_dev *dev,
                            struct mlx5_flow_spec *spec,
                            u32 underlay_qpn)
index 8215d7ab579d8133aaae5b047c71b8a3c7bec239..d81ff5078e5ebfd17c99a65ae2098e8e46951631 100644 (file)
@@ -263,6 +263,14 @@ struct mlx5_ib_pp {
        struct mlx5_core_dev *mdev;
 };
 
+enum mlx5_ib_optional_counter_type {
+       MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS,
+       MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS,
+       MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS,
+
+       MLX5_IB_OPCOUNTER_MAX,
+};
+
 struct mlx5_ib_flow_db {
        struct mlx5_ib_flow_prio        prios[MLX5_IB_NUM_FLOW_FT];
        struct mlx5_ib_flow_prio        egress_prios[MLX5_IB_NUM_FLOW_FT];
@@ -271,6 +279,7 @@ struct mlx5_ib_flow_db {
        struct mlx5_ib_flow_prio        fdb;
        struct mlx5_ib_flow_prio        rdma_rx[MLX5_IB_NUM_FLOW_FT];
        struct mlx5_ib_flow_prio        rdma_tx[MLX5_IB_NUM_FLOW_FT];
+       struct mlx5_ib_flow_prio        opfcs[MLX5_IB_OPCOUNTER_MAX];
        struct mlx5_flow_table          *lag_demux_ft;
        /* Protect flow steering bypass flow tables
         * when add/del flow rules.
@@ -797,6 +806,13 @@ struct mlx5_ib_resources {
        struct mlx5_ib_port_resources ports[2];
 };
 
+#define MAX_OPFC_RULES 2
+
+struct mlx5_ib_op_fc {
+       struct mlx5_fc *fc;
+       struct mlx5_flow_handle *rule[MAX_OPFC_RULES];
+};
+
 struct mlx5_ib_counters {
        struct rdma_stat_desc *descs;
        size_t *offsets;
@@ -807,6 +823,14 @@ struct mlx5_ib_counters {
        u16 set_id;
 };
 
+int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
+                        struct mlx5_ib_op_fc *opfc,
+                        enum mlx5_ib_optional_counter_type type);
+
+void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev,
+                            struct mlx5_ib_op_fc *opfc,
+                            enum mlx5_ib_optional_counter_type type);
+
 struct mlx5_ib_multiport_info;
 
 struct mlx5_ib_multiport {
index 7e542205861c4549eda7566dbd4b2928685eed2e..8ae07c0ecdf7ef1711cfd91299ac5c1001346cdf 100644 (file)
@@ -232,6 +232,7 @@ static inline u32 ib_get_sqpn(struct ib_other_headers *ohdr)
 #define IB_BTH_SE_SHIFT        23
 #define IB_BTH_TVER_MASK       0xf
 #define IB_BTH_TVER_SHIFT      16
+#define IB_BTH_OPCODE_CNP      0x81
 
 static inline u8 ib_bth_get_pad(struct ib_other_headers *ohdr)
 {