netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Mark Zhang <markzhang@nvidia.com>
To: <jgg@nvidia.com>, <dledford@redhat.com>, <saeedm@nvidia.com>
Cc: <linux-rdma@vger.kernel.org>, <netdev@vger.kernel.org>,
	<aharonl@nvidia.com>, <netao@nvidia.com>, <leonro@nvidia.com>,
	Mark Zhang <markzhang@nvidia.com>
Subject: [PATCH rdma-next 05/10] RDMA/mlx5: Add steering support in optional flow counters
Date: Wed, 18 Aug 2021 14:24:23 +0300	[thread overview]
Message-ID: <20210818112428.209111-6-markzhang@nvidia.com> (raw)
In-Reply-To: <20210818112428.209111-1-markzhang@nvidia.com>

From: Aharon Landau <aharonl@nvidia.com>

Adding steering infrastructure for adding and removing optional counter.
This allows to add and remove the counters dynamically in order not to
hurt performance.

Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Reviewed-by: Maor Gottlieb <maorg@nvidia.com>
Signed-off-by: Mark Zhang <markzhang@nvidia.com>
---
 drivers/infiniband/hw/mlx5/fs.c      | 111 +++++++++++++++++++++++++++
 drivers/infiniband/hw/mlx5/mlx5_ib.h |  12 +++
 include/rdma/ib_hdrs.h               |   1 +
 3 files changed, 124 insertions(+)

diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c
index 5fbc0a8454b9..be6a00969ddb 100644
--- a/drivers/infiniband/hw/mlx5/fs.c
+++ b/drivers/infiniband/hw/mlx5/fs.c
@@ -10,12 +10,14 @@
 #include <rdma/uverbs_std_types.h>
 #include <rdma/mlx5_user_ioctl_cmds.h>
 #include <rdma/mlx5_user_ioctl_verbs.h>
+#include <rdma/ib_hdrs.h>
 #include <rdma/ib_umem.h>
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/fs.h>
 #include <linux/mlx5/fs_helpers.h>
 #include <linux/mlx5/accel.h>
 #include <linux/mlx5/eswitch.h>
+#include <net/inet_ecn.h>
 #include "mlx5_ib.h"
 #include "counters.h"
 #include "devx.h"
@@ -847,6 +849,115 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
 	return prio;
 }
 
+enum {
+	RDMA_RX_ECN_OPCOUNTER_PRIO,
+	RDMA_RX_CNP_OPCOUNTER_PRIO,
+};
+
+enum {
+	RDMA_TX_CNP_OPCOUNTER_PRIO,
+};
+
+int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, struct mlx5_ib_op_fc *opfc,
+			 enum mlx5_ib_optional_counter_type type)
+{
+	enum mlx5_flow_namespace_type fn_type;
+	struct mlx5_flow_act flow_act = {};
+	struct mlx5_flow_destination dst;
+	struct mlx5_flow_namespace *ns;
+	struct mlx5_ib_flow_prio *prio;
+	struct mlx5_flow_spec *spec;
+	int priority, err = 0;
+
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+	if (!spec)
+		return -ENOMEM;
+
+	switch (type) {
+	case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS:
+		fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
+		priority = RDMA_RX_ECN_OPCOUNTER_PRIO;
+		spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+				 outer_headers.ip_ecn);
+		MLX5_SET(fte_match_param, spec->match_value,
+			 outer_headers.ip_ecn, INET_ECN_CE);
+		break;
+
+	case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS:
+		if (!MLX5_CAP_FLOWTABLE(dev->mdev,
+					ft_field_support_2_nic_receive_rdma.bth_opcode)) {
+			err = -EOPNOTSUPP;
+			goto free;
+		}
+		fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
+		priority = RDMA_RX_CNP_OPCOUNTER_PRIO;
+		spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+				 misc_parameters.bth_opcode);
+		MLX5_SET(fte_match_param, spec->match_value,
+			 misc_parameters.bth_opcode, IB_BTH_OPCODE_CNP);
+		break;
+
+	case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS:
+		if (!MLX5_CAP_FLOWTABLE(dev->mdev,
+					ft_field_support_2_nic_transmit_rdma.bth_opcode)) {
+			err = -EOPNOTSUPP;
+			goto free;
+		}
+		fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
+		priority = RDMA_TX_CNP_OPCOUNTER_PRIO;
+		spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+				 misc_parameters.bth_opcode);
+		MLX5_SET(fte_match_param, spec->match_value,
+			 misc_parameters.bth_opcode, IB_BTH_OPCODE_CNP);
+		break;
+
+	default:
+		err = -EOPNOTSUPP;
+		goto free;
+	}
+
+	ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
+	if (!ns) {
+		err = -EOPNOTSUPP;
+		goto free;
+	}
+	prio = _get_prio(ns, &opfc->prio, priority, 1, 1, 0);
+	if (IS_ERR(prio)) {
+		err = PTR_ERR(prio);
+		goto free;
+	}
+
+	dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+	dst.counter_id = mlx5_fc_id(opfc->fc);
+
+	flow_act.action =
+		MLX5_FLOW_CONTEXT_ACTION_COUNT | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+
+	opfc->rule =
+		mlx5_add_flow_rules(prio->flow_table, spec, &flow_act, &dst, 1);
+	if (IS_ERR(opfc->rule)) {
+		put_flow_table(dev, prio, false);
+		err = PTR_ERR(opfc->rule);
+		goto free;
+	}
+	prio->refcount++;
+
+free:
+	kfree(spec);
+	return err;
+}
+
+void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev,
+			     struct mlx5_ib_op_fc *opfc)
+{
+	mlx5_del_flow_rules(opfc->rule);
+	put_flow_table(dev, &opfc->prio, true);
+	WARN_ON(opfc->prio.flow_table);
+}
+
 static void set_underlay_qp(struct mlx5_ib_dev *dev,
 			    struct mlx5_flow_spec *spec,
 			    u32 underlay_qpn)
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 2ba352702294..130b2ed79ba2 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -805,6 +805,12 @@ enum mlx5_ib_optional_counter_type {
 	MLX5_IB_OPCOUNTER_MAX,
 };
 
+struct mlx5_ib_op_fc {
+	struct mlx5_fc *fc;
+	struct mlx5_ib_flow_prio prio;
+	struct mlx5_flow_handle *rule;
+};
+
 struct mlx5_ib_counters {
 	const char **names;
 	size_t *offsets;
@@ -814,6 +820,12 @@ struct mlx5_ib_counters {
 	u16 set_id;
 };
 
+int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, struct mlx5_ib_op_fc *opfc,
+			 enum mlx5_ib_optional_counter_type type);
+
+void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev,
+			     struct mlx5_ib_op_fc *opfc);
+
 struct mlx5_ib_multiport_info;
 
 struct mlx5_ib_multiport {
diff --git a/include/rdma/ib_hdrs.h b/include/rdma/ib_hdrs.h
index 7e542205861c..8ae07c0ecdf7 100644
--- a/include/rdma/ib_hdrs.h
+++ b/include/rdma/ib_hdrs.h
@@ -232,6 +232,7 @@ static inline u32 ib_get_sqpn(struct ib_other_headers *ohdr)
 #define IB_BTH_SE_SHIFT	23
 #define IB_BTH_TVER_MASK	0xf
 #define IB_BTH_TVER_SHIFT	16
+#define IB_BTH_OPCODE_CNP	0x81
 
 static inline u8 ib_bth_get_pad(struct ib_other_headers *ohdr)
 {
-- 
2.26.2


  parent reply	other threads:[~2021-08-18 11:25 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-18 11:24 [PATCH rdma-next 00/10] Optional counter statistics support Mark Zhang
2021-08-18 11:24 ` [PATCH rdma-next 01/10] net/mlx5: Add support in bth_opcode as a match criteria Mark Zhang
2021-08-18 11:24 ` [PATCH rdma-next 02/10] net/mlx5: Add priorities for counters in RDMA namespaces Mark Zhang
2021-08-18 11:24 ` [PATCH rdma-next 03/10] RDMA/counters: Support to allocate per-port optional counter statistics Mark Zhang
2021-08-23 19:30   ` Jason Gunthorpe
2021-08-24  6:22     ` Mark Zhang
2021-08-24 13:14       ` Jason Gunthorpe
2021-08-18 11:24 ` [PATCH rdma-next 04/10] RDMA/mlx5: Add alloc_op_port_stats() support Mark Zhang
2021-08-23 19:19   ` Jason Gunthorpe
2021-08-18 11:24 ` Mark Zhang [this message]
2021-08-18 11:24 ` [PATCH rdma-next 06/10] RDMA/nldev: Add support to add and remove optional counters Mark Zhang
2021-08-23 19:42   ` Jason Gunthorpe
2021-08-24  2:09     ` Mark Zhang
2021-08-18 11:24 ` [PATCH rdma-next 07/10] RDMA/mlx5: Add add_op_stat() and remove_op_stat() support Mark Zhang
2021-08-18 11:24 ` [PATCH rdma-next 08/10] RDMA/nldev: Add support to get optional counters statistics Mark Zhang
2021-08-18 11:24 ` [PATCH rdma-next 09/10] RDMA/mlx5: Add get_op_stats() support Mark Zhang
2021-08-18 11:24 ` [PATCH rdma-next 10/10] RDMA/nldev: Add support to get current enabled optional counters Mark Zhang
2021-08-23 19:44   ` Jason Gunthorpe
2021-08-24  2:13     ` Mark Zhang
2021-08-24 13:13       ` Jason Gunthorpe
2021-08-23 19:33 ` [PATCH rdma-next 00/10] Optional counter statistics support Jason Gunthorpe
2021-08-24  1:44   ` Mark Zhang
2021-08-24 13:11     ` Jason Gunthorpe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210818112428.209111-6-markzhang@nvidia.com \
    --to=markzhang@nvidia.com \
    --cc=aharonl@nvidia.com \
    --cc=dledford@redhat.com \
    --cc=jgg@nvidia.com \
    --cc=leonro@nvidia.com \
    --cc=linux-rdma@vger.kernel.org \
    --cc=netao@nvidia.com \
    --cc=netdev@vger.kernel.org \
    --cc=saeedm@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).