From: Mark Zhang <markzhang@nvidia.com>
To: <jgg@nvidia.com>, <dledford@redhat.com>, <saeedm@nvidia.com>
Cc: <linux-rdma@vger.kernel.org>, <netdev@vger.kernel.org>,
<aharonl@nvidia.com>, <netao@nvidia.com>, <leonro@nvidia.com>,
Mark Zhang <markzhang@nvidia.com>
Subject: [PATCH rdma-next 05/10] RDMA/mlx5: Add steering support in optional flow counters
Date: Wed, 18 Aug 2021 14:24:23 +0300 [thread overview]
Message-ID: <20210818112428.209111-6-markzhang@nvidia.com> (raw)
In-Reply-To: <20210818112428.209111-1-markzhang@nvidia.com>
From: Aharon Landau <aharonl@nvidia.com>
Adding steering infrastructure for adding and removing optional counter.
This allows to add and remove the counters dynamically in order not to
hurt performance.
Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Reviewed-by: Maor Gottlieb <maorg@nvidia.com>
Signed-off-by: Mark Zhang <markzhang@nvidia.com>
---
drivers/infiniband/hw/mlx5/fs.c | 111 +++++++++++++++++++++++++++
drivers/infiniband/hw/mlx5/mlx5_ib.h | 12 +++
include/rdma/ib_hdrs.h | 1 +
3 files changed, 124 insertions(+)
diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c
index 5fbc0a8454b9..be6a00969ddb 100644
--- a/drivers/infiniband/hw/mlx5/fs.c
+++ b/drivers/infiniband/hw/mlx5/fs.c
@@ -10,12 +10,14 @@
#include <rdma/uverbs_std_types.h>
#include <rdma/mlx5_user_ioctl_cmds.h>
#include <rdma/mlx5_user_ioctl_verbs.h>
+#include <rdma/ib_hdrs.h>
#include <rdma/ib_umem.h>
#include <linux/mlx5/driver.h>
#include <linux/mlx5/fs.h>
#include <linux/mlx5/fs_helpers.h>
#include <linux/mlx5/accel.h>
#include <linux/mlx5/eswitch.h>
+#include <net/inet_ecn.h>
#include "mlx5_ib.h"
#include "counters.h"
#include "devx.h"
@@ -847,6 +849,115 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
return prio;
}
+enum {
+ RDMA_RX_ECN_OPCOUNTER_PRIO,
+ RDMA_RX_CNP_OPCOUNTER_PRIO,
+};
+
+enum {
+ RDMA_TX_CNP_OPCOUNTER_PRIO,
+};
+
+int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, struct mlx5_ib_op_fc *opfc,
+ enum mlx5_ib_optional_counter_type type)
+{
+ enum mlx5_flow_namespace_type fn_type;
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_destination dst;
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_ib_flow_prio *prio;
+ struct mlx5_flow_spec *spec;
+ int priority, err = 0;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ switch (type) {
+ case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS:
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
+ priority = RDMA_RX_ECN_OPCOUNTER_PRIO;
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.ip_ecn);
+ MLX5_SET(fte_match_param, spec->match_value,
+ outer_headers.ip_ecn, INET_ECN_CE);
+ break;
+
+ case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS:
+ if (!MLX5_CAP_FLOWTABLE(dev->mdev,
+ ft_field_support_2_nic_receive_rdma.bth_opcode)) {
+ err = -EOPNOTSUPP;
+ goto free;
+ }
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
+ priority = RDMA_RX_CNP_OPCOUNTER_PRIO;
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ misc_parameters.bth_opcode);
+ MLX5_SET(fte_match_param, spec->match_value,
+ misc_parameters.bth_opcode, IB_BTH_OPCODE_CNP);
+ break;
+
+ case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS:
+ if (!MLX5_CAP_FLOWTABLE(dev->mdev,
+ ft_field_support_2_nic_transmit_rdma.bth_opcode)) {
+ err = -EOPNOTSUPP;
+ goto free;
+ }
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
+ priority = RDMA_TX_CNP_OPCOUNTER_PRIO;
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ misc_parameters.bth_opcode);
+ MLX5_SET(fte_match_param, spec->match_value,
+ misc_parameters.bth_opcode, IB_BTH_OPCODE_CNP);
+ break;
+
+ default:
+ err = -EOPNOTSUPP;
+ goto free;
+ }
+
+ ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
+ if (!ns) {
+ err = -EOPNOTSUPP;
+ goto free;
+ }
+ prio = _get_prio(ns, &opfc->prio, priority, 1, 1, 0);
+ if (IS_ERR(prio)) {
+ err = PTR_ERR(prio);
+ goto free;
+ }
+
+ dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dst.counter_id = mlx5_fc_id(opfc->fc);
+
+ flow_act.action =
+ MLX5_FLOW_CONTEXT_ACTION_COUNT | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+
+ opfc->rule =
+ mlx5_add_flow_rules(prio->flow_table, spec, &flow_act, &dst, 1);
+ if (IS_ERR(opfc->rule)) {
+ put_flow_table(dev, prio, false);
+ err = PTR_ERR(opfc->rule);
+ goto free;
+ }
+ prio->refcount++;
+
+free:
+ kfree(spec);
+ return err;
+}
+
+void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_op_fc *opfc)
+{
+ mlx5_del_flow_rules(opfc->rule);
+ put_flow_table(dev, &opfc->prio, true);
+ WARN_ON(opfc->prio.flow_table);
+}
+
static void set_underlay_qp(struct mlx5_ib_dev *dev,
struct mlx5_flow_spec *spec,
u32 underlay_qpn)
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 2ba352702294..130b2ed79ba2 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -805,6 +805,12 @@ enum mlx5_ib_optional_counter_type {
MLX5_IB_OPCOUNTER_MAX,
};
+struct mlx5_ib_op_fc {
+ struct mlx5_fc *fc;
+ struct mlx5_ib_flow_prio prio;
+ struct mlx5_flow_handle *rule;
+};
+
struct mlx5_ib_counters {
const char **names;
size_t *offsets;
@@ -814,6 +820,12 @@ struct mlx5_ib_counters {
u16 set_id;
};
+int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, struct mlx5_ib_op_fc *opfc,
+ enum mlx5_ib_optional_counter_type type);
+
+void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_op_fc *opfc);
+
struct mlx5_ib_multiport_info;
struct mlx5_ib_multiport {
diff --git a/include/rdma/ib_hdrs.h b/include/rdma/ib_hdrs.h
index 7e542205861c..8ae07c0ecdf7 100644
--- a/include/rdma/ib_hdrs.h
+++ b/include/rdma/ib_hdrs.h
@@ -232,6 +232,7 @@ static inline u32 ib_get_sqpn(struct ib_other_headers *ohdr)
#define IB_BTH_SE_SHIFT 23
#define IB_BTH_TVER_MASK 0xf
#define IB_BTH_TVER_SHIFT 16
+#define IB_BTH_OPCODE_CNP 0x81
static inline u8 ib_bth_get_pad(struct ib_other_headers *ohdr)
{
--
2.26.2
next prev parent reply other threads:[~2021-08-18 11:25 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-08-18 11:24 [PATCH rdma-next 00/10] Optional counter statistics support Mark Zhang
2021-08-18 11:24 ` [PATCH rdma-next 01/10] net/mlx5: Add support in bth_opcode as a match criteria Mark Zhang
2021-08-18 11:24 ` [PATCH rdma-next 02/10] net/mlx5: Add priorities for counters in RDMA namespaces Mark Zhang
2021-08-18 11:24 ` [PATCH rdma-next 03/10] RDMA/counters: Support to allocate per-port optional counter statistics Mark Zhang
2021-08-23 19:30 ` Jason Gunthorpe
2021-08-24 6:22 ` Mark Zhang
2021-08-24 13:14 ` Jason Gunthorpe
2021-08-18 11:24 ` [PATCH rdma-next 04/10] RDMA/mlx5: Add alloc_op_port_stats() support Mark Zhang
2021-08-23 19:19 ` Jason Gunthorpe
2021-08-18 11:24 ` Mark Zhang [this message]
2021-08-18 11:24 ` [PATCH rdma-next 06/10] RDMA/nldev: Add support to add and remove optional counters Mark Zhang
2021-08-23 19:42 ` Jason Gunthorpe
2021-08-24 2:09 ` Mark Zhang
2021-08-18 11:24 ` [PATCH rdma-next 07/10] RDMA/mlx5: Add add_op_stat() and remove_op_stat() support Mark Zhang
2021-08-18 11:24 ` [PATCH rdma-next 08/10] RDMA/nldev: Add support to get optional counters statistics Mark Zhang
2021-08-18 11:24 ` [PATCH rdma-next 09/10] RDMA/mlx5: Add get_op_stats() support Mark Zhang
2021-08-18 11:24 ` [PATCH rdma-next 10/10] RDMA/nldev: Add support to get current enabled optional counters Mark Zhang
2021-08-23 19:44 ` Jason Gunthorpe
2021-08-24 2:13 ` Mark Zhang
2021-08-24 13:13 ` Jason Gunthorpe
2021-08-23 19:33 ` [PATCH rdma-next 00/10] Optional counter statistics support Jason Gunthorpe
2021-08-24 1:44 ` Mark Zhang
2021-08-24 13:11 ` Jason Gunthorpe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210818112428.209111-6-markzhang@nvidia.com \
--to=markzhang@nvidia.com \
--cc=aharonl@nvidia.com \
--cc=dledford@redhat.com \
--cc=jgg@nvidia.com \
--cc=leonro@nvidia.com \
--cc=linux-rdma@vger.kernel.org \
--cc=netao@nvidia.com \
--cc=netdev@vger.kernel.org \
--cc=saeedm@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).