From: Mark Zhang <markzhang@nvidia.com>
To: <jgg@nvidia.com>, <dledford@redhat.com>, <saeedm@nvidia.com>
Cc: <linux-rdma@vger.kernel.org>, <netdev@vger.kernel.org>,
<aharonl@nvidia.com>, <netao@nvidia.com>, <leonro@nvidia.com>,
Mark Zhang <markzhang@nvidia.com>
Subject: [PATCH rdma-next 05/10] RDMA/mlx5: Add steering support in optional flow counters
Date: Wed, 18 Aug 2021 14:24:23 +0300 [thread overview]
Message-ID: <20210818112428.209111-6-markzhang@nvidia.com> (raw)
In-Reply-To: <20210818112428.209111-1-markzhang@nvidia.com>
From: Aharon Landau <aharonl@nvidia.com>
Adding steering infrastructure for adding and removing optional counter.
This allows to add and remove the counters dynamically in order not to
hurt performance.
Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Reviewed-by: Maor Gottlieb <maorg@nvidia.com>
Signed-off-by: Mark Zhang <markzhang@nvidia.com>
---
drivers/infiniband/hw/mlx5/fs.c | 111 +++++++++++++++++++++++++++
drivers/infiniband/hw/mlx5/mlx5_ib.h | 12 +++
include/rdma/ib_hdrs.h | 1 +
3 files changed, 124 insertions(+)
diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c
index 5fbc0a8454b9..be6a00969ddb 100644
--- a/drivers/infiniband/hw/mlx5/fs.c
+++ b/drivers/infiniband/hw/mlx5/fs.c
@@ -10,12 +10,14 @@
#include <rdma/uverbs_std_types.h>
#include <rdma/mlx5_user_ioctl_cmds.h>
#include <rdma/mlx5_user_ioctl_verbs.h>
+#include <rdma/ib_hdrs.h>
#include <rdma/ib_umem.h>
#include <linux/mlx5/driver.h>
#include <linux/mlx5/fs.h>
#include <linux/mlx5/fs_helpers.h>
#include <linux/mlx5/accel.h>
#include <linux/mlx5/eswitch.h>
+#include <net/inet_ecn.h>
#include "mlx5_ib.h"
#include "counters.h"
#include "devx.h"
@@ -847,6 +849,115 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
return prio;
}
+enum {
+ RDMA_RX_ECN_OPCOUNTER_PRIO,
+ RDMA_RX_CNP_OPCOUNTER_PRIO,
+};
+
+enum {
+ RDMA_TX_CNP_OPCOUNTER_PRIO,
+};
+
+int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, struct mlx5_ib_op_fc *opfc,
+ enum mlx5_ib_optional_counter_type type)
+{
+ enum mlx5_flow_namespace_type fn_type;
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_destination dst;
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_ib_flow_prio *prio;
+ struct mlx5_flow_spec *spec;
+ int priority, err = 0;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ switch (type) {
+ case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS:
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
+ priority = RDMA_RX_ECN_OPCOUNTER_PRIO;
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.ip_ecn);
+ MLX5_SET(fte_match_param, spec->match_value,
+ outer_headers.ip_ecn, INET_ECN_CE);
+ break;
+
+ case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS:
+ if (!MLX5_CAP_FLOWTABLE(dev->mdev,
+ ft_field_support_2_nic_receive_rdma.bth_opcode)) {
+ err = -EOPNOTSUPP;
+ goto free;
+ }
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
+ priority = RDMA_RX_CNP_OPCOUNTER_PRIO;
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ misc_parameters.bth_opcode);
+ MLX5_SET(fte_match_param, spec->match_value,
+ misc_parameters.bth_opcode, IB_BTH_OPCODE_CNP);
+ break;
+
+ case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS:
+ if (!MLX5_CAP_FLOWTABLE(dev->mdev,
+ ft_field_support_2_nic_transmit_rdma.bth_opcode)) {
+ err = -EOPNOTSUPP;
+ goto free;
+ }
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
+ priority = RDMA_TX_CNP_OPCOUNTER_PRIO;
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ misc_parameters.bth_opcode);
+ MLX5_SET(fte_match_param, spec->match_value,
+ misc_parameters.bth_opcode, IB_BTH_OPCODE_CNP);
+ break;
+
+ default:
+ err = -EOPNOTSUPP;
+ goto free;
+ }
+
+ ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
+ if (!ns) {
+ err = -EOPNOTSUPP;
+ goto free;
+ }
+ prio = _get_prio(ns, &opfc->prio, priority, 1, 1, 0);
+ if (IS_ERR(prio)) {
+ err = PTR_ERR(prio);
+ goto free;
+ }
+
+ dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dst.counter_id = mlx5_fc_id(opfc->fc);
+
+ flow_act.action =
+ MLX5_FLOW_CONTEXT_ACTION_COUNT | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+
+ opfc->rule =
+ mlx5_add_flow_rules(prio->flow_table, spec, &flow_act, &dst, 1);
+ if (IS_ERR(opfc->rule)) {
+ put_flow_table(dev, prio, false);
+ err = PTR_ERR(opfc->rule);
+ goto free;
+ }
+ prio->refcount++;
+
+free:
+ kfree(spec);
+ return err;
+}
+
+void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_op_fc *opfc)
+{
+ mlx5_del_flow_rules(opfc->rule);
+ put_flow_table(dev, &opfc->prio, true);
+ WARN_ON(opfc->prio.flow_table);
+}
+
static void set_underlay_qp(struct mlx5_ib_dev *dev,
struct mlx5_flow_spec *spec,
u32 underlay_qpn)
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 2ba352702294..130b2ed79ba2 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -805,6 +805,12 @@ enum mlx5_ib_optional_counter_type {
MLX5_IB_OPCOUNTER_MAX,
};
+struct mlx5_ib_op_fc {
+ struct mlx5_fc *fc;
+ struct mlx5_ib_flow_prio prio;
+ struct mlx5_flow_handle *rule;
+};
+
struct mlx5_ib_counters {
const char **names;
size_t *offsets;
@@ -814,6 +820,12 @@ struct mlx5_ib_counters {
u16 set_id;
};
+int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, struct mlx5_ib_op_fc *opfc,
+ enum mlx5_ib_optional_counter_type type);
+
+void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_op_fc *opfc);
+
struct mlx5_ib_multiport_info;
struct mlx5_ib_multiport {
diff --git a/include/rdma/ib_hdrs.h b/include/rdma/ib_hdrs.h
index 7e542205861c..8ae07c0ecdf7 100644
--- a/include/rdma/ib_hdrs.h
+++ b/include/rdma/ib_hdrs.h
@@ -232,6 +232,7 @@ static inline u32 ib_get_sqpn(struct ib_other_headers *ohdr)
#define IB_BTH_SE_SHIFT 23
#define IB_BTH_TVER_MASK 0xf
#define IB_BTH_TVER_SHIFT 16
+#define IB_BTH_OPCODE_CNP 0x81
static inline u8 ib_bth_get_pad(struct ib_other_headers *ohdr)
{
--
2.26.2
next prev parent reply other threads:[~2021-08-18 11:25 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-08-18 11:24 [PATCH rdma-next 00/10] Optional counter statistics support Mark Zhang
2021-08-18 11:24 ` [PATCH rdma-next 01/10] net/mlx5: Add support in bth_opcode as a match criteria Mark Zhang
2021-08-18 11:24 ` [PATCH rdma-next 02/10] net/mlx5: Add priorities for counters in RDMA namespaces Mark Zhang
2021-08-18 11:24 ` [PATCH rdma-next 03/10] RDMA/counters: Support to allocate per-port optional counter statistics Mark Zhang
2021-08-23 19:30 ` Jason Gunthorpe
2021-08-24 6:22 ` Mark Zhang
2021-08-24 13:14 ` Jason Gunthorpe
2021-08-18 11:24 ` [PATCH rdma-next 04/10] RDMA/mlx5: Add alloc_op_port_stats() support Mark Zhang
2021-08-23 19:19 ` Jason Gunthorpe
2021-08-18 11:24 ` Mark Zhang [this message]
2021-08-18 11:24 ` [PATCH rdma-next 06/10] RDMA/nldev: Add support to add and remove optional counters Mark Zhang
2021-08-23 19:42 ` Jason Gunthorpe
2021-08-24 2:09 ` Mark Zhang
2021-08-18 11:24 ` [PATCH rdma-next 07/10] RDMA/mlx5: Add add_op_stat() and remove_op_stat() support Mark Zhang
2021-08-18 11:24 ` [PATCH rdma-next 08/10] RDMA/nldev: Add support to get optional counters statistics Mark Zhang
2021-08-18 11:24 ` [PATCH rdma-next 09/10] RDMA/mlx5: Add get_op_stats() support Mark Zhang
2021-08-18 11:24 ` [PATCH rdma-next 10/10] RDMA/nldev: Add support to get current enabled optional counters Mark Zhang
2021-08-23 19:44 ` Jason Gunthorpe
2021-08-24 2:13 ` Mark Zhang
2021-08-24 13:13 ` Jason Gunthorpe
2021-08-23 19:33 ` [PATCH rdma-next 00/10] Optional counter statistics support Jason Gunthorpe
2021-08-24 1:44 ` Mark Zhang
2021-08-24 13:11 ` Jason Gunthorpe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210818112428.209111-6-markzhang@nvidia.com \
--to=markzhang@nvidia.com \
--cc=aharonl@nvidia.com \
--cc=dledford@redhat.com \
--cc=jgg@nvidia.com \
--cc=leonro@nvidia.com \
--cc=linux-rdma@vger.kernel.org \
--cc=netao@nvidia.com \
--cc=netdev@vger.kernel.org \
--cc=saeedm@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.