From: Mark Zhang <markzhang@nvidia.com>
To: <jgg@nvidia.com>, <dledford@redhat.com>, <saeedm@nvidia.com>
Cc: <linux-rdma@vger.kernel.org>, <netdev@vger.kernel.org>,
<aharonl@nvidia.com>, <netao@nvidia.com>, <leonro@nvidia.com>,
Mark Zhang <markzhang@nvidia.com>
Subject: [PATCH rdma-next 06/10] RDMA/nldev: Add support to add and remove optional counters
Date: Wed, 18 Aug 2021 14:24:24 +0300 [thread overview]
Message-ID: <20210818112428.209111-7-markzhang@nvidia.com> (raw)
In-Reply-To: <20210818112428.209111-1-markzhang@nvidia.com>
From: Aharon Landau <aharonl@nvidia.com>
This patch adds the ability to add/remove optional counter to a link
through RDMA netlink. Limit it to users with ADMIN capability only.
Examples:
$ sudo rdma statistic add link rocep8s0f0/1 optional-set cc_rx_ce_pkts
$ sudo rdma statistic remove link rocep8s0f0/1 optional-set cc_rx_ce_pkts
Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Signed-off-by: Neta Ostrovsky <netao@nvidia.com>
Signed-off-by: Mark Zhang <markzhang@nvidia.com>
---
drivers/infiniband/core/counters.c | 50 ++++++++++++++++
drivers/infiniband/core/device.c | 2 +
drivers/infiniband/core/nldev.c | 93 ++++++++++++++++++++++++++++++
include/rdma/ib_verbs.h | 7 +++
include/rdma/rdma_counter.h | 4 ++
include/rdma/rdma_netlink.h | 1 +
include/uapi/rdma/rdma_netlink.h | 9 +++
7 files changed, 166 insertions(+)
diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c
index b8b6db98bfdf..fa04178aa0eb 100644
--- a/drivers/infiniband/core/counters.c
+++ b/drivers/infiniband/core/counters.c
@@ -106,6 +106,56 @@ static int __rdma_counter_bind_qp(struct rdma_counter *counter,
return ret;
}
+static struct rdma_op_counter *get_opcounter(struct rdma_op_stats *opstats,
+ const char *name)
+{
+ int i;
+
+ for (i = 0; i < opstats->num_opcounters; i++)
+ if (!strcmp(opstats->opcounters[i].name, name))
+ return opstats->opcounters + i;
+
+ return NULL;
+}
+
+static int rdma_opcounter_set(struct ib_device *dev, u32 port,
+ const char *name, bool is_add)
+{
+ struct rdma_port_counter *port_counter;
+ struct rdma_op_counter *opc;
+ int ret;
+
+ if (!dev->ops.add_op_stat || !dev->ops.remove_op_stat)
+ return -EOPNOTSUPP;
+
+ port_counter = &dev->port_data[port].port_counter;
+ opc = get_opcounter(port_counter->opstats, name);
+ if (!opc)
+ return -EINVAL;
+
+ mutex_lock(&port_counter->opstats->lock);
+ ret = is_add ? dev->ops.add_op_stat(dev, port, opc->type) :
+ dev->ops.remove_op_stat(dev, port, opc->type);
+ if (ret)
+ goto end;
+
+ opc->enabled = is_add;
+end:
+ mutex_unlock(&port_counter->opstats->lock);
+ return ret;
+}
+
+int rdma_opcounter_add(struct ib_device *dev, u32 port, const char *name)
+{
+ return rdma_opcounter_set(dev, port, name, true);
+}
+
+int rdma_opcounter_remove(struct ib_device *dev, u32 port,
+ const char *name)
+{
+ return rdma_opcounter_set(dev, port, name, false);
+}
+
static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u32 port,
struct ib_qp *qp,
enum rdma_nl_counter_mode mode)
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 23e1ae50b2e4..b9138f20f9a8 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -2590,6 +2590,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
ops->uverbs_no_driver_id_binding;
SET_DEVICE_OP(dev_ops, add_gid);
+ SET_DEVICE_OP(dev_ops, add_op_stat);
SET_DEVICE_OP(dev_ops, advise_mr);
SET_DEVICE_OP(dev_ops, alloc_dm);
SET_DEVICE_OP(dev_ops, alloc_hw_device_stats);
@@ -2701,6 +2702,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, reg_dm_mr);
SET_DEVICE_OP(dev_ops, reg_user_mr);
SET_DEVICE_OP(dev_ops, reg_user_mr_dmabuf);
+ SET_DEVICE_OP(dev_ops, remove_op_stat);
SET_DEVICE_OP(dev_ops, req_notify_cq);
SET_DEVICE_OP(dev_ops, rereg_user_mr);
SET_DEVICE_OP(dev_ops, resize_cq);
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index e9b4b2cccaa0..17d55d89f11c 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -154,6 +154,11 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
[RDMA_NLDEV_NET_NS_FD] = { .type = NLA_U32 },
[RDMA_NLDEV_SYS_ATTR_NETNS_MODE] = { .type = NLA_U8 },
[RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_STAT_OPCOUNTERS] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENTRY_NAME] = { .type = NLA_NUL_STRING,
+ .len = RDMA_NLDEV_ATTR_OPCOUNTER_NAME_SIZE },
+ [RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENTRY_VALUE] = { .type = NLA_U64 },
};
static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
@@ -1888,6 +1893,86 @@ static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
return err;
}
+static int nldev_stat_set_op_stat(struct sk_buff *skb,
+ struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack,
+ bool cmd_add)
+{
+ char opcounter[RDMA_NLDEV_ATTR_OPCOUNTER_NAME_SIZE] = {};
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+ struct ib_device *device;
+ struct sk_buff *msg;
+ u32 index, port;
+ int ret;
+
+ ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, extack);
+
+ if (ret || !tb[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENTRY_NAME] ||
+ !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
+ !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
+ return -EINVAL;
+
+ index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+ device = ib_device_get_by_index(sock_net(skb->sk), index);
+ if (!device)
+ return -EINVAL;
+
+ port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
+ if (!rdma_is_port_valid(device, port)) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ nla_strscpy(opcounter, tb[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENTRY_NAME],
+ sizeof(opcounter));
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
+ (cmd_add ?
+ RDMA_NLDEV_CMD_STAT_ADD_OPCOUNTER :
+ RDMA_NLDEV_CMD_STAT_REMOVE_OPCOUNTER)),
+ 0, 0);
+
+ if (cmd_add)
+ ret = rdma_opcounter_add(device, port, opcounter);
+ else
+ ret = rdma_opcounter_remove(device, port, opcounter);
+ if (ret)
+ goto err_msg;
+
+ nlmsg_end(msg, nlh);
+ ib_device_put(device);
+ return rdma_nl_unicast(sock_net(skb->sk), msg,
+ NETLINK_CB(skb).portid);
+
+err_msg:
+ nlmsg_free(msg);
+err:
+ ib_device_put(device);
+ return ret;
+}
+
+static int nldev_stat_add_op_stat_doit(struct sk_buff *skb,
+ struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ return nldev_stat_set_op_stat(skb, nlh, extack, true);
+}
+
+static int nldev_stat_remove_op_stat_doit(struct sk_buff *skb,
+ struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ return nldev_stat_set_op_stat(skb, nlh, extack, false);
+}
+
static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
@@ -2342,6 +2427,14 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
.dump = nldev_res_get_mr_raw_dumpit,
.flags = RDMA_NL_ADMIN_PERM,
},
+ [RDMA_NLDEV_CMD_STAT_ADD_OPCOUNTER] = {
+ .doit = nldev_stat_add_op_stat_doit,
+ .flags = RDMA_NL_ADMIN_PERM,
+ },
+ [RDMA_NLDEV_CMD_STAT_REMOVE_OPCOUNTER] = {
+ .doit = nldev_stat_remove_op_stat_doit,
+ .flags = RDMA_NL_ADMIN_PERM,
+ },
};
void __init nldev_init(void)
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 40b0f7825975..fa9e668b9b14 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -600,11 +600,14 @@ static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct(
/**
* struct rdma_op_counter
+ * @enabled - To indicate if this counter is currently enabled (as optional
+ * counters can be dynamically enabled/disabled)
* @type - The vendor-specific type of the counter
* @name - The name of the counter
* @value - The value of the counter
*/
struct rdma_op_counter {
+ bool enabled;
int type;
const char *name;
u64 value;
@@ -2595,6 +2598,10 @@ struct ib_device_ops {
struct rdma_op_stats *(*alloc_op_port_stats)(struct ib_device *device,
u32 port_num);
+ int (*add_op_stat)(struct ib_device *device, u32 port,
+ int optional_stat);
+ int (*remove_op_stat)(struct ib_device *device, u32 port,
+ int optional_stat);
/**
* Allows rdma drivers to add their own restrack attributes.
*/
diff --git a/include/rdma/rdma_counter.h b/include/rdma/rdma_counter.h
index 3531c5061718..48086a7248ac 100644
--- a/include/rdma/rdma_counter.h
+++ b/include/rdma/rdma_counter.h
@@ -63,5 +63,9 @@ int rdma_counter_unbind_qpn(struct ib_device *dev, u32 port,
int rdma_counter_get_mode(struct ib_device *dev, u32 port,
enum rdma_nl_counter_mode *mode,
enum rdma_nl_counter_mask *mask);
+int rdma_opcounter_add(struct ib_device *dev, u32 port,
+ const char *name);
+int rdma_opcounter_remove(struct ib_device *dev, u32 port,
+ const char *name);
#endif /* _RDMA_COUNTER_H_ */
diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h
index 2758d9df71ee..ac47a0cc0508 100644
--- a/include/rdma/rdma_netlink.h
+++ b/include/rdma/rdma_netlink.h
@@ -10,6 +10,7 @@ enum {
RDMA_NLDEV_ATTR_EMPTY_STRING = 1,
RDMA_NLDEV_ATTR_ENTRY_STRLEN = 16,
RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE = 32,
+ RDMA_NLDEV_ATTR_OPCOUNTER_NAME_SIZE = 64,
};
struct rdma_nl_cbs {
diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index 75a1ae2311d8..79e6ca87d2e0 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -297,6 +297,10 @@ enum rdma_nldev_command {
RDMA_NLDEV_CMD_RES_SRQ_GET, /* can dump */
+ RDMA_NLDEV_CMD_STAT_ADD_OPCOUNTER,
+
+ RDMA_NLDEV_CMD_STAT_REMOVE_OPCOUNTER,
+
RDMA_NLDEV_NUM_OPS
};
@@ -549,6 +553,11 @@ enum rdma_nldev_attr {
RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK, /* u8 */
+ RDMA_NLDEV_ATTR_STAT_OPCOUNTERS, /* nested table */
+ RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENTRY, /* nested table */
+ RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENTRY_NAME, /* string */
+ RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENTRY_VALUE, /* u64 */
+
/*
* Always the end
*/
--
2.26.2
next prev parent reply other threads:[~2021-08-18 11:25 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-08-18 11:24 [PATCH rdma-next 00/10] Optional counter statistics support Mark Zhang
2021-08-18 11:24 ` [PATCH rdma-next 01/10] net/mlx5: Add support in bth_opcode as a match criteria Mark Zhang
2021-08-18 11:24 ` [PATCH rdma-next 02/10] net/mlx5: Add priorities for counters in RDMA namespaces Mark Zhang
2021-08-18 11:24 ` [PATCH rdma-next 03/10] RDMA/counters: Support to allocate per-port optional counter statistics Mark Zhang
2021-08-23 19:30 ` Jason Gunthorpe
2021-08-24 6:22 ` Mark Zhang
2021-08-24 13:14 ` Jason Gunthorpe
2021-08-18 11:24 ` [PATCH rdma-next 04/10] RDMA/mlx5: Add alloc_op_port_stats() support Mark Zhang
2021-08-23 19:19 ` Jason Gunthorpe
2021-08-18 11:24 ` [PATCH rdma-next 05/10] RDMA/mlx5: Add steering support in optional flow counters Mark Zhang
2021-08-18 11:24 ` Mark Zhang [this message]
2021-08-23 19:42 ` [PATCH rdma-next 06/10] RDMA/nldev: Add support to add and remove optional counters Jason Gunthorpe
2021-08-24 2:09 ` Mark Zhang
2021-08-18 11:24 ` [PATCH rdma-next 07/10] RDMA/mlx5: Add add_op_stat() and remove_op_stat() support Mark Zhang
2021-08-18 11:24 ` [PATCH rdma-next 08/10] RDMA/nldev: Add support to get optional counters statistics Mark Zhang
2021-08-18 11:24 ` [PATCH rdma-next 09/10] RDMA/mlx5: Add get_op_stats() support Mark Zhang
2021-08-18 11:24 ` [PATCH rdma-next 10/10] RDMA/nldev: Add support to get current enabled optional counters Mark Zhang
2021-08-23 19:44 ` Jason Gunthorpe
2021-08-24 2:13 ` Mark Zhang
2021-08-24 13:13 ` Jason Gunthorpe
2021-08-23 19:33 ` [PATCH rdma-next 00/10] Optional counter statistics support Jason Gunthorpe
2021-08-24 1:44 ` Mark Zhang
2021-08-24 13:11 ` Jason Gunthorpe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210818112428.209111-7-markzhang@nvidia.com \
--to=markzhang@nvidia.com \
--cc=aharonl@nvidia.com \
--cc=dledford@redhat.com \
--cc=jgg@nvidia.com \
--cc=leonro@nvidia.com \
--cc=linux-rdma@vger.kernel.org \
--cc=netao@nvidia.com \
--cc=netdev@vger.kernel.org \
--cc=saeedm@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.