From: Leon Romanovsky <leon@kernel.org>
To: Jason Gunthorpe <jgg@nvidia.com>
Cc: Vlad Dumitrescu <vdumitrescu@nvidia.com>,
linux-rdma@vger.kernel.org, Sean Hefty <shefty@nvidia.com>
Subject: [PATCH rdma-next 8/9] RDMA/nldev: Add mad-linear-timeouts management attribute
Date: Thu, 5 Dec 2024 15:49:38 +0200 [thread overview]
Message-ID: <5328045b50805d019606f724b439104bbef3ff69.1733405453.git.leon@kernel.org> (raw)
In-Reply-To: <cover.1733405453.git.leon@kernel.org>
From: Vlad Dumitrescu <vdumitrescu@nvidia.com>
This attribute allows system admins to make a trade-off between speed
of recovery under transient loss and reducing congestion under
persistent loss or overload.
Set 15 as max value as it allows sys admins to effectively opt-out the
CM from exponential backoff. CM is currently using CMA_MAX_CM_RETRIES
(15) constant to set retries. Other MAD layer callers use different
values (e.g., sa_query uses 10, UMAD exposes the parameter to
userspace), but a max of 15 linear retries should be enough.
Example:
# rdma management show rocep1s0f1/1
1: rocep1s0f1: 1 mad-linear-timeouts 4 ...
# rdma management set rocep1s0f1/1 mad-linear-timeouts 6
# rdma management show
0: rocep1s0f0: 1 mad-linear-timeouts 4 ...
1: rocep1s0f1: 1 mad-linear-timeouts 6 ...
Signed-off-by: Vlad Dumitrescu <vdumitrescu@nvidia.com>
Reviewed-by: Sean Hefty <shefty@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
drivers/infiniband/core/mad.c | 35 ++++++++++++++++++++++++++++++
drivers/infiniband/core/mad_priv.h | 4 ++++
drivers/infiniband/core/nldev.c | 19 ++++++++++++++++
include/uapi/rdma/rdma_netlink.h | 2 ++
4 files changed, 60 insertions(+)
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index a3a8cf4bbc20..7c4ac8ae0a3f 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -54,7 +54,9 @@
#define CREATE_TRACE_POINTS
#include <trace/events/ib_mad.h>
+#define IB_MAD_LINEAR_TIMEOUTS_MIN 1
#define IB_MAD_LINEAR_TIMEOUTS_DEFAULT 4
+#define IB_MAD_LINEAR_TIMEOUTS_MAX 15
#define IB_MAD_MAX_TIMEOUT_MS (60 * MSEC_PER_SEC)
#define IB_MAD_MAX_DEADLINE (jiffies + msecs_to_jiffies(5 * 60 * 1000))
@@ -145,6 +147,39 @@ ib_get_mad_port(struct ib_device *device, u32 port_num)
return entry;
}
+int ib_mad_linear_timeouts_set(struct ib_device *dev, u32 port_num, u8 val,
+ struct netlink_ext_ack *extack)
+{
+ struct ib_mad_port_private *port = ib_get_mad_port(dev, port_num);
+
+ if (!port)
+ return -ENODEV;
+
+ if (val > IB_MAD_LINEAR_TIMEOUTS_MAX ||
+ val < IB_MAD_LINEAR_TIMEOUTS_MIN) {
+ NL_SET_ERR_MSG_FMT_MOD(extack, "Valid range [%u-%u]",
+ IB_MAD_LINEAR_TIMEOUTS_MIN,
+ IB_MAD_LINEAR_TIMEOUTS_MAX);
+ return -EINVAL;
+ }
+
+ WRITE_ONCE(port->linear_timeouts, val);
+
+ return 0;
+}
+
+int ib_mad_linear_timeouts_get(struct ib_device *dev, u32 port_num, u8 *val)
+{
+ struct ib_mad_port_private *port = ib_get_mad_port(dev, port_num);
+
+ if (!port)
+ return -ENODEV;
+
+ *val = READ_ONCE(port->linear_timeouts);
+
+ return 0;
+}
+
static inline u8 convert_mgmt_class(u8 mgmt_class)
{
/* Alias IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE to 0 */
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index 076ebcea27b4..e6b362c054a6 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -241,4 +241,8 @@ void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr);
void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr,
unsigned long timeout_ms);
+int ib_mad_linear_timeouts_set(struct ib_device *dev, u32 port_num, u8 val,
+ struct netlink_ext_ack *extack);
+int ib_mad_linear_timeouts_get(struct ib_device *dev, u32 port_num, u8 *val);
+
#endif /* __IB_MAD_PRIV_H__ */
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 363742567dd2..acb02f8c87c0 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -172,6 +172,7 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
[RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE] = { .type = NLA_U8 },
[RDMA_NLDEV_ATTR_EVENT_TYPE] = { .type = NLA_U8 },
[RDMA_NLDEV_MGMT_ATTR_SA_MIN_TIMEOUT] = { .type = NLA_U32 },
+ [RDMA_NLDEV_MGMT_ATTR_MAD_LINEAR_TIMEOUTS] = { .type = NLA_U8 },
};
static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
@@ -2627,6 +2628,7 @@ static int nldev_mgmt_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
{
struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
struct ib_device *device;
+ u8 mad_linear_timeouts;
struct sk_buff *msg;
u32 index;
u32 port;
@@ -2657,6 +2659,10 @@ static int nldev_mgmt_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
goto err;
}
+ ret = ib_mad_linear_timeouts_get(device, port, &mad_linear_timeouts);
+ if (ret)
+ goto err;
+
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg) {
ret = -ENOMEM;
@@ -2680,6 +2686,11 @@ static int nldev_mgmt_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
goto err_msg;
}
+ ret = nla_put_u8(msg, RDMA_NLDEV_MGMT_ATTR_MAD_LINEAR_TIMEOUTS,
+ mad_linear_timeouts);
+ if (ret)
+ goto err_msg;
+
nlmsg_end(msg, nlh);
return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
@@ -2695,6 +2706,7 @@ static int nldev_set_mgmt_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
{
struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
struct ib_device *device;
+ u8 mad_linear_timeouts;
u32 index;
u32 port;
u32 sa_min_timeout;
@@ -2723,6 +2735,13 @@ static int nldev_set_mgmt_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
extack);
}
+ if (tb[RDMA_NLDEV_MGMT_ATTR_MAD_LINEAR_TIMEOUTS]) {
+ mad_linear_timeouts = nla_get_u8(
+ tb[RDMA_NLDEV_MGMT_ATTR_MAD_LINEAR_TIMEOUTS]);
+ return ib_mad_linear_timeouts_set(device, port,
+ mad_linear_timeouts, extack);
+ }
+
err:
ib_device_put(device);
return -EINVAL;
diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index 2b1c4c55e51f..d209a5973c8e 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -585,6 +585,8 @@ enum rdma_nldev_attr {
RDMA_NLDEV_SYS_ATTR_MONITOR_MODE, /* u8 */
RDMA_NLDEV_MGMT_ATTR_SA_MIN_TIMEOUT, /* u32 */
+
+ RDMA_NLDEV_MGMT_ATTR_MAD_LINEAR_TIMEOUTS, /* u8 */
/*
* Always the end
*/
--
2.47.0
next prev parent reply other threads:[~2024-12-05 13:51 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-12-05 13:49 [PATCH rdma-next 0/9] Rework retry algorithm used when sending MADs Leon Romanovsky
2024-12-05 13:49 ` [PATCH rdma-next 1/9] IB/mad: Apply timeout modification (CM MRA) only once Leon Romanovsky
2024-12-05 13:49 ` [PATCH rdma-next 2/9] IB/mad: Add deadline for send MADs Leon Romanovsky
2024-12-05 13:49 ` [PATCH rdma-next 3/9] RDMA/sa_query: Enforce min retry interval and deadline Leon Romanovsky
2024-12-05 13:49 ` [PATCH rdma-next 4/9] RDMA/nldev: Add sa-min-timeout management attribute Leon Romanovsky
2024-12-05 13:49 ` [PATCH rdma-next 5/9] IB/umad: Set deadline when sending non-RMPP MADs Leon Romanovsky
2024-12-05 13:49 ` [PATCH rdma-next 6/9] IB/cm: Set deadline when sending MADs Leon Romanovsky
2024-12-05 13:49 ` [PATCH rdma-next 7/9] IB/mad: Exponential backoff when retrying sends Leon Romanovsky
2024-12-05 13:49 ` Leon Romanovsky [this message]
2024-12-05 13:49 ` [PATCH rdma-next 9/9] IB/cma: Lower response timeout to roughly 1s Leon Romanovsky
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=5328045b50805d019606f724b439104bbef3ff69.1733405453.git.leon@kernel.org \
--to=leon@kernel.org \
--cc=jgg@nvidia.com \
--cc=linux-rdma@vger.kernel.org \
--cc=shefty@nvidia.com \
--cc=vdumitrescu@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox