[PATCH rdma-next 8/9] RDMA/nldev: Add mad-linear-timeouts management attribute

Linux RDMA and InfiniBand development
 help / color / mirror / Atom feed

From: Leon Romanovsky <leon@kernel.org>
To: Jason Gunthorpe <jgg@nvidia.com>
Cc: Vlad Dumitrescu <vdumitrescu@nvidia.com>,
	linux-rdma@vger.kernel.org, Sean Hefty <shefty@nvidia.com>
Subject: [PATCH rdma-next 8/9] RDMA/nldev: Add mad-linear-timeouts management attribute
Date: Thu,  5 Dec 2024 15:49:38 +0200	[thread overview]
Message-ID: <5328045b50805d019606f724b439104bbef3ff69.1733405453.git.leon@kernel.org> (raw)
In-Reply-To: <cover.1733405453.git.leon@kernel.org>

From: Vlad Dumitrescu <vdumitrescu@nvidia.com>

This attribute allows system admins to make a trade-off between speed
of recovery under transient loss and reducing congestion under
persistent loss or overload.

Set 15 as max value as it allows sys admins to effectively opt-out the
CM from exponential backoff.  CM is currently using CMA_MAX_CM_RETRIES
(15) constant to set retries.  Other MAD layer callers use different
values (e.g., sa_query uses 10, UMAD exposes the parameter to
userspace), but a max of 15 linear retries should be enough.

Example:
  # rdma management show rocep1s0f1/1
  1: rocep1s0f1: 1 mad-linear-timeouts 4 ...
  # rdma management set rocep1s0f1/1 mad-linear-timeouts 6
  # rdma management show
  0: rocep1s0f0: 1 mad-linear-timeouts 4 ...
  1: rocep1s0f1: 1 mad-linear-timeouts 6 ...

Signed-off-by: Vlad Dumitrescu <vdumitrescu@nvidia.com>
Reviewed-by: Sean Hefty <shefty@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/core/mad.c      | 35 ++++++++++++++++++++++++++++++
 drivers/infiniband/core/mad_priv.h |  4 ++++
 drivers/infiniband/core/nldev.c    | 19 ++++++++++++++++
 include/uapi/rdma/rdma_netlink.h   |  2 ++
 4 files changed, 60 insertions(+)

diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index a3a8cf4bbc20..7c4ac8ae0a3f 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -54,7 +54,9 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/ib_mad.h>
 
+#define IB_MAD_LINEAR_TIMEOUTS_MIN	1
 #define IB_MAD_LINEAR_TIMEOUTS_DEFAULT	4
+#define IB_MAD_LINEAR_TIMEOUTS_MAX	15
 #define IB_MAD_MAX_TIMEOUT_MS		(60 * MSEC_PER_SEC)
 #define IB_MAD_MAX_DEADLINE		(jiffies + msecs_to_jiffies(5 * 60 * 1000))
 
@@ -145,6 +147,39 @@ ib_get_mad_port(struct ib_device *device, u32 port_num)
 	return entry;
 }
 
+int ib_mad_linear_timeouts_set(struct ib_device *dev, u32 port_num, u8 val,
+			       struct netlink_ext_ack *extack)
+{
+	struct ib_mad_port_private *port = ib_get_mad_port(dev, port_num);
+
+	if (!port)
+		return -ENODEV;
+
+	if (val > IB_MAD_LINEAR_TIMEOUTS_MAX ||
+	    val < IB_MAD_LINEAR_TIMEOUTS_MIN) {
+		NL_SET_ERR_MSG_FMT_MOD(extack, "Valid range [%u-%u]",
+				       IB_MAD_LINEAR_TIMEOUTS_MIN,
+				       IB_MAD_LINEAR_TIMEOUTS_MAX);
+		return -EINVAL;
+	}
+
+	WRITE_ONCE(port->linear_timeouts, val);
+
+	return 0;
+}
+
+int ib_mad_linear_timeouts_get(struct ib_device *dev, u32 port_num, u8 *val)
+{
+	struct ib_mad_port_private *port = ib_get_mad_port(dev, port_num);
+
+	if (!port)
+		return -ENODEV;
+
+	*val = READ_ONCE(port->linear_timeouts);
+
+	return 0;
+}
+
 static inline u8 convert_mgmt_class(u8 mgmt_class)
 {
 	/* Alias IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE to 0 */
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index 076ebcea27b4..e6b362c054a6 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -241,4 +241,8 @@ void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr);
 void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr,
 			  unsigned long timeout_ms);
 
+int ib_mad_linear_timeouts_set(struct ib_device *dev, u32 port_num, u8 val,
+			       struct netlink_ext_ack *extack);
+int ib_mad_linear_timeouts_get(struct ib_device *dev, u32 port_num, u8 *val);
+
 #endif	/* __IB_MAD_PRIV_H__ */
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 363742567dd2..acb02f8c87c0 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -172,6 +172,7 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
 	[RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE]	= { .type = NLA_U8 },
 	[RDMA_NLDEV_ATTR_EVENT_TYPE]		= { .type = NLA_U8 },
 	[RDMA_NLDEV_MGMT_ATTR_SA_MIN_TIMEOUT]	= { .type = NLA_U32 },
+	[RDMA_NLDEV_MGMT_ATTR_MAD_LINEAR_TIMEOUTS] = { .type = NLA_U8 },
 };
 
 static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
@@ -2627,6 +2628,7 @@ static int nldev_mgmt_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 {
 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
 	struct ib_device *device;
+	u8 mad_linear_timeouts;
 	struct sk_buff *msg;
 	u32 index;
 	u32 port;
@@ -2657,6 +2659,10 @@ static int nldev_mgmt_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 			goto err;
 	}
 
+	ret = ib_mad_linear_timeouts_get(device, port, &mad_linear_timeouts);
+	if (ret)
+		goto err;
+
 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 	if (!msg) {
 		ret = -ENOMEM;
@@ -2680,6 +2686,11 @@ static int nldev_mgmt_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 			goto err_msg;
 	}
 
+	ret = nla_put_u8(msg, RDMA_NLDEV_MGMT_ATTR_MAD_LINEAR_TIMEOUTS,
+			 mad_linear_timeouts);
+	if (ret)
+		goto err_msg;
+
 	nlmsg_end(msg, nlh);
 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
 
@@ -2695,6 +2706,7 @@ static int nldev_set_mgmt_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 {
 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
 	struct ib_device *device;
+	u8 mad_linear_timeouts;
 	u32 index;
 	u32 port;
 	u32 sa_min_timeout;
@@ -2723,6 +2735,13 @@ static int nldev_set_mgmt_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 					     extack);
 	}
 
+	if (tb[RDMA_NLDEV_MGMT_ATTR_MAD_LINEAR_TIMEOUTS]) {
+		mad_linear_timeouts = nla_get_u8(
+			tb[RDMA_NLDEV_MGMT_ATTR_MAD_LINEAR_TIMEOUTS]);
+		return ib_mad_linear_timeouts_set(device, port,
+						  mad_linear_timeouts, extack);
+	}
+
 err:
 	ib_device_put(device);
 	return -EINVAL;
diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index 2b1c4c55e51f..d209a5973c8e 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -585,6 +585,8 @@ enum rdma_nldev_attr {
 	RDMA_NLDEV_SYS_ATTR_MONITOR_MODE,	/* u8 */
 
 	RDMA_NLDEV_MGMT_ATTR_SA_MIN_TIMEOUT,	/* u32 */
+
+	RDMA_NLDEV_MGMT_ATTR_MAD_LINEAR_TIMEOUTS, /* u8 */
 	/*
 	 * Always the end
 	 */
-- 
2.47.0

next prev parent reply	other threads:[~2024-12-05 13:51 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-12-05 13:49 [PATCH rdma-next 0/9] Rework retry algorithm used when sending MADs Leon Romanovsky
2024-12-05 13:49 ` [PATCH rdma-next 1/9] IB/mad: Apply timeout modification (CM MRA) only once Leon Romanovsky
2024-12-05 13:49 ` [PATCH rdma-next 2/9] IB/mad: Add deadline for send MADs Leon Romanovsky
2024-12-05 13:49 ` [PATCH rdma-next 3/9] RDMA/sa_query: Enforce min retry interval and deadline Leon Romanovsky
2024-12-05 13:49 ` [PATCH rdma-next 4/9] RDMA/nldev: Add sa-min-timeout management attribute Leon Romanovsky
2024-12-05 13:49 ` [PATCH rdma-next 5/9] IB/umad: Set deadline when sending non-RMPP MADs Leon Romanovsky
2024-12-05 13:49 ` [PATCH rdma-next 6/9] IB/cm: Set deadline when sending MADs Leon Romanovsky
2024-12-05 13:49 ` [PATCH rdma-next 7/9] IB/mad: Exponential backoff when retrying sends Leon Romanovsky
2024-12-05 13:49 ` Leon Romanovsky [this message]
2024-12-05 13:49 ` [PATCH rdma-next 9/9] IB/cma: Lower response timeout to roughly 1s Leon Romanovsky

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:a3a8cf4bbc2 dfblob:7c4ac8ae0a3 dfblob:076ebcea27b
dfblob:e6b362c054a dfblob:363742567dd dfblob:acb02f8c87c
dfblob:2b1c4c55e51 dfblob:d209a5973c8 )
 OR (
bs:"[PATCH rdma-next 8/9] RDMA/nldev: Add mad-linear-timeouts management attribute" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5328045b50805d019606f724b439104bbef3ff69.1733405453.git.leon@kernel.org \
    --to=leon@kernel.org \
    --cc=jgg@nvidia.com \
    --cc=linux-rdma@vger.kernel.org \
    --cc=shefty@nvidia.com \
    --cc=vdumitrescu@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox