Linux RDMA and InfiniBand development
 help / color / mirror / Atom feed
* [PATCH rdma-next] IB/cma: Define options to set CM timeouts and retries
@ 2024-04-02 19:36 Etienne AUJAMES
  2024-04-08  3:26 ` Mark Zhang
  2024-04-08 16:10 ` Sean Hefty
  0 siblings, 2 replies; 8+ messages in thread
From: Etienne AUJAMES @ 2024-04-02 19:36 UTC (permalink / raw)
  To: jgg, leon, markzhang; +Cc: linux-rdma, Gael.DELBARY, guillaume.courrier

Define new options in 'rdma_set_option' to override default CM retries
("Max CM retries") and timeouts ("Local CM Response Timeout" and "Remote
CM Response Timeout").

These options can be useful for RoCE networks (no SM) to decrease the
overall connection timeout with an unreachable node (by default, it can
take several minutes).

Signed-off-by: Etienne AUJAMES <eaujames@ddn.com>
---
 drivers/infiniband/core/cma.c      | 92 ++++++++++++++++++++++++++++--
 drivers/infiniband/core/cma_priv.h |  4 ++
 drivers/infiniband/core/ucma.c     | 14 +++++
 include/rdma/rdma_cm.h             |  5 ++
 include/uapi/rdma/rdma_user_cm.h   |  4 +-
 5 files changed, 113 insertions(+), 6 deletions(-)

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 1e2cd7c8716e..cc73b9708862 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1002,6 +1002,8 @@ __rdma_create_id(struct net *net, rdma_cm_event_handler event_handler,
 	id_priv->tos_set = false;
 	id_priv->timeout_set = false;
 	id_priv->min_rnr_timer_set = false;
+	id_priv->max_cm_retries = false;
+	id_priv->cm_timeout = false;
 	id_priv->gid_type = IB_GID_TYPE_IB;
 	spin_lock_init(&id_priv->lock);
 	mutex_init(&id_priv->qp_mutex);
@@ -2845,6 +2847,80 @@ int rdma_set_min_rnr_timer(struct rdma_cm_id *id, u8 min_rnr_timer)
 }
 EXPORT_SYMBOL(rdma_set_min_rnr_timer);
 
+/**
+ * rdma_set_cm_retries() - Set the maximum of CM retries of the QP associated
+ *			   with a connection identifier.
+ * @id: Communication identifier associated with service type.
+ * @max_cm_retries: 4-bit value definied as "Max CM Retries" REQ field
+ *		    (Table 99 "REQ Message Contents" in the IBTA specification).
+ *
+ * This function should be called before rdma_connect() on active side.
+ * The value will determine the maximum number of times the CM should
+ * resend a connection request or reply (REQ/REP) before giving up (UNREACHABLE
+ * event).
+ *
+ * Return: 0 for success
+ */
+int rdma_set_cm_retries(struct rdma_cm_id *id, u8 max_cm_retries)
+{
+	struct rdma_id_private *id_priv;
+
+	/* It is a 4-bit value */
+	if (max_cm_retries & 0xf0)
+		return -EINVAL;
+
+	if (WARN_ON(id->qp_type != IB_QPT_RC && id->qp_type != IB_QPT_XRC_TGT))
+		return -EINVAL;
+
+	id_priv = container_of(id, struct rdma_id_private, id);
+	mutex_lock(&id_priv->qp_mutex);
+	id_priv->max_cm_retries = max_cm_retries;
+	id_priv->max_cm_retries_set = true;
+	mutex_unlock(&id_priv->qp_mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL(rdma_set_cm_retries);
+
+/**
+ * rdma_set_cm_timeout() - Set the CM timeouts of the QP associated with a
+ *			   connection identifier.
+ * @id: Communication identifier associated with service type.
+ * @cm_timeout: 5-bit value, expressed as 4.096 * 2^(timeout) usec.
+ *		This value should be superior than 0.
+ *
+ * This function should be called before rdma_connect() on active side.
+ * The value will affect the "Remote CM Response Timeout" and the
+ * "Local CM Response Timeout" timeouts to respond to a connection request (REQ)
+ * and to wait for connection reply (REP) ack on the remote node.
+ *
+ * Round-trip timeouts for a REQ and REP requests:
+ * REQ_timeout_ms = remote_cm_response_timeout_ms + 2* PacketLifeTime_ms
+ * REP_timeout_ms = local_cm_response_timeout_ms
+ *
+ * Return: 0 for success
+ */
+int rdma_set_cm_timeout(struct rdma_cm_id *id, u8 cm_timeout)
+{
+	struct rdma_id_private *id_priv;
+
+	/* It is a 5-bit value */
+	if (!cm_timeout || (cm_timeout & 0xe0))
+		return -EINVAL;
+
+	if (WARN_ON(id->qp_type != IB_QPT_RC && id->qp_type != IB_QPT_XRC_TGT))
+		return -EINVAL;
+
+	id_priv = container_of(id, struct rdma_id_private, id);
+	mutex_lock(&id_priv->qp_mutex);
+	id_priv->cm_timeout = cm_timeout;
+	id_priv->cm_timeout_set = true;
+	mutex_unlock(&id_priv->qp_mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL(rdma_set_cm_timeout);
+
 static int route_set_path_rec_inbound(struct cma_work *work,
 				      struct sa_path_rec *path_rec)
 {
@@ -4295,8 +4371,11 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
 	req.path = id_priv->id.route.path_rec;
 	req.sgid_attr = id_priv->id.route.addr.dev_addr.sgid_attr;
 	req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));
-	req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8);
-	req.max_cm_retries = CMA_MAX_CM_RETRIES;
+	req.timeout_ms = id_priv->cm_timeout_set ?
+		id_priv->cm_timeout : CMA_CM_RESPONSE_TIMEOUT;
+	req.timeout_ms = 1 << (req.timeout_ms - 8);
+	req.max_cm_retries = id_priv->max_cm_retries_set ?
+		id_priv->max_cm_retries : CMA_MAX_CM_RETRIES;
 
 	trace_cm_send_sidr_req(id_priv);
 	ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req);
@@ -4368,9 +4447,12 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,
 	req.flow_control = conn_param->flow_control;
 	req.retry_count = min_t(u8, 7, conn_param->retry_count);
 	req.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count);
-	req.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
-	req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
-	req.max_cm_retries = CMA_MAX_CM_RETRIES;
+	req.remote_cm_response_timeout = id_priv->cm_timeout_set ?
+		id_priv->cm_timeout : CMA_CM_RESPONSE_TIMEOUT;
+	req.local_cm_response_timeout = id_priv->cm_timeout_set ?
+		id_priv->cm_timeout : CMA_CM_RESPONSE_TIMEOUT;
+	req.max_cm_retries = id_priv->max_cm_retries_set ?
+		id_priv->max_cm_retries : CMA_MAX_CM_RETRIES;
 	req.srq = id_priv->srq ? 1 : 0;
 	req.ece.vendor_id = id_priv->ece.vendor_id;
 	req.ece.attr_mod = id_priv->ece.attr_mod;
diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h
index b7354c94cf1b..e3a35eb1bf96 100644
--- a/drivers/infiniband/core/cma_priv.h
+++ b/drivers/infiniband/core/cma_priv.h
@@ -95,10 +95,14 @@ struct rdma_id_private {
 	u8			tos_set:1;
 	u8                      timeout_set:1;
 	u8			min_rnr_timer_set:1;
+	u8			max_cm_retries_set:1;
+	u8			cm_timeout_set:1;
 	u8			reuseaddr;
 	u8			afonly;
 	u8			timeout;
 	u8			min_rnr_timer;
+	u8			max_cm_retries;
+	u8			cm_timeout;
 	u8 used_resolve_ip;
 	enum ib_gid_type	gid_type;
 
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 5f5ad8faf86e..a95f513077ac 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -1284,6 +1284,20 @@ static int ucma_set_option_id(struct ucma_context *ctx, int optname,
 		}
 		ret = rdma_set_ack_timeout(ctx->cm_id, *((u8 *)optval));
 		break;
+	case RDMA_OPTION_ID_CM_RETRIES:
+		if (optlen != sizeof(u8)) {
+			ret = -EINVAL;
+			break;
+		}
+		ret = rdma_set_cm_retries(ctx->cm_id, *((u8 *)optval));
+		break;
+	case RDMA_OPTION_ID_CM_TIMEOUTS:
+		if (optlen != sizeof(u8)) {
+			ret = -EINVAL;
+			break;
+		}
+		ret = rdma_set_cm_timeout(ctx->cm_id, *((u8 *)optval));
+		break;
 	default:
 		ret = -ENOSYS;
 	}
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index 8a8ab2f793ab..b5923ceb9853 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -344,6 +344,11 @@ int rdma_set_afonly(struct rdma_cm_id *id, int afonly);
 int rdma_set_ack_timeout(struct rdma_cm_id *id, u8 timeout);
 
 int rdma_set_min_rnr_timer(struct rdma_cm_id *id, u8 min_rnr_timer);
+
+int rdma_set_cm_retries(struct rdma_cm_id *id, u8 max_cm_retries);
+
+int rdma_set_cm_timeout(struct rdma_cm_id *id, u8 cm_timeout);
+
  /**
  * rdma_get_service_id - Return the IB service ID for a specified address.
  * @id: Communication identifier associated with the address.
diff --git a/include/uapi/rdma/rdma_user_cm.h b/include/uapi/rdma/rdma_user_cm.h
index 7cea03581f79..eadff72ecd54 100644
--- a/include/uapi/rdma/rdma_user_cm.h
+++ b/include/uapi/rdma/rdma_user_cm.h
@@ -313,7 +313,9 @@ enum {
 	RDMA_OPTION_ID_TOS	 = 0,
 	RDMA_OPTION_ID_REUSEADDR = 1,
 	RDMA_OPTION_ID_AFONLY	 = 2,
-	RDMA_OPTION_ID_ACK_TIMEOUT = 3
+	RDMA_OPTION_ID_ACK_TIMEOUT = 3,
+	RDMA_OPTION_ID_CM_RETRIES = 4,
+	RDMA_OPTION_ID_CM_TIMEOUTS = 5
 };
 
 enum {
-- 
2.39.3


^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2024-04-11 17:15 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-04-02 19:36 [PATCH rdma-next] IB/cma: Define options to set CM timeouts and retries Etienne AUJAMES
2024-04-08  3:26 ` Mark Zhang
2024-04-09 16:11   ` Etienne AUJAMES
2024-04-08 16:10 ` Sean Hefty
2024-04-09 13:07   ` Etienne AUJAMES
2024-04-09 14:44     ` Sean Hefty
2024-04-11 16:04       ` Etienne AUJAMES
2024-04-11 17:15         ` Sean Hefty

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox