From: Zhu Yanjun <zyjzyj2000@gmail.com>
To: Etienne AUJAMES <eaujames@ddn.com>,
jgg@ziepe.ca, leon@kernel.org, markzhang@nvidia.com,
shefty@nvidia.com
Cc: "linux-rdma@vger.kernel.org" <linux-rdma@vger.kernel.org>,
"Gael.DELBARY@cea.fr" <Gael.DELBARY@cea.fr>,
"guillaume.courrier@cea.fr" <guillaume.courrier@cea.fr>,
Serguei Smirnov <ssmirnov@whamcloud.com>,
Cyril Bordage <cbordage@whamcloud.com>
Subject: Re: [PATCH rdma-next v2] IB/cma: Define option to set max CM retries
Date: Tue, 23 Apr 2024 17:42:37 +0200 [thread overview]
Message-ID: <700c19e8-ae4f-42f0-a604-9e33a9a94dd3@linux.dev> (raw)
In-Reply-To: <Zh_IGG3chXtjK3Nu@eaujamesDDN>
On 17.04.24 15:01, Etienne AUJAMES wrote:
> Define new options in 'rdma_set_option' to override default CM retries
> ("Max CM retries").
>
> This option can be useful for RoCE networks (no SM) to decrease the
> overall connection timeout with an unreachable node.
>
> With a default of 15 retries, the CM can take several minutes to
> return an UNREACHABLE event.
>
> With Infiniband, the SM returns an empty path record for an
> unreachable node (error returned in rdma_resolve_route()). So, the
> application will not send the connection requests.
>
> Signed-off-by: Etienne AUJAMES <eaujames@ddn.com>
> ---
> drivers/infiniband/core/cma.c | 40 +++++++++++++++++++++++++++---
> drivers/infiniband/core/cma_priv.h | 2 ++
> drivers/infiniband/core/ucma.c | 7 ++++++
> include/rdma/rdma_cm.h | 3 +++
> include/uapi/rdma/rdma_user_cm.h | 3 ++-
> 5 files changed, 51 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
> index 1e2cd7c8716e..b6a73c7307ea 100644
> --- a/drivers/infiniband/core/cma.c
> +++ b/drivers/infiniband/core/cma.c
> @@ -1002,6 +1002,7 @@ __rdma_create_id(struct net *net, rdma_cm_event_handler event_handler,
> id_priv->tos_set = false;
> id_priv->timeout_set = false;
> id_priv->min_rnr_timer_set = false;
> + id_priv->max_cm_retries = false;
max_cm_retries is u8 type. Not sure if it is good to set it as false.
Zhu Yanjun
> id_priv->gid_type = IB_GID_TYPE_IB;
> spin_lock_init(&id_priv->lock);
> mutex_init(&id_priv->qp_mutex);
> @@ -2845,6 +2846,38 @@ int rdma_set_min_rnr_timer(struct rdma_cm_id *id, u8 min_rnr_timer)
> }
> EXPORT_SYMBOL(rdma_set_min_rnr_timer);
>
> +/**
> + * rdma_set_cm_retries() - Configure CM retries to establish an active
> + * connection.
> + * @id: Connection identifier to connect.
> + * @max_cm_retries: 4-bit value defined as "Max CM Retries" REQ field
> + * (Table 99 "REQ Message Contents" in the IBTA specification).
> + *
> + * This function should be called before rdma_connect() on active side.
> + * The value will determine the maximum number of times the CM should
> + * resend a connection request or reply (REQ/REP) before giving up (UNREACHABLE
> + * event).
> + *
> + * Return: 0 for success
> + */
> +int rdma_set_cm_retries(struct rdma_cm_id *id, u8 max_cm_retries)
> +{
> + struct rdma_id_private *id_priv;
> +
> + /* It is a 4-bit value */
> + if (max_cm_retries & 0xf0)
> + return -EINVAL;
> +
> + id_priv = container_of(id, struct rdma_id_private, id);
> + mutex_lock(&id_priv->qp_mutex);
> + id_priv->max_cm_retries = max_cm_retries;
> + id_priv->max_cm_retries_set = true;
> + mutex_unlock(&id_priv->qp_mutex);
> +
> + return 0;
> +}
> +EXPORT_SYMBOL(rdma_set_cm_retries);
> +
> static int route_set_path_rec_inbound(struct cma_work *work,
> struct sa_path_rec *path_rec)
> {
> @@ -4295,8 +4328,8 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
> req.path = id_priv->id.route.path_rec;
> req.sgid_attr = id_priv->id.route.addr.dev_addr.sgid_attr;
> req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));
> - req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8);
> - req.max_cm_retries = CMA_MAX_CM_RETRIES;
> + req.max_cm_retries = id_priv->max_cm_retries_set ?
> + id_priv->max_cm_retries : CMA_MAX_CM_RETRIES;
>
> trace_cm_send_sidr_req(id_priv);
> ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req);
> @@ -4370,7 +4403,8 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,
> req.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count);
> req.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
> req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
> - req.max_cm_retries = CMA_MAX_CM_RETRIES;
> + req.max_cm_retries = id_priv->max_cm_retries_set ?
> + id_priv->max_cm_retries : CMA_MAX_CM_RETRIES;
> req.srq = id_priv->srq ? 1 : 0;
> req.ece.vendor_id = id_priv->ece.vendor_id;
> req.ece.attr_mod = id_priv->ece.attr_mod;
> diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h
> index b7354c94cf1b..4c41e5d7e848 100644
> --- a/drivers/infiniband/core/cma_priv.h
> +++ b/drivers/infiniband/core/cma_priv.h
> @@ -95,10 +95,12 @@ struct rdma_id_private {
> u8 tos_set:1;
> u8 timeout_set:1;
> u8 min_rnr_timer_set:1;
> + u8 max_cm_retries_set:1;
> u8 reuseaddr;
> u8 afonly;
> u8 timeout;
> u8 min_rnr_timer;
> + u8 max_cm_retries;
> u8 used_resolve_ip;
> enum ib_gid_type gid_type;
>
> diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
> index 5f5ad8faf86e..27c165de7eff 100644
> --- a/drivers/infiniband/core/ucma.c
> +++ b/drivers/infiniband/core/ucma.c
> @@ -1284,6 +1284,13 @@ static int ucma_set_option_id(struct ucma_context *ctx, int optname,
> }
> ret = rdma_set_ack_timeout(ctx->cm_id, *((u8 *)optval));
> break;
> + case RDMA_OPTION_ID_CM_RETRIES:
> + if (optlen != sizeof(u8)) {
> + ret = -EINVAL;
> + break;
> + }
> + ret = rdma_set_cm_retries(ctx->cm_id, *((u8 *)optval));
> + break;
> default:
> ret = -ENOSYS;
> }
> diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
> index 8a8ab2f793ab..1d7404e2d81e 100644
> --- a/include/rdma/rdma_cm.h
> +++ b/include/rdma/rdma_cm.h
> @@ -344,6 +344,9 @@ int rdma_set_afonly(struct rdma_cm_id *id, int afonly);
> int rdma_set_ack_timeout(struct rdma_cm_id *id, u8 timeout);
>
> int rdma_set_min_rnr_timer(struct rdma_cm_id *id, u8 min_rnr_timer);
> +
> +int rdma_set_cm_retries(struct rdma_cm_id *id, u8 max_cm_retries);
> +
> /**
> * rdma_get_service_id - Return the IB service ID for a specified address.
> * @id: Communication identifier associated with the address.
> diff --git a/include/uapi/rdma/rdma_user_cm.h b/include/uapi/rdma/rdma_user_cm.h
> index 7cea03581f79..f00eb05006b0 100644
> --- a/include/uapi/rdma/rdma_user_cm.h
> +++ b/include/uapi/rdma/rdma_user_cm.h
> @@ -313,7 +313,8 @@ enum {
> RDMA_OPTION_ID_TOS = 0,
> RDMA_OPTION_ID_REUSEADDR = 1,
> RDMA_OPTION_ID_AFONLY = 2,
> - RDMA_OPTION_ID_ACK_TIMEOUT = 3
> + RDMA_OPTION_ID_ACK_TIMEOUT = 3,
> + RDMA_OPTION_ID_CM_RETRIES = 4,
> };
>
> enum {
next prev parent reply other threads:[~2024-04-23 15:43 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-04-17 13:01 [PATCH rdma-next v2] IB/cma: Define option to set max CM retries Etienne AUJAMES
2024-04-23 15:42 ` Zhu Yanjun [this message]
2024-04-23 15:54 ` Sean Hefty
2024-04-24 8:41 ` Etienne AUJAMES
2024-04-24 8:32 ` Etienne AUJAMES
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=700c19e8-ae4f-42f0-a604-9e33a9a94dd3@linux.dev \
--to=zyjzyj2000@gmail.com \
--cc=Gael.DELBARY@cea.fr \
--cc=cbordage@whamcloud.com \
--cc=eaujames@ddn.com \
--cc=guillaume.courrier@cea.fr \
--cc=jgg@ziepe.ca \
--cc=leon@kernel.org \
--cc=linux-rdma@vger.kernel.org \
--cc=markzhang@nvidia.com \
--cc=shefty@nvidia.com \
--cc=ssmirnov@whamcloud.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox