netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net-next 0/3] Reduce locks scope of-smc_xxx_lgr_pending
@ 2024-11-13  7:14 D. Wythe
  2024-11-13  7:14 ` [PATCH net-next 1/3] net/smc: refactoring lgr pending lock D. Wythe
                   ` (3 more replies)
  0 siblings, 4 replies; 7+ messages in thread
From: D. Wythe @ 2024-11-13  7:14 UTC (permalink / raw)
  To: kgraul, wenjia, jaka, wintera, guwen
  Cc: kuba, davem, netdev, linux-s390, linux-rdma, tonylu, pabeni,
	edumazet

This patch set attempts to optimize the parallelism of SMC-R connections by reducing
locks scope of-smc_xxx_lgr_pending. This is a balance between large-scale refactoring
and performance optimization, where this patch attempts to achieve performance optimization
with as few changes as possible to minimize unexpected impacts.

Although there are still many bottlenecks that affect the connection performance of SMC, 
This also has a certain performance improvement after this patches.

Short-lived conenction wrk/nginx benchmark test:

+--------------+--------+--------+--------+-------+-------+-------+-------+
|conns/qps     |   c8   |  c16   |  c32   |  c64  | c512  | c1024 | c2048 |
+--------------+--------+--------+--------+-------+-------+-------+-------+
|SMC-R before  |10481.84|10761.04|10283.01|9006.88|9140.88|9255.41|7296.20|
+--------------+--------+--------+--------+-------+-------+-------+-------+
|SMC-R origin  |7328.86 |7256.99 |7288.53 |7239.55|5787.56|5371.17|3065.74|
+--------------+--------+--------+--------+-------+-------+-------+-------+

D. Wythe (3):
  net/smc: refactoring lgr pending lock
  net/smc: reduce locks scope of smc_xxx_lgr_pending
  net/smc: Isolate the smc_xxx_lgr_pending with ib_device

 net/smc/af_smc.c   | 36 +++++++++++++++++++-----------------
 net/smc/smc_core.c | 17 +++++++++++++++--
 net/smc/smc_core.h | 29 +++++++++++++++++++++++++++++
 net/smc/smc_ib.c   |  2 ++
 net/smc/smc_ib.h   |  2 ++
 5 files changed, 67 insertions(+), 19 deletions(-)

-- 
2.45.0


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH net-next 1/3] net/smc: refactoring lgr pending lock
  2024-11-13  7:14 [PATCH net-next 0/3] Reduce locks scope of-smc_xxx_lgr_pending D. Wythe
@ 2024-11-13  7:14 ` D. Wythe
  2024-11-14  1:40   ` Dust Li
  2024-11-13  7:14 ` [PATCH net-next 2/3] net/smc: reduce locks scope of smc_xxx_lgr_pending D. Wythe
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 7+ messages in thread
From: D. Wythe @ 2024-11-13  7:14 UTC (permalink / raw)
  To: kgraul, wenjia, jaka, wintera, guwen
  Cc: kuba, davem, netdev, linux-s390, linux-rdma, tonylu, pabeni,
	edumazet

This patch replaces the locking and unlocking of lgr pending with
a unified inline function, and since the granularity of lgr pending
lock is within the lifecycle of init_info, which make it possible
to record the lock state on init_info, which provides a potential
functionality for multiple unlocks without triggering exceptions, which
creates conditions to reduce the scope of locks in the future.

Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
---
 net/smc/af_smc.c   | 24 ++++++++++++------------
 net/smc/smc_core.h | 29 +++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+), 12 deletions(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 9d76e902fd77..19480d8affb0 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -1251,10 +1251,10 @@ static int smc_connect_rdma(struct smc_sock *smc,
 	if (reason_code)
 		return reason_code;
 
-	mutex_lock(&smc_client_lgr_pending);
+	smc_lgr_pending_lock(ini, &smc_client_lgr_pending);
 	reason_code = smc_conn_create(smc, ini);
 	if (reason_code) {
-		mutex_unlock(&smc_client_lgr_pending);
+		smc_lgr_pending_unlock(ini);
 		return reason_code;
 	}
 
@@ -1343,7 +1343,7 @@ static int smc_connect_rdma(struct smc_sock *smc,
 		if (reason_code)
 			goto connect_abort;
 	}
-	mutex_unlock(&smc_client_lgr_pending);
+	smc_lgr_pending_unlock(ini);
 
 	smc_copy_sock_settings_to_clc(smc);
 	smc->connect_nonblock = 0;
@@ -1353,7 +1353,7 @@ static int smc_connect_rdma(struct smc_sock *smc,
 	return 0;
 connect_abort:
 	smc_conn_abort(smc, ini->first_contact_local);
-	mutex_unlock(&smc_client_lgr_pending);
+	smc_lgr_pending_unlock(ini);
 	smc->connect_nonblock = 0;
 
 	return reason_code;
@@ -1412,10 +1412,10 @@ static int smc_connect_ism(struct smc_sock *smc,
 	ini->ism_peer_gid[ini->ism_selected].gid = ntohll(aclc->d0.gid);
 
 	/* there is only one lgr role for SMC-D; use server lock */
-	mutex_lock(&smc_server_lgr_pending);
+	smc_lgr_pending_lock(ini, &smc_server_lgr_pending);
 	rc = smc_conn_create(smc, ini);
 	if (rc) {
-		mutex_unlock(&smc_server_lgr_pending);
+		smc_lgr_pending_unlock(ini);
 		return rc;
 	}
 
@@ -1446,7 +1446,7 @@ static int smc_connect_ism(struct smc_sock *smc,
 				  aclc->hdr.version, eid, ini);
 	if (rc)
 		goto connect_abort;
-	mutex_unlock(&smc_server_lgr_pending);
+	smc_lgr_pending_unlock(ini);
 
 	smc_copy_sock_settings_to_clc(smc);
 	smc->connect_nonblock = 0;
@@ -1456,7 +1456,7 @@ static int smc_connect_ism(struct smc_sock *smc,
 	return 0;
 connect_abort:
 	smc_conn_abort(smc, ini->first_contact_local);
-	mutex_unlock(&smc_server_lgr_pending);
+	smc_lgr_pending_unlock(ini);
 	smc->connect_nonblock = 0;
 
 	return rc;
@@ -2478,7 +2478,7 @@ static void smc_listen_work(struct work_struct *work)
 	if (rc)
 		goto out_decl;
 
-	mutex_lock(&smc_server_lgr_pending);
+	smc_lgr_pending_lock(ini, &smc_server_lgr_pending);
 	smc_close_init(new_smc);
 	smc_rx_init(new_smc);
 	smc_tx_init(new_smc);
@@ -2497,7 +2497,7 @@ static void smc_listen_work(struct work_struct *work)
 
 	/* SMC-D does not need this lock any more */
 	if (ini->is_smcd)
-		mutex_unlock(&smc_server_lgr_pending);
+		smc_lgr_pending_unlock(ini);
 
 	/* receive SMC Confirm CLC message */
 	memset(buf, 0, sizeof(*buf));
@@ -2528,7 +2528,7 @@ static void smc_listen_work(struct work_struct *work)
 					    ini->first_contact_local, ini);
 		if (rc)
 			goto out_unlock;
-		mutex_unlock(&smc_server_lgr_pending);
+		smc_lgr_pending_unlock(ini);
 	}
 	smc_conn_save_peer_info(new_smc, cclc);
 
@@ -2544,7 +2544,7 @@ static void smc_listen_work(struct work_struct *work)
 	goto out_free;
 
 out_unlock:
-	mutex_unlock(&smc_server_lgr_pending);
+	smc_lgr_pending_unlock(ini);
 out_decl:
 	smc_listen_decline(new_smc, rc, ini ? ini->first_contact_local : 0,
 			   proposal_version);
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 69b54ecd6503..5abe9438772c 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -432,6 +432,8 @@ struct smc_init_info {
 	u8			ism_offered_cnt; /* # of ISM devices offered */
 	u8			ism_selected;    /* index of selected ISM dev*/
 	u8			smcd_version;
+	/* mutex holding for conn create */
+	struct mutex *mutex;
 };
 
 /* Find the connection associated with the given alert token in the link group.
@@ -600,6 +602,33 @@ int smcr_nl_get_lgr(struct sk_buff *skb, struct netlink_callback *cb);
 int smcr_nl_get_link(struct sk_buff *skb, struct netlink_callback *cb);
 int smcd_nl_get_lgr(struct sk_buff *skb, struct netlink_callback *cb);
 
+static inline void smc_lgr_pending_lock(struct smc_init_info *ini, struct mutex *lock)
+{
+	if (unlikely(ini->mutex))
+		pr_warn_once("smc: lgr pending deadlock dected.");
+
+	mutex_lock(lock);
+	ini->mutex = lock;
+}
+
+/* It will save the locking status of the ini, which provides a potential functionality
+ * for multiple unlocks without triggering exceptions. This creates conditions
+ * to reduce the scope of locks in the future.
+ */
+static inline void smc_lgr_pending_unlock(struct smc_init_info *ini)
+{
+	/* tempory */
+	struct mutex *lock;
+
+	/* already unlock it, just return */
+	if (!ini->mutex)
+		return;
+
+	lock = ini->mutex;
+	ini->mutex = NULL;
+	mutex_unlock(lock);
+}
+
 static inline struct smc_link_group *smc_get_lgr(struct smc_link *link)
 {
 	return link->lgr;
-- 
2.45.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH net-next 2/3] net/smc: reduce locks scope of smc_xxx_lgr_pending
  2024-11-13  7:14 [PATCH net-next 0/3] Reduce locks scope of-smc_xxx_lgr_pending D. Wythe
  2024-11-13  7:14 ` [PATCH net-next 1/3] net/smc: refactoring lgr pending lock D. Wythe
@ 2024-11-13  7:14 ` D. Wythe
  2024-11-13  7:14 ` [PATCH net-next 3/3] net/smc: Isolate the smc_xxx_lgr_pending with ib_device D. Wythe
  2024-11-14  1:27 ` [PATCH net-next 0/3] Reduce locks scope of-smc_xxx_lgr_pending Dust Li
  3 siblings, 0 replies; 7+ messages in thread
From: D. Wythe @ 2024-11-13  7:14 UTC (permalink / raw)
  To: kgraul, wenjia, jaka, wintera, guwen
  Cc: kuba, davem, netdev, linux-s390, linux-rdma, tonylu, pabeni,
	edumazet

To reduce locks scope of smc_xxx_lgr_pending, who aim to serialize
the creation of link group. However, once link group existed already,
those locks are meaningless, worse still, they make incoming connections
have to be queued one after the other.

As an optimization, Once we found that we have reused an existing link group,
we can immediately release the lock. This way, only the first contact connection
needs to hold the global lock throughout its entire lifecycle, while the
NON first contact only needing to hold it until the end of smc_conn_create.
This greatly alleviates the bottleneck of establishing
connections in SMC.

Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
---
 net/smc/smc_core.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 500952c2e67b..5559a8218bd9 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -1951,8 +1951,8 @@ static bool smcd_lgr_match(struct smc_link_group *lgr,
 	return true;
 }
 
-/* create a new SMC connection (and a new link group if necessary) */
-int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
+static int __smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini,
+			     bool unlock_with_existed_lgr)
 {
 	struct smc_connection *conn = &smc->conn;
 	struct net *net = sock_net(&smc->sk);
@@ -2026,7 +2026,10 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
 			smc_lgr_cleanup_early(lgr);
 			goto out;
 		}
+	} else if (unlock_with_existed_lgr) {
+		smc_lgr_pending_unlock(ini);
 	}
+
 	smc_lgr_hold(conn->lgr); /* lgr_put in smc_conn_free() */
 	if (!conn->lgr->is_smcd)
 		smcr_link_hold(conn->lnk); /* link_put in smc_conn_free() */
@@ -2050,6 +2053,16 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
 	return rc;
 }
 
+/* create a new SMC connection (and a new link group if necessary) */
+int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
+{
+	/* Considering that the path for SMC-D is shorter than SMC-R
+	 * the impact of global locking is smaller. So, let's make no
+	 * change on SMC-D.
+	 */
+	return __smc_conn_create(smc, ini, !ini->is_smcd);
+}
+
 #define SMCD_DMBE_SIZES		6 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
 #define SMCR_RMBE_SIZES		15 /* 0 -> 16KB, 1 -> 32KB, .. 15 -> 512MB */
 
-- 
2.45.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH net-next 3/3] net/smc: Isolate the smc_xxx_lgr_pending with ib_device
  2024-11-13  7:14 [PATCH net-next 0/3] Reduce locks scope of-smc_xxx_lgr_pending D. Wythe
  2024-11-13  7:14 ` [PATCH net-next 1/3] net/smc: refactoring lgr pending lock D. Wythe
  2024-11-13  7:14 ` [PATCH net-next 2/3] net/smc: reduce locks scope of smc_xxx_lgr_pending D. Wythe
@ 2024-11-13  7:14 ` D. Wythe
  2024-11-14  1:51   ` Dust Li
  2024-11-14  1:27 ` [PATCH net-next 0/3] Reduce locks scope of-smc_xxx_lgr_pending Dust Li
  3 siblings, 1 reply; 7+ messages in thread
From: D. Wythe @ 2024-11-13  7:14 UTC (permalink / raw)
  To: kgraul, wenjia, jaka, wintera, guwen
  Cc: kuba, davem, netdev, linux-s390, linux-rdma, tonylu, pabeni,
	edumazet

It is widely known that SMC introduced a global lock to protect the
creation of the first connection. This lock not only brings performance
issues but also poses a serious security risk. In a multi-tenant
container environment, malicious tenants can construct attacks that keep
the lock occupied for an extended period, thereby affecting the
connections of other tenants.

Considering that this lock is essentially meant to protect the QP, which
belongs to a device, we can limit the scope of the lock to within the
device rather than having it be global. This way, when a container
exclusively occupies the device, it can avoid being affected by other
malicious tenants.

Also make on impact on SMC-D since the path of SMC-D is shorter.

Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
---
 net/smc/af_smc.c | 18 ++++++++++--------
 net/smc/smc_ib.c |  2 ++
 net/smc/smc_ib.h |  2 ++
 3 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 19480d8affb0..d5b9ea7661db 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -56,11 +56,8 @@
 #include "smc_loopback.h"
 #include "smc_inet.h"
 
-static DEFINE_MUTEX(smc_server_lgr_pending);	/* serialize link group
-						 * creation on server
-						 */
-static DEFINE_MUTEX(smc_client_lgr_pending);	/* serialize link group
-						 * creation on client
+static DEFINE_MUTEX(smcd_server_lgr_pending);	/* serialize link group
+						 * creation on server for SMC-D
 						 */
 
 static struct workqueue_struct	*smc_tcp_ls_wq;	/* wq for tcp listen work */
@@ -1251,7 +1248,9 @@ static int smc_connect_rdma(struct smc_sock *smc,
 	if (reason_code)
 		return reason_code;
 
-	smc_lgr_pending_lock(ini, &smc_client_lgr_pending);
+	smc_lgr_pending_lock(ini, (ini->smcr_version & SMC_V2) ?
+				&ini->smcrv2.ib_dev_v2->smc_server_lgr_pending :
+				&ini->ib_dev->smc_server_lgr_pending);
 	reason_code = smc_conn_create(smc, ini);
 	if (reason_code) {
 		smc_lgr_pending_unlock(ini);
@@ -1412,7 +1411,7 @@ static int smc_connect_ism(struct smc_sock *smc,
 	ini->ism_peer_gid[ini->ism_selected].gid = ntohll(aclc->d0.gid);
 
 	/* there is only one lgr role for SMC-D; use server lock */
-	smc_lgr_pending_lock(ini, &smc_server_lgr_pending);
+	smc_lgr_pending_lock(ini, &smcd_server_lgr_pending);
 	rc = smc_conn_create(smc, ini);
 	if (rc) {
 		smc_lgr_pending_unlock(ini);
@@ -2044,6 +2043,9 @@ static int smc_listen_rdma_init(struct smc_sock *new_smc,
 {
 	int rc;
 
+	smc_lgr_pending_lock(ini, (ini->smcr_version & SMC_V2) ?
+			     &ini->smcrv2.ib_dev_v2->smc_server_lgr_pending :
+			     &ini->ib_dev->smc_server_lgr_pending);
 	/* allocate connection / link group */
 	rc = smc_conn_create(new_smc, ini);
 	if (rc)
@@ -2064,6 +2066,7 @@ static int smc_listen_ism_init(struct smc_sock *new_smc,
 {
 	int rc;
 
+	smc_lgr_pending_lock(ini, &smcd_server_lgr_pending);
 	rc = smc_conn_create(new_smc, ini);
 	if (rc)
 		return rc;
@@ -2478,7 +2481,6 @@ static void smc_listen_work(struct work_struct *work)
 	if (rc)
 		goto out_decl;
 
-	smc_lgr_pending_lock(ini, &smc_server_lgr_pending);
 	smc_close_init(new_smc);
 	smc_rx_init(new_smc);
 	smc_tx_init(new_smc);
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 9c563cdbea90..fb8b81b628b8 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -952,6 +952,8 @@ static int smc_ib_add_dev(struct ib_device *ibdev)
 	init_waitqueue_head(&smcibdev->lnks_deleted);
 	mutex_init(&smcibdev->mutex);
 	mutex_lock(&smc_ib_devices.mutex);
+	mutex_init(&smcibdev->smc_server_lgr_pending);
+	mutex_init(&smcibdev->smc_client_lgr_pending);
 	list_add_tail(&smcibdev->list, &smc_ib_devices.list);
 	mutex_unlock(&smc_ib_devices.mutex);
 	ib_set_client_data(ibdev, &smc_ib_client, smcibdev);
diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
index ef8ac2b7546d..322547a5a23d 100644
--- a/net/smc/smc_ib.h
+++ b/net/smc/smc_ib.h
@@ -57,6 +57,8 @@ struct smc_ib_device {				/* ib-device infos for smc */
 	atomic_t		lnk_cnt_by_port[SMC_MAX_PORTS];
 						/* number of links per port */
 	int			ndev_ifidx[SMC_MAX_PORTS]; /* ndev if indexes */
+	struct mutex    smc_server_lgr_pending; /* serialize link group creation on server */
+	struct mutex    smc_client_lgr_pending; /* serialize link group creation on client */
 };
 
 static inline __be32 smc_ib_gid_to_ipv4(u8 gid[SMC_GID_SIZE])
-- 
2.45.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH net-next 0/3] Reduce locks scope of-smc_xxx_lgr_pending
  2024-11-13  7:14 [PATCH net-next 0/3] Reduce locks scope of-smc_xxx_lgr_pending D. Wythe
                   ` (2 preceding siblings ...)
  2024-11-13  7:14 ` [PATCH net-next 3/3] net/smc: Isolate the smc_xxx_lgr_pending with ib_device D. Wythe
@ 2024-11-14  1:27 ` Dust Li
  3 siblings, 0 replies; 7+ messages in thread
From: Dust Li @ 2024-11-14  1:27 UTC (permalink / raw)
  To: D. Wythe, kgraul, wenjia, jaka, wintera, guwen
  Cc: kuba, davem, netdev, linux-s390, linux-rdma, tonylu, pabeni,
	edumazet

On 2024-11-13 15:14:02, D. Wythe wrote:
>This patch set attempts to optimize the parallelism of SMC-R connections by reducing
>locks scope of-smc_xxx_lgr_pending. This is a balance between large-scale refactoring
>and performance optimization, where this patch attempts to achieve performance optimization
>with as few changes as possible to minimize unexpected impacts.

I think one of the main benefits you missed here is isolating the lock across
different network namespaces when the RDMA device is exclusive. This
isolation is crucial when managing multiple containers, each with its
own RDMA device.

With the global smc_server_lgr_pending lock, if one container holds this
lock, all other containers must wait.

Worse still, a malicious client could exploit this by forcing the server
to create new link groups according to the SMC-R protocol. In other
words, an attacker could easily compel the server to continuously create
new link groups and hold the smc_server_lgr_pending lock, causing all
SMC connections targeting other containers on the same server to wait.
This scenario effectively constitutes a denial-of-service (DoS) attack.

>
>Although there are still many bottlenecks that affect the connection performance of SMC, 
>This also has a certain performance improvement after this patches.
>
>Short-lived conenction wrk/nginx benchmark test:
>
>+--------------+--------+--------+--------+-------+-------+-------+-------+
>|conns/qps     |   c8   |  c16   |  c32   |  c64  | c512  | c1024 | c2048 |
>+--------------+--------+--------+--------+-------+-------+-------+-------+
>|SMC-R before  |10481.84|10761.04|10283.01|9006.88|9140.88|9255.41|7296.20|
         ^^^ after ?


>+--------------+--------+--------+--------+-------+-------+-------+-------+
>|SMC-R origin  |7328.86 |7256.99 |7288.53 |7239.55|5787.56|5371.17|3065.74|
>+--------------+--------+--------+--------+-------+-------+-------+-------+
>
>D. Wythe (3):
>  net/smc: refactoring lgr pending lock
>  net/smc: reduce locks scope of smc_xxx_lgr_pending
>  net/smc: Isolate the smc_xxx_lgr_pending with ib_device
>
> net/smc/af_smc.c   | 36 +++++++++++++++++++-----------------
> net/smc/smc_core.c | 17 +++++++++++++++--
> net/smc/smc_core.h | 29 +++++++++++++++++++++++++++++
> net/smc/smc_ib.c   |  2 ++
> net/smc/smc_ib.h   |  2 ++
> 5 files changed, 67 insertions(+), 19 deletions(-)
>
>-- 
>2.45.0
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH net-next 1/3] net/smc: refactoring lgr pending lock
  2024-11-13  7:14 ` [PATCH net-next 1/3] net/smc: refactoring lgr pending lock D. Wythe
@ 2024-11-14  1:40   ` Dust Li
  0 siblings, 0 replies; 7+ messages in thread
From: Dust Li @ 2024-11-14  1:40 UTC (permalink / raw)
  To: D. Wythe, kgraul, wenjia, jaka, wintera, guwen
  Cc: kuba, davem, netdev, linux-s390, linux-rdma, tonylu, pabeni,
	edumazet

On 2024-11-13 15:14:03, D. Wythe wrote:
>This patch replaces the locking and unlocking of lgr pending with
>a unified inline function, and since the granularity of lgr pending
>lock is within the lifecycle of init_info, which make it possible
>to record the lock state on init_info, which provides a potential
>functionality for multiple unlocks without triggering exceptions, which

Since we already have lockdep, I am skeptical about the usefulness of
this feature.

>creates conditions to reduce the scope of locks in the future.

>
>Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
>---
> net/smc/af_smc.c   | 24 ++++++++++++------------
> net/smc/smc_core.h | 29 +++++++++++++++++++++++++++++
> 2 files changed, 41 insertions(+), 12 deletions(-)
>
>diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
>index 9d76e902fd77..19480d8affb0 100644
>--- a/net/smc/af_smc.c
>+++ b/net/smc/af_smc.c
>@@ -1251,10 +1251,10 @@ static int smc_connect_rdma(struct smc_sock *smc,
> 	if (reason_code)
> 		return reason_code;
> 
>-	mutex_lock(&smc_client_lgr_pending);
>+	smc_lgr_pending_lock(ini, &smc_client_lgr_pending);
> 	reason_code = smc_conn_create(smc, ini);
> 	if (reason_code) {
>-		mutex_unlock(&smc_client_lgr_pending);
>+		smc_lgr_pending_unlock(ini);
> 		return reason_code;
> 	}
> 
>@@ -1343,7 +1343,7 @@ static int smc_connect_rdma(struct smc_sock *smc,
> 		if (reason_code)
> 			goto connect_abort;
> 	}
>-	mutex_unlock(&smc_client_lgr_pending);
>+	smc_lgr_pending_unlock(ini);
> 
> 	smc_copy_sock_settings_to_clc(smc);
> 	smc->connect_nonblock = 0;
>@@ -1353,7 +1353,7 @@ static int smc_connect_rdma(struct smc_sock *smc,
> 	return 0;
> connect_abort:
> 	smc_conn_abort(smc, ini->first_contact_local);
>-	mutex_unlock(&smc_client_lgr_pending);
>+	smc_lgr_pending_unlock(ini);
> 	smc->connect_nonblock = 0;
> 
> 	return reason_code;
>@@ -1412,10 +1412,10 @@ static int smc_connect_ism(struct smc_sock *smc,
> 	ini->ism_peer_gid[ini->ism_selected].gid = ntohll(aclc->d0.gid);
> 
> 	/* there is only one lgr role for SMC-D; use server lock */
>-	mutex_lock(&smc_server_lgr_pending);
>+	smc_lgr_pending_lock(ini, &smc_server_lgr_pending);
> 	rc = smc_conn_create(smc, ini);
> 	if (rc) {
>-		mutex_unlock(&smc_server_lgr_pending);
>+		smc_lgr_pending_unlock(ini);
> 		return rc;
> 	}
> 
>@@ -1446,7 +1446,7 @@ static int smc_connect_ism(struct smc_sock *smc,
> 				  aclc->hdr.version, eid, ini);
> 	if (rc)
> 		goto connect_abort;
>-	mutex_unlock(&smc_server_lgr_pending);
>+	smc_lgr_pending_unlock(ini);
> 
> 	smc_copy_sock_settings_to_clc(smc);
> 	smc->connect_nonblock = 0;
>@@ -1456,7 +1456,7 @@ static int smc_connect_ism(struct smc_sock *smc,
> 	return 0;
> connect_abort:
> 	smc_conn_abort(smc, ini->first_contact_local);
>-	mutex_unlock(&smc_server_lgr_pending);
>+	smc_lgr_pending_unlock(ini);
> 	smc->connect_nonblock = 0;
> 
> 	return rc;
>@@ -2478,7 +2478,7 @@ static void smc_listen_work(struct work_struct *work)
> 	if (rc)
> 		goto out_decl;
> 
>-	mutex_lock(&smc_server_lgr_pending);
>+	smc_lgr_pending_lock(ini, &smc_server_lgr_pending);
> 	smc_close_init(new_smc);
> 	smc_rx_init(new_smc);
> 	smc_tx_init(new_smc);
>@@ -2497,7 +2497,7 @@ static void smc_listen_work(struct work_struct *work)
> 
> 	/* SMC-D does not need this lock any more */
> 	if (ini->is_smcd)
>-		mutex_unlock(&smc_server_lgr_pending);
>+		smc_lgr_pending_unlock(ini);
> 
> 	/* receive SMC Confirm CLC message */
> 	memset(buf, 0, sizeof(*buf));
>@@ -2528,7 +2528,7 @@ static void smc_listen_work(struct work_struct *work)
> 					    ini->first_contact_local, ini);
> 		if (rc)
> 			goto out_unlock;
>-		mutex_unlock(&smc_server_lgr_pending);
>+		smc_lgr_pending_unlock(ini);
> 	}
> 	smc_conn_save_peer_info(new_smc, cclc);
> 
>@@ -2544,7 +2544,7 @@ static void smc_listen_work(struct work_struct *work)
> 	goto out_free;
> 
> out_unlock:
>-	mutex_unlock(&smc_server_lgr_pending);
>+	smc_lgr_pending_unlock(ini);
> out_decl:
> 	smc_listen_decline(new_smc, rc, ini ? ini->first_contact_local : 0,
> 			   proposal_version);
>diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
>index 69b54ecd6503..5abe9438772c 100644
>--- a/net/smc/smc_core.h
>+++ b/net/smc/smc_core.h
>@@ -432,6 +432,8 @@ struct smc_init_info {
> 	u8			ism_offered_cnt; /* # of ISM devices offered */
> 	u8			ism_selected;    /* index of selected ISM dev*/
> 	u8			smcd_version;
>+	/* mutex holding for conn create */
>+	struct mutex *mutex;
> };
> 
> /* Find the connection associated with the given alert token in the link group.
>@@ -600,6 +602,33 @@ int smcr_nl_get_lgr(struct sk_buff *skb, struct netlink_callback *cb);
> int smcr_nl_get_link(struct sk_buff *skb, struct netlink_callback *cb);
> int smcd_nl_get_lgr(struct sk_buff *skb, struct netlink_callback *cb);
> 
>+static inline void smc_lgr_pending_lock(struct smc_init_info *ini, struct mutex *lock)
>+{
>+	if (unlikely(ini->mutex))
>+		pr_warn_once("smc: lgr pending deadlock dected.");
>+
>+	mutex_lock(lock);
>+	ini->mutex = lock;
>+}
>+
>+/* It will save the locking status of the ini, which provides a potential functionality
>+ * for multiple unlocks without triggering exceptions. This creates conditions
>+ * to reduce the scope of locks in the future.
>+ */
>+static inline void smc_lgr_pending_unlock(struct smc_init_info *ini)
>+{
>+	/* tempory */
>+	struct mutex *lock;
>+
>+	/* already unlock it, just return */
>+	if (!ini->mutex)
>+		return;
>+
>+	lock = ini->mutex;
>+	ini->mutex = NULL;
>+	mutex_unlock(lock);
>+}
>+
> static inline struct smc_link_group *smc_get_lgr(struct smc_link *link)
> {
> 	return link->lgr;
>-- 
>2.45.0
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH net-next 3/3] net/smc: Isolate the smc_xxx_lgr_pending with ib_device
  2024-11-13  7:14 ` [PATCH net-next 3/3] net/smc: Isolate the smc_xxx_lgr_pending with ib_device D. Wythe
@ 2024-11-14  1:51   ` Dust Li
  0 siblings, 0 replies; 7+ messages in thread
From: Dust Li @ 2024-11-14  1:51 UTC (permalink / raw)
  To: D. Wythe, kgraul, wenjia, jaka, wintera, guwen
  Cc: kuba, davem, netdev, linux-s390, linux-rdma, tonylu, pabeni,
	edumazet

On 2024-11-13 15:14:05, D. Wythe wrote:
>It is widely known that SMC introduced a global lock to protect the
>creation of the first connection. This lock not only brings performance
>issues but also poses a serious security risk. In a multi-tenant
>container environment, malicious tenants can construct attacks that keep
>the lock occupied for an extended period, thereby affecting the
>connections of other tenants.
>
>Considering that this lock is essentially meant to protect the QP, which
>belongs to a device, we can limit the scope of the lock to within the
>device rather than having it be global. This way, when a container
>exclusively occupies the device, it can avoid being affected by other
>malicious tenants.
>
>Also make on impact on SMC-D since the path of SMC-D is shorter.
>
>Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
>---
> net/smc/af_smc.c | 18 ++++++++++--------
> net/smc/smc_ib.c |  2 ++
> net/smc/smc_ib.h |  2 ++
> 3 files changed, 14 insertions(+), 8 deletions(-)
>
>diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
>index 19480d8affb0..d5b9ea7661db 100644
>--- a/net/smc/af_smc.c
>+++ b/net/smc/af_smc.c
>@@ -56,11 +56,8 @@
> #include "smc_loopback.h"
> #include "smc_inet.h"
> 
>-static DEFINE_MUTEX(smc_server_lgr_pending);	/* serialize link group
>-						 * creation on server
>-						 */
>-static DEFINE_MUTEX(smc_client_lgr_pending);	/* serialize link group
>-						 * creation on client
>+static DEFINE_MUTEX(smcd_server_lgr_pending);	/* serialize link group
>+						 * creation on server for SMC-D
> 						 */

Why not move the smcd_server_lgr_pending lock to the smcd_device level
as well ?


> 
> static struct workqueue_struct	*smc_tcp_ls_wq;	/* wq for tcp listen work */
>@@ -1251,7 +1248,9 @@ static int smc_connect_rdma(struct smc_sock *smc,
> 	if (reason_code)
> 		return reason_code;
> 
>-	smc_lgr_pending_lock(ini, &smc_client_lgr_pending);
>+	smc_lgr_pending_lock(ini, (ini->smcr_version & SMC_V2) ?
>+				&ini->smcrv2.ib_dev_v2->smc_server_lgr_pending :
>+				&ini->ib_dev->smc_server_lgr_pending);
> 	reason_code = smc_conn_create(smc, ini);
> 	if (reason_code) {
> 		smc_lgr_pending_unlock(ini);
>@@ -1412,7 +1411,7 @@ static int smc_connect_ism(struct smc_sock *smc,
> 	ini->ism_peer_gid[ini->ism_selected].gid = ntohll(aclc->d0.gid);
> 
> 	/* there is only one lgr role for SMC-D; use server lock */
>-	smc_lgr_pending_lock(ini, &smc_server_lgr_pending);
>+	smc_lgr_pending_lock(ini, &smcd_server_lgr_pending);
> 	rc = smc_conn_create(smc, ini);
> 	if (rc) {
> 		smc_lgr_pending_unlock(ini);
>@@ -2044,6 +2043,9 @@ static int smc_listen_rdma_init(struct smc_sock *new_smc,
> {
> 	int rc;
> 
>+	smc_lgr_pending_lock(ini, (ini->smcr_version & SMC_V2) ?
>+			     &ini->smcrv2.ib_dev_v2->smc_server_lgr_pending :
>+			     &ini->ib_dev->smc_server_lgr_pending);
> 	/* allocate connection / link group */
> 	rc = smc_conn_create(new_smc, ini);
> 	if (rc)
>@@ -2064,6 +2066,7 @@ static int smc_listen_ism_init(struct smc_sock *new_smc,
> {
> 	int rc;
> 
>+	smc_lgr_pending_lock(ini, &smcd_server_lgr_pending);
> 	rc = smc_conn_create(new_smc, ini);
> 	if (rc)
> 		return rc;
>@@ -2478,7 +2481,6 @@ static void smc_listen_work(struct work_struct *work)
> 	if (rc)
> 		goto out_decl;
> 
>-	smc_lgr_pending_lock(ini, &smc_server_lgr_pending);
> 	smc_close_init(new_smc);
> 	smc_rx_init(new_smc);
> 	smc_tx_init(new_smc);
>diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
>index 9c563cdbea90..fb8b81b628b8 100644
>--- a/net/smc/smc_ib.c
>+++ b/net/smc/smc_ib.c
>@@ -952,6 +952,8 @@ static int smc_ib_add_dev(struct ib_device *ibdev)
> 	init_waitqueue_head(&smcibdev->lnks_deleted);
> 	mutex_init(&smcibdev->mutex);
> 	mutex_lock(&smc_ib_devices.mutex);
>+	mutex_init(&smcibdev->smc_server_lgr_pending);
>+	mutex_init(&smcibdev->smc_client_lgr_pending);
> 	list_add_tail(&smcibdev->list, &smc_ib_devices.list);
> 	mutex_unlock(&smc_ib_devices.mutex);
> 	ib_set_client_data(ibdev, &smc_ib_client, smcibdev);
>diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
>index ef8ac2b7546d..322547a5a23d 100644
>--- a/net/smc/smc_ib.h
>+++ b/net/smc/smc_ib.h
>@@ -57,6 +57,8 @@ struct smc_ib_device {				/* ib-device infos for smc */
> 	atomic_t		lnk_cnt_by_port[SMC_MAX_PORTS];
> 						/* number of links per port */
> 	int			ndev_ifidx[SMC_MAX_PORTS]; /* ndev if indexes */
>+	struct mutex    smc_server_lgr_pending; /* serialize link group creation on server */
>+	struct mutex    smc_client_lgr_pending; /* serialize link group creation on client */

Align please.

Best regards,
Dust


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2024-11-14  1:51 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-11-13  7:14 [PATCH net-next 0/3] Reduce locks scope of-smc_xxx_lgr_pending D. Wythe
2024-11-13  7:14 ` [PATCH net-next 1/3] net/smc: refactoring lgr pending lock D. Wythe
2024-11-14  1:40   ` Dust Li
2024-11-13  7:14 ` [PATCH net-next 2/3] net/smc: reduce locks scope of smc_xxx_lgr_pending D. Wythe
2024-11-13  7:14 ` [PATCH net-next 3/3] net/smc: Isolate the smc_xxx_lgr_pending with ib_device D. Wythe
2024-11-14  1:51   ` Dust Li
2024-11-14  1:27 ` [PATCH net-next 0/3] Reduce locks scope of-smc_xxx_lgr_pending Dust Li

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).