All of lore.kernel.org
 help / color / mirror / Atom feed
From: Wen Gu <guwen@linux.alibaba.com>
To: kgraul@linux.ibm.com, wenjia@linux.ibm.com, jaka@linux.ibm.com,
	davem@davemloft.net, edumazet@google.com, kuba@kernel.org,
	pabeni@redhat.com
Cc: linux-s390@vger.kernel.org, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org
Subject: [RFC PATCH net-next v2 4/5] net/smc: avoid data copy from sndbuf to peer RMB in SMC-D loopback
Date: Tue, 20 Dec 2022 11:21:44 +0800	[thread overview]
Message-ID: <1671506505-104676-5-git-send-email-guwen@linux.alibaba.com> (raw)
In-Reply-To: <1671506505-104676-1-git-send-email-guwen@linux.alibaba.com>

This patch aims to improve SMC-D loopback performance by avoiding
data copy from local sndbuf to peer RMB. The main idea is to let
local sndbuf and peer RMB share the same physical memory.

 +----------+                     +----------+
 | socket A |                     | socket B |
 +----------+                     +----------+
       |                               ^
       |         +---------+           |
  regard as      |         | ----------|
  local sndbuf   |  B's    |     regard as
       |         |  RMB    |     local RMB
       |-------> |         |
                 +---------+

For connections using smcd loopback device:

1. Only create and maintain local RMB.
        a. Create or reuse RMB when create connection;
        b. Free RMB when lgr free;

2. Attach local sndbuf to peer RMB.
        a. sndbuf_desc describes the same memory region as peer rmb_desc.
        b. sndbuf_desc is exclusive to specific connection and won't be
           added to lgr buffer pool for reuse.
        c. sndbuf is attached to peer RMB when receive remote token after
           CLC accept/confirm message.
        d. sndbuf is detached from peer RMB when connection is freed.

Therefore, the data copied from the userspace to local sndbuf directly
reaches the peer RMB.

Signed-off-by: Wen Gu <guwen@linux.alibaba.com>
---
 net/smc/af_smc.c   | 23 +++++++++++++++++++-
 net/smc/smc_core.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 net/smc/smc_core.h |  2 ++
 3 files changed, 86 insertions(+), 1 deletion(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index b9884c8..c7de566 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -1073,7 +1073,6 @@ static int smc_find_proposal_devices(struct smc_sock *smc,
 	 * The RFC patch hasn't resolved this, just simply always
 	 * chooses loopback device first, and fallback if loopback
 	 * communication is impossible.
-	 *
 	 */
 	/* check if there is an ism or loopback device available */
 	if (!(ini->smcd_version & SMC_V1) ||
@@ -1397,6 +1396,17 @@ static int smc_connect_ism(struct smc_sock *smc,
 	}
 
 	smc_conn_save_peer_info(smc, aclc);
+
+	/* special for smcd loopback
+	 * conns above smcd loopback dev only create their rmbs.
+	 * their sndbufs are 'maps' of peer rmbs.
+	 */
+	if (smc->conn.lgr->smcd->is_loopback) {
+		rc = smcd_buf_attach(&smc->conn);
+		if (rc)
+			goto connect_abort;
+		smc->sk.sk_sndbuf = 2 * (smc->conn.sndbuf_desc->len);
+	}
 	smc_close_init(smc);
 	smc_rx_init(smc);
 	smc_tx_init(smc);
@@ -2464,6 +2474,17 @@ static void smc_listen_work(struct work_struct *work)
 		mutex_unlock(&smc_server_lgr_pending);
 	}
 	smc_conn_save_peer_info(new_smc, cclc);
+
+	/* special for smcd loopback
+	 * conns above smcd loopback dev only create their rmbs.
+	 * their sndbufs are 'maps' of peer rmbs.
+	 */
+	if (ini->is_smcd && new_smc->conn.lgr->smcd->is_loopback) {
+		rc = smcd_buf_attach(&new_smc->conn);
+		if (rc)
+			goto out_decl;
+		new_smc->sk.sk_sndbuf = 2 * (new_smc->conn.sndbuf_desc->len);
+	}
 	smc_listen_out_connected(new_smc);
 	SMC_STAT_SERV_SUCC_INC(sock_net(newclcsock->sk), ini);
 	goto out_free;
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index c305d8d..bf40ad3 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -1171,6 +1171,10 @@ void smc_conn_free(struct smc_connection *conn)
 		if (!list_empty(&lgr->list))
 			smc_ism_unset_conn(conn);
 		tasklet_kill(&conn->rx_tsklet);
+
+		/* detach sndbuf from peer rmb */
+		if (lgr->smcd->is_loopback)
+			smcd_buf_detach(conn);
 	} else {
 		smc_cdc_wait_pend_tx_wr(conn);
 		if (current_work() != &conn->abort_work)
@@ -2423,6 +2427,14 @@ int smc_buf_create(struct smc_sock *smc, bool is_smcd)
 {
 	int rc;
 
+	if (is_smcd && smc->conn.lgr->smcd->is_loopback) {
+		/* Conns above smcd loopback device only create and maintain
+		 * their RMBs. The sndbufs will be attached to peer RMBs once
+		 * getting the tokens.
+		 */
+		return __smc_buf_create(smc, is_smcd, true);
+	}
+
 	/* create send buffer */
 	rc = __smc_buf_create(smc, is_smcd, false);
 	if (rc)
@@ -2439,6 +2451,56 @@ int smc_buf_create(struct smc_sock *smc, bool is_smcd)
 	return rc;
 }
 
+/* for smcd loopback conns, attach local sndbuf to peer RMB.
+ * The data copy to sndbuf is equal to data copy to peer RMB.
+ */
+int smcd_buf_attach(struct smc_connection *conn)
+{
+	struct smcd_dev *smcd = conn->lgr->smcd;
+	u64 peer_token = conn->peer_token;
+	struct smc_buf_desc *buf_desc;
+	int rc;
+
+	buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
+	if (!buf_desc)
+		return -ENOMEM;
+	rc = smc_ism_attach_dmb(smcd, peer_token, buf_desc);
+	if (rc) {
+		rc = SMC_CLC_DECL_ERR_RTOK;
+		goto free;
+	}
+
+	/* attach local sndbuf to peer RMB.
+	 * refer to local sndbuf is equal to refer to peer RMB.
+	 */
+	/* align with peer rmb */
+	buf_desc->cpu_addr = (u8 *)buf_desc->cpu_addr + sizeof(struct smcd_cdc_msg);
+	buf_desc->len -=  sizeof(struct smcd_cdc_msg);
+	conn->sndbuf_desc = buf_desc;
+	conn->sndbuf_desc->used = 1;
+	//smc->sk.sk_sndbuf = 2 * (smc->conn->sndbuf_desc->len);
+	atomic_set(&conn->sndbuf_space, conn->sndbuf_desc->len);
+	return 0;
+
+free:
+	kfree(buf_desc);
+	return rc;
+}
+
+void smcd_buf_detach(struct smc_connection *conn)
+{
+	struct smcd_dev *smcd = conn->lgr->smcd;
+	u64 peer_token = conn->peer_token;
+
+	if (!conn->sndbuf_desc)
+		return;
+
+	smc_ism_detach_dmb(smcd, peer_token);
+
+	kfree(conn->sndbuf_desc);
+	conn->sndbuf_desc = NULL;
+}
+
 static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
 {
 	int i;
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 285f9bd..b51b020 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -518,6 +518,8 @@ void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid,
 void smc_smcd_terminate_all(struct smcd_dev *dev);
 void smc_smcr_terminate_all(struct smc_ib_device *smcibdev);
 int smc_buf_create(struct smc_sock *smc, bool is_smcd);
+int smcd_buf_attach(struct smc_connection *conn);
+void smcd_buf_detach(struct smc_connection *conn);
 int smc_uncompress_bufsize(u8 compressed);
 int smc_rmb_rtoken_handling(struct smc_connection *conn, struct smc_link *link,
 			    struct smc_clc_msg_accept_confirm *clc);
-- 
1.8.3.1


  parent reply	other threads:[~2022-12-20  3:22 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-12-20  3:21 [RFC PATCH net-next v2 0/5] net/smc:Introduce SMC-D based loopback acceleration Wen Gu
2022-12-20  3:21 ` [RFC PATCH net-next v2 1/5] net/smc: introduce SMC-D loopback device Wen Gu
2023-01-19 16:25   ` Alexandra Winter
2023-01-30 16:30     ` Wen Gu
2022-12-20  3:21 ` [RFC PATCH net-next v2 2/5] net/smc: choose loopback device in SMC-D communication Wen Gu
2022-12-20  3:21 ` [RFC PATCH net-next v2 3/5] net/smc: add dmb attach and detach interface Wen Gu
2022-12-20  3:21 ` Wen Gu [this message]
2022-12-20  3:21 ` [RFC PATCH net-next v2 5/5] net/smc: logic of cursors update in SMC-D loopback connections Wen Gu
2022-12-20 14:02 ` [RFC PATCH net-next v2 0/5] net/smc:Introduce SMC-D based loopback acceleration Niklas Schnelle
2022-12-21 13:14   ` Wen Gu
2023-01-04 16:09     ` Alexandra Winter
2023-01-12 12:12       ` Wen Gu
2023-01-16 11:01         ` Wenjia Zhang
2023-01-18 12:15           ` Wen Gu
2023-01-19 12:30             ` Alexandra Winter
2023-01-30 16:27               ` Wen Gu
2022-12-26 10:46   ` Dust Li
2022-12-28 10:26 ` Wen Gu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1671506505-104676-5-git-send-email-guwen@linux.alibaba.com \
    --to=guwen@linux.alibaba.com \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=jaka@linux.ibm.com \
    --cc=kgraul@linux.ibm.com \
    --cc=kuba@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-s390@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=wenjia@linux.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.