From: Wen Gu <guwen@linux.alibaba.com>
To: kgraul@linux.ibm.com, wenjia@linux.ibm.com, jaka@linux.ibm.com,
davem@davemloft.net, edumazet@google.com, kuba@kernel.org,
pabeni@redhat.com
Cc: linux-s390@vger.kernel.org, netdev@vger.kernel.org,
linux-kernel@vger.kernel.org
Subject: [RFC PATCH net-next v2 4/5] net/smc: avoid data copy from sndbuf to peer RMB in SMC-D loopback
Date: Tue, 20 Dec 2022 11:21:44 +0800 [thread overview]
Message-ID: <1671506505-104676-5-git-send-email-guwen@linux.alibaba.com> (raw)
In-Reply-To: <1671506505-104676-1-git-send-email-guwen@linux.alibaba.com>
This patch aims to improve SMC-D loopback performance by avoiding
data copy from local sndbuf to peer RMB. The main idea is to let
local sndbuf and peer RMB share the same physical memory.
+----------+ +----------+
| socket A | | socket B |
+----------+ +----------+
| ^
| +---------+ |
regard as | | ----------|
local sndbuf | B's | regard as
| | RMB | local RMB
|-------> | |
+---------+
For connections using smcd loopback device:
1. Only create and maintain local RMB.
a. Create or reuse RMB when create connection;
b. Free RMB when lgr free;
2. Attach local sndbuf to peer RMB.
a. sndbuf_desc describes the same memory region as peer rmb_desc.
b. sndbuf_desc is exclusive to specific connection and won't be
added to lgr buffer pool for reuse.
c. sndbuf is attached to peer RMB when receive remote token after
CLC accept/confirm message.
d. sndbuf is detached from peer RMB when connection is freed.
Therefore, the data copied from the userspace to local sndbuf directly
reaches the peer RMB.
Signed-off-by: Wen Gu <guwen@linux.alibaba.com>
---
net/smc/af_smc.c | 23 +++++++++++++++++++-
net/smc/smc_core.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
net/smc/smc_core.h | 2 ++
3 files changed, 86 insertions(+), 1 deletion(-)
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index b9884c8..c7de566 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -1073,7 +1073,6 @@ static int smc_find_proposal_devices(struct smc_sock *smc,
* The RFC patch hasn't resolved this, just simply always
* chooses loopback device first, and fallback if loopback
* communication is impossible.
- *
*/
/* check if there is an ism or loopback device available */
if (!(ini->smcd_version & SMC_V1) ||
@@ -1397,6 +1396,17 @@ static int smc_connect_ism(struct smc_sock *smc,
}
smc_conn_save_peer_info(smc, aclc);
+
+ /* special for smcd loopback
+ * conns above smcd loopback dev only create their rmbs.
+ * their sndbufs are 'maps' of peer rmbs.
+ */
+ if (smc->conn.lgr->smcd->is_loopback) {
+ rc = smcd_buf_attach(&smc->conn);
+ if (rc)
+ goto connect_abort;
+ smc->sk.sk_sndbuf = 2 * (smc->conn.sndbuf_desc->len);
+ }
smc_close_init(smc);
smc_rx_init(smc);
smc_tx_init(smc);
@@ -2464,6 +2474,17 @@ static void smc_listen_work(struct work_struct *work)
mutex_unlock(&smc_server_lgr_pending);
}
smc_conn_save_peer_info(new_smc, cclc);
+
+ /* special for smcd loopback
+ * conns above smcd loopback dev only create their rmbs.
+ * their sndbufs are 'maps' of peer rmbs.
+ */
+ if (ini->is_smcd && new_smc->conn.lgr->smcd->is_loopback) {
+ rc = smcd_buf_attach(&new_smc->conn);
+ if (rc)
+ goto out_decl;
+ new_smc->sk.sk_sndbuf = 2 * (new_smc->conn.sndbuf_desc->len);
+ }
smc_listen_out_connected(new_smc);
SMC_STAT_SERV_SUCC_INC(sock_net(newclcsock->sk), ini);
goto out_free;
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index c305d8d..bf40ad3 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -1171,6 +1171,10 @@ void smc_conn_free(struct smc_connection *conn)
if (!list_empty(&lgr->list))
smc_ism_unset_conn(conn);
tasklet_kill(&conn->rx_tsklet);
+
+ /* detach sndbuf from peer rmb */
+ if (lgr->smcd->is_loopback)
+ smcd_buf_detach(conn);
} else {
smc_cdc_wait_pend_tx_wr(conn);
if (current_work() != &conn->abort_work)
@@ -2423,6 +2427,14 @@ int smc_buf_create(struct smc_sock *smc, bool is_smcd)
{
int rc;
+ if (is_smcd && smc->conn.lgr->smcd->is_loopback) {
+ /* Conns above smcd loopback device only create and maintain
+ * their RMBs. The sndbufs will be attached to peer RMBs once
+ * getting the tokens.
+ */
+ return __smc_buf_create(smc, is_smcd, true);
+ }
+
/* create send buffer */
rc = __smc_buf_create(smc, is_smcd, false);
if (rc)
@@ -2439,6 +2451,56 @@ int smc_buf_create(struct smc_sock *smc, bool is_smcd)
return rc;
}
+/* for smcd loopback conns, attach local sndbuf to peer RMB.
+ * The data copy to sndbuf is equal to data copy to peer RMB.
+ */
+int smcd_buf_attach(struct smc_connection *conn)
+{
+ struct smcd_dev *smcd = conn->lgr->smcd;
+ u64 peer_token = conn->peer_token;
+ struct smc_buf_desc *buf_desc;
+ int rc;
+
+ buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
+ if (!buf_desc)
+ return -ENOMEM;
+ rc = smc_ism_attach_dmb(smcd, peer_token, buf_desc);
+ if (rc) {
+ rc = SMC_CLC_DECL_ERR_RTOK;
+ goto free;
+ }
+
+ /* attach local sndbuf to peer RMB.
+ * refer to local sndbuf is equal to refer to peer RMB.
+ */
+ /* align with peer rmb */
+ buf_desc->cpu_addr = (u8 *)buf_desc->cpu_addr + sizeof(struct smcd_cdc_msg);
+ buf_desc->len -= sizeof(struct smcd_cdc_msg);
+ conn->sndbuf_desc = buf_desc;
+ conn->sndbuf_desc->used = 1;
+ //smc->sk.sk_sndbuf = 2 * (smc->conn->sndbuf_desc->len);
+ atomic_set(&conn->sndbuf_space, conn->sndbuf_desc->len);
+ return 0;
+
+free:
+ kfree(buf_desc);
+ return rc;
+}
+
+void smcd_buf_detach(struct smc_connection *conn)
+{
+ struct smcd_dev *smcd = conn->lgr->smcd;
+ u64 peer_token = conn->peer_token;
+
+ if (!conn->sndbuf_desc)
+ return;
+
+ smc_ism_detach_dmb(smcd, peer_token);
+
+ kfree(conn->sndbuf_desc);
+ conn->sndbuf_desc = NULL;
+}
+
static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
{
int i;
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 285f9bd..b51b020 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -518,6 +518,8 @@ void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid,
void smc_smcd_terminate_all(struct smcd_dev *dev);
void smc_smcr_terminate_all(struct smc_ib_device *smcibdev);
int smc_buf_create(struct smc_sock *smc, bool is_smcd);
+int smcd_buf_attach(struct smc_connection *conn);
+void smcd_buf_detach(struct smc_connection *conn);
int smc_uncompress_bufsize(u8 compressed);
int smc_rmb_rtoken_handling(struct smc_connection *conn, struct smc_link *link,
struct smc_clc_msg_accept_confirm *clc);
--
1.8.3.1
next prev parent reply other threads:[~2022-12-20 3:22 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-12-20 3:21 [RFC PATCH net-next v2 0/5] net/smc:Introduce SMC-D based loopback acceleration Wen Gu
2022-12-20 3:21 ` [RFC PATCH net-next v2 1/5] net/smc: introduce SMC-D loopback device Wen Gu
2023-01-19 16:25 ` Alexandra Winter
2023-01-30 16:30 ` Wen Gu
2022-12-20 3:21 ` [RFC PATCH net-next v2 2/5] net/smc: choose loopback device in SMC-D communication Wen Gu
2022-12-20 3:21 ` [RFC PATCH net-next v2 3/5] net/smc: add dmb attach and detach interface Wen Gu
2022-12-20 3:21 ` Wen Gu [this message]
2022-12-20 3:21 ` [RFC PATCH net-next v2 5/5] net/smc: logic of cursors update in SMC-D loopback connections Wen Gu
2022-12-20 14:02 ` [RFC PATCH net-next v2 0/5] net/smc:Introduce SMC-D based loopback acceleration Niklas Schnelle
2022-12-21 13:14 ` Wen Gu
2023-01-04 16:09 ` Alexandra Winter
2023-01-12 12:12 ` Wen Gu
2023-01-16 11:01 ` Wenjia Zhang
2023-01-18 12:15 ` Wen Gu
2023-01-19 12:30 ` Alexandra Winter
2023-01-30 16:27 ` Wen Gu
2022-12-26 10:46 ` Dust Li
2022-12-28 10:26 ` Wen Gu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1671506505-104676-5-git-send-email-guwen@linux.alibaba.com \
--to=guwen@linux.alibaba.com \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=jaka@linux.ibm.com \
--cc=kgraul@linux.ibm.com \
--cc=kuba@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-s390@vger.kernel.org \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=wenjia@linux.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox