public inbox for linux-s390@vger.kernel.org
 help / color / mirror / Atom feed
From: Wen Gu <guwen@linux.alibaba.com>
To: kgraul@linux.ibm.com, wenjia@linux.ibm.com, jaka@linux.ibm.com,
	davem@davemloft.net, edumazet@google.com, kuba@kernel.org,
	pabeni@redhat.com
Cc: wintera@linux.ibm.com, schnelle@linux.ibm.com,
	gbayer@linux.ibm.com, pasic@linux.ibm.com,
	alibuda@linux.alibaba.com, tonylu@linux.alibaba.com,
	dust.li@linux.alibaba.com, guwen@linux.alibaba.com,
	linux-s390@vger.kernel.org, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org
Subject: [PATCH net-next v4 16/18] net/smc: avoid data copy from sndbuf to peer RMB in SMC-D
Date: Sun, 24 Sep 2023 23:16:51 +0800	[thread overview]
Message-ID: <1695568613-125057-17-git-send-email-guwen@linux.alibaba.com> (raw)
In-Reply-To: <1695568613-125057-1-git-send-email-guwen@linux.alibaba.com>

This patch aims to avoid data copy from local sndbuf to peer RMB by mapping
local sndbuf to peer RMB when DMBs have ISM_ATTR_DMB_MAP attribute.

After this, local sndbuf and peer RMB share the same physical memory.

 +----------+                     +----------+
 | socket A |                     | socket B |
 +----------+                     +----------+
       |                               ^
       |         +---------+           |
  regard as      |         |     regard as
  local sndbuf   |  B's    |     local RMB
       |         |  RMB    |           |
       |-------> |         |-----------|
                 +---------+

1. From the perspective of RMB:

     a. Created or reused when connection is created.
     b. Unused and recycled to lgr buffer pool when connection is freed.
     c. Freed when link group is freed.

2. From the perspective of sndbuf:

     a. Mapped to peer RMB by the rtoken exchanged through CLC message.
        Then accessing local sndbuf is equivalent to accessing peer RMB.
     c. Unmapped from peer RMB and freed when connection is freed. Won't be
        recycled to lgr buffer pool.

Therefore, the data written to local sndbuf will directly reach peer RMB.

Signed-off-by: Wen Gu <guwen@linux.alibaba.com>
---
 net/smc/af_smc.c   | 14 +++++++++++
 net/smc/smc_core.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 net/smc/smc_core.h |  1 +
 3 files changed, 84 insertions(+), 1 deletion(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 6435659..47556c3 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -1442,6 +1442,12 @@ static int smc_connect_ism(struct smc_sock *smc,
 	}
 
 	smc_conn_save_peer_info(smc, aclc);
+
+	if (smc_ism_dmb_mappable(smc->conn.lgr->smcd)) {
+		rc = smcd_buf_attach(smc);
+		if (rc)
+			goto connect_abort;
+	}
 	smc_close_init(smc);
 	smc_rx_init(smc);
 	smc_tx_init(smc);
@@ -2550,6 +2556,14 @@ static void smc_listen_work(struct work_struct *work)
 		mutex_unlock(&smc_server_lgr_pending);
 	}
 	smc_conn_save_peer_info(new_smc, cclc);
+
+	if (ini->is_smcd &&
+	    smc_ism_dmb_mappable(new_smc->conn.lgr->smcd)) {
+		rc = smcd_buf_attach(new_smc);
+		if (rc)
+			goto out_decl;
+	}
+
 	smc_listen_out_connected(new_smc);
 	SMC_STAT_SERV_SUCC_INC(sock_net(newclcsock->sk), ini);
 	goto out_free;
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 6d7c738..96b1def 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -1154,6 +1154,20 @@ static void smcr_buf_unuse(struct smc_buf_desc *buf_desc, bool is_rmb,
 	}
 }
 
+static void smcd_buf_detach(struct smc_connection *conn)
+{
+	struct smcd_dev *smcd = conn->lgr->smcd;
+	u64 peer_token = conn->peer_token;
+
+	if (!conn->sndbuf_desc)
+		return;
+
+	smc_ism_detach_dmb(smcd, peer_token);
+
+	kfree(conn->sndbuf_desc);
+	conn->sndbuf_desc = NULL;
+}
+
 static void smc_buf_unuse(struct smc_connection *conn,
 			  struct smc_link_group *lgr)
 {
@@ -1198,6 +1212,10 @@ void smc_conn_free(struct smc_connection *conn)
 		if (!list_empty(&lgr->list))
 			smc_ism_unset_conn(conn);
 		tasklet_kill(&conn->rx_tsklet);
+
+		/* detach sndbuf from peer RMB */
+		if (smc_ism_dmb_mappable(lgr->smcd))
+			smcd_buf_detach(conn);
 	} else {
 		smc_cdc_wait_pend_tx_wr(conn);
 		if (current_work() != &conn->abort_work)
@@ -2459,15 +2477,23 @@ void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
  */
 int smc_buf_create(struct smc_sock *smc, bool is_smcd)
 {
+	bool sndbuf_created = false;
 	int rc;
 
+	if (is_smcd &&
+	    smc_ism_dmb_mappable(smc->conn.lgr->smcd))
+		goto create_rmb;
+
 	/* create send buffer */
 	rc = __smc_buf_create(smc, is_smcd, false);
 	if (rc)
 		return rc;
+	sndbuf_created = true;
+
+create_rmb:
 	/* create rmb */
 	rc = __smc_buf_create(smc, is_smcd, true);
-	if (rc) {
+	if (rc && sndbuf_created) {
 		down_write(&smc->conn.lgr->sndbufs_lock);
 		list_del(&smc->conn.sndbuf_desc->list);
 		up_write(&smc->conn.lgr->sndbufs_lock);
@@ -2477,6 +2503,48 @@ int smc_buf_create(struct smc_sock *smc, bool is_smcd)
 	return rc;
 }
 
+int smcd_buf_attach(struct smc_sock *smc)
+{
+	struct smc_connection *conn = &smc->conn;
+	struct smcd_dev *smcd = conn->lgr->smcd;
+	u64 peer_token = conn->peer_token;
+	struct smc_buf_desc *buf_desc;
+	int rc;
+
+	buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
+	if (!buf_desc)
+		return -ENOMEM;
+
+	/* map local sndbuf desc to peer RMB, so operations on local
+	 * sndbuf are equivalent to operations on peer RMB.
+	 */
+	rc = smc_ism_attach_dmb(smcd, peer_token, buf_desc);
+	if (rc) {
+		rc = SMC_CLC_DECL_MEM;
+		goto free;
+	}
+
+	smc->sk.sk_sndbuf = buf_desc->len;
+	buf_desc->cpu_addr = (u8 *)buf_desc->cpu_addr + sizeof(struct smcd_cdc_msg);
+	buf_desc->len -= sizeof(struct smcd_cdc_msg);
+	conn->sndbuf_desc = buf_desc;
+	conn->sndbuf_desc->used = 1;
+	atomic_set(&conn->sndbuf_space, conn->sndbuf_desc->len);
+	return 0;
+
+free:
+	if (conn->rmb_desc) {
+		/* free local RMB as well */
+		down_write(&conn->lgr->rmbs_lock);
+		list_del(&conn->rmb_desc->list);
+		up_write(&conn->lgr->rmbs_lock);
+		smc_buf_free(conn->lgr, true, conn->rmb_desc);
+		conn->rmb_desc = NULL;
+	}
+	kfree(buf_desc);
+	return rc;
+}
+
 static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
 {
 	int i;
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index d57eb9b..2cba119 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -551,6 +551,7 @@ void smc_smcd_terminate(struct smcd_dev *dev, struct smcd_gid *peer_gid,
 void smc_smcd_terminate_all(struct smcd_dev *dev);
 void smc_smcr_terminate_all(struct smc_ib_device *smcibdev);
 int smc_buf_create(struct smc_sock *smc, bool is_smcd);
+int smcd_buf_attach(struct smc_sock *smc);
 int smc_uncompress_bufsize(u8 compressed);
 int smc_rmb_rtoken_handling(struct smc_connection *conn, struct smc_link *link,
 			    struct smc_clc_msg_accept_confirm *clc);
-- 
1.8.3.1


  parent reply	other threads:[~2023-09-24 15:18 UTC|newest]

Thread overview: 50+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-09-24 15:16 [PATCH net-next v4 00/18] net/smc: implement virtual ISM extension and loopback-ism Wen Gu
2023-09-24 15:16 ` [PATCH net-next v4 01/18] net/smc: decouple ism_dev from SMC-D device dump Wen Gu
2023-09-24 15:16 ` [PATCH net-next v4 02/18] net/smc: decouple ism_dev from SMC-D DMB registration Wen Gu
2023-09-24 15:16 ` [PATCH net-next v4 03/18] net/smc: extract v2 check helper from SMC-D device registration Wen Gu
2023-09-28  3:08   ` Jan Karcher
2023-09-30  8:41     ` Wen Gu
2023-09-24 15:16 ` [PATCH net-next v4 04/18] net/smc: support SMCv2.x supplemental features negotiation Wen Gu
2023-09-24 15:16 ` [PATCH net-next v4 05/18] net/smc: reserve CHID range for SMC-D virtual device Wen Gu
2023-09-28  3:08   ` Jan Karcher
2023-09-28  9:10     ` Alexandra Winter
2023-10-04  8:27       ` Wen Gu
2023-09-24 15:16 ` [PATCH net-next v4 06/18] net/smc: extend GID to 128bits only for virtual ISM device Wen Gu
2023-10-12  7:54   ` Dust Li
2023-10-12 13:24     ` Wen Gu
2023-09-24 15:16 ` [PATCH net-next v4 07/18] net/smc: disable SEID on non-s390 architecture Wen Gu
2023-09-24 15:16 ` [PATCH net-next v4 08/18] net/smc: enable virtual ISM device feature bit Wen Gu
2023-09-24 15:16 ` [PATCH net-next v4 09/18] net/smc: introduce SMC-D loopback device Wen Gu
2023-09-25 11:50   ` Alexandra Winter
2023-09-25 13:29     ` Alexandra Winter
2023-09-25 14:20       ` Wen Gu
2023-09-25 13:57     ` Wen Gu
2023-09-25 15:18     ` Dust Li
2023-09-26  7:24       ` Alexandra Winter
2023-09-28  3:16         ` Jan Karcher
2023-09-28 18:35           ` Wen Gu
2023-09-29 14:08             ` Alexandra Winter
2023-10-04  9:05               ` Wen Gu
2023-09-24 15:16 ` [PATCH net-next v4 10/18] net/smc: implement ID-related operations of loopback Wen Gu
2023-10-18 13:24   ` Alexandra Winter
2023-09-24 15:16 ` [PATCH net-next v4 11/18] net/smc: implement some unsupported " Wen Gu
2023-09-24 15:16 ` [PATCH net-next v4 12/18] net/smc: implement DMB-related " Wen Gu
2023-09-24 23:29   ` kernel test robot
2023-09-25  1:47     ` Wen Gu
2023-09-24 15:16 ` [PATCH net-next v4 13/18] net/smc: register loopback device as SMC-Dv2 device Wen Gu
2023-09-24 15:16 ` [PATCH net-next v4 14/18] net/smc: add operation for getting DMB attribute Wen Gu
2023-09-24 15:16 ` [PATCH net-next v4 15/18] net/smc: add operations for DMB attach and detach Wen Gu
2023-09-24 15:16 ` Wen Gu [this message]
2023-09-24 15:16 ` [PATCH net-next v4 17/18] net/smc: modify cursor update logic when sndbuf mapped to RMB Wen Gu
2023-09-24 15:16 ` [PATCH net-next v4 18/18] net/smc: add interface implementation of loopback device Wen Gu
2023-09-26  7:30 ` [PATCH net-next v4 00/18] net/smc: implement virtual ISM extension and loopback-ism Alexandra Winter
2023-09-27 15:16 ` Alexandra Winter
2023-09-28  8:56   ` Alexandra Winter
2023-09-28 17:29     ` Wen Gu
2023-09-29 13:31       ` Alexandra Winter
2023-10-04  8:42         ` Wen Gu
2023-09-28 16:42   ` Wen Gu
2023-10-05  8:21 ` Niklas Schnelle
2023-10-08  7:19   ` Wen Gu
2023-10-17  3:49     ` Wen Gu
2023-10-18 19:43       ` Wenjia Zhang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1695568613-125057-17-git-send-email-guwen@linux.alibaba.com \
    --to=guwen@linux.alibaba.com \
    --cc=alibuda@linux.alibaba.com \
    --cc=davem@davemloft.net \
    --cc=dust.li@linux.alibaba.com \
    --cc=edumazet@google.com \
    --cc=gbayer@linux.ibm.com \
    --cc=jaka@linux.ibm.com \
    --cc=kgraul@linux.ibm.com \
    --cc=kuba@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-s390@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=pasic@linux.ibm.com \
    --cc=schnelle@linux.ibm.com \
    --cc=tonylu@linux.alibaba.com \
    --cc=wenjia@linux.ibm.com \
    --cc=wintera@linux.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox