netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Wen Gu <guwen@linux.alibaba.com>
To: wintera@linux.ibm.com, wenjia@linux.ibm.com, hca@linux.ibm.com,
	gor@linux.ibm.com, agordeev@linux.ibm.com, davem@davemloft.net,
	edumazet@google.com, kuba@kernel.org, pabeni@redhat.com,
	jaka@linux.ibm.com
Cc: borntraeger@linux.ibm.com, svens@linux.ibm.com,
	alibuda@linux.alibaba.com, tonylu@linux.alibaba.com,
	guwen@linux.alibaba.com, linux-s390@vger.kernel.org,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [PATCH net-next 13/15] net/smc: introduce loopback-ism DMB type control
Date: Thu, 11 Jan 2024 20:00:34 +0800	[thread overview]
Message-ID: <20240111120036.109903-14-guwen@linux.alibaba.com> (raw)
In-Reply-To: <20240111120036.109903-1-guwen@linux.alibaba.com>

This provides a way to {get|set} type of DMB offered by loopback-ism,
whether it is physically or virtually contiguous memory.

echo 0 > /sys/devices/virtual/smc/loopback-ism/dmb_type # physically
echo 1 > /sys/devices/virtual/smc/loopback-ism/dmb_type # virtually

The settings take effect after re-activating loopback-ism by:

echo 0 > /sys/devices/virtual/smc/loopback-ism/active
echo 1 > /sys/devices/virtual/smc/loopback-ism/active

After this, the link group and DMBs related to loopback-ism will be
flushed and subsequent DMBs created will be of the desired type.

The motivation of this control is that physically contiguous DMB has
best performance but is usually expensive, while the virtually
contiguous DMB is cheap and perform well in most scenarios, but if
sndbuf and DMB are merged, virtual DMB will be accessed concurrently
in Tx and Rx and there will be a bottleneck caused by lock contention
of find_vmap_area when there are many CPUs and CONFIG_HARDENED_USERCOPY
is set (see link below). So an option is provided.

Link: https://lore.kernel.org/all/238e63cd-e0e8-4fbf-852f-bc4d5bc35d5a@linux.alibaba.com/
Signed-off-by: Wen Gu <guwen@linux.alibaba.com>
---
 net/smc/smc_loopback.c | 80 +++++++++++++++++++++++++++++++++++-------
 net/smc/smc_loopback.h |  6 ++++
 2 files changed, 74 insertions(+), 12 deletions(-)

diff --git a/net/smc/smc_loopback.c b/net/smc/smc_loopback.c
index a89dbf84aea5..2e734f8e08f5 100644
--- a/net/smc/smc_loopback.c
+++ b/net/smc/smc_loopback.c
@@ -13,6 +13,7 @@
 
 #include <linux/device.h>
 #include <linux/types.h>
+#include <linux/vmalloc.h>
 #include <net/smc.h>
 
 #include "smc_cdc.h"
@@ -24,6 +25,7 @@
 #define SMC_DMA_ADDR_INVALID	(~(dma_addr_t)0)
 
 static const char smc_lo_dev_name[] = "loopback-ism";
+static unsigned int smc_lo_dmb_type = SMC_LO_DMB_PHYS;
 static struct smc_lo_dev *lo_dev;
 static struct class *smc_class;
 
@@ -124,8 +126,50 @@ static ssize_t active_store(struct device *dev,
 	return count;
 }
 static DEVICE_ATTR_RW(active);
+
+static ssize_t dmb_type_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct smc_lo_dev *ldev =
+		container_of(dev, struct smc_lo_dev, dev);
+	const char *type;
+
+	switch (ldev->dmb_type) {
+	case SMC_LO_DMB_PHYS:
+		type = "Physically contiguous buffer";
+		break;
+	case SMC_LO_DMB_VIRT:
+		type = "Virtually contiguous buffer";
+		break;
+	default:
+		type = "Unknown type";
+	}
+
+	return sysfs_emit(buf, "%d: %s\n", ldev->dmb_type, type);
+}
+
+static ssize_t dmb_type_store(struct device *dev,
+			      struct device_attribute *attr,
+			      const char *buf, size_t count)
+{
+	unsigned int dmb_type;
+	int ret;
+
+	ret = kstrtouint(buf, 0, &dmb_type);
+	if (ret)
+		return ret;
+
+	if (dmb_type != SMC_LO_DMB_PHYS &&
+	    dmb_type != SMC_LO_DMB_VIRT)
+		return -EINVAL;
+
+	smc_lo_dmb_type = dmb_type; /* re-activate to take effect */
+	return count;
+}
+static DEVICE_ATTR_RW(dmb_type);
 static struct attribute *smc_lo_attrs[] = {
 	&dev_attr_active.attr,
+	&dev_attr_dmb_type.attr,
 	&dev_attr_xfer_bytes.attr,
 	&dev_attr_dmbs_cnt.attr,
 	NULL,
@@ -170,8 +214,7 @@ static int smc_lo_register_dmb(struct smcd_dev *smcd, struct smcd_dmb *dmb,
 {
 	struct smc_lo_dmb_node *dmb_node, *tmp_node;
 	struct smc_lo_dev *ldev = smcd->priv;
-	int sba_idx, order, rc;
-	struct page *pages;
+	int sba_idx, rc;
 
 	/* check space for new dmb */
 	for_each_clear_bit(sba_idx, ldev->sba_idx_mask, SMC_LO_MAX_DMBS) {
@@ -188,16 +231,27 @@ static int smc_lo_register_dmb(struct smcd_dev *smcd, struct smcd_dmb *dmb,
 	}
 
 	dmb_node->sba_idx = sba_idx;
-	order = get_order(dmb->dmb_len);
-	pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN |
-			    __GFP_NOMEMALLOC | __GFP_COMP |
-			    __GFP_NORETRY | __GFP_ZERO,
-			    order);
-	if (!pages) {
-		rc = -ENOMEM;
-		goto err_node;
+	if (ldev->dmb_type == SMC_LO_DMB_PHYS) {
+		struct page *pages;
+		int order;
+
+		order = get_order(dmb->dmb_len);
+		pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN |
+				    __GFP_NOMEMALLOC | __GFP_COMP |
+				    __GFP_NORETRY | __GFP_ZERO,
+				    order);
+		if (!pages) {
+			rc = -ENOMEM;
+			goto err_node;
+		}
+		dmb_node->cpu_addr = (void *)page_address(pages);
+	} else {
+		dmb_node->cpu_addr = vzalloc(dmb->dmb_len);
+		if (!dmb_node->cpu_addr) {
+			rc = -ENOMEM;
+			goto err_node;
+		}
 	}
-	dmb_node->cpu_addr = (void *)page_address(pages);
 	dmb_node->len = dmb->dmb_len;
 	dmb_node->dma_addr = SMC_DMA_ADDR_INVALID;
 
@@ -251,7 +305,7 @@ static int smc_lo_unregister_dmb(struct smcd_dev *smcd, struct smcd_dmb *dmb)
 	write_unlock(&ldev->dmb_ht_lock);
 
 	clear_bit(dmb_node->sba_idx, ldev->sba_idx_mask);
-	kfree(dmb_node->cpu_addr);
+	kvfree(dmb_node->cpu_addr);
 	kfree(dmb_node);
 	SMC_LO_STAT_DMBS_DEC(ldev);
 
@@ -396,6 +450,7 @@ static int smcd_lo_register_dev(struct smc_lo_dev *ldev)
 	ldev->smcd = smcd;
 	smcd->priv = ldev;
 	smc_ism_set_v2_capable();
+	ldev->dmb_type = smc_lo_dmb_type;
 	mutex_lock(&smcd_dev_list.mutex);
 	list_add(&smcd->list, &smcd_dev_list.list);
 	mutex_unlock(&smcd_dev_list.mutex);
@@ -419,6 +474,7 @@ static void smcd_lo_unregister_dev(struct smc_lo_dev *ldev)
 	mutex_unlock(&smcd_dev_list.mutex);
 	kfree(smcd->conn);
 	kfree(smcd);
+	ldev->dmb_type = smc_lo_dmb_type;
 	smc_lo_clear_stats(ldev);
 }
 
diff --git a/net/smc/smc_loopback.h b/net/smc/smc_loopback.h
index d4572ca42f08..8ee5c6805fc4 100644
--- a/net/smc/smc_loopback.h
+++ b/net/smc/smc_loopback.h
@@ -23,6 +23,11 @@
 #define SMC_LO_DMBS_HASH_BITS	12
 #define SMC_LO_CHID		0xFFFF
 
+enum {
+	SMC_LO_DMB_PHYS,
+	SMC_LO_DMB_VIRT,
+};
+
 struct smc_lo_dmb_node {
 	struct hlist_node list;
 	u64 token;
@@ -41,6 +46,7 @@ struct smc_lo_dev {
 	struct smcd_dev *smcd;
 	struct device dev;
 	u8 active;
+	u8 dmb_type;
 	u16 chid;
 	struct smcd_gid local_gid;
 	struct smc_lo_dev_stats64 __percpu *stats;
-- 
2.32.0.3.g01195cf9f


  parent reply	other threads:[~2024-01-11 12:01 UTC|newest]

Thread overview: 54+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-01-11 12:00 [PATCH net-next 00/15] net/smc: implement loopback-ism used by SMC-D Wen Gu
2024-01-11 12:00 ` [PATCH net-next 01/15] net/smc: improve SMC-D device dump for virtual ISM Wen Gu
2024-01-11 12:00 ` [PATCH net-next 02/15] net/smc: decouple specialized struct from SMC-D DMB registration Wen Gu
2024-01-11 12:00 ` [PATCH net-next 03/15] net/smc: introduce virtual ISM device loopback-ism Wen Gu
2024-02-16 14:11   ` Wenjia Zhang
2024-02-20  1:20     ` Wen Gu
2024-01-11 12:00 ` [PATCH net-next 04/15] net/smc: implement ID-related operations of loopback-ism Wen Gu
2024-01-11 12:00 ` [PATCH net-next 05/15] net/smc: implement some unsupported " Wen Gu
2024-01-11 12:00 ` [PATCH net-next 06/15] net/smc: implement DMB-related " Wen Gu
2024-02-16 14:13   ` Wenjia Zhang
2024-02-20  1:55     ` Wen Gu
2024-02-23 14:12       ` Wenjia Zhang
2024-02-26  3:04         ` Wen Gu
2024-01-11 12:00 ` [PATCH net-next 07/15] net/smc: register loopback-ism into SMC-D device list Wen Gu
2024-01-11 12:00 ` [PATCH net-next 08/15] net/smc: introduce loopback-ism runtime switch Wen Gu
2024-01-11 12:00 ` [PATCH net-next 09/15] net/smc: introduce loopback-ism statistics attributes Wen Gu
2024-02-16 14:24   ` Wenjia Zhang
2024-02-20  2:45     ` Wen Gu
2024-02-23 14:13       ` Wenjia Zhang
2024-02-26 12:58         ` Wen Gu
2024-01-11 12:00 ` [PATCH net-next 10/15] net/smc: add operations to merge sndbuf with peer DMB Wen Gu
2024-01-11 12:00 ` [PATCH net-next 11/15] net/smc: attach or detach ghost sndbuf to " Wen Gu
2024-01-11 12:00 ` [PATCH net-next 12/15] net/smc: adapt cursor update when sndbuf and peer DMB are merged Wen Gu
2024-01-11 12:00 ` Wen Gu [this message]
2024-02-16 14:25   ` [PATCH net-next 13/15] net/smc: introduce loopback-ism DMB type control Wenjia Zhang
2024-02-20  3:19     ` Wen Gu
2024-01-11 12:00 ` [PATCH net-next 14/15] net/smc: introduce loopback-ism DMB data copy control Wen Gu
2024-01-12 16:24   ` Niklas Schnelle
2024-01-13  7:12     ` Wen Gu
2024-02-16 14:25   ` Wenjia Zhang
2024-02-20  3:36     ` Wen Gu
2024-02-23 14:42       ` Wenjia Zhang
2024-01-11 12:00 ` [PATCH net-next 15/15] net/smc: implement DMB-merged operations of loopback-ism Wen Gu
2024-01-11 13:36 ` [PATCH net-next 00/15] net/smc: implement loopback-ism used by SMC-D Simon Horman
2024-01-12  2:54   ` Wen Gu
2024-01-11 14:50 ` Jiri Pirko
2024-01-12  8:29   ` Wen Gu
2024-01-12  9:10     ` Jiri Pirko
2024-01-12 12:32       ` Wen Gu
2024-01-12 15:50         ` Jiri Pirko
2024-01-13  9:22           ` Wen Gu
2024-01-15 14:11             ` Jiri Pirko
2024-01-18  8:27 ` Wen Gu
2024-01-18 13:59   ` Wenjia Zhang
2024-01-19  1:46     ` Wen Gu
2024-01-23 14:03       ` Alexandra Winter
2024-01-24  6:33         ` Wen Gu
2024-02-05 10:05           ` Wen Gu
2024-02-07  9:08             ` Wenjia Zhang
2024-02-06 12:18 ` Alexandra Winter
2024-02-08 16:12   ` Wen Gu
2024-02-19 14:04   ` Wen Gu
2024-02-16 14:09 ` Wenjia Zhang
2024-02-20  3:52   ` Wen Gu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240111120036.109903-14-guwen@linux.alibaba.com \
    --to=guwen@linux.alibaba.com \
    --cc=agordeev@linux.ibm.com \
    --cc=alibuda@linux.alibaba.com \
    --cc=borntraeger@linux.ibm.com \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=gor@linux.ibm.com \
    --cc=hca@linux.ibm.com \
    --cc=jaka@linux.ibm.com \
    --cc=kuba@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-s390@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=svens@linux.ibm.com \
    --cc=tonylu@linux.alibaba.com \
    --cc=wenjia@linux.ibm.com \
    --cc=wintera@linux.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).