From: James Simmons <jsimmons@infradead.org>
To: Andreas Dilger <adilger@whamcloud.com>,
Oleg Drokin <green@whamcloud.com>, NeilBrown <neilb@suse.de>
Cc: Alexey Lyashkov <alexey.lyashkov@hpe.com>,
Lustre Development List <lustre-devel@lists.lustre.org>
Subject: [lustre-devel] [PATCH 46/50] lnet: o2iblnd: avoid memory copy for short msg
Date: Sun, 20 Mar 2022 09:31:00 -0400 [thread overview]
Message-ID: <1647783064-20688-47-git-send-email-jsimmons@infradead.org> (raw)
In-Reply-To: <1647783064-20688-1-git-send-email-jsimmons@infradead.org>
From: Alexey Lyashkov <alexey.lyashkov@hpe.com>
Modern cards allow to send a kernel memory data without mapping
or copy to the preallocated buffer.
It reduce a lnet selftest cpu consumption by 3% for messages
less than 4k size.
HPE-bug-id: LUS-1796
WC-bug-id: https://jira.whamcloud.com/browse/LU-14008
Lustre-commit: bebd87cc6c9acc577 ("LU-14008 o2iblnd: avoid memory copy for short msg")
Signed-off-by: Alexey Lyashkov <alexey.lyashkov@hpe.com>
Reviewed-on: https://review.whamcloud.com/40262
Reviewed-by: Chris Horn <chris.horn@hpe.com>
Reviewed-by: Alexander Boyko <alexander.boyko@hpe.com>
Reviewed-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Signed-off-by: James Simmons <jsimmons@infradead.org>
---
net/lnet/klnds/o2iblnd/o2iblnd.c | 3 +-
net/lnet/klnds/o2iblnd/o2iblnd.h | 3 ++
net/lnet/klnds/o2iblnd/o2iblnd_cb.c | 63 ++++++++++++++++++++++++++++---------
3 files changed, 52 insertions(+), 17 deletions(-)
diff --git a/net/lnet/klnds/o2iblnd/o2iblnd.c b/net/lnet/klnds/o2iblnd/o2iblnd.c
index 9ce6082..8dce4179 100644
--- a/net/lnet/klnds/o2iblnd/o2iblnd.c
+++ b/net/lnet/klnds/o2iblnd/o2iblnd.c
@@ -628,10 +628,9 @@ static unsigned int kiblnd_send_wrs(struct kib_conn *conn)
*/
int ret;
int multiplier = 1 + conn->ibc_max_frags;
- enum kib_dev_caps dev_caps = conn->ibc_hdev->ibh_dev->ibd_dev_caps;
/* FastReg needs two extra WRs for map and invalidate */
- if (dev_caps & IBLND_DEV_CAPS_FASTREG_ENABLED)
+ if (IS_FAST_REG_DEV(conn->ibc_hdev->ibh_dev))
multiplier += 2;
/* account for a maximum of ibc_queue_depth in-flight transfers */
diff --git a/net/lnet/klnds/o2iblnd/o2iblnd.h b/net/lnet/klnds/o2iblnd/o2iblnd.h
index 5a4b4f8..e798695 100644
--- a/net/lnet/klnds/o2iblnd/o2iblnd.h
+++ b/net/lnet/klnds/o2iblnd/o2iblnd.h
@@ -149,6 +149,9 @@ enum kib_dev_caps {
IBLND_DEV_CAPS_FASTREG_GAPS_SUPPORT = BIT(1),
};
+#define IS_FAST_REG_DEV(dev) \
+ ((dev)->ibd_dev_caps & IBLND_DEV_CAPS_FASTREG_ENABLED)
+
struct kib_dev {
struct list_head ibd_list; /* chain on kib_devs */
struct list_head ibd_fail_list; /* chain on kib_failed_devs */
diff --git a/net/lnet/klnds/o2iblnd/o2iblnd_cb.c b/net/lnet/klnds/o2iblnd/o2iblnd_cb.c
index 983599f..a88939e7 100644
--- a/net/lnet/klnds/o2iblnd/o2iblnd_cb.c
+++ b/net/lnet/klnds/o2iblnd/o2iblnd_cb.c
@@ -42,8 +42,11 @@
static void kiblnd_peer_alive(struct kib_peer_ni *peer_ni);
static void kiblnd_peer_connect_failed(struct kib_peer_ni *peer_ni, int active,
int error);
-static void kiblnd_init_tx_msg(struct lnet_ni *ni, struct kib_tx *tx,
- int type, int body_nob);
+static struct ib_rdma_wr *
+kiblnd_init_tx_msg_payload(struct lnet_ni *ni, struct kib_tx *tx,
+ int type, int body_nob, int payload_nob);
+#define kiblnd_init_tx_msg(ni, tx, type, body) \
+ kiblnd_init_tx_msg_payload(ni, tx, type, body, 0)
static int kiblnd_init_rdma(struct kib_conn *conn, struct kib_tx *tx, int type,
int resid, struct kib_rdma_desc *dstrd,
u64 dstcookie);
@@ -572,7 +575,7 @@ static int kiblnd_init_rdma(struct kib_conn *conn, struct kib_tx *tx, int type,
* in trying to map the memory, because it'll just fail. So
* preemptively fail with an appropriate message
*/
- if ((dev->ibd_dev_caps & IBLND_DEV_CAPS_FASTREG_ENABLED) &&
+ if (IS_FAST_REG_DEV(dev) &&
!(dev->ibd_dev_caps & IBLND_DEV_CAPS_FASTREG_GAPS_SUPPORT) &&
tx->tx_gaps) {
CERROR("Using FastReg with no GAPS support, but tx has gaps\n");
@@ -1021,9 +1024,9 @@ static int kiblnd_map_tx(struct lnet_ni *ni, struct kib_tx *tx,
tx->tx_nsge++;
}
-static void
-kiblnd_init_tx_msg(struct lnet_ni *ni, struct kib_tx *tx, int type,
- int body_nob)
+static struct ib_rdma_wr *
+kiblnd_init_tx_msg_payload(struct lnet_ni *ni, struct kib_tx *tx, int type,
+ int body_nob, int payload)
{
struct ib_rdma_wr *wrq = &tx->tx_wrq[tx->tx_nwrq];
int nob = offsetof(struct kib_msg, ibm_u) + body_nob;
@@ -1032,7 +1035,7 @@ static int kiblnd_map_tx(struct lnet_ni *ni, struct kib_tx *tx,
LASSERT(tx->tx_nwrq < IBLND_MAX_RDMA_FRAGS + 1);
LASSERT(nob <= IBLND_MSG_SIZE);
- kiblnd_init_msg(tx->tx_msg, type, body_nob);
+ kiblnd_init_msg(tx->tx_msg, type, body_nob + payload);
*wrq = (struct ib_rdma_wr) {
.wr = {
@@ -1047,6 +1050,7 @@ static int kiblnd_map_tx(struct lnet_ni *ni, struct kib_tx *tx,
kiblnd_init_tx_sge(tx, tx->tx_msgaddr, nob);
tx->tx_nwrq++;
+ return wrq;
}
static int
@@ -1654,15 +1658,44 @@ static int kiblnd_map_tx(struct lnet_ni *ni, struct kib_tx *tx,
ibmsg = tx->tx_msg;
lnet_hdr_to_nid4(hdr, &ibmsg->ibm_u.immediate.ibim_hdr);
- rc = copy_from_iter(&ibmsg->ibm_u.immediate.ibim_payload, payload_nob,
- &from);
- if (rc != payload_nob) {
- kiblnd_pool_free_node(&tx->tx_pool->tpo_pool, &tx->tx_list);
- return -EFAULT;
- }
+ if (payload_nob) {
+ struct ib_rdma_wr *wrq;
+ int i;
+
+ nob = offsetof(struct kib_immediate_msg, ibim_payload[0]);
+ wrq = kiblnd_init_tx_msg_payload(ni, tx, IBLND_MSG_IMMEDIATE,
+ nob, payload_nob);
+
+ rd = tx->tx_rd;
+ rc = kiblnd_setup_rd_kiov(ni, tx, rd,
+ payload_niov, payload_kiov,
+ payload_offset, payload_nob);
+ if (rc != 0) {
+ CERROR("Can't setup IMMEDIATE src for %s: %d\n",
+ libcfs_nidstr(&target->nid), rc);
+ kiblnd_tx_done(tx);
+ return -EIO;
+ }
- nob = offsetof(struct kib_immediate_msg, ibim_payload[payload_nob]);
- kiblnd_init_tx_msg(ni, tx, IBLND_MSG_IMMEDIATE, nob);
+ /* lets generate a SGE chain */
+ for (i = 0; i < rd->rd_nfrags; i++) {
+ kiblnd_init_tx_sge(tx, rd->rd_frags[i].rf_addr,
+ rd->rd_frags[i].rf_nob);
+ wrq->wr.num_sge++;
+ }
+ } else {
+ rc = copy_from_iter(&ibmsg->ibm_u.immediate.ibim_payload,
+ payload_nob, &from);
+ if (rc != payload_nob) {
+ kiblnd_pool_free_node(&tx->tx_pool->tpo_pool,
+ &tx->tx_list);
+ return -EFAULT;
+ }
+
+ nob = offsetof(struct kib_immediate_msg,
+ ibim_payload[payload_nob]);
+ kiblnd_init_tx_msg(ni, tx, IBLND_MSG_IMMEDIATE, nob);
+ }
/* finalise lntmsg on completion */
tx->tx_lntmsg[0] = lntmsg;
--
1.8.3.1
_______________________________________________
lustre-devel mailing list
lustre-devel@lists.lustre.org
http://lists.lustre.org/listinfo.cgi/lustre-devel-lustre.org
next prev parent reply other threads:[~2022-03-20 13:33 UTC|newest]
Thread overview: 51+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-03-20 13:30 [lustre-devel] [PATCH 00/50] lustre: update to OpenSFS tree as of March 20, 2022 James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 01/50] lustre: type cleanups and remove debug statements James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 02/50] lustre: osc: Fix grant test for ARM James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 03/50] lnet: extend nids in struct lnet_msg James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 04/50] lnet: Change lnet_send() to take large-addr nids James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 05/50] lnet: use large nids in struct lnet_event James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 06/50] lnet: socklnd: prepare for new KSOCK_MSG type James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 07/50] lnet: socklnd: don't deref lnet_hdr in LNDs James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 08/50] lustre: sec: make client encryption compatible with ext4 James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 09/50] lustre: sec: allow subdir mount of encrypted dir James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 10/50] lustre: fld: repeat rpc in fld_client_rpc after EAGAIN James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 11/50] lustre: fld: don't obtain a slot for fld request James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 12/50] lustre: update version to 2.14.57 James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 13/50] lustre: llite: deadlock in ll_new_node() James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 14/50] lnet: o2iblnd: avoid static allocation for msg tx James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 15/50] lnet: separate lnet_hdr in msg from that in lnd James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 16/50] lnet: change lnet_hdr to store large nids James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 17/50] lnet: change lnet_prep_send to take net_processid James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 18/50] lnet: convert to struct lnet_process_id in lib-move James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 19/50] lnet: convert LNetGetID to return an large-addr pid James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 20/50] lnet: alter lnd_notify_peer_down() to take lnet_nid James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 21/50] lnet: socklnd: move lnet_hdr unpack into ->pro_unpack James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 22/50] lnet: socklnd: Change ksock_hello_msg to struct lnet_nid James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 23/50] lnet: socklnd: add hello message version 4 James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 24/50] lnet: Convert ping to support 16-bytes address James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 25/50] lnet: convert nids in lnet_parse to lnet_nid James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 26/50] lnet: change src_nid arg to lnet_parse() to 16byte James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 27/50] lnet: Fix NULL-deref in lnet_nidstr_r() James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 28/50] lnet: change lnet_del_route() to take lnet_nid James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 29/50] lustre: llite: Move free user pages James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 30/50] lustre: llite: Do not get/put DIO pages James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 31/50] lustre: llite: Remove unnecessary page get/put James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 32/50] lustre: llite: LL_IOC_LMV_GETSTRIPE 'default' shows inherit layout James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 33/50] lustre: hsm: update size upon completion of data version James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 34/50] lustre: llite: Delay dput in ll_dirty_page_discard_warn James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 35/50] lnet: libcfs: Use FAIL_CHECK_QUIET for fake i/o James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 36/50] lnet: Avoid peer NI recovery for local interface James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 37/50] lustre: osc: add OBD_IOC_GETATTR support for osc James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 38/50] lustre: sec: present .fscrypt in subdir mount James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 39/50] lnet: improve hash distribution across CPTs James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 40/50] lustre: osc: osc_extent_wait() deadlock James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 41/50] lustre: quota: delete unused quota ID James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 42/50] lnet: Check LNET_NID_IS_ANY in LNET_NID_NET James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 43/50] lustre: llite: clear async errors on write commit sync James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 44/50] lnet: lnet_peer_data_present() memory leak James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 45/50] lnet: Don't use pref NI for reserved portal James Simmons
2022-03-20 13:31 ` James Simmons [this message]
2022-03-20 13:31 ` [lustre-devel] [PATCH 47/50] lustre: llite: set default LMV hash type with 2.12 MDS James Simmons
2022-03-20 13:31 ` [lustre-devel] [PATCH 48/50] lnet: Stop discovery on deleted peer NI James Simmons
2022-03-20 13:31 ` [lustre-devel] [PATCH 49/50] lustre: sec: fix DIO for encrypted files James Simmons
2022-03-20 13:31 ` [lustre-devel] [PATCH 50/50] lustre: ptlrpc: Use after free of 'conn' in rhashtable retry James Simmons
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1647783064-20688-47-git-send-email-jsimmons@infradead.org \
--to=jsimmons@infradead.org \
--cc=adilger@whamcloud.com \
--cc=alexey.lyashkov@hpe.com \
--cc=green@whamcloud.com \
--cc=lustre-devel@lists.lustre.org \
--cc=neilb@suse.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).