From: "Fabio M. De Francesco" <fmdefrancesco@gmail.com>
To: linux-nvme@lists.infradead.org, Sagi Grimberg <sagi@grimberg.me>,
Christoph Hellwig <hch@lst.de>, Keith Busch <kbusch@kernel.org>,
Chaitanya Kulkarni <Chaitanya.Kulkarni@wdc.com>,
James Smart <james.smart@broadcom.com>,
Ira Weiny <ira.weiny@intel.com>,
Venkataramanan Anirudh <anirudh.venkataramanan@intel.com>,
linux-kernel@vger.kernel.org
Cc: "Fabio M. De Francesco" <fmdefrancesco@gmail.com>,
Chaitanya Kulkarni <chaitanyak@nvidia.com>,
Al Viro <viro@zeniv.linux.org.uk>
Subject: [PATCH v2] nvmet-tcp: Don't map pages which can't come from HIGHMEM
Date: Wed, 31 Aug 2022 00:05:33 +0200 [thread overview]
Message-ID: <20220830220533.17777-1-fmdefrancesco@gmail.com> (raw)
kmap() is being deprecated in favor of kmap_local_page().[1]
There are two main problems with kmap(): (1) It comes with an overhead as
mapping space is restricted and protected by a global lock for
synchronization and (2) it also requires global TLB invalidation when the
kmap’s pool wraps and it might block when the mapping space is fully
utilized until a slot becomes available.
The pages which will be mapped are allocated in nvmet_tcp_map_data(),
using the GFP_KERNEL flag. This assures that they cannot come from
HIGHMEM. This imply that a straight page_address() can replace the kmap()
of sg_page(sg) in nvmet_tcp_map_pdu_iovec(). As a side effect, we might
also delete the field "nr_mapped" from struct "nvmet_tcp_cmd" because,
after removing the kmap() calls, there would be no longer any need of it.
In addition, there is no reason to use a kvec for the command receive
data buffers iovec, use a bio_vec instead and let iov_iter handle the
buffer mapping and data copy.
Test with blktests on a QEMU/KVM x86_32 VM, 6GB RAM, booting a kernel with
HIGHMEM64GB enabled.
[1] "[PATCH] checkpatch: Add kmap and kmap_atomic to the deprecated
list" https://lore.kernel.org/all/20220813220034.806698-1-ira.weiny@intel.com/
Cc: Chaitanya Kulkarni <chaitanyak@nvidia.com>
Cc: Keith Busch <kbusch@kernel.org>
Suggested-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Fabio M. De Francesco <fmdefrancesco@gmail.com>
Suggested-by: Christoph Hellwig <hch@lst.de>
Suggested-by: Al Viro <viro@zeniv.linux.org.uk>
[sagi: added bio_vec plus minor naming changes]
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
v1->v2: Add the "Suggested-by" tags from Al and Christoph, because they
advised to use BVEC instead of KVEC. Fix a minor alignment issue reported
by checkpatch.
drivers/nvme/target/tcp.c | 44 ++++++++++++---------------------------
1 file changed, 13 insertions(+), 31 deletions(-)
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index dc3b4dc8fe08..c07de4f4f719 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -77,9 +77,8 @@ struct nvmet_tcp_cmd {
u32 pdu_len;
u32 pdu_recv;
int sg_idx;
- int nr_mapped;
struct msghdr recv_msg;
- struct kvec *iov;
+ struct bio_vec *iov;
u32 flags;
struct list_head entry;
@@ -167,7 +166,6 @@ static const struct nvmet_fabrics_ops nvmet_tcp_ops;
static void nvmet_tcp_free_cmd(struct nvmet_tcp_cmd *c);
static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd);
static void nvmet_tcp_free_cmd_buffers(struct nvmet_tcp_cmd *cmd);
-static void nvmet_tcp_unmap_pdu_iovec(struct nvmet_tcp_cmd *cmd);
static inline u16 nvmet_tcp_cmd_tag(struct nvmet_tcp_queue *queue,
struct nvmet_tcp_cmd *cmd)
@@ -301,35 +299,21 @@ static int nvmet_tcp_check_ddgst(struct nvmet_tcp_queue *queue, void *pdu)
static void nvmet_tcp_free_cmd_buffers(struct nvmet_tcp_cmd *cmd)
{
- WARN_ON(unlikely(cmd->nr_mapped > 0));
-
kfree(cmd->iov);
sgl_free(cmd->req.sg);
cmd->iov = NULL;
cmd->req.sg = NULL;
}
-static void nvmet_tcp_unmap_pdu_iovec(struct nvmet_tcp_cmd *cmd)
-{
- struct scatterlist *sg;
- int i;
-
- sg = &cmd->req.sg[cmd->sg_idx];
-
- for (i = 0; i < cmd->nr_mapped; i++)
- kunmap(sg_page(&sg[i]));
-
- cmd->nr_mapped = 0;
-}
-
-static void nvmet_tcp_map_pdu_iovec(struct nvmet_tcp_cmd *cmd)
+static void nvmet_tcp_build_pdu_iovec(struct nvmet_tcp_cmd *cmd)
{
- struct kvec *iov = cmd->iov;
+ struct bio_vec *iov = cmd->iov;
struct scatterlist *sg;
u32 length, offset, sg_offset;
+ int nr_pages;
length = cmd->pdu_len;
- cmd->nr_mapped = DIV_ROUND_UP(length, PAGE_SIZE);
+ nr_pages = DIV_ROUND_UP(length, PAGE_SIZE);
offset = cmd->rbytes_done;
cmd->sg_idx = offset / PAGE_SIZE;
sg_offset = offset % PAGE_SIZE;
@@ -338,8 +322,9 @@ static void nvmet_tcp_map_pdu_iovec(struct nvmet_tcp_cmd *cmd)
while (length) {
u32 iov_len = min_t(u32, length, sg->length - sg_offset);
- iov->iov_base = kmap(sg_page(sg)) + sg->offset + sg_offset;
- iov->iov_len = iov_len;
+ iov->bv_page = sg_page(sg);
+ iov->bv_len = sg->length;
+ iov->bv_offset = sg->offset + sg_offset;
length -= iov_len;
sg = sg_next(sg);
@@ -347,8 +332,8 @@ static void nvmet_tcp_map_pdu_iovec(struct nvmet_tcp_cmd *cmd)
sg_offset = 0;
}
- iov_iter_kvec(&cmd->recv_msg.msg_iter, READ, cmd->iov,
- cmd->nr_mapped, cmd->pdu_len);
+ iov_iter_bvec(&cmd->recv_msg.msg_iter, READ, cmd->iov,
+ nr_pages, cmd->pdu_len);
}
static void nvmet_tcp_fatal_error(struct nvmet_tcp_queue *queue)
@@ -926,7 +911,7 @@ static void nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue,
}
queue->rcv_state = NVMET_TCP_RECV_DATA;
- nvmet_tcp_map_pdu_iovec(cmd);
+ nvmet_tcp_build_pdu_iovec(cmd);
cmd->flags |= NVMET_TCP_F_INIT_FAILED;
}
@@ -952,7 +937,7 @@ static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue)
cmd->pdu_len = le32_to_cpu(data->data_length);
cmd->pdu_recv = 0;
- nvmet_tcp_map_pdu_iovec(cmd);
+ nvmet_tcp_build_pdu_iovec(cmd);
queue->cmd = cmd;
queue->rcv_state = NVMET_TCP_RECV_DATA;
@@ -1021,7 +1006,7 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue)
if (nvmet_tcp_need_data_in(queue->cmd)) {
if (nvmet_tcp_has_inline_data(queue->cmd)) {
queue->rcv_state = NVMET_TCP_RECV_DATA;
- nvmet_tcp_map_pdu_iovec(queue->cmd);
+ nvmet_tcp_build_pdu_iovec(queue->cmd);
return 0;
}
/* send back R2T */
@@ -1141,7 +1126,6 @@ static int nvmet_tcp_try_recv_data(struct nvmet_tcp_queue *queue)
cmd->rbytes_done += ret;
}
- nvmet_tcp_unmap_pdu_iovec(cmd);
if (queue->data_digest) {
nvmet_tcp_prep_recv_ddgst(cmd);
return 0;
@@ -1411,7 +1395,6 @@ static void nvmet_tcp_restore_socket_callbacks(struct nvmet_tcp_queue *queue)
static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd)
{
nvmet_req_uninit(&cmd->req);
- nvmet_tcp_unmap_pdu_iovec(cmd);
nvmet_tcp_free_cmd_buffers(cmd);
}
@@ -1424,7 +1407,6 @@ static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue)
if (nvmet_tcp_need_data_in(cmd))
nvmet_req_uninit(&cmd->req);
- nvmet_tcp_unmap_pdu_iovec(cmd);
nvmet_tcp_free_cmd_buffers(cmd);
}
--
2.37.2
next reply other threads:[~2022-08-30 22:06 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-08-30 22:05 Fabio M. De Francesco [this message]
2022-08-31 23:53 ` [PATCH v2] nvmet-tcp: Don't map pages which can't come from HIGHMEM Al Viro
2022-09-19 15:40 ` Christoph Hellwig
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220830220533.17777-1-fmdefrancesco@gmail.com \
--to=fmdefrancesco@gmail.com \
--cc=Chaitanya.Kulkarni@wdc.com \
--cc=anirudh.venkataramanan@intel.com \
--cc=chaitanyak@nvidia.com \
--cc=hch@lst.de \
--cc=ira.weiny@intel.com \
--cc=james.smart@broadcom.com \
--cc=kbusch@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-nvme@lists.infradead.org \
--cc=sagi@grimberg.me \
--cc=viro@zeniv.linux.org.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.