From: Hannes Reinecke <hare@kernel.org>
To: Christoph Hellwig <hch@lst.de>
Cc: Sagi Grimberg <sagi@grimberg.me>, Keith Busch <kbusch@kernel.org>,
linux-nvme@lists.infradead.org, Hannes Reinecke <hare@suse.de>,
Hannes Reinecke <hare@kernel.org>
Subject: [PATCH 3/3] nvme-tcp: per-controller I/O workqueues
Date: Mon, 8 Jul 2024 09:10:13 +0200 [thread overview]
Message-ID: <20240708071013.69984-4-hare@kernel.org> (raw)
In-Reply-To: <20240708071013.69984-1-hare@kernel.org>
From: Hannes Reinecke <hare@suse.de>
Implement per-controller I/O workqueues to reduce workqueue contention
during I/O and improve I/O performance.
Performance comparison:
baseline rx/tx blk-mq multiple workqueues
4k seq write: 449MiB/s 480MiB/s 524MiB/s 540MiB/s
4k rand write: 410MiB/s 481MiB/s 524MiB/s 539MiB/s
4k seq read: 478MiB/s 481MiB/s 566MiB/s 582MiB/s
4k rand read: 547MiB/s 480MiB/s 511MiB/s 633MiB/s
Signed-off-by: Hannes Reinecke <hare@kernel.org>
---
drivers/nvme/host/tcp.c | 34 ++++++++++++++++++----------------
1 file changed, 18 insertions(+), 16 deletions(-)
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index a5c42a7b4bee..fc8f682f686d 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -194,6 +194,7 @@ struct nvme_tcp_ctrl {
struct sockaddr_storage src_addr;
struct nvme_ctrl ctrl;
+ struct workqueue_struct *io_wq;
struct work_struct err_work;
struct delayed_work connect_work;
struct nvme_tcp_request async_req;
@@ -202,7 +203,6 @@ struct nvme_tcp_ctrl {
static LIST_HEAD(nvme_tcp_ctrl_list);
static DEFINE_MUTEX(nvme_tcp_ctrl_mutex);
-static struct workqueue_struct *nvme_tcp_wq;
static const struct blk_mq_ops nvme_tcp_mq_ops;
static const struct blk_mq_ops nvme_tcp_admin_mq_ops;
static int nvme_tcp_try_send(struct nvme_tcp_queue *queue);
@@ -410,7 +410,7 @@ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req,
}
if (last && nvme_tcp_queue_has_pending(queue))
- queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+ queue_work_on(queue->io_cpu, queue->ctrl->io_wq, &queue->io_work);
}
static void nvme_tcp_process_req_list(struct nvme_tcp_queue *queue)
@@ -987,7 +987,7 @@ static void nvme_tcp_data_ready(struct sock *sk)
queue = sk->sk_user_data;
if (likely(queue && queue->rd_enabled) &&
!test_bit(NVME_TCP_Q_POLLING, &queue->flags))
- queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+ queue_work_on(queue->io_cpu, queue->ctrl->io_wq, &queue->io_work);
read_unlock_bh(&sk->sk_callback_lock);
}
@@ -999,7 +999,7 @@ static void nvme_tcp_write_space(struct sock *sk)
queue = sk->sk_user_data;
if (likely(queue && sk_stream_is_writeable(sk))) {
clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
- queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+ queue_work_on(queue->io_cpu, queue->ctrl->io_wq, &queue->io_work);
}
read_unlock_bh(&sk->sk_callback_lock);
}
@@ -1325,7 +1325,7 @@ static void nvme_tcp_io_work(struct work_struct *w)
dev_dbg(queue->ctrl->ctrl.device, "queue %d: queue stall (%u msecs)\n",
nvme_tcp_queue_id(queue), jiffies_to_msecs(overrun));
- queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+ queue_work_on(queue->io_cpu, queue->ctrl->io_wq, &queue->io_work);
}
static void nvme_tcp_free_crypto(struct nvme_tcp_queue *queue)
@@ -2486,6 +2486,8 @@ static void nvme_tcp_free_ctrl(struct nvme_ctrl *nctrl)
nvmf_free_options(nctrl->opts);
free_ctrl:
+ destroy_workqueue(ctrl->io_wq);
+
kfree(ctrl->queues);
kfree(ctrl);
}
@@ -2676,7 +2678,7 @@ static void nvme_tcp_commit_rqs(struct blk_mq_hw_ctx *hctx)
struct nvme_tcp_queue *queue = hctx->driver_data;
if (!llist_empty(&queue->req_list))
- queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+ queue_work_on(queue->io_cpu, queue->ctrl->io_wq, &queue->io_work);
}
static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx,
@@ -2812,6 +2814,7 @@ static struct nvme_tcp_ctrl *nvme_tcp_alloc_ctrl(struct device *dev,
struct nvmf_ctrl_options *opts)
{
struct nvme_tcp_ctrl *ctrl;
+ unsigned int wq_flags = WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_SYSFS;
int ret;
ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
@@ -2883,6 +2886,15 @@ static struct nvme_tcp_ctrl *nvme_tcp_alloc_ctrl(struct device *dev,
if (ret)
goto out_kfree_queues;
+ if (wq_unbound)
+ wq_flags |= WQ_UNBOUND;
+ ctrl->io_wq = alloc_workqueue("nvme_tcp_wq_%d", wq_flags, 0,
+ ctrl->ctrl.instance);
+ if (!ctrl->io_wq) {
+ nvme_put_ctrl(&ctrl->ctrl);
+ return ERR_PTR(-ENOMEM);
+ }
+
return ctrl;
out_kfree_queues:
kfree(ctrl->queues);
@@ -2948,7 +2960,6 @@ static struct nvmf_transport_ops nvme_tcp_transport = {
static int __init nvme_tcp_init_module(void)
{
- unsigned int wq_flags = WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_SYSFS;
int cpu;
BUILD_BUG_ON(sizeof(struct nvme_tcp_hdr) != 8);
@@ -2960,13 +2971,6 @@ static int __init nvme_tcp_init_module(void)
BUILD_BUG_ON(sizeof(struct nvme_tcp_icresp_pdu) != 128);
BUILD_BUG_ON(sizeof(struct nvme_tcp_term_pdu) != 24);
- if (wq_unbound)
- wq_flags |= WQ_UNBOUND;
-
- nvme_tcp_wq = alloc_workqueue("nvme_tcp_wq", wq_flags, 0);
- if (!nvme_tcp_wq)
- return -ENOMEM;
-
for_each_possible_cpu(cpu)
atomic_set(&nvme_tcp_cpu_queues[cpu], 0);
@@ -2985,8 +2989,6 @@ static void __exit nvme_tcp_cleanup_module(void)
nvme_delete_ctrl(&ctrl->ctrl);
mutex_unlock(&nvme_tcp_ctrl_mutex);
flush_workqueue(nvme_delete_wq);
-
- destroy_workqueue(nvme_tcp_wq);
}
module_init(nvme_tcp_init_module);
--
2.35.3
next prev parent reply other threads:[~2024-07-08 7:10 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-07-08 7:10 [PATCHv2 0/3] nvme-tcp: improve scalability Hannes Reinecke
2024-07-08 7:10 ` [PATCH 1/3] nvme-tcp: improve rx/tx fairness Hannes Reinecke
2024-07-08 11:57 ` Sagi Grimberg
2024-07-08 13:21 ` Hannes Reinecke
2024-07-08 14:25 ` Sagi Grimberg
2024-07-08 15:50 ` Hannes Reinecke
2024-07-08 19:31 ` Sagi Grimberg
2024-07-09 6:51 ` Hannes Reinecke
2024-07-09 7:06 ` Sagi Grimberg
2024-07-08 7:10 ` [PATCH 2/3] nvme-tcp: align I/O cpu with blk-mq mapping Hannes Reinecke
2024-07-08 12:08 ` Sagi Grimberg
2024-07-08 12:43 ` Hannes Reinecke
2024-07-08 14:38 ` Sagi Grimberg
2024-07-08 7:10 ` Hannes Reinecke [this message]
2024-07-08 12:12 ` [PATCH 3/3] nvme-tcp: per-controller I/O workqueues Sagi Grimberg
2024-07-08 12:48 ` Hannes Reinecke
2024-07-08 14:41 ` Sagi Grimberg
2024-07-10 11:56 ` [PATCHv2 0/3] nvme-tcp: improve scalability Sagi Grimberg
2024-07-10 14:06 ` Hannes Reinecke
2024-07-10 14:45 ` Sagi Grimberg
2024-07-16 6:31 ` Sagi Grimberg
2024-07-16 7:10 ` Hannes Reinecke
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240708071013.69984-4-hare@kernel.org \
--to=hare@kernel.org \
--cc=hare@suse.de \
--cc=hch@lst.de \
--cc=kbusch@kernel.org \
--cc=linux-nvme@lists.infradead.org \
--cc=sagi@grimberg.me \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox