From: Hannes Reinecke <hare@kernel.org>
To: Christoph Hellwig <hch@lst.de>
Cc: Sagi Grimberg <sagi@grimberg.me>, Keith Busch <kbusch@kernel.org>,
linux-nvme@lists.infradead.org, Hannes Reinecke <hare@suse.de>,
Hannes Reinecke <hare@kernel.org>
Subject: [PATCH 3/3] nvme-tcp: per-controller I/O workqueues
Date: Mon, 8 Jul 2024 09:10:13 +0200 [thread overview]
Message-ID: <20240708071013.69984-4-hare@kernel.org> (raw)
In-Reply-To: <20240708071013.69984-1-hare@kernel.org>
From: Hannes Reinecke <hare@suse.de>
Implement per-controller I/O workqueues to reduce workqueue contention
during I/O and improve I/O performance.
Performance comparison:
baseline rx/tx blk-mq multiple workqueues
4k seq write: 449MiB/s 480MiB/s 524MiB/s 540MiB/s
4k rand write: 410MiB/s 481MiB/s 524MiB/s 539MiB/s
4k seq read: 478MiB/s 481MiB/s 566MiB/s 582MiB/s
4k rand read: 547MiB/s 480MiB/s 511MiB/s 633MiB/s
Signed-off-by: Hannes Reinecke <hare@kernel.org>
---
drivers/nvme/host/tcp.c | 34 ++++++++++++++++++----------------
1 file changed, 18 insertions(+), 16 deletions(-)
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index a5c42a7b4bee..fc8f682f686d 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -194,6 +194,7 @@ struct nvme_tcp_ctrl {
struct sockaddr_storage src_addr;
struct nvme_ctrl ctrl;
+ struct workqueue_struct *io_wq;
struct work_struct err_work;
struct delayed_work connect_work;
struct nvme_tcp_request async_req;
@@ -202,7 +203,6 @@ struct nvme_tcp_ctrl {
static LIST_HEAD(nvme_tcp_ctrl_list);
static DEFINE_MUTEX(nvme_tcp_ctrl_mutex);
-static struct workqueue_struct *nvme_tcp_wq;
static const struct blk_mq_ops nvme_tcp_mq_ops;
static const struct blk_mq_ops nvme_tcp_admin_mq_ops;
static int nvme_tcp_try_send(struct nvme_tcp_queue *queue);
@@ -410,7 +410,7 @@ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req,
}
if (last && nvme_tcp_queue_has_pending(queue))
- queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+ queue_work_on(queue->io_cpu, queue->ctrl->io_wq, &queue->io_work);
}
static void nvme_tcp_process_req_list(struct nvme_tcp_queue *queue)
@@ -987,7 +987,7 @@ static void nvme_tcp_data_ready(struct sock *sk)
queue = sk->sk_user_data;
if (likely(queue && queue->rd_enabled) &&
!test_bit(NVME_TCP_Q_POLLING, &queue->flags))
- queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+ queue_work_on(queue->io_cpu, queue->ctrl->io_wq, &queue->io_work);
read_unlock_bh(&sk->sk_callback_lock);
}
@@ -999,7 +999,7 @@ static void nvme_tcp_write_space(struct sock *sk)
queue = sk->sk_user_data;
if (likely(queue && sk_stream_is_writeable(sk))) {
clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
- queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+ queue_work_on(queue->io_cpu, queue->ctrl->io_wq, &queue->io_work);
}
read_unlock_bh(&sk->sk_callback_lock);
}
@@ -1325,7 +1325,7 @@ static void nvme_tcp_io_work(struct work_struct *w)
dev_dbg(queue->ctrl->ctrl.device, "queue %d: queue stall (%u msecs)\n",
nvme_tcp_queue_id(queue), jiffies_to_msecs(overrun));
- queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+ queue_work_on(queue->io_cpu, queue->ctrl->io_wq, &queue->io_work);
}
static void nvme_tcp_free_crypto(struct nvme_tcp_queue *queue)
@@ -2486,6 +2486,8 @@ static void nvme_tcp_free_ctrl(struct nvme_ctrl *nctrl)
nvmf_free_options(nctrl->opts);
free_ctrl:
+ destroy_workqueue(ctrl->io_wq);
+
kfree(ctrl->queues);
kfree(ctrl);
}
@@ -2676,7 +2678,7 @@ static void nvme_tcp_commit_rqs(struct blk_mq_hw_ctx *hctx)
struct nvme_tcp_queue *queue = hctx->driver_data;
if (!llist_empty(&queue->req_list))
- queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+ queue_work_on(queue->io_cpu, queue->ctrl->io_wq, &queue->io_work);
}
static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx,
@@ -2812,6 +2814,7 @@ static struct nvme_tcp_ctrl *nvme_tcp_alloc_ctrl(struct device *dev,
struct nvmf_ctrl_options *opts)
{
struct nvme_tcp_ctrl *ctrl;
+ unsigned int wq_flags = WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_SYSFS;
int ret;
ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
@@ -2883,6 +2886,15 @@ static struct nvme_tcp_ctrl *nvme_tcp_alloc_ctrl(struct device *dev,
if (ret)
goto out_kfree_queues;
+ if (wq_unbound)
+ wq_flags |= WQ_UNBOUND;
+ ctrl->io_wq = alloc_workqueue("nvme_tcp_wq_%d", wq_flags, 0,
+ ctrl->ctrl.instance);
+ if (!ctrl->io_wq) {
+ nvme_put_ctrl(&ctrl->ctrl);
+ return ERR_PTR(-ENOMEM);
+ }
+
return ctrl;
out_kfree_queues:
kfree(ctrl->queues);
@@ -2948,7 +2960,6 @@ static struct nvmf_transport_ops nvme_tcp_transport = {
static int __init nvme_tcp_init_module(void)
{
- unsigned int wq_flags = WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_SYSFS;
int cpu;
BUILD_BUG_ON(sizeof(struct nvme_tcp_hdr) != 8);
@@ -2960,13 +2971,6 @@ static int __init nvme_tcp_init_module(void)
BUILD_BUG_ON(sizeof(struct nvme_tcp_icresp_pdu) != 128);
BUILD_BUG_ON(sizeof(struct nvme_tcp_term_pdu) != 24);
- if (wq_unbound)
- wq_flags |= WQ_UNBOUND;
-
- nvme_tcp_wq = alloc_workqueue("nvme_tcp_wq", wq_flags, 0);
- if (!nvme_tcp_wq)
- return -ENOMEM;
-
for_each_possible_cpu(cpu)
atomic_set(&nvme_tcp_cpu_queues[cpu], 0);
@@ -2985,8 +2989,6 @@ static void __exit nvme_tcp_cleanup_module(void)
nvme_delete_ctrl(&ctrl->ctrl);
mutex_unlock(&nvme_tcp_ctrl_mutex);
flush_workqueue(nvme_delete_wq);
-
- destroy_workqueue(nvme_tcp_wq);
}
module_init(nvme_tcp_init_module);
--
2.35.3
next prev parent reply other threads:[~2024-07-08 7:10 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-07-08 7:10 [PATCHv2 0/3] nvme-tcp: improve scalability Hannes Reinecke
2024-07-08 7:10 ` [PATCH 1/3] nvme-tcp: improve rx/tx fairness Hannes Reinecke
2024-07-08 11:57 ` Sagi Grimberg
2024-07-08 13:21 ` Hannes Reinecke
2024-07-08 14:25 ` Sagi Grimberg
2024-07-08 15:50 ` Hannes Reinecke
2024-07-08 19:31 ` Sagi Grimberg
2024-07-09 6:51 ` Hannes Reinecke
2024-07-09 7:06 ` Sagi Grimberg
2024-07-08 7:10 ` [PATCH 2/3] nvme-tcp: align I/O cpu with blk-mq mapping Hannes Reinecke
2024-07-08 12:08 ` Sagi Grimberg
2024-07-08 12:43 ` Hannes Reinecke
2024-07-08 14:38 ` Sagi Grimberg
2024-07-08 7:10 ` Hannes Reinecke [this message]
2024-07-08 12:12 ` [PATCH 3/3] nvme-tcp: per-controller I/O workqueues Sagi Grimberg
2024-07-08 12:48 ` Hannes Reinecke
2024-07-08 14:41 ` Sagi Grimberg
2024-07-10 11:56 ` [PATCHv2 0/3] nvme-tcp: improve scalability Sagi Grimberg
2024-07-10 14:06 ` Hannes Reinecke
2024-07-10 14:45 ` Sagi Grimberg
2024-07-16 6:31 ` Sagi Grimberg
2024-07-16 7:10 ` Hannes Reinecke
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240708071013.69984-4-hare@kernel.org \
--to=hare@kernel.org \
--cc=hare@suse.de \
--cc=hch@lst.de \
--cc=kbusch@kernel.org \
--cc=linux-nvme@lists.infradead.org \
--cc=sagi@grimberg.me \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.