[PATCH 3/3] nvme-tcp: per-controller I/O workqueues

Linux-NVME Archive on lore.kernel.org
 help / color / mirror / Atom feed

From: Hannes Reinecke <hare@kernel.org>
To: Christoph Hellwig <hch@lst.de>
Cc: Sagi Grimberg <sagi@grimberg.me>, Keith Busch <kbusch@kernel.org>,
	linux-nvme@lists.infradead.org, Hannes Reinecke <hare@suse.de>,
	Hannes Reinecke <hare@kernel.org>
Subject: [PATCH 3/3] nvme-tcp: per-controller I/O workqueues
Date: Mon,  8 Jul 2024 09:10:13 +0200	[thread overview]
Message-ID: <20240708071013.69984-4-hare@kernel.org> (raw)
In-Reply-To: <20240708071013.69984-1-hare@kernel.org>

From: Hannes Reinecke <hare@suse.de>

Implement per-controller I/O workqueues to reduce workqueue contention
during I/O and improve I/O performance.

Performance comparison:
               baseline rx/tx    blk-mq   multiple workqueues
4k seq write:  449MiB/s 480MiB/s 524MiB/s 540MiB/s
4k rand write: 410MiB/s 481MiB/s 524MiB/s 539MiB/s
4k seq read:   478MiB/s 481MiB/s 566MiB/s 582MiB/s
4k rand read:  547MiB/s 480MiB/s 511MiB/s 633MiB/s

Signed-off-by: Hannes Reinecke <hare@kernel.org>
---
 drivers/nvme/host/tcp.c | 34 ++++++++++++++++++----------------
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index a5c42a7b4bee..fc8f682f686d 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -194,6 +194,7 @@ struct nvme_tcp_ctrl {
 	struct sockaddr_storage src_addr;
 	struct nvme_ctrl	ctrl;
 
+	struct workqueue_struct	*io_wq;
 	struct work_struct	err_work;
 	struct delayed_work	connect_work;
 	struct nvme_tcp_request async_req;
@@ -202,7 +203,6 @@ struct nvme_tcp_ctrl {
 
 static LIST_HEAD(nvme_tcp_ctrl_list);
 static DEFINE_MUTEX(nvme_tcp_ctrl_mutex);
-static struct workqueue_struct *nvme_tcp_wq;
 static const struct blk_mq_ops nvme_tcp_mq_ops;
 static const struct blk_mq_ops nvme_tcp_admin_mq_ops;
 static int nvme_tcp_try_send(struct nvme_tcp_queue *queue);
@@ -410,7 +410,7 @@ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req,
 	}
 
 	if (last && nvme_tcp_queue_has_pending(queue))
-		queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+		queue_work_on(queue->io_cpu, queue->ctrl->io_wq, &queue->io_work);
 }
 
 static void nvme_tcp_process_req_list(struct nvme_tcp_queue *queue)
@@ -987,7 +987,7 @@ static void nvme_tcp_data_ready(struct sock *sk)
 	queue = sk->sk_user_data;
 	if (likely(queue && queue->rd_enabled) &&
 	    !test_bit(NVME_TCP_Q_POLLING, &queue->flags))
-		queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+		queue_work_on(queue->io_cpu, queue->ctrl->io_wq, &queue->io_work);
 	read_unlock_bh(&sk->sk_callback_lock);
 }
 
@@ -999,7 +999,7 @@ static void nvme_tcp_write_space(struct sock *sk)
 	queue = sk->sk_user_data;
 	if (likely(queue && sk_stream_is_writeable(sk))) {
 		clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
-		queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+		queue_work_on(queue->io_cpu, queue->ctrl->io_wq, &queue->io_work);
 	}
 	read_unlock_bh(&sk->sk_callback_lock);
 }
@@ -1325,7 +1325,7 @@ static void nvme_tcp_io_work(struct work_struct *w)
 		dev_dbg(queue->ctrl->ctrl.device, "queue %d: queue stall (%u msecs)\n",
 			nvme_tcp_queue_id(queue), jiffies_to_msecs(overrun));
 
-	queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+	queue_work_on(queue->io_cpu, queue->ctrl->io_wq, &queue->io_work);
 }
 
 static void nvme_tcp_free_crypto(struct nvme_tcp_queue *queue)
@@ -2486,6 +2486,8 @@ static void nvme_tcp_free_ctrl(struct nvme_ctrl *nctrl)
 
 	nvmf_free_options(nctrl->opts);
 free_ctrl:
+	destroy_workqueue(ctrl->io_wq);
+
 	kfree(ctrl->queues);
 	kfree(ctrl);
 }
@@ -2676,7 +2678,7 @@ static void nvme_tcp_commit_rqs(struct blk_mq_hw_ctx *hctx)
 	struct nvme_tcp_queue *queue = hctx->driver_data;
 
 	if (!llist_empty(&queue->req_list))
-		queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+		queue_work_on(queue->io_cpu, queue->ctrl->io_wq, &queue->io_work);
 }
 
 static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx,
@@ -2812,6 +2814,7 @@ static struct nvme_tcp_ctrl *nvme_tcp_alloc_ctrl(struct device *dev,
 		struct nvmf_ctrl_options *opts)
 {
 	struct nvme_tcp_ctrl *ctrl;
+	unsigned int wq_flags = WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_SYSFS;
 	int ret;
 
 	ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
@@ -2883,6 +2886,15 @@ static struct nvme_tcp_ctrl *nvme_tcp_alloc_ctrl(struct device *dev,
 	if (ret)
 		goto out_kfree_queues;
 
+	if (wq_unbound)
+		wq_flags |= WQ_UNBOUND;
+	ctrl->io_wq = alloc_workqueue("nvme_tcp_wq_%d", wq_flags, 0,
+				      ctrl->ctrl.instance);
+	if (!ctrl->io_wq) {
+		nvme_put_ctrl(&ctrl->ctrl);
+		return ERR_PTR(-ENOMEM);
+	}
+
 	return ctrl;
 out_kfree_queues:
 	kfree(ctrl->queues);
@@ -2948,7 +2960,6 @@ static struct nvmf_transport_ops nvme_tcp_transport = {
 
 static int __init nvme_tcp_init_module(void)
 {
-	unsigned int wq_flags = WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_SYSFS;
 	int cpu;
 
 	BUILD_BUG_ON(sizeof(struct nvme_tcp_hdr) != 8);
@@ -2960,13 +2971,6 @@ static int __init nvme_tcp_init_module(void)
 	BUILD_BUG_ON(sizeof(struct nvme_tcp_icresp_pdu) != 128);
 	BUILD_BUG_ON(sizeof(struct nvme_tcp_term_pdu) != 24);
 
-	if (wq_unbound)
-		wq_flags |= WQ_UNBOUND;
-
-	nvme_tcp_wq = alloc_workqueue("nvme_tcp_wq", wq_flags, 0);
-	if (!nvme_tcp_wq)
-		return -ENOMEM;
-
 	for_each_possible_cpu(cpu)
 		atomic_set(&nvme_tcp_cpu_queues[cpu], 0);
 
@@ -2985,8 +2989,6 @@ static void __exit nvme_tcp_cleanup_module(void)
 		nvme_delete_ctrl(&ctrl->ctrl);
 	mutex_unlock(&nvme_tcp_ctrl_mutex);
 	flush_workqueue(nvme_delete_wq);
-
-	destroy_workqueue(nvme_tcp_wq);
 }
 
 module_init(nvme_tcp_init_module);
-- 
2.35.3

next prev parent reply	other threads:[~2024-07-08  7:10 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-07-08  7:10 [PATCHv2 0/3] nvme-tcp: improve scalability Hannes Reinecke
2024-07-08  7:10 ` [PATCH 1/3] nvme-tcp: improve rx/tx fairness Hannes Reinecke
2024-07-08 11:57   ` Sagi Grimberg
2024-07-08 13:21     ` Hannes Reinecke
2024-07-08 14:25       ` Sagi Grimberg
2024-07-08 15:50         ` Hannes Reinecke
2024-07-08 19:31           ` Sagi Grimberg
2024-07-09  6:51             ` Hannes Reinecke
2024-07-09  7:06               ` Sagi Grimberg
2024-07-08  7:10 ` [PATCH 2/3] nvme-tcp: align I/O cpu with blk-mq mapping Hannes Reinecke
2024-07-08 12:08   ` Sagi Grimberg
2024-07-08 12:43     ` Hannes Reinecke
2024-07-08 14:38       ` Sagi Grimberg
2024-07-08  7:10 ` Hannes Reinecke [this message]
2024-07-08 12:12   ` [PATCH 3/3] nvme-tcp: per-controller I/O workqueues Sagi Grimberg
2024-07-08 12:48     ` Hannes Reinecke
2024-07-08 14:41       ` Sagi Grimberg
2024-07-10 11:56 ` [PATCHv2 0/3] nvme-tcp: improve scalability Sagi Grimberg
2024-07-10 14:06   ` Hannes Reinecke
2024-07-10 14:45     ` Sagi Grimberg
2024-07-16  6:31 ` Sagi Grimberg
2024-07-16  7:10   ` Hannes Reinecke

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:a5c42a7b4be dfblob:fc8f682f686 )
 OR (
bs:"[PATCH 3/3] nvme-tcp: per-controller I/O workqueues" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240708071013.69984-4-hare@kernel.org \
    --to=hare@kernel.org \
    --cc=hare@suse.de \
    --cc=hch@lst.de \
    --cc=kbusch@kernel.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=sagi@grimberg.me \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox