Linux-NVME Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: Hannes Reinecke <hare@kernel.org>
To: Sagi Grimberg <sagi@grimberg.me>
Cc: Christoph Hellwig <hch@lst.de>, Keith Busch <kbusch@kernel.org>,
	linux-nvme@lists.infradead.org, Hannes Reinecke <hare@kernel.org>
Subject: [PATCH 1/4] nvme-tcp: per-controller I/O workqueues
Date: Wed,  3 Jul 2024 15:50:18 +0200	[thread overview]
Message-ID: <20240703135021.34143-2-hare@kernel.org> (raw)
In-Reply-To: <20240703135021.34143-1-hare@kernel.org>

Implement per-controller I/O workqueues to reduce workqueue contention
during I/O.

Signed-off-by: Hannes Reinecke <hare@kernel.org>
---
 drivers/nvme/host/tcp.c | 35 ++++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 5885aa452aa1..d43099c562fc 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -191,6 +191,7 @@ struct nvme_tcp_ctrl {
 	struct sockaddr_storage src_addr;
 	struct nvme_ctrl	ctrl;
 
+	struct workqueue_struct	*io_wq;
 	struct work_struct	err_work;
 	struct delayed_work	connect_work;
 	struct nvme_tcp_request async_req;
@@ -199,7 +200,6 @@ struct nvme_tcp_ctrl {
 
 static LIST_HEAD(nvme_tcp_ctrl_list);
 static DEFINE_MUTEX(nvme_tcp_ctrl_mutex);
-static struct workqueue_struct *nvme_tcp_wq;
 static const struct blk_mq_ops nvme_tcp_mq_ops;
 static const struct blk_mq_ops nvme_tcp_admin_mq_ops;
 static int nvme_tcp_try_send(struct nvme_tcp_queue *queue);
@@ -402,7 +402,7 @@ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req,
 	}
 
 	if (last && nvme_tcp_queue_has_pending(queue))
-		queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+		queue_work_on(queue->io_cpu, queue->ctrl->io_wq, &queue->io_work);
 }
 
 static void nvme_tcp_process_req_list(struct nvme_tcp_queue *queue)
@@ -974,7 +974,7 @@ static void nvme_tcp_data_ready(struct sock *sk)
 	queue = sk->sk_user_data;
 	if (likely(queue && queue->rd_enabled) &&
 	    !test_bit(NVME_TCP_Q_POLLING, &queue->flags))
-		queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+		queue_work_on(queue->io_cpu, queue->ctrl->io_wq, &queue->io_work);
 	read_unlock_bh(&sk->sk_callback_lock);
 }
 
@@ -986,7 +986,7 @@ static void nvme_tcp_write_space(struct sock *sk)
 	queue = sk->sk_user_data;
 	if (likely(queue && sk_stream_is_writeable(sk))) {
 		clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
-		queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+		queue_work_on(queue->io_cpu, queue->ctrl->io_wq, &queue->io_work);
 	}
 	read_unlock_bh(&sk->sk_callback_lock);
 }
@@ -1304,7 +1304,7 @@ static void nvme_tcp_io_work(struct work_struct *w)
 
 	} while (!time_after(jiffies, deadline)); /* quota is exhausted */
 
-	queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+	queue_work_on(queue->io_cpu, queue->ctrl->io_wq, &queue->io_work);
 }
 
 static void nvme_tcp_free_crypto(struct nvme_tcp_queue *queue)
@@ -2390,6 +2390,8 @@ static void nvme_tcp_free_ctrl(struct nvme_ctrl *nctrl)
 
 	nvmf_free_options(nctrl->opts);
 free_ctrl:
+	destroy_workqueue(ctrl->io_wq);
+
 	kfree(ctrl->queues);
 	kfree(ctrl);
 }
@@ -2580,7 +2582,7 @@ static void nvme_tcp_commit_rqs(struct blk_mq_hw_ctx *hctx)
 	struct nvme_tcp_queue *queue = hctx->driver_data;
 
 	if (!llist_empty(&queue->req_list))
-		queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+		queue_work_on(queue->io_cpu, queue->ctrl->io_wq, &queue->io_work);
 }
 
 static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx,
@@ -2712,6 +2714,7 @@ static struct nvme_tcp_ctrl *nvme_tcp_alloc_ctrl(struct device *dev,
 		struct nvmf_ctrl_options *opts)
 {
 	struct nvme_tcp_ctrl *ctrl;
+	unsigned int wq_flags = WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_SYSFS;
 	int ret;
 
 	ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
@@ -2783,6 +2786,15 @@ static struct nvme_tcp_ctrl *nvme_tcp_alloc_ctrl(struct device *dev,
 	if (ret)
 		goto out_kfree_queues;
 
+	if (wq_unbound)
+		wq_flags |= WQ_UNBOUND;
+	ctrl->io_wq = alloc_workqueue("nvme_tcp_wq_%d", wq_flags, 0,
+				      ctrl->ctrl.instance);
+	if (!ctrl->io_wq) {
+		nvme_put_ctrl(&ctrl->ctrl);
+		return ERR_PTR(-ENOMEM);
+	}
+
 	return ctrl;
 out_kfree_queues:
 	kfree(ctrl->queues);
@@ -2848,8 +2860,6 @@ static struct nvmf_transport_ops nvme_tcp_transport = {
 
 static int __init nvme_tcp_init_module(void)
 {
-	unsigned int wq_flags = WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_SYSFS;
-
 	BUILD_BUG_ON(sizeof(struct nvme_tcp_hdr) != 8);
 	BUILD_BUG_ON(sizeof(struct nvme_tcp_cmd_pdu) != 72);
 	BUILD_BUG_ON(sizeof(struct nvme_tcp_data_pdu) != 24);
@@ -2859,13 +2869,6 @@ static int __init nvme_tcp_init_module(void)
 	BUILD_BUG_ON(sizeof(struct nvme_tcp_icresp_pdu) != 128);
 	BUILD_BUG_ON(sizeof(struct nvme_tcp_term_pdu) != 24);
 
-	if (wq_unbound)
-		wq_flags |= WQ_UNBOUND;
-
-	nvme_tcp_wq = alloc_workqueue("nvme_tcp_wq", wq_flags, 0);
-	if (!nvme_tcp_wq)
-		return -ENOMEM;
-
 	nvmf_register_transport(&nvme_tcp_transport);
 	return 0;
 }
@@ -2881,8 +2884,6 @@ static void __exit nvme_tcp_cleanup_module(void)
 		nvme_delete_ctrl(&ctrl->ctrl);
 	mutex_unlock(&nvme_tcp_ctrl_mutex);
 	flush_workqueue(nvme_delete_wq);
-
-	destroy_workqueue(nvme_tcp_wq);
 }
 
 module_init(nvme_tcp_init_module);
-- 
2.35.3



  reply	other threads:[~2024-07-03 13:50 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-07-03 13:50 [PATCH 0/4] nvme-tcp: improve scalability Hannes Reinecke
2024-07-03 13:50 ` Hannes Reinecke [this message]
2024-07-03 14:11   ` [PATCH 1/4] nvme-tcp: per-controller I/O workqueues Sagi Grimberg
2024-07-03 14:46     ` Hannes Reinecke
2024-07-03 15:16       ` Sagi Grimberg
2024-07-03 17:07         ` Tejun Heo
2024-07-03 19:14           ` Sagi Grimberg
2024-07-03 19:17             ` Tejun Heo
2024-07-03 19:41               ` Sagi Grimberg
2024-07-04  7:36               ` Hannes Reinecke
2024-07-05  7:10                 ` Christoph Hellwig
2024-07-05  8:11                   ` Hannes Reinecke
2024-07-05  8:16                     ` Jens Axboe
2024-07-04  5:36   ` Christoph Hellwig
2024-07-03 13:50 ` [PATCH 2/4] nvme-tcp: align I/O cpu with blk-mq mapping Hannes Reinecke
2024-07-03 14:19   ` Sagi Grimberg
2024-07-03 14:53     ` Hannes Reinecke
2024-07-03 15:03       ` Sagi Grimberg
2024-07-03 15:40         ` Hannes Reinecke
2024-07-03 19:38           ` Sagi Grimberg
2024-07-03 19:47             ` Sagi Grimberg
2024-07-04  6:43             ` Hannes Reinecke
2024-07-04  9:07               ` Sagi Grimberg
2024-07-04 14:03                 ` Hannes Reinecke
2024-07-04  5:37     ` Christoph Hellwig
2024-07-04  9:13       ` Sagi Grimberg
2024-07-03 13:50 ` [PATCH 3/4] workqueue: introduce helper workqueue_unbound_affinity_scope() Hannes Reinecke
2024-07-03 17:31   ` Tejun Heo
2024-07-04  6:04     ` Hannes Reinecke
2024-07-03 13:50 ` [PATCH 4/4] nvme-tcp: switch to 'cpu' affinity scope for unbound workqueues Hannes Reinecke
2024-07-03 14:22   ` Sagi Grimberg
2024-07-03 15:01     ` Hannes Reinecke
2024-07-03 15:09       ` Sagi Grimberg
2024-07-03 15:50         ` Hannes Reinecke
2024-07-04  9:11           ` Sagi Grimberg
2024-07-04 15:54             ` Hannes Reinecke
2024-07-05 11:48               ` Sagi Grimberg

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240703135021.34143-2-hare@kernel.org \
    --to=hare@kernel.org \
    --cc=hch@lst.de \
    --cc=kbusch@kernel.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=sagi@grimberg.me \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox