All of lore.kernel.org
 help / color / mirror / Atom feed
From: Hannes Reinecke <hare@kernel.org>
To: Sagi Grimberg <sagi@grimberg.me>
Cc: Christoph Hellwig <hch@lst.de>, Keith Busch <kbusch@kernel.org>,
	linux-nvme@lists.infradead.org, Hannes Reinecke <hare@kernel.org>
Subject: [PATCH 1/4] nvme-tcp: per-controller I/O workqueues
Date: Wed,  3 Jul 2024 15:50:18 +0200	[thread overview]
Message-ID: <20240703135021.34143-2-hare@kernel.org> (raw)
In-Reply-To: <20240703135021.34143-1-hare@kernel.org>

Implement per-controller I/O workqueues to reduce workqueue contention
during I/O.

Signed-off-by: Hannes Reinecke <hare@kernel.org>
---
 drivers/nvme/host/tcp.c | 35 ++++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 5885aa452aa1..d43099c562fc 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -191,6 +191,7 @@ struct nvme_tcp_ctrl {
 	struct sockaddr_storage src_addr;
 	struct nvme_ctrl	ctrl;
 
+	struct workqueue_struct	*io_wq;
 	struct work_struct	err_work;
 	struct delayed_work	connect_work;
 	struct nvme_tcp_request async_req;
@@ -199,7 +200,6 @@ struct nvme_tcp_ctrl {
 
 static LIST_HEAD(nvme_tcp_ctrl_list);
 static DEFINE_MUTEX(nvme_tcp_ctrl_mutex);
-static struct workqueue_struct *nvme_tcp_wq;
 static const struct blk_mq_ops nvme_tcp_mq_ops;
 static const struct blk_mq_ops nvme_tcp_admin_mq_ops;
 static int nvme_tcp_try_send(struct nvme_tcp_queue *queue);
@@ -402,7 +402,7 @@ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req,
 	}
 
 	if (last && nvme_tcp_queue_has_pending(queue))
-		queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+		queue_work_on(queue->io_cpu, queue->ctrl->io_wq, &queue->io_work);
 }
 
 static void nvme_tcp_process_req_list(struct nvme_tcp_queue *queue)
@@ -974,7 +974,7 @@ static void nvme_tcp_data_ready(struct sock *sk)
 	queue = sk->sk_user_data;
 	if (likely(queue && queue->rd_enabled) &&
 	    !test_bit(NVME_TCP_Q_POLLING, &queue->flags))
-		queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+		queue_work_on(queue->io_cpu, queue->ctrl->io_wq, &queue->io_work);
 	read_unlock_bh(&sk->sk_callback_lock);
 }
 
@@ -986,7 +986,7 @@ static void nvme_tcp_write_space(struct sock *sk)
 	queue = sk->sk_user_data;
 	if (likely(queue && sk_stream_is_writeable(sk))) {
 		clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
-		queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+		queue_work_on(queue->io_cpu, queue->ctrl->io_wq, &queue->io_work);
 	}
 	read_unlock_bh(&sk->sk_callback_lock);
 }
@@ -1304,7 +1304,7 @@ static void nvme_tcp_io_work(struct work_struct *w)
 
 	} while (!time_after(jiffies, deadline)); /* quota is exhausted */
 
-	queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+	queue_work_on(queue->io_cpu, queue->ctrl->io_wq, &queue->io_work);
 }
 
 static void nvme_tcp_free_crypto(struct nvme_tcp_queue *queue)
@@ -2390,6 +2390,8 @@ static void nvme_tcp_free_ctrl(struct nvme_ctrl *nctrl)
 
 	nvmf_free_options(nctrl->opts);
 free_ctrl:
+	destroy_workqueue(ctrl->io_wq);
+
 	kfree(ctrl->queues);
 	kfree(ctrl);
 }
@@ -2580,7 +2582,7 @@ static void nvme_tcp_commit_rqs(struct blk_mq_hw_ctx *hctx)
 	struct nvme_tcp_queue *queue = hctx->driver_data;
 
 	if (!llist_empty(&queue->req_list))
-		queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+		queue_work_on(queue->io_cpu, queue->ctrl->io_wq, &queue->io_work);
 }
 
 static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx,
@@ -2712,6 +2714,7 @@ static struct nvme_tcp_ctrl *nvme_tcp_alloc_ctrl(struct device *dev,
 		struct nvmf_ctrl_options *opts)
 {
 	struct nvme_tcp_ctrl *ctrl;
+	unsigned int wq_flags = WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_SYSFS;
 	int ret;
 
 	ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
@@ -2783,6 +2786,15 @@ static struct nvme_tcp_ctrl *nvme_tcp_alloc_ctrl(struct device *dev,
 	if (ret)
 		goto out_kfree_queues;
 
+	if (wq_unbound)
+		wq_flags |= WQ_UNBOUND;
+	ctrl->io_wq = alloc_workqueue("nvme_tcp_wq_%d", wq_flags, 0,
+				      ctrl->ctrl.instance);
+	if (!ctrl->io_wq) {
+		nvme_put_ctrl(&ctrl->ctrl);
+		return ERR_PTR(-ENOMEM);
+	}
+
 	return ctrl;
 out_kfree_queues:
 	kfree(ctrl->queues);
@@ -2848,8 +2860,6 @@ static struct nvmf_transport_ops nvme_tcp_transport = {
 
 static int __init nvme_tcp_init_module(void)
 {
-	unsigned int wq_flags = WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_SYSFS;
-
 	BUILD_BUG_ON(sizeof(struct nvme_tcp_hdr) != 8);
 	BUILD_BUG_ON(sizeof(struct nvme_tcp_cmd_pdu) != 72);
 	BUILD_BUG_ON(sizeof(struct nvme_tcp_data_pdu) != 24);
@@ -2859,13 +2869,6 @@ static int __init nvme_tcp_init_module(void)
 	BUILD_BUG_ON(sizeof(struct nvme_tcp_icresp_pdu) != 128);
 	BUILD_BUG_ON(sizeof(struct nvme_tcp_term_pdu) != 24);
 
-	if (wq_unbound)
-		wq_flags |= WQ_UNBOUND;
-
-	nvme_tcp_wq = alloc_workqueue("nvme_tcp_wq", wq_flags, 0);
-	if (!nvme_tcp_wq)
-		return -ENOMEM;
-
 	nvmf_register_transport(&nvme_tcp_transport);
 	return 0;
 }
@@ -2881,8 +2884,6 @@ static void __exit nvme_tcp_cleanup_module(void)
 		nvme_delete_ctrl(&ctrl->ctrl);
 	mutex_unlock(&nvme_tcp_ctrl_mutex);
 	flush_workqueue(nvme_delete_wq);
-
-	destroy_workqueue(nvme_tcp_wq);
 }
 
 module_init(nvme_tcp_init_module);
-- 
2.35.3



  reply	other threads:[~2024-07-03 13:50 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-07-03 13:50 [PATCH 0/4] nvme-tcp: improve scalability Hannes Reinecke
2024-07-03 13:50 ` Hannes Reinecke [this message]
2024-07-03 14:11   ` [PATCH 1/4] nvme-tcp: per-controller I/O workqueues Sagi Grimberg
2024-07-03 14:46     ` Hannes Reinecke
2024-07-03 15:16       ` Sagi Grimberg
2024-07-03 17:07         ` Tejun Heo
2024-07-03 19:14           ` Sagi Grimberg
2024-07-03 19:17             ` Tejun Heo
2024-07-03 19:41               ` Sagi Grimberg
2024-07-04  7:36               ` Hannes Reinecke
2024-07-05  7:10                 ` Christoph Hellwig
2024-07-05  8:11                   ` Hannes Reinecke
2024-07-05  8:16                     ` Jens Axboe
2024-07-04  5:36   ` Christoph Hellwig
2024-07-03 13:50 ` [PATCH 2/4] nvme-tcp: align I/O cpu with blk-mq mapping Hannes Reinecke
2024-07-03 14:19   ` Sagi Grimberg
2024-07-03 14:53     ` Hannes Reinecke
2024-07-03 15:03       ` Sagi Grimberg
2024-07-03 15:40         ` Hannes Reinecke
2024-07-03 19:38           ` Sagi Grimberg
2024-07-03 19:47             ` Sagi Grimberg
2024-07-04  6:43             ` Hannes Reinecke
2024-07-04  9:07               ` Sagi Grimberg
2024-07-04 14:03                 ` Hannes Reinecke
2024-07-04  5:37     ` Christoph Hellwig
2024-07-04  9:13       ` Sagi Grimberg
2024-07-03 13:50 ` [PATCH 3/4] workqueue: introduce helper workqueue_unbound_affinity_scope() Hannes Reinecke
2024-07-03 17:31   ` Tejun Heo
2024-07-04  6:04     ` Hannes Reinecke
2024-07-03 13:50 ` [PATCH 4/4] nvme-tcp: switch to 'cpu' affinity scope for unbound workqueues Hannes Reinecke
2024-07-03 14:22   ` Sagi Grimberg
2024-07-03 15:01     ` Hannes Reinecke
2024-07-03 15:09       ` Sagi Grimberg
2024-07-03 15:50         ` Hannes Reinecke
2024-07-04  9:11           ` Sagi Grimberg
2024-07-04 15:54             ` Hannes Reinecke
2024-07-05 11:48               ` Sagi Grimberg

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240703135021.34143-2-hare@kernel.org \
    --to=hare@kernel.org \
    --cc=hch@lst.de \
    --cc=kbusch@kernel.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=sagi@grimberg.me \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.