All of lore.kernel.org
 help / color / mirror / Atom feed
From: Hannes Reinecke <hare@kernel.org>
To: Christoph Hellwig <hch@lst.de>
Cc: Sagi Grimberg <sagi@grimberg.me>, Keith Busch <kbusch@kernel.org>,
	linux-nvme@lists.infradead.org, Hannes Reinecke <hare@kernel.org>
Subject: [PATCH 5/7] nvmet-tcp: add wq_unbound module parameter
Date: Wed, 26 Jun 2024 14:13:45 +0200	[thread overview]
Message-ID: <20240626121347.1116-6-hare@kernel.org> (raw)
In-Reply-To: <20240626121347.1116-1-hare@kernel.org>

For high loads the default scheme of queueing work on the receiving
cpu might lead to cpu starvation and 'CPU hogged' messages.
This patch provides an 'wq_unbound' module parameter to let the
workqueue mechanism do scheduling decisions.

Signed-off-by: Hannes Reinecke <hare@kernel.org>
---
 drivers/nvme/target/tcp.c | 34 ++++++++++++++++++++++++++--------
 1 file changed, 26 insertions(+), 8 deletions(-)

diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index d305d7162dde..572e4f474c68 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -73,6 +73,14 @@ device_param_cb(idle_poll_period_usecs, &set_param_ops,
 MODULE_PARM_DESC(idle_poll_period_usecs,
 		"nvmet tcp io_work poll till idle time period in usecs: Default 0");
 
+/*
+ * Use the unbound workqueue for nvme_tcp_wq, then we can set the cpu affinity
+ * from sysfs.
+ */
+static bool wq_unbound;
+module_param(wq_unbound, bool, 0644);
+MODULE_PARM_DESC(wq_unbound, "Use unbound workqueue for nvme-tcp IO context (default false)");
+
 #ifdef CONFIG_NVME_TARGET_TCP_TLS
 /*
  * TLS handshake timeout
@@ -566,6 +574,15 @@ static struct nvmet_tcp_cmd *nvmet_tcp_fetch_cmd(struct nvmet_tcp_queue *queue)
 	return queue->snd_cmd;
 }
 
+static void nvmet_tcp_queue_work(struct nvmet_tcp_queue *queue)
+{
+	if (wq_unbound)
+		queue_work(nvmet_tcp_wq, &queue->io_work);
+	else
+		queue_work_on(queue_cpu(queue), nvmet_tcp_wq,
+			      &queue->io_work);
+}
+
 static void nvmet_tcp_queue_response(struct nvmet_req *req)
 {
 	struct nvmet_tcp_cmd *cmd =
@@ -590,7 +607,7 @@ static void nvmet_tcp_queue_response(struct nvmet_req *req)
 	}
 
 	llist_add(&cmd->lentry, &queue->resp_list);
-	queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &cmd->queue->io_work);
+	nvmet_tcp_queue_work(queue);
 }
 
 static void nvmet_tcp_execute_request(struct nvmet_tcp_cmd *cmd)
@@ -1452,7 +1469,7 @@ static void nvmet_tcp_io_work(struct work_struct *w)
 	 * ops activity was recorded during the do-while loop above.
 	 */
 	if (nvmet_tcp_check_queue_deadline(queue, ops) || pending)
-		queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work);
+		nvmet_tcp_queue_work(queue);
 }
 
 static int nvmet_tcp_alloc_cmd(struct nvmet_tcp_queue *queue,
@@ -1628,8 +1645,7 @@ static void nvmet_tcp_data_ready(struct sock *sk)
 		if (queue->data_ready)
 			queue->data_ready(sk);
 		if (queue->state != NVMET_TCP_Q_TLS_HANDSHAKE)
-			queue_work_on(queue_cpu(queue), nvmet_tcp_wq,
-				      &queue->io_work);
+			nvmet_tcp_queue_work(queue);
 	}
 	read_unlock_bh(&sk->sk_callback_lock);
 }
@@ -1650,7 +1666,7 @@ static void nvmet_tcp_write_space(struct sock *sk)
 
 	if (sk_stream_is_writeable(sk)) {
 		clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
-		queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work);
+		nvmet_tcp_queue_work(queue);
 	}
 out:
 	read_unlock_bh(&sk->sk_callback_lock);
@@ -1731,7 +1747,7 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue)
 		sock->sk->sk_write_space = nvmet_tcp_write_space;
 		if (idle_poll_period_usecs)
 			nvmet_tcp_arm_queue_deadline(queue);
-		queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work);
+		nvmet_tcp_queue_work(queue);
 	}
 	write_unlock_bh(&sock->sk->sk_callback_lock);
 
@@ -2182,9 +2198,11 @@ static const struct nvmet_fabrics_ops nvmet_tcp_ops = {
 static int __init nvmet_tcp_init(void)
 {
 	int ret;
+	unsigned int wq_flags = WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_SYSFS;
 
-	nvmet_tcp_wq = alloc_workqueue("nvmet_tcp_wq",
-				WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
+	if (wq_unbound)
+		wq_flags |= WQ_UNBOUND;
+	nvmet_tcp_wq = alloc_workqueue("nvmet_tcp_wq", wq_flags, 0);
 	if (!nvmet_tcp_wq)
 		return -ENOMEM;
 
-- 
2.35.3



  parent reply	other threads:[~2024-06-26 12:14 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-06-26 12:13 [PATCH 0/7] nvme-tcp scalability improvements Hannes Reinecke
2024-06-26 12:13 ` [PATCH 1/7] nvme-tcp: align I/O cpu with blk-mq mapping Hannes Reinecke
2024-06-26 12:13 ` [PATCH 2/7] nvme-tcp: distribute queue affinity Hannes Reinecke
2024-06-26 13:38   ` Sagi Grimberg
2024-06-26 12:13 ` [PATCH 3/7] net: micro-optimize skb_datagram_iter Hannes Reinecke
2024-06-26 13:38   ` Sagi Grimberg
2024-06-26 12:13 ` [PATCH 4/7] nvme-tcp: receive data in softirq Hannes Reinecke
2024-06-26 12:13 ` Hannes Reinecke [this message]
2024-06-26 13:44   ` [PATCH 5/7] nvmet-tcp: add wq_unbound module parameter Sagi Grimberg
2024-06-26 12:13 ` [PATCH 6/7] nvme-tcp: SOCK_NOSPACE handling Hannes Reinecke
2024-06-26 13:45   ` Sagi Grimberg
2024-06-26 12:13 ` [PATCH 7/7] nvme-tcp: make softirq_rx the default Hannes Reinecke
2024-06-26 13:46   ` Sagi Grimberg
2024-06-26 13:37 ` [PATCH 0/7] nvme-tcp scalability improvements Sagi Grimberg
2024-06-26 14:27   ` Hannes Reinecke

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240626121347.1116-6-hare@kernel.org \
    --to=hare@kernel.org \
    --cc=hch@lst.de \
    --cc=kbusch@kernel.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=sagi@grimberg.me \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.