public inbox for linux-nvme@lists.infradead.org
 help / color / mirror / Atom feed
From: Hannes Reinecke <hare@kernel.org>
To: Christoph Hellwig <hch@lst.de>
Cc: Sagi Grimberg <sagi@grimberg.me>, Keith Busch <kbusch@kernel.org>,
	linux-nvme@lists.infradead.org, Hannes Reinecke <hare@kernel.org>
Subject: [PATCH 2/7] nvme-tcp: distribute queue affinity
Date: Wed, 26 Jun 2024 14:13:42 +0200	[thread overview]
Message-ID: <20240626121347.1116-3-hare@kernel.org> (raw)
In-Reply-To: <20240626121347.1116-1-hare@kernel.org>

Introduce a per-cpu counter to distribute the number of queues
over all cpus in a blk-mq hwctx cpu set. The current algorithm
leads to identical cpu affinity maps for all controllers, piling
work on the same cpu for all queues with the same qid.

Signed-off-by: Hannes Reinecke <hare@kernel.org>
---
 drivers/nvme/host/tcp.c | 31 +++++++++++++++++++++++++------
 1 file changed, 25 insertions(+), 6 deletions(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 78fbce13a9e6..faab55ff86fe 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -26,6 +26,8 @@
 
 struct nvme_tcp_queue;
 
+static atomic_t nvme_tcp_cpu_queues[NR_CPUS];
+
 /* Define the socket priority to use for connections were it is desirable
  * that the NIC consider performing optimized packet processing or filtering.
  * A non-zero value being sufficient to indicate general consideration of any
@@ -1569,16 +1571,26 @@ static void nvme_tcp_set_queue_io_cpu(struct nvme_tcp_queue *queue)
 	if (wq_unbound)
 		queue->io_cpu = WORK_CPU_UNBOUND;
 	else {
-		int i;
+		int i, min_queues = WORK_CPU_UNBOUND, io_cpu = WORK_CPU_UNBOUND;
 
 		if (WARN_ON(!mq_map))
 			return;
-		for_each_cpu(i, cpu_online_mask) {
-			if (mq_map[i] == qid) {
-				queue->io_cpu = i;
-				break;
+		for_each_online_cpu(i) {
+			int num_queues;
+
+			if (mq_map[i] != qid)
+				continue;
+
+			num_queues = atomic_read(&nvme_tcp_cpu_queues[i]);
+			if (num_queues < min_queues) {
+				min_queues = num_queues;
+				io_cpu = i;
 			}
 		}
+		if (io_cpu != WORK_CPU_UNBOUND) {
+			queue->io_cpu = io_cpu;
+			atomic_inc(&nvme_tcp_cpu_queues[io_cpu]);
+		}
 		dev_dbg(ctrl->ctrl.device, "queue %d: using cpu %d\n",
 			qid, queue->io_cpu);
 	}
@@ -1834,6 +1846,10 @@ static void __nvme_tcp_stop_queue(struct nvme_tcp_queue *queue)
 	kernel_sock_shutdown(queue->sock, SHUT_RDWR);
 	nvme_tcp_restore_sock_ops(queue);
 	cancel_work_sync(&queue->io_work);
+	if (queue->io_cpu != WORK_CPU_UNBOUND) {
+		atomic_dec(&nvme_tcp_cpu_queues[queue->io_cpu]);
+		queue->io_cpu = WORK_CPU_UNBOUND;
+	}
 }
 
 static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid)
@@ -2845,7 +2861,7 @@ static struct nvmf_transport_ops nvme_tcp_transport = {
 
 static int __init nvme_tcp_init_module(void)
 {
-	unsigned int wq_flags = WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_SYSFS;
+	unsigned int wq_flags = WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_SYSFS, i;
 
 	BUILD_BUG_ON(sizeof(struct nvme_tcp_hdr) != 8);
 	BUILD_BUG_ON(sizeof(struct nvme_tcp_cmd_pdu) != 72);
@@ -2863,6 +2879,9 @@ static int __init nvme_tcp_init_module(void)
 	if (!nvme_tcp_wq)
 		return -ENOMEM;
 
+	for_each_possible_cpu(i)
+		atomic_set(&nvme_tcp_cpu_queues[i], 0);
+
 	nvmf_register_transport(&nvme_tcp_transport);
 	return 0;
 }
-- 
2.35.3



  parent reply	other threads:[~2024-06-26 12:14 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-06-26 12:13 [PATCH 0/7] nvme-tcp scalability improvements Hannes Reinecke
2024-06-26 12:13 ` [PATCH 1/7] nvme-tcp: align I/O cpu with blk-mq mapping Hannes Reinecke
2024-06-26 12:13 ` Hannes Reinecke [this message]
2024-06-26 13:38   ` [PATCH 2/7] nvme-tcp: distribute queue affinity Sagi Grimberg
2024-06-26 12:13 ` [PATCH 3/7] net: micro-optimize skb_datagram_iter Hannes Reinecke
2024-06-26 13:38   ` Sagi Grimberg
2024-06-26 12:13 ` [PATCH 4/7] nvme-tcp: receive data in softirq Hannes Reinecke
2024-06-26 12:13 ` [PATCH 5/7] nvmet-tcp: add wq_unbound module parameter Hannes Reinecke
2024-06-26 13:44   ` Sagi Grimberg
2024-06-26 12:13 ` [PATCH 6/7] nvme-tcp: SOCK_NOSPACE handling Hannes Reinecke
2024-06-26 13:45   ` Sagi Grimberg
2024-06-26 12:13 ` [PATCH 7/7] nvme-tcp: make softirq_rx the default Hannes Reinecke
2024-06-26 13:46   ` Sagi Grimberg
2024-06-26 13:37 ` [PATCH 0/7] nvme-tcp scalability improvements Sagi Grimberg
2024-06-26 14:27   ` Hannes Reinecke

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240626121347.1116-3-hare@kernel.org \
    --to=hare@kernel.org \
    --cc=hch@lst.de \
    --cc=kbusch@kernel.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=sagi@grimberg.me \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox