From: Hannes Reinecke <hare@kernel.org>
To: Christoph Hellwig <hch@lst.de>
Cc: Sagi Grimberg <sagi@grimberg.me>, Keith Busch <kbusch@kernel.org>,
linux-nvme@lists.infradead.org, Hannes Reinecke <hare@kernel.org>
Subject: [PATCH 2/7] nvme-tcp: distribute queue affinity
Date: Wed, 26 Jun 2024 14:13:42 +0200 [thread overview]
Message-ID: <20240626121347.1116-3-hare@kernel.org> (raw)
In-Reply-To: <20240626121347.1116-1-hare@kernel.org>
Introduce a per-cpu counter to distribute the number of queues
over all cpus in a blk-mq hwctx cpu set. The current algorithm
leads to identical cpu affinity maps for all controllers, piling
work on the same cpu for all queues with the same qid.
Signed-off-by: Hannes Reinecke <hare@kernel.org>
---
drivers/nvme/host/tcp.c | 31 +++++++++++++++++++++++++------
1 file changed, 25 insertions(+), 6 deletions(-)
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 78fbce13a9e6..faab55ff86fe 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -26,6 +26,8 @@
struct nvme_tcp_queue;
+static atomic_t nvme_tcp_cpu_queues[NR_CPUS];
+
/* Define the socket priority to use for connections were it is desirable
* that the NIC consider performing optimized packet processing or filtering.
* A non-zero value being sufficient to indicate general consideration of any
@@ -1569,16 +1571,26 @@ static void nvme_tcp_set_queue_io_cpu(struct nvme_tcp_queue *queue)
if (wq_unbound)
queue->io_cpu = WORK_CPU_UNBOUND;
else {
- int i;
+ int i, min_queues = WORK_CPU_UNBOUND, io_cpu = WORK_CPU_UNBOUND;
if (WARN_ON(!mq_map))
return;
- for_each_cpu(i, cpu_online_mask) {
- if (mq_map[i] == qid) {
- queue->io_cpu = i;
- break;
+ for_each_online_cpu(i) {
+ int num_queues;
+
+ if (mq_map[i] != qid)
+ continue;
+
+ num_queues = atomic_read(&nvme_tcp_cpu_queues[i]);
+ if (num_queues < min_queues) {
+ min_queues = num_queues;
+ io_cpu = i;
}
}
+ if (io_cpu != WORK_CPU_UNBOUND) {
+ queue->io_cpu = io_cpu;
+ atomic_inc(&nvme_tcp_cpu_queues[io_cpu]);
+ }
dev_dbg(ctrl->ctrl.device, "queue %d: using cpu %d\n",
qid, queue->io_cpu);
}
@@ -1834,6 +1846,10 @@ static void __nvme_tcp_stop_queue(struct nvme_tcp_queue *queue)
kernel_sock_shutdown(queue->sock, SHUT_RDWR);
nvme_tcp_restore_sock_ops(queue);
cancel_work_sync(&queue->io_work);
+ if (queue->io_cpu != WORK_CPU_UNBOUND) {
+ atomic_dec(&nvme_tcp_cpu_queues[queue->io_cpu]);
+ queue->io_cpu = WORK_CPU_UNBOUND;
+ }
}
static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid)
@@ -2845,7 +2861,7 @@ static struct nvmf_transport_ops nvme_tcp_transport = {
static int __init nvme_tcp_init_module(void)
{
- unsigned int wq_flags = WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_SYSFS;
+ unsigned int wq_flags = WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_SYSFS, i;
BUILD_BUG_ON(sizeof(struct nvme_tcp_hdr) != 8);
BUILD_BUG_ON(sizeof(struct nvme_tcp_cmd_pdu) != 72);
@@ -2863,6 +2879,9 @@ static int __init nvme_tcp_init_module(void)
if (!nvme_tcp_wq)
return -ENOMEM;
+ for_each_possible_cpu(i)
+ atomic_set(&nvme_tcp_cpu_queues[i], 0);
+
nvmf_register_transport(&nvme_tcp_transport);
return 0;
}
--
2.35.3
next prev parent reply other threads:[~2024-06-26 12:14 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-06-26 12:13 [PATCH 0/7] nvme-tcp scalability improvements Hannes Reinecke
2024-06-26 12:13 ` [PATCH 1/7] nvme-tcp: align I/O cpu with blk-mq mapping Hannes Reinecke
2024-06-26 12:13 ` Hannes Reinecke [this message]
2024-06-26 13:38 ` [PATCH 2/7] nvme-tcp: distribute queue affinity Sagi Grimberg
2024-06-26 12:13 ` [PATCH 3/7] net: micro-optimize skb_datagram_iter Hannes Reinecke
2024-06-26 13:38 ` Sagi Grimberg
2024-06-26 12:13 ` [PATCH 4/7] nvme-tcp: receive data in softirq Hannes Reinecke
2024-06-26 12:13 ` [PATCH 5/7] nvmet-tcp: add wq_unbound module parameter Hannes Reinecke
2024-06-26 13:44 ` Sagi Grimberg
2024-06-26 12:13 ` [PATCH 6/7] nvme-tcp: SOCK_NOSPACE handling Hannes Reinecke
2024-06-26 13:45 ` Sagi Grimberg
2024-06-26 12:13 ` [PATCH 7/7] nvme-tcp: make softirq_rx the default Hannes Reinecke
2024-06-26 13:46 ` Sagi Grimberg
2024-06-26 13:37 ` [PATCH 0/7] nvme-tcp scalability improvements Sagi Grimberg
2024-06-26 14:27 ` Hannes Reinecke
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240626121347.1116-3-hare@kernel.org \
--to=hare@kernel.org \
--cc=hch@lst.de \
--cc=kbusch@kernel.org \
--cc=linux-nvme@lists.infradead.org \
--cc=sagi@grimberg.me \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.