* [RFC PATCH 1/4] nvme-tcp: optionally limit I/O queue count based on NIC queues
2026-04-20 11:49 [RFC PATCH 0/4] nvme-tcp: NIC topology aware I/O queue scaling and queue info export Nilay Shroff
@ 2026-04-20 11:49 ` Nilay Shroff
2026-04-20 11:49 ` [RFC PATCH 2/4] nvme-tcp: add a diagnostic message when NIC queues are underutilized Nilay Shroff
` (2 subsequent siblings)
3 siblings, 0 replies; 5+ messages in thread
From: Nilay Shroff @ 2026-04-20 11:49 UTC (permalink / raw)
To: linux-nvme; +Cc: kbusch, hch, hare, sagi, chaitanyak, gjoyce, Nilay Shroff
NVMe-TCP currently provisions I/O queues primarily based on CPU
availability. On systems where the number of CPUs significantly exceeds
the number of NIC hardware queues, this can lead to multiple I/O queues
sharing the same NIC TX/RX queues, resulting in increased lock
contention, cacheline bouncing, and inter-processor interrupts (IPIs).
In such configurations, limiting the number of NVMe-TCP I/O queues to
the number of NIC hardware queues can improve performance by reducing
contention and improving locality. Aligning NVMe-TCP worker threads with
NIC queue topology may also help reduce tail latency.
Add a new transport option "match_hw_queues" to allow users to
optionally limit the number of NVMe-TCP I/O queues to the number of NIC
TX/RX queues. When enabled, the number of I/O queues is set to:
min(num_online_cpus, num_nic_queues)
This behavior is opt-in and does not change existing defaults.
Signed-off-by: Nilay Shroff <nilay@linux.ibm.com>
---
drivers/nvme/host/fabrics.c | 4 ++
drivers/nvme/host/fabrics.h | 3 +
drivers/nvme/host/tcp.c | 120 +++++++++++++++++++++++++++++++++++-
3 files changed, 126 insertions(+), 1 deletion(-)
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index ac3d4f400601..62ae998825e1 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -709,6 +709,7 @@ static const match_table_t opt_tokens = {
{ NVMF_OPT_TLS, "tls" },
{ NVMF_OPT_CONCAT, "concat" },
#endif
+ { NVMF_OPT_MATCH_HW_QUEUES, "match_hw_queues" },
{ NVMF_OPT_ERR, NULL }
};
@@ -1064,6 +1065,9 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
}
opts->concat = true;
break;
+ case NVMF_OPT_MATCH_HW_QUEUES:
+ opts->match_hw_queues = true;
+ break;
default:
pr_warn("unknown parameter or missing value '%s' in ctrl creation request\n",
p);
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index caf5503d0833..e8e3a2672832 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -67,6 +67,7 @@ enum {
NVMF_OPT_KEYRING = 1 << 26,
NVMF_OPT_TLS_KEY = 1 << 27,
NVMF_OPT_CONCAT = 1 << 28,
+ NVMF_OPT_MATCH_HW_QUEUES = 1 << 29,
};
/**
@@ -106,6 +107,7 @@ enum {
* @disable_sqflow: disable controller sq flow control
* @hdr_digest: generate/verify header digest (TCP)
* @data_digest: generate/verify data digest (TCP)
+ * @match_hw_queues: limit controller IO queue count based on NIC queues (TCP)
* @nr_write_queues: number of queues for write I/O
* @nr_poll_queues: number of queues for polling I/O
* @tos: type of service
@@ -136,6 +138,7 @@ struct nvmf_ctrl_options {
bool disable_sqflow;
bool hdr_digest;
bool data_digest;
+ bool match_hw_queues;
unsigned int nr_write_queues;
unsigned int nr_poll_queues;
int tos;
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 243dab830dc8..7102a7a54d78 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -16,6 +16,8 @@
#include <net/tls.h>
#include <net/tls_prot.h>
#include <net/handshake.h>
+#include <net/ip6_route.h>
+#include <linux/in6.h>
#include <linux/blk-mq.h>
#include <net/busy_poll.h>
#include <trace/events/sock.h>
@@ -1762,6 +1764,103 @@ static int nvme_tcp_start_tls(struct nvme_ctrl *nctrl,
return ret;
}
+static struct net_device *nvme_tcp_get_netdev(struct nvme_ctrl *ctrl)
+{
+ struct net_device *dev = NULL;
+
+ if (ctrl->opts->mask & NVMF_OPT_HOST_IFACE)
+ dev = dev_get_by_name(&init_net, ctrl->opts->host_iface);
+ else {
+ struct nvme_tcp_ctrl *tctrl = to_tcp_ctrl(ctrl);
+
+ if (tctrl->addr.ss_family == AF_INET) {
+ struct rtable *rt;
+ struct flowi4 fl4 = {};
+ struct sockaddr_in *addr =
+ (struct sockaddr_in *)&tctrl->addr;
+
+ fl4.daddr = addr->sin_addr.s_addr;
+ if (ctrl->opts->mask & NVMF_OPT_HOST_TRADDR) {
+ addr = (struct sockaddr_in *)&tctrl->src_addr;
+ fl4.saddr = addr->sin_addr.s_addr;
+ }
+ fl4.flowi4_proto = IPPROTO_TCP;
+
+ rt = ip_route_output_key(&init_net, &fl4);
+ if (IS_ERR(rt))
+ return NULL;
+
+ dev = dst_dev(&rt->dst);
+ /*
+ * Get reference to netdev as ip_rt_put() will
+ * release the netdev reference.
+ */
+ if (dev)
+ dev_hold(dev);
+
+ ip_rt_put(rt);
+
+ } else if (tctrl->addr.ss_family == AF_INET6) {
+ struct dst_entry *dst;
+ struct flowi6 fl6 = {};
+ struct sockaddr_in6 *addr6 =
+ (struct sockaddr_in6 *)&tctrl->addr;
+
+ fl6.daddr = addr6->sin6_addr;
+ if (ctrl->opts->mask & NVMF_OPT_HOST_TRADDR) {
+ addr6 = (struct sockaddr_in6 *)&tctrl->src_addr;
+ fl6.saddr = addr6->sin6_addr;
+ }
+ fl6.flowi6_proto = IPPROTO_TCP;
+
+ dst = ip6_route_output(&init_net, NULL, &fl6);
+ if (dst->error) {
+ dst_release(dst);
+ return NULL;
+ }
+
+ dev = dst_dev(dst);
+ /*
+ * Get reference to netdev as dst_release() will
+ * release the netdev reference.
+ */
+ if (dev)
+ dev_hold(dev);
+
+ dst_release(dst);
+ }
+ }
+
+ return dev;
+}
+
+static void nvme_tcp_put_netdev(struct net_device *dev)
+{
+ if (dev)
+ dev_put(dev);
+}
+
+/*
+ * Returns number of active NIC queues (min of TX/RX), or 0 if device cannot
+ * be determined.
+ */
+static int nvme_tcp_get_netdev_current_queue_count(struct nvme_ctrl *ctrl)
+{
+ struct net_device *dev;
+ int tx_queues, rx_queues;
+
+ dev = nvme_tcp_get_netdev(ctrl);
+ if (!dev)
+ return 0;
+
+ tx_queues = dev->real_num_tx_queues;
+ rx_queues = dev->real_num_rx_queues;
+
+ nvme_tcp_put_netdev(dev);
+
+ return min(tx_queues, rx_queues);
+}
+
static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid,
key_serial_t pskid)
{
@@ -2144,6 +2243,24 @@ static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
unsigned int nr_io_queues;
int ret;
+ if (!(ctrl->opts->mask & NVMF_OPT_NR_IO_QUEUES) &&
+ (ctrl->opts->mask & NVMF_OPT_MATCH_HW_QUEUES)) {
+ int nr_hw_queues;
+
+ nr_hw_queues = nvme_tcp_get_netdev_current_queue_count(ctrl);
+ if (nr_hw_queues <= 0)
+ goto init_queue;
+
+ ctrl->opts->nr_io_queues = min(nr_hw_queues, num_online_cpus());
+
+ if (ctrl->opts->nr_io_queues < num_online_cpus())
+ dev_info(ctrl->device,
+ "limiting I/O queues to %u (NIC queues %d, CPUs %u)\n",
+ ctrl->opts->nr_io_queues, nr_hw_queues,
+ num_online_cpus());
+ }
+
+init_queue:
nr_io_queues = nvmf_nr_io_queues(ctrl->opts);
ret = nvme_set_queue_count(ctrl, &nr_io_queues);
if (ret)
@@ -3019,7 +3136,8 @@ static struct nvmf_transport_ops nvme_tcp_transport = {
NVMF_OPT_HDR_DIGEST | NVMF_OPT_DATA_DIGEST |
NVMF_OPT_NR_WRITE_QUEUES | NVMF_OPT_NR_POLL_QUEUES |
NVMF_OPT_TOS | NVMF_OPT_HOST_IFACE | NVMF_OPT_TLS |
- NVMF_OPT_KEYRING | NVMF_OPT_TLS_KEY | NVMF_OPT_CONCAT,
+ NVMF_OPT_KEYRING | NVMF_OPT_TLS_KEY |
+ NVMF_OPT_CONCAT | NVMF_OPT_MATCH_HW_QUEUES,
.create_ctrl = nvme_tcp_create_ctrl,
};
--
2.53.0
^ permalink raw reply related [flat|nested] 5+ messages in thread* [RFC PATCH 2/4] nvme-tcp: add a diagnostic message when NIC queues are underutilized
2026-04-20 11:49 [RFC PATCH 0/4] nvme-tcp: NIC topology aware I/O queue scaling and queue info export Nilay Shroff
2026-04-20 11:49 ` [RFC PATCH 1/4] nvme-tcp: optionally limit I/O queue count based on NIC queues Nilay Shroff
@ 2026-04-20 11:49 ` Nilay Shroff
2026-04-20 11:49 ` [RFC PATCH 3/4] nvme: add debugfs helpers for NVMe drivers Nilay Shroff
2026-04-20 11:49 ` [RFC PATCH 4/4] nvme: expose queue information via debugfs Nilay Shroff
3 siblings, 0 replies; 5+ messages in thread
From: Nilay Shroff @ 2026-04-20 11:49 UTC (permalink / raw)
To: linux-nvme; +Cc: kbusch, hch, hare, sagi, chaitanyak, gjoyce, Nilay Shroff
Some systems may configure fewer NIC queues than supported by the
hardware. When the number of NVMe-TCP I/O queues is limited by the
number of active NIC queues, this can result in suboptimal performance.
Add a diagnostic message to warn when the configured NIC queue count
is lower than the maximum supported queue count, as reported by the
driver. This may help users identify configurations where increasing
the NIC queue count could improve performance.
This change is informational only and does not modify NIC configuration.
Signed-off-by: Nilay Shroff <nilay@linux.ibm.com>
---
drivers/nvme/host/tcp.c | 45 ++++++++++++++++++++++++++++++++++++++---
1 file changed, 42 insertions(+), 3 deletions(-)
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 7102a7a54d78..9239495122fc 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -11,6 +11,7 @@
#include <linux/crc32.h>
#include <linux/nvme-tcp.h>
#include <linux/nvme-keyring.h>
+#include <linux/ethtool.h>
#include <net/sock.h>
#include <net/tcp.h>
#include <net/tls.h>
@@ -20,6 +21,7 @@
#include <linux/in6.h>
#include <linux/blk-mq.h>
#include <net/busy_poll.h>
+#include <net/netdev_lock.h>
#include <trace/events/sock.h>
#include "nvme.h"
@@ -1861,6 +1863,35 @@ static int nvme_tcp_get_netdev_current_queue_count(struct nvme_ctrl *ctrl)
return min(tx_queues, rx_queues);
}
+static int nvme_tcp_get_netdev_max_queue_count(struct nvme_ctrl *ctrl)
+{
+ struct net_device *dev;
+ struct ethtool_channels channels = {0};
+ int max = 0;
+
+ dev = nvme_tcp_get_netdev(ctrl);
+ if (!dev)
+ return 0;
+
+ rtnl_lock();
+ if (!dev->ethtool_ops || !dev->ethtool_ops->get_channels)
+ goto out;
+
+ netdev_lock_ops(dev);
+
+ dev->ethtool_ops->get_channels(dev, &channels);
+ if (channels.max_combined)
+ max = channels.max_combined;
+ else
+ max = min(channels.max_rx, channels.max_tx);
+
+ netdev_unlock_ops(dev);
+out:
+ rtnl_unlock();
+
+ return max;
+}
+
static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid,
key_serial_t pskid)
{
@@ -2245,19 +2276,27 @@ static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
if (!(ctrl->opts->mask & NVMF_OPT_NR_IO_QUEUES) &&
(ctrl->opts->mask & NVMF_OPT_MATCH_HW_QUEUES)) {
- int nr_hw_queues;
+ int nr_hw_queues, max_hw_queues;
nr_hw_queues = nvme_tcp_get_netdev_current_queue_count(ctrl);
if (nr_hw_queues <= 0)
goto init_queue;
ctrl->opts->nr_io_queues = min(nr_hw_queues, num_online_cpus());
-
- if (ctrl->opts->nr_io_queues < num_online_cpus())
+ if (ctrl->opts->nr_io_queues < num_online_cpus()) {
dev_info(ctrl->device,
"limiting I/O queues to %u (NIC queues %d, CPUs %u)\n",
ctrl->opts->nr_io_queues, nr_hw_queues,
num_online_cpus());
+
+ max_hw_queues =
+ nvme_tcp_get_netdev_max_queue_count(ctrl);
+ if (max_hw_queues > nr_hw_queues)
+ dev_info(ctrl->device,
+ "NIC supports %u queues but only %u are configured; "
+ "consider increasing queue count for better perfromance\n",
+ max_hw_queues, nr_hw_queues);
+ }
}
init_queue:
--
2.53.0
^ permalink raw reply related [flat|nested] 5+ messages in thread* [RFC PATCH 3/4] nvme: add debugfs helpers for NVMe drivers
2026-04-20 11:49 [RFC PATCH 0/4] nvme-tcp: NIC topology aware I/O queue scaling and queue info export Nilay Shroff
2026-04-20 11:49 ` [RFC PATCH 1/4] nvme-tcp: optionally limit I/O queue count based on NIC queues Nilay Shroff
2026-04-20 11:49 ` [RFC PATCH 2/4] nvme-tcp: add a diagnostic message when NIC queues are underutilized Nilay Shroff
@ 2026-04-20 11:49 ` Nilay Shroff
2026-04-20 11:49 ` [RFC PATCH 4/4] nvme: expose queue information via debugfs Nilay Shroff
3 siblings, 0 replies; 5+ messages in thread
From: Nilay Shroff @ 2026-04-20 11:49 UTC (permalink / raw)
To: linux-nvme; +Cc: kbusch, hch, hare, sagi, chaitanyak, gjoyce, Nilay Shroff
Introduce helper APIs that allow NVMe drivers to register and unregister
debugfs entries, along with a reusable attribute structure for defining
new debugfs files.
The implementation uses seq_file interfaces to safely expose per-
namespace or per-path statistics, while supporting both simple show
callbacks and full seq_operations.
This will be used by subsequent patches to expose NVMe-TCP queue
and flow information for tuning NVMe TCP I/O workqueue and network stack
components.
Signed-off-by: Nilay Shroff <nilay@linux.ibm.com>
---
drivers/nvme/host/Makefile | 2 +-
drivers/nvme/host/debugfs.c | 111 ++++++++++++++++++++++++++++++++++++
drivers/nvme/host/nvme.h | 10 ++++
3 files changed, 122 insertions(+), 1 deletion(-)
create mode 100644 drivers/nvme/host/debugfs.c
diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile
index 6414ec968f99..7962dfc3b2ad 100644
--- a/drivers/nvme/host/Makefile
+++ b/drivers/nvme/host/Makefile
@@ -10,7 +10,7 @@ obj-$(CONFIG_NVME_FC) += nvme-fc.o
obj-$(CONFIG_NVME_TCP) += nvme-tcp.o
obj-$(CONFIG_NVME_APPLE) += nvme-apple.o
-nvme-core-y += core.o ioctl.o sysfs.o pr.o
+nvme-core-y += core.o ioctl.o sysfs.o pr.o debugfs.o
nvme-core-$(CONFIG_NVME_VERBOSE_ERRORS) += constants.o
nvme-core-$(CONFIG_TRACING) += trace.o
nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o
diff --git a/drivers/nvme/host/debugfs.c b/drivers/nvme/host/debugfs.c
new file mode 100644
index 000000000000..ee86138487d0
--- /dev/null
+++ b/drivers/nvme/host/debugfs.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2026 IBM Corporation
+ * Nilay Shroff <nilay@linux.ibm.com>
+ */
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+
+#include "nvme.h"
+
+struct nvme_debugfs_attr {
+ const char *name;
+ umode_t mode;
+ int (*show)(void *data, struct seq_file *m);
+ const struct seq_operations *seq_ops;
+};
+
+struct nvme_debugfs_ctx {
+ void *data;
+ struct nvme_debugfs_attr *attr;
+};
+
+static int nvme_debugfs_show(struct seq_file *m, void *v)
+{
+ struct nvme_debugfs_ctx *ctx = m->private;
+ void *data = ctx->data;
+ struct nvme_debugfs_attr *attr = ctx->attr;
+
+ return attr->show(data, m);
+}
+
+static int nvme_debugfs_open(struct inode *inode, struct file *file)
+{
+ void *data = inode->i_private;
+ struct nvme_debugfs_attr *attr = debugfs_get_aux(file);
+ struct nvme_debugfs_ctx *ctx;
+ struct seq_file *m;
+ int ret;
+
+ ctx = kzalloc_obj(*ctx);
+ if (WARN_ON_ONCE(!ctx))
+ return -ENOMEM;
+
+ ctx->data = data;
+ ctx->attr = attr;
+
+ if (attr->seq_ops) {
+ ret = seq_open(file, attr->seq_ops);
+ if (ret) {
+ kfree(ctx);
+ return ret;
+ }
+ m = file->private_data;
+ m->private = ctx;
+ return ret;
+ }
+
+ if (WARN_ON_ONCE(!attr->show)) {
+ kfree(ctx);
+ return -EPERM;
+ }
+
+ return single_open(file, nvme_debugfs_show, ctx);
+}
+
+static int nvme_debugfs_release(struct inode *inode, struct file *file)
+{
+ struct seq_file *m = file->private_data;
+ struct nvme_debugfs_ctx *ctx = m->private;
+ struct nvme_debugfs_attr *attr = ctx->attr;
+ int ret;
+
+ if (attr->seq_ops)
+ ret = seq_release(inode, file);
+ else
+ ret = single_release(inode, file);
+
+ kfree(ctx);
+ return ret;
+}
+
+static const struct file_operations nvme_debugfs_fops = {
+ .owner = THIS_MODULE,
+ .open = nvme_debugfs_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = nvme_debugfs_release,
+};
+
+static const struct nvme_debugfs_attr nvme_ns_debugfs_attrs[] = {
+ {},
+};
+
+static void nvme_debugfs_create_files(struct request_queue *q,
+ const struct nvme_debugfs_attr *attr, void *data)
+{
+ if (WARN_ON_ONCE(!q->debugfs_dir))
+ return;
+
+ for (; attr->name; attr++)
+ debugfs_create_file_aux(attr->name, attr->mode, q->debugfs_dir,
+ data, (void *)attr, &nvme_debugfs_fops);
+}
+
+void nvme_debugfs_register(struct gendisk *disk)
+{
+ nvme_debugfs_create_files(disk->queue, nvme_ns_debugfs_attrs,
+ disk->private_data);
+}
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index ccd5e05dac98..2f3f1d2d19b9 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -468,6 +468,16 @@ struct nvme_ctrl {
u16 awupf; /* 0's based value. */
};
+void nvme_debugfs_register(struct gendisk *disk);
+static inline void nvme_debugfs_unregister(struct gendisk *disk)
+{
+ /*
+ * Nothing to do for now. When the request queue is unregistered,
+ * all files under q->debugfs_dir are recursively deleted.
+ * This is just a placeholder; the compiler will optimize it out.
+ */
+}
+
static inline enum nvme_ctrl_state nvme_ctrl_state(struct nvme_ctrl *ctrl)
{
return READ_ONCE(ctrl->state);
--
2.53.0
^ permalink raw reply related [flat|nested] 5+ messages in thread* [RFC PATCH 4/4] nvme: expose queue information via debugfs
2026-04-20 11:49 [RFC PATCH 0/4] nvme-tcp: NIC topology aware I/O queue scaling and queue info export Nilay Shroff
` (2 preceding siblings ...)
2026-04-20 11:49 ` [RFC PATCH 3/4] nvme: add debugfs helpers for NVMe drivers Nilay Shroff
@ 2026-04-20 11:49 ` Nilay Shroff
3 siblings, 0 replies; 5+ messages in thread
From: Nilay Shroff @ 2026-04-20 11:49 UTC (permalink / raw)
To: linux-nvme; +Cc: kbusch, hch, hare, sagi, chaitanyak, gjoyce, Nilay Shroff
Add a new debugfs attribute "io_queue_info" to expose per-queue
information for NVMe controllers. For NVMe-TCP, this includes the
CPU handling each I/O queue and the associated TCP flow (source and
destination address/port).
This information can be useful for understanding and tuning the
interaction between NVMe-TCP I/O queues and network stack components,
such as IRQ affinity, RPS/RFS, XPS, or NIC flow steering (ntuple).
The data is exported using seq_file interfaces to allow iteration
over all controller queues.
Signed-off-by: Nilay Shroff <nilay@linux.ibm.com>
---
drivers/nvme/host/core.c | 3 +++
drivers/nvme/host/debugfs.c | 53 ++++++++++++++++++++++++++++++++++++-
drivers/nvme/host/nvme.h | 2 ++
drivers/nvme/host/tcp.c | 52 ++++++++++++++++++++++++++++++++++++
4 files changed, 109 insertions(+), 1 deletion(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 1e33af94c24b..1b0d13374d45 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -4207,6 +4207,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
if (device_add_disk(ctrl->device, ns->disk, nvme_ns_attr_groups))
goto out_cleanup_ns_from_list;
+ nvme_debugfs_register(ns->disk);
+
if (!nvme_ns_head_multipath(ns->head))
nvme_add_ns_cdev(ns);
@@ -4285,6 +4287,7 @@ static void nvme_ns_remove(struct nvme_ns *ns)
nvme_mpath_remove_sysfs_link(ns);
+ nvme_debugfs_unregister(ns->disk);
del_gendisk(ns->disk);
mutex_lock(&ns->ctrl->namespaces_lock);
diff --git a/drivers/nvme/host/debugfs.c b/drivers/nvme/host/debugfs.c
index ee86138487d0..68c40582fa97 100644
--- a/drivers/nvme/host/debugfs.c
+++ b/drivers/nvme/host/debugfs.c
@@ -22,6 +22,56 @@ struct nvme_debugfs_ctx {
struct nvme_debugfs_attr *attr;
};
+static void *nvme_io_queue_info_start(struct seq_file *m, loff_t *pos)
+{
+ struct nvme_debugfs_ctx *ctx = m->private;
+ struct nvme_ns *ns = ctx->data;
+ struct nvme_ctrl *ctrl = ns->ctrl;
+
+ nvme_get_ctrl(ctrl);
+ /*
+ * IO queues starts at offset 1.
+ */
+ return (++*pos < ctrl->queue_count) ? pos : NULL;
+}
+
+static void *nvme_io_queue_info_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ struct nvme_debugfs_ctx *ctx = m->private;
+ struct nvme_ns *ns = ctx->data;
+ struct nvme_ctrl *ctrl = ns->ctrl;
+
+ return (++*pos < ctrl->queue_count) ? pos : NULL;
+}
+
+static void nvme_io_queue_info_stop(struct seq_file *m, void *v)
+{
+ struct nvme_debugfs_ctx *ctx = m->private;
+ struct nvme_ns *ns = ctx->data;
+ struct nvme_ctrl *ctrl = ns->ctrl;
+
+ nvme_put_ctrl(ctrl);
+}
+
+static int nvme_io_queue_info_show(struct seq_file *m, void *v)
+{
+ struct nvme_debugfs_ctx *ctx = m->private;
+ struct nvme_ns *ns = ctx->data;
+ struct nvme_ctrl *ctrl = ns->ctrl;
+
+ if (ctrl->ops->print_io_queue_info)
+ return ctrl->ops->print_io_queue_info(m, ctrl, *(loff_t *)v);
+
+ return 0;
+}
+
+const struct seq_operations nvme_io_queue_info_seq_ops = {
+ .start = nvme_io_queue_info_start,
+ .next = nvme_io_queue_info_next,
+ .stop = nvme_io_queue_info_stop,
+ .show = nvme_io_queue_info_show
+};
+
static int nvme_debugfs_show(struct seq_file *m, void *v)
{
struct nvme_debugfs_ctx *ctx = m->private;
@@ -90,7 +140,8 @@ static const struct file_operations nvme_debugfs_fops = {
};
static const struct nvme_debugfs_attr nvme_ns_debugfs_attrs[] = {
- {},
+ {"io_queue_info", 0400, .seq_ops = &nvme_io_queue_info_seq_ops},
+ {}
};
static void nvme_debugfs_create_files(struct request_queue *q,
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 2f3f1d2d19b9..d7ff82971136 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -650,6 +650,8 @@ struct nvme_ctrl_ops {
void (*print_device_info)(struct nvme_ctrl *ctrl);
bool (*supports_pci_p2pdma)(struct nvme_ctrl *ctrl);
unsigned long (*get_virt_boundary)(struct nvme_ctrl *ctrl, bool is_admin);
+ int (*print_io_queue_info)(struct seq_file *m, struct nvme_ctrl *ctrl,
+ int qid);
};
/*
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 9239495122fc..6d06e984de47 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -2723,6 +2723,57 @@ static void nvme_tcp_free_ctrl(struct nvme_ctrl *nctrl)
kfree(ctrl);
}
+static int nvme_tcp_print_io_queue_info(struct seq_file *m,
+ struct nvme_ctrl *ctrl, int qid)
+{
+ int cpu;
+ struct sockaddr_storage src, dst;
+ struct nvme_tcp_ctrl *tctrl = to_tcp_ctrl(ctrl);
+ struct nvme_tcp_queue *queue = &tctrl->queues[qid];
+ int ret = -EINVAL;
+
+ if (!qid || qid >= ctrl->queue_count ||
+ !test_bit(NVME_TCP_Q_LIVE, &queue->flags))
+ return -EINVAL;
+
+ mutex_lock(&queue->queue_lock);
+ if (!queue->sock)
+ goto unlock;
+
+ ret = kernel_getsockname(queue->sock, (struct sockaddr *)&src);
+ if (ret <= 0)
+ goto unlock;
+
+ ret = kernel_getpeername(queue->sock, (struct sockaddr *)&dst);
+ if (ret <= 0)
+ goto unlock;
+
+ cpu = (queue->io_cpu == WORK_CPU_UNBOUND) ? -1 : queue->io_cpu;
+
+ if (src.ss_family == AF_INET) {
+ struct sockaddr_in *sip = (struct sockaddr_in *)&src;
+ struct sockaddr_in *dip = (struct sockaddr_in *)&dst;
+
+ seq_printf(m, "qid=%d cpu=%d src_ip=%pI4 src_port=%u dst_ip=%pI4 dst_port=%u\n",
+ qid, cpu,
+ &sip->sin_addr.s_addr, ntohs(sip->sin_port),
+ &dip->sin_addr.s_addr, ntohs(dip->sin_port));
+ ret = 0;
+ } else if (src.ss_family == AF_INET6) {
+ struct sockaddr_in6 *sip6 = (struct sockaddr_in6 *)&src;
+ struct sockaddr_in6 *dip6 = (struct sockaddr_in6 *)&dst;
+
+ seq_printf(m, "qid=%d cpu=%d src_ip=%pI6c src_port=%u dst_ip=%pI6c dst_port=%u\n",
+ qid, cpu,
+ &sip6->sin6_addr, ntohs(sip6->sin6_port),
+ &dip6->sin6_addr, ntohs(dip6->sin6_port));
+ ret = 0;
+ }
+unlock:
+ mutex_unlock(&queue->queue_lock);
+ return ret;
+}
+
static void nvme_tcp_set_sg_null(struct nvme_command *c)
{
struct nvme_sgl_desc *sg = &c->common.dptr.sgl;
@@ -3023,6 +3074,7 @@ static const struct nvme_ctrl_ops nvme_tcp_ctrl_ops = {
.get_address = nvme_tcp_get_address,
.stop_ctrl = nvme_tcp_stop_ctrl,
.get_virt_boundary = nvmf_get_virt_boundary,
+ .print_io_queue_info = nvme_tcp_print_io_queue_info,
};
static bool
--
2.53.0
^ permalink raw reply related [flat|nested] 5+ messages in thread