Linux-NVME Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: Nilay Shroff <nilay@linux.ibm.com>
To: linux-nvme@lists.infradead.org
Cc: hare@suse.de, kbusch@kernel.org, hch@lst.de, sagi@grimberg.me,
	dwagner@suse.de, kanie@linux.alibaba.com, jmeneghi@redhat.com,
	randyj@purestorage.com, martin.petersen@oracle.com,
	john.g.garry@oracle.com, gjoyce@linux.ibm.com
Subject: [PATCHv6 2/8] nvme-multipath: pass I/O type to nvme_find_path()
Date: Wed, 20 May 2026 23:50:58 +0530	[thread overview]
Message-ID: <20260520182112.863076-3-nilay@linux.ibm.com> (raw)
In-Reply-To: <20260520182112.863076-1-nilay@linux.ibm.com>

Currently, nvme_find_path() only accepts an nvme_ns_head argument.
However, the upcoming latency-aware I/O policy also needs to know
the I/O type (read/write/other) associated with the request in order
to make path selection decisions.

Update nvme_find_path() to accept an additional argument describing
the I/O type. This patch does not introduce any functional change and
only prepares the interface for subsequent latency-policy changes.

Signed-off-by: Nilay Shroff <nilay@linux.ibm.com>
---
 drivers/nvme/host/ioctl.c     | 38 ++++++++++++++++++++++++++++++++---
 drivers/nvme/host/multipath.c |  9 +++++----
 drivers/nvme/host/nvme.h      | 19 +++++++++++++++++-
 drivers/nvme/host/pr.c        |  6 ++++--
 drivers/nvme/host/sysfs.c     |  2 +-
 5 files changed, 63 insertions(+), 11 deletions(-)

diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index 08889b20e5d8..7a9043350227 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -699,18 +699,29 @@ static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
 int nvme_ns_head_ioctl(struct block_device *bdev, blk_mode_t mode,
 		unsigned int cmd, unsigned long arg)
 {
+	u8 opcode;
 	struct nvme_ns_head *head = bdev->bd_disk->private_data;
 	bool open_for_write = mode & BLK_OPEN_WRITE;
 	void __user *argp = (void __user *)arg;
 	struct nvme_ns *ns;
 	int srcu_idx, ret = -EWOULDBLOCK;
 	unsigned int flags = 0;
+	unsigned int op_type = NVME_STAT_OTHER;
 
 	if (bdev_is_partition(bdev))
 		flags |= NVME_IOCTL_PARTITION;
 
+	if (cmd == NVME_IOCTL_SUBMIT_IO) {
+		if (get_user(opcode, (u8 *)argp))
+			return -EFAULT;
+		if (opcode == nvme_cmd_write)
+			op_type = NVME_STAT_WRITE;
+		else if (opcode == nvme_cmd_read)
+			op_type = NVME_STAT_READ;
+	}
+
 	srcu_idx = srcu_read_lock(&head->srcu);
-	ns = nvme_find_path(head);
+	ns = nvme_find_path(head, op_type);
 	if (!ns)
 		goto out_unlock;
 
@@ -732,6 +743,7 @@ int nvme_ns_head_ioctl(struct block_device *bdev, blk_mode_t mode,
 long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd,
 		unsigned long arg)
 {
+	u8 opcode;
 	bool open_for_write = file->f_mode & FMODE_WRITE;
 	struct cdev *cdev = file_inode(file)->i_cdev;
 	struct nvme_ns_head *head =
@@ -739,9 +751,19 @@ long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd,
 	void __user *argp = (void __user *)arg;
 	struct nvme_ns *ns;
 	int srcu_idx, ret = -EWOULDBLOCK;
+	unsigned int op_type = NVME_STAT_OTHER;
+
+	if (cmd == NVME_IOCTL_SUBMIT_IO) {
+		if (get_user(opcode, (u8 *)argp))
+			return -EFAULT;
+		if (opcode == nvme_cmd_write)
+			op_type = NVME_STAT_WRITE;
+		else if (opcode == nvme_cmd_read)
+			op_type = NVME_STAT_READ;
+	}
 
 	srcu_idx = srcu_read_lock(&head->srcu);
-	ns = nvme_find_path(head);
+	ns = nvme_find_path(head, op_type);
 	if (!ns)
 		goto out_unlock;
 
@@ -761,7 +783,17 @@ int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd,
 	struct cdev *cdev = file_inode(ioucmd->file)->i_cdev;
 	struct nvme_ns_head *head = container_of(cdev, struct nvme_ns_head, cdev);
 	int srcu_idx = srcu_read_lock(&head->srcu);
-	struct nvme_ns *ns = nvme_find_path(head);
+	const struct nvme_uring_cmd *cmd = io_uring_sqe128_cmd(ioucmd->sqe,
+						struct nvme_uring_cmd);
+	__u8 opcode = READ_ONCE(cmd->opcode);
+	unsigned int op_type = NVME_STAT_OTHER;
+
+	if (opcode == nvme_cmd_write)
+		op_type = NVME_STAT_WRITE;
+	else if (opcode == nvme_cmd_read)
+		op_type = NVME_STAT_READ;
+
+	struct nvme_ns *ns = nvme_find_path(head, op_type);
 	int ret = -EINVAL;
 
 	if (ns)
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 263161cb8ac0..90f449780e72 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -446,7 +446,8 @@ static struct nvme_ns *nvme_numa_path(struct nvme_ns_head *head)
 	return ns;
 }
 
-inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head)
+inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head,
+		unsigned int op_type)
 {
 	switch (READ_ONCE(head->subsys->iopolicy)) {
 	case NVME_IOPOLICY_QD:
@@ -508,7 +509,7 @@ static void nvme_ns_head_submit_bio(struct bio *bio)
 		return;
 
 	srcu_idx = srcu_read_lock(&head->srcu);
-	ns = nvme_find_path(head);
+	ns = nvme_find_path(head, nvme_data_dir(bio_op(bio)));
 	if (likely(ns)) {
 		bio_set_dev(bio, ns->disk->part0);
 		bio->bi_opf |= REQ_NVME_MPATH;
@@ -550,7 +551,7 @@ static int nvme_ns_head_get_unique_id(struct gendisk *disk, u8 id[16],
 	int srcu_idx, ret = -EWOULDBLOCK;
 
 	srcu_idx = srcu_read_lock(&head->srcu);
-	ns = nvme_find_path(head);
+	ns = nvme_find_path(head, NVME_STAT_OTHER);
 	if (ns)
 		ret = nvme_ns_get_unique_id(ns, id, type);
 	srcu_read_unlock(&head->srcu, srcu_idx);
@@ -566,7 +567,7 @@ static int nvme_ns_head_report_zones(struct gendisk *disk, sector_t sector,
 	int srcu_idx, ret = -EWOULDBLOCK;
 
 	srcu_idx = srcu_read_lock(&head->srcu);
-	ns = nvme_find_path(head);
+	ns = nvme_find_path(head, NVME_STAT_OTHER);
 	if (ns)
 		ret = nvme_ns_report_zones(ns, sector, nr_zones, args);
 	srcu_read_unlock(&head->srcu, srcu_idx);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index ccd5e05dac98..39e986e5f184 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -514,6 +514,13 @@ struct nvme_ns_ids {
 	u8	csi;
 };
 
+enum nvme_stat_group {
+	NVME_STAT_READ,
+	NVME_STAT_WRITE,
+	NVME_STAT_OTHER,
+	NVME_NUM_STAT_GROUPS
+};
+
 /*
  * Anchor structure for namespaces.  There is one for each namespace in a
  * NVMe subsystem that any of our controllers can see, and the namespace
@@ -1017,7 +1024,17 @@ extern const struct attribute_group *nvme_dev_attr_groups[];
 extern const struct block_device_operations nvme_bdev_ops;
 
 void nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl);
-struct nvme_ns *nvme_find_path(struct nvme_ns_head *head);
+struct nvme_ns *nvme_find_path(struct nvme_ns_head *head, unsigned int op_type);
+static inline int nvme_data_dir(const enum req_op op)
+{
+	if (op == REQ_OP_READ)
+		return NVME_STAT_READ;
+	else if (op_is_write(op))
+		return NVME_STAT_WRITE;
+	else
+		return NVME_STAT_OTHER;
+}
+
 #ifdef CONFIG_NVME_MULTIPATH
 static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl)
 {
diff --git a/drivers/nvme/host/pr.c b/drivers/nvme/host/pr.c
index fe7dbe264815..80e106f151d2 100644
--- a/drivers/nvme/host/pr.c
+++ b/drivers/nvme/host/pr.c
@@ -53,10 +53,12 @@ static int nvme_send_ns_head_pr_command(struct block_device *bdev,
 		struct nvme_command *c, void *data, unsigned int data_len)
 {
 	struct nvme_ns_head *head = bdev->bd_disk->private_data;
-	int srcu_idx = srcu_read_lock(&head->srcu);
-	struct nvme_ns *ns = nvme_find_path(head);
+	int srcu_idx;
+	struct nvme_ns *ns;
 	int ret = -EWOULDBLOCK;
 
+	srcu_idx = srcu_read_lock(&head->srcu);
+	ns = nvme_find_path(head, NVME_STAT_OTHER);
 	if (ns) {
 		c->common.nsid = cpu_to_le32(ns->head->ns_id);
 		ret = nvme_submit_sync_cmd(ns->queue, c, data, data_len);
diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c
index e59758616f27..b6b718bd310d 100644
--- a/drivers/nvme/host/sysfs.c
+++ b/drivers/nvme/host/sysfs.c
@@ -194,7 +194,7 @@ static int ns_head_update_nuse(struct nvme_ns_head *head)
 		return 0;
 
 	srcu_idx = srcu_read_lock(&head->srcu);
-	ns = nvme_find_path(head);
+	ns = nvme_find_path(head, NVME_STAT_OTHER);
 	if (!ns)
 		goto out_unlock;
 
-- 
2.53.0



  parent reply	other threads:[~2026-05-20 18:21 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-20 18:20 [PATCHv6 0/8] nvme-multipath: introduce latency I/O policy Nilay Shroff
2026-05-20 18:20 ` [PATCHv6 1/8] block: expose blk_stat_{enable,disable}_accounting() to drivers Nilay Shroff
2026-05-21 12:06   ` John Garry
2026-05-22  6:18     ` Nilay Shroff
2026-05-22 12:10       ` Christoph Hellwig
2026-05-20 18:20 ` Nilay Shroff [this message]
2026-05-27  7:16   ` [PATCHv6 2/8] nvme-multipath: pass I/O type to nvme_find_path() Hannes Reinecke
2026-05-20 18:20 ` [PATCHv6 3/8] nvme-multipath: add support for latency I/O policy Nilay Shroff
2026-05-20 18:21 ` [PATCHv6 4/8] nvme: add generic debugfs support Nilay Shroff
2026-05-20 18:21 ` [PATCHv6 5/8] nvme-multipath: add debugfs attribute latency_ewma_shift Nilay Shroff
2026-05-20 18:21 ` [PATCHv6 6/8] nvme-multipath: add debugfs attribute latency_batch_timeout Nilay Shroff
2026-05-20 18:21 ` [PATCHv6 7/8] nvme-multipath: add debugfs attribute latency_stat Nilay Shroff
2026-05-20 18:21 ` [PATCHv6 8/8] nvme-multipath: add documentation for latency I/O policy Nilay Shroff

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260520182112.863076-3-nilay@linux.ibm.com \
    --to=nilay@linux.ibm.com \
    --cc=dwagner@suse.de \
    --cc=gjoyce@linux.ibm.com \
    --cc=hare@suse.de \
    --cc=hch@lst.de \
    --cc=jmeneghi@redhat.com \
    --cc=john.g.garry@oracle.com \
    --cc=kanie@linux.alibaba.com \
    --cc=kbusch@kernel.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=martin.petersen@oracle.com \
    --cc=randyj@purestorage.com \
    --cc=sagi@grimberg.me \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox