Linux SCSI subsystem development
 help / color / mirror / Atom feed
From: John Garry <john.g.garry@oracle.com>
To: hch@lst.de, kbusch@kernel.org, sagi@grimberg.me, axboe@fb.com,
	martin.petersen@oracle.com,
	james.bottomley@hansenpartnership.com, hare@suse.com,
	bmarzins@redhat.com, nilay@linux.ibm.com
Cc: jmeneghi@redhat.com, linux-nvme@lists.infradead.org,
	linux-scsi@vger.kernel.org, michael.christie@oracle.com,
	snitzer@kernel.org, dm-devel@lists.linux.dev,
	linux-kernel@vger.kernel.org,
	John Garry <john.g.garry@oracle.com>
Subject: [PATCH v2 06/13] libmultipath: Add cdev support
Date: Tue, 28 Apr 2026 11:10:58 +0000	[thread overview]
Message-ID: <20260428111105.1778008-7-john.g.garry@oracle.com> (raw)
In-Reply-To: <20260428111105.1778008-1-john.g.garry@oracle.com>

Add support to create a cdev multipath device. The functionality is much
the same as NVMe, where the cdev is created when a mpath device is set
live.

The driver must provide a mpath_head_template.cdev_ioctl callback to
actually handle the ioctl.

Structure mpath_generic_chr_fops would be used for setting the cdev fops in
the mpath_head_template.add_cdev callback.

NVMe cdev iotcl handler has special handling for NVMe controller commands.
In this case, the SRCU read lock is dropped before executing the ioctl.
For reference, see nvme_ns_head_ctrl_ioctl(). This makes having the SRCU
lock when calling not always possible. To handle this scenario, add template
callbacks .ioctl_begin and .ioctl_finish to be called around the before and
after the ioctl callback - if the .ioctl_begin returns data then we know
to drop the SRCU lock before calling the ioctl callback, and then later
call .ioctl_finish callback with that same data. For NVMe using
libmultipath, we would take a reference to the controller structure and
pass a pointer to the controller structure back in .ioctl_begin callback
and use that same data in the .ioctl_finish callback to put the reference
to the controller.

Signed-off-by: John Garry <john.g.garry@oracle.com>
---
 include/linux/multipath.h |  18 ++++++
 lib/multipath.c           | 129 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 147 insertions(+)

diff --git a/include/linux/multipath.h b/include/linux/multipath.h
index 72186ab220083..3ac77c089a58c 100644
--- a/include/linux/multipath.h
+++ b/include/linux/multipath.h
@@ -4,8 +4,11 @@
 
 #include <linux/blkdev.h>
 #include <linux/blk-mq.h>
+#include <linux/cdev.h>
 #include <linux/srcu.h>
+#include <linux/io_uring/cmd.h>
 
+extern const struct file_operations mpath_chr_fops;
 extern const struct block_device_operations mpath_ops;
 
 enum mpath_iopolicy_e {
@@ -37,12 +40,24 @@ struct mpath_device {
 
 struct mpath_head_template {
 	bool (*available_path)(struct mpath_device *);
+	int (*add_cdev)(struct mpath_head *);
+	void (*del_cdev)(struct mpath_head *);
 	bool (*is_disabled)(struct mpath_device *);
 	bool (*is_optimized)(struct mpath_device *);
 	int (*get_nr_active)(struct mpath_device *);
+	long (*cdev_ioctl)(struct mpath_device *, unsigned int cmd,
+				unsigned long arg, bool open_for_write);
+	int (*chr_uring_cmd)(struct mpath_device *,
+				struct io_uring_cmd *ioucmd,
+				unsigned int issue_flags);
+	int (*chr_uring_cmd_iopoll)(struct io_uring_cmd *ioucmd,
+				 struct io_comp_batch *iob,
+				 unsigned int poll_flags);
 	enum mpath_iopolicy_e (*get_iopolicy)(struct mpath_head *);
 	struct bio *(*clone_bio)(struct bio *);
 	const struct attribute_group **device_groups;
+	void (*ioctl_begin)(struct mpath_device *, unsigned int cmd, void **);
+	void (*ioctl_finish)(void *opaque);
 };
 
 #define MPATH_HEAD_DISK_LIVE 			0
@@ -58,6 +73,9 @@ struct mpath_head {
 	spinlock_t		requeue_lock;
 	struct work_struct	requeue_work; /* work struct for requeue */
 
+	struct cdev		cdev;
+	struct device		cdev_device;
+
 	void			*drvdata;
 	unsigned long		flags;
 	struct gendisk		*disk;
diff --git a/lib/multipath.c b/lib/multipath.c
index 1232e057199ae..69e48ca3169c2 100644
--- a/lib/multipath.c
+++ b/lib/multipath.c
@@ -462,6 +462,122 @@ const struct block_device_operations mpath_ops = {
 };
 EXPORT_SYMBOL_GPL(mpath_ops);
 
+static int mpath_chr_open(struct inode *inode, struct file *file)
+{
+	struct cdev *cdev = file_inode(file)->i_cdev;
+	struct mpath_head *mpath_head =
+			container_of(cdev, struct mpath_head, cdev);
+
+	return mpath_get_head(mpath_head);
+}
+
+static int mpath_chr_release(struct inode *inode, struct file *file)
+{
+	struct cdev *cdev = file_inode(file)->i_cdev;
+	struct mpath_head *mpath_head =
+			container_of(cdev, struct mpath_head, cdev);
+
+	mpath_put_head(mpath_head);
+	return 0;
+}
+
+static long mpath_chr_ioctl(struct file *file, unsigned int cmd,
+		unsigned long arg)
+{
+	struct cdev *cdev = file_inode(file)->i_cdev;
+	struct mpath_head *mpath_head =
+			container_of(cdev, struct mpath_head, cdev);
+	struct mpath_device *mpath_device;
+	int srcu_idx, err = -EWOULDBLOCK;
+	void *unlocked_ioctl_data = NULL;
+
+	srcu_idx = srcu_read_lock(&mpath_head->srcu);
+	mpath_device = mpath_find_path(mpath_head);
+	if (!mpath_device)
+		goto out_unlock;
+	if (mpath_head->mpdt->ioctl_begin)
+		mpath_head->mpdt->ioctl_begin(mpath_device, cmd,
+					&unlocked_ioctl_data);
+	if (unlocked_ioctl_data)
+		srcu_read_unlock(&mpath_head->srcu, srcu_idx);
+	err = mpath_head->mpdt->cdev_ioctl(mpath_device, cmd, arg,
+					file->f_mode & FMODE_WRITE);
+	if (unlocked_ioctl_data) {
+		mpath_head->mpdt->ioctl_finish(unlocked_ioctl_data);
+		return err;
+	}
+
+out_unlock:
+	srcu_read_unlock(&mpath_head->srcu, srcu_idx);
+	return err;
+}
+
+static int mpath_chr_uring_cmd(struct io_uring_cmd *ioucmd,
+		unsigned int issue_flags)
+{
+	struct cdev *cdev = file_inode(ioucmd->file)->i_cdev;
+	struct mpath_head *mpath_head =
+			container_of(cdev, struct mpath_head, cdev);
+	struct mpath_device *mpath_device;
+	/* error code copied from nvme_ns_head_chr_uring_cmd */
+	int srcu_idx, ret = -EINVAL;
+
+	srcu_idx = srcu_read_lock(&mpath_head->srcu);
+	mpath_device = mpath_find_path(mpath_head);
+
+	if (!mpath_device)
+		goto out_unlock;
+
+	if (!mpath_head->mpdt->chr_uring_cmd) {
+		ret = -EOPNOTSUPP;
+		goto out_unlock;
+	}
+
+	ret = mpath_head->mpdt->chr_uring_cmd(mpath_device, ioucmd,
+			issue_flags);
+out_unlock:
+	srcu_read_unlock(&mpath_head->srcu, srcu_idx);
+	return ret;
+}
+
+static int mpath_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd,
+				 struct io_comp_batch *iob,
+				 unsigned int poll_flags)
+{
+	struct cdev *cdev = file_inode(ioucmd->file)->i_cdev;
+	struct mpath_head *mpath_head =
+			container_of(cdev, struct mpath_head, cdev);
+
+	if (!mpath_head->mpdt->chr_uring_cmd_iopoll)
+		return -EOPNOTSUPP;
+
+	return mpath_head->mpdt->chr_uring_cmd_iopoll(ioucmd, iob, poll_flags);
+}
+
+const struct file_operations mpath_chr_fops = {
+	.owner		= THIS_MODULE,
+	.open		= mpath_chr_open,
+	.release	= mpath_chr_release,
+	.unlocked_ioctl	= mpath_chr_ioctl,
+	.compat_ioctl	= compat_ptr_ioctl,
+	.uring_cmd	= mpath_chr_uring_cmd,
+	.uring_cmd_iopoll = mpath_chr_uring_cmd_iopoll,
+};
+EXPORT_SYMBOL_GPL(mpath_chr_fops);
+
+static int mpath_head_add_cdev(struct mpath_head *mpath_head)
+{
+	if (mpath_head->mpdt->add_cdev)
+		return mpath_head->mpdt->add_cdev(mpath_head);
+	return 0;
+}
+
+static void mpath_head_del_cdev(struct mpath_head *mpath_head)
+{
+	if (mpath_head->mpdt->del_cdev)
+		mpath_head->mpdt->del_cdev(mpath_head);
+}
+
 static void multipath_partition_scan_work(struct work_struct *work)
 {
 	struct mpath_head *mpath_head =
@@ -504,6 +620,7 @@ void mpath_remove_disk(struct mpath_head *mpath_head)
 		 */
 		mpath_schedule_requeue_work(mpath_head);
 
+		mpath_head_del_cdev(mpath_head);
 		mpath_synchronize(mpath_head);
 		del_gendisk(disk);
 	}
@@ -526,6 +643,16 @@ EXPORT_SYMBOL_GPL(mpath_put_disk);
 int mpath_alloc_head_disk(struct mpath_head *mpath_head,
 			struct queue_limits *lim, int numa_node)
 {
+	/* Do limited sanity checks on template */
+	if (!mpath_head->mpdt->ioctl_begin ^ !mpath_head->mpdt->ioctl_finish)
+		return -EINVAL;
+
+	if (!mpath_head->mpdt->add_cdev ^ !mpath_head->mpdt->del_cdev)
+		return -EINVAL;
+
+	if (!mpath_head->mpdt->add_cdev ^ !mpath_head->mpdt->cdev_ioctl)
+		return -EINVAL;
+
 	mpath_head->disk = blk_alloc_disk(lim, numa_node);
 	if (IS_ERR(mpath_head->disk))
 		return PTR_ERR(mpath_head->disk);
@@ -555,6 +682,8 @@ void mpath_device_set_live(struct mpath_device *mpath_device)
 			clear_bit(MPATH_HEAD_DISK_LIVE, &mpath_head->flags);
 			return;
 		}
+
+		mpath_head_add_cdev(mpath_head);
 		queue_work(mpath_wq, &mpath_head->partition_scan_work);
 	}
 
-- 
2.43.5


  parent reply	other threads:[~2026-04-28 11:13 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-28 11:10 [PATCH v2 00/13] libmultipath: a generic multipath lib for block drivers John Garry
2026-04-28 11:10 ` [PATCH v2 01/13] libmultipath: Add initial framework John Garry
2026-04-28 11:10 ` [PATCH v2 02/13] libmultipath: Add basic gendisk support John Garry
2026-04-28 11:10 ` [PATCH v2 03/13] libmultipath: Add path selection support John Garry
2026-04-28 11:10 ` [PATCH v2 04/13] libmultipath: Add bio handling John Garry
2026-04-28 11:10 ` [PATCH v2 05/13] libmultipath: Add support for mpath_device management John Garry
2026-04-28 11:10 ` John Garry [this message]
2026-04-28 11:10 ` [PATCH v2 07/13] libmultipath: Add delayed removal support John Garry
2026-04-28 11:11 ` [PATCH v2 08/13] libmultipath: Add sysfs helpers John Garry
2026-04-28 11:11 ` [PATCH v2 09/13] libmultipath: Add PR support John Garry
2026-04-28 11:11 ` [PATCH v2 10/13] libmultipath: Add mpath_bdev_report_zones() John Garry
2026-04-28 11:11 ` [PATCH v2 11/13] libmultipath: Add support for block device IOCTL John Garry
2026-04-28 11:11 ` [PATCH v2 12/13] libmultipath: Add mpath_bdev_getgeo() John Garry
2026-04-28 11:11 ` [PATCH v2 13/13] libmultipath: Add mpath_bdev_get_unique_id() John Garry
2026-05-10 22:03 ` [PATCH v2 00/13] libmultipath: a generic multipath lib for block drivers Sagi Grimberg
2026-05-11  7:30   ` John Garry

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260428111105.1778008-7-john.g.garry@oracle.com \
    --to=john.g.garry@oracle.com \
    --cc=axboe@fb.com \
    --cc=bmarzins@redhat.com \
    --cc=dm-devel@lists.linux.dev \
    --cc=hare@suse.com \
    --cc=hch@lst.de \
    --cc=james.bottomley@hansenpartnership.com \
    --cc=jmeneghi@redhat.com \
    --cc=kbusch@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=linux-scsi@vger.kernel.org \
    --cc=martin.petersen@oracle.com \
    --cc=michael.christie@oracle.com \
    --cc=nilay@linux.ibm.com \
    --cc=sagi@grimberg.me \
    --cc=snitzer@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox