* [PATCH v2 01/18] scsi-multipath: introduce basic SCSI device support
2026-04-28 11:14 [PATCH v2 00/18] Native SCSI multipath support John Garry
@ 2026-04-28 11:14 ` John Garry
2026-04-28 11:14 ` [PATCH v2 02/18] scsi-multipath: introduce scsi_device head structure John Garry
` (16 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: John Garry @ 2026-04-28 11:14 UTC (permalink / raw)
To: hch, kbusch, sagi, axboe, martin.petersen, james.bottomley, hare,
bmarzins, nilay
Cc: jmeneghi, linux-nvme, linux-scsi, michael.christie, snitzer,
dm-devel, linux-kernel, John Garry
For a scsi_device to support multipath, introduce structure
scsi_mpath_device to hold multipath-specific details.
Like nvme_ns structure for NVME, scsi_mpath_device holds the mpath_device
structure to device management and path selection.
A module param are introduced to enable multipath - the following modes
are available:
- on
- off
- always
SCSI multipath will only be available until the following conditions:
- scsi_multipath enabled and ALUA supported and unique ID available in
VPD page 83.
- scsi_multipath always mode and unique ID available in VPD page 83
The scsi_device structure contains a pointer to scsi_mpath_device; having
this pointer set or unset indicates whether multipath is enabled or
disabled for the scsi_device.
Signed-off-by: John Garry <john.g.garry@oracle.com>
---
drivers/scsi/Kconfig | 10 +++
drivers/scsi/Makefile | 1 +
drivers/scsi/scsi.c | 8 +-
drivers/scsi/scsi_multipath.c | 146 ++++++++++++++++++++++++++++++++++
drivers/scsi/scsi_scan.c | 4 +
drivers/scsi/scsi_sysfs.c | 2 +
include/scsi/scsi_device.h | 2 +
include/scsi/scsi_multipath.h | 55 +++++++++++++
8 files changed, 227 insertions(+), 1 deletion(-)
create mode 100644 drivers/scsi/scsi_multipath.c
create mode 100644 include/scsi/scsi_multipath.h
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 19d0884479a24..2375971db2052 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -76,6 +76,16 @@ config SCSI_LIB_KUNIT_TEST
If unsure say N.
+config SCSI_MULTIPATH
+ bool "SCSI multipath support (EXPERIMENTAL)"
+ depends on SCSI_MOD
+ select LIBMULTIPATH
+ help
+ This option enables support for native SCSI multipath support for
+ SCSI host.
+
+ If unsure say N.
+
comment "SCSI support type (disk, tape, CD-ROM)"
depends on SCSI
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
index 16de3e41f94c4..64b7a82828b81 100644
--- a/drivers/scsi/Makefile
+++ b/drivers/scsi/Makefile
@@ -168,6 +168,7 @@ scsi_mod-y += scsi_trace.o scsi_logging.o
scsi_mod-$(CONFIG_PM) += scsi_pm.o
scsi_mod-$(CONFIG_SCSI_DH) += scsi_dh.o
scsi_mod-$(CONFIG_BLK_DEV_BSG) += scsi_bsg.o
+scsi_mod-$(CONFIG_SCSI_MULTIPATH) += scsi_multipath.o
hv_storvsc-y := storvsc_drv.o
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 76cdad063f7bc..70aa1dbeacebf 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -64,6 +64,7 @@
#include <scsi/scsi_driver.h>
#include <scsi/scsi_eh.h>
#include <scsi/scsi_host.h>
+#include <scsi/scsi_multipath.h>
#include <scsi/scsi_tcq.h>
#include "scsi_priv.h"
@@ -1042,12 +1043,16 @@ static int __init init_scsi(void)
error = scsi_sysfs_register();
if (error)
goto cleanup_sysctl;
+ error = scsi_multipath_init();
+ if (error)
+ goto cleanup_sysfs;
scsi_netlink_init();
printk(KERN_NOTICE "SCSI subsystem initialized\n");
return 0;
-
+cleanup_sysfs:
+ scsi_sysfs_unregister();
cleanup_sysctl:
scsi_exit_sysctl();
cleanup_hosts:
@@ -1066,6 +1071,7 @@ static int __init init_scsi(void)
static void __exit exit_scsi(void)
{
scsi_netlink_exit();
+ scsi_multipath_exit();
scsi_sysfs_unregister();
scsi_exit_sysctl();
scsi_exit_hosts();
diff --git a/drivers/scsi/scsi_multipath.c b/drivers/scsi/scsi_multipath.c
new file mode 100644
index 0000000000000..ff37cfdf2f9d1
--- /dev/null
+++ b/drivers/scsi/scsi_multipath.c
@@ -0,0 +1,146 @@
+// SPDX-License-Indentifier: GPL-2.0
+/*
+ * Copyright (c) 2026 Oracle Corp
+ *
+ */
+
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_driver.h>
+#include <scsi/scsi_proto.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_multipath.h>
+
+#include "scsi_priv.h"
+
+enum {
+ SCSI_MULTIPATH_OFF,
+ SCSI_MULTIPATH_ON,
+ SCSI_MULTIPATH_ALWAYS,
+};
+
+static const char *scsi_multipath_modes[] = {
+ [SCSI_MULTIPATH_OFF] = "off",
+ [SCSI_MULTIPATH_ON] = "on",
+ [SCSI_MULTIPATH_ALWAYS] = "always",
+};
+
+static int scsi_multipath = SCSI_MULTIPATH_OFF;
+
+static int scsi_multipath_param_set(const char *val, const struct kernel_param *kp)
+{
+ if (!val)
+ return -EINVAL;
+ if (!strncmp(val, "on", 2))
+ scsi_multipath = SCSI_MULTIPATH_ON;
+ else if (!strncmp(val, "always", 6))
+ scsi_multipath = SCSI_MULTIPATH_ALWAYS;
+ else if (!strncmp(val, "off", 3))
+ scsi_multipath = SCSI_MULTIPATH_OFF;
+ else
+ return -EINVAL;
+
+ return 0;
+}
+
+static int scsi_multipath_param_get(char *buf, const struct kernel_param *kp)
+{
+ return sprintf(buf, "%s\n", scsi_multipath_modes[scsi_multipath]);
+}
+
+static const struct kernel_param_ops multipath_param_ops = {
+ .set = scsi_multipath_param_set,
+ .get = scsi_multipath_param_get,
+};
+
+module_param_cb(multipath, &multipath_param_ops, &scsi_multipath, 0444);
+MODULE_PARM_DESC(multipath, "turn on native multipath support, options: on, off, always");
+
+static int scsi_mpath_unique_lun_id(struct scsi_device *sdev)
+{
+ struct scsi_mpath_device *scsi_mpath_dev = sdev->scsi_mpath_dev;
+ int ret;
+
+ ret = scsi_vpd_lun_id(sdev, scsi_mpath_dev->device_id_str,
+ SCSI_MPATH_DEVICE_ID_LEN);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+static int scsi_multipath_sdev_init(struct scsi_device *sdev)
+{
+ struct Scsi_Host *shost = sdev->host;
+ struct scsi_mpath_device *scsi_mpath_dev;
+ struct mpath_device *mpath_device;
+
+ scsi_mpath_dev = kzalloc(sizeof(*scsi_mpath_dev), GFP_KERNEL);
+ if (!scsi_mpath_dev)
+ return -ENOMEM;
+ scsi_mpath_dev->sdev = sdev;
+ sdev->scsi_mpath_dev = scsi_mpath_dev;
+
+ mpath_device = &scsi_mpath_dev->mpath_device;
+ mpath_device->numa_node = dev_to_node(shost->dma_dev);
+ mpath_device->access_state = MPATH_STATE_OPTIMIZED;
+
+ return 0;
+}
+
+static void scsi_multipath_sdev_uninit(struct scsi_device *sdev)
+{
+ kfree(sdev->scsi_mpath_dev);
+ sdev->scsi_mpath_dev = NULL;
+}
+
+int scsi_mpath_dev_alloc(struct scsi_device *sdev)
+{
+ int ret;
+
+ if (scsi_multipath == SCSI_MULTIPATH_OFF)
+ return 0;
+
+ if (!scsi_device_tpgs(sdev) && (scsi_multipath != SCSI_MULTIPATH_ALWAYS)) {
+ sdev_printk(KERN_DEBUG, sdev, "IMPLICIT TPGS are required for multipath support\n");
+ return 0;
+ }
+
+ ret = scsi_multipath_sdev_init(sdev);
+ if (ret)
+ return ret;
+
+ ret = scsi_mpath_unique_lun_id(sdev);
+ if (ret < 0) {
+ ret = 0;
+ goto out_uninit;
+ }
+
+ return 0;
+
+out_uninit:
+ scsi_multipath_sdev_uninit(sdev);
+ return ret;
+}
+
+void scsi_mpath_dev_release(struct scsi_device *sdev)
+{
+ struct scsi_mpath_device *scsi_mpath_dev = sdev->scsi_mpath_dev;
+
+ if (!scsi_mpath_dev)
+ return;
+
+ scsi_multipath_sdev_uninit(sdev);
+}
+
+int __init scsi_multipath_init(void)
+{
+ return 0;
+}
+
+void __exit scsi_multipath_exit(void)
+{
+}
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("scsi_multipath");
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 2cfcf1f5d6a46..842dad8cc6a2f 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -46,6 +46,7 @@
#include <scsi/scsi_transport.h>
#include <scsi/scsi_dh.h>
#include <scsi/scsi_eh.h>
+#include <scsi/scsi_multipath.h>
#include "scsi_priv.h"
#include "scsi_logging.h"
@@ -1123,6 +1124,9 @@ static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result,
sdev->max_queue_depth = sdev->queue_depth;
WARN_ON_ONCE(sdev->max_queue_depth > sdev->budget_map.depth);
+ if (scsi_mpath_dev_alloc(sdev))
+ return SCSI_SCAN_NO_RESPONSE;
+
/*
* Ok, the device is now all set up, we can
* register it and tell the rest of the kernel
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 6b8c5c05f2944..47534d9f2cf9b 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -23,6 +23,7 @@
#include <scsi/scsi_transport.h>
#include <scsi/scsi_driver.h>
#include <scsi/scsi_devinfo.h>
+#include <scsi/scsi_multipath.h>
#include "scsi_priv.h"
#include "scsi_logging.h"
@@ -455,6 +456,7 @@ static void scsi_device_dev_release(struct device *dev)
might_sleep();
scsi_dh_release_device(sdev);
+ scsi_mpath_dev_release(sdev);
parent = sdev->sdev_gendev.parent;
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index d32f5841f4f85..52974dba0a724 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -279,6 +279,8 @@ struct scsi_device {
struct device sdev_gendev,
sdev_dev;
+ struct scsi_mpath_device *scsi_mpath_dev;
+
struct work_struct requeue_work;
struct scsi_device_handler *handler;
diff --git a/include/scsi/scsi_multipath.h b/include/scsi/scsi_multipath.h
new file mode 100644
index 0000000000000..d3d410dafd17a
--- /dev/null
+++ b/include/scsi/scsi_multipath.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SCSI_SCSI_MULTIPATH_H
+#define _SCSI_SCSI_MULTIPATH_H
+
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/rcupdate.h>
+#include <linux/workqueue.h>
+#include <linux/mutex.h>
+#include <linux/blk-mq.h>
+#include <linux/multipath.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_dbg.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_devinfo.h>
+#include <scsi/scsi_driver.h>
+
+#ifdef CONFIG_SCSI_MULTIPATH
+#define SCSI_MPATH_DEVICE_ID_LEN 256
+
+struct scsi_mpath_device {
+ struct mpath_device mpath_device;
+ struct scsi_device *sdev;
+
+ char device_id_str[SCSI_MPATH_DEVICE_ID_LEN];
+};
+#define to_scsi_mpath_device(d) \
+ container_of(d, struct scsi_mpath_device, mpath_device)
+
+int scsi_mpath_dev_alloc(struct scsi_device *sdev);
+void scsi_mpath_dev_release(struct scsi_device *sdev);
+int scsi_multipath_init(void);
+void scsi_multipath_exit(void);
+#else /* CONFIG_SCSI_MULTIPATH */
+
+struct scsi_mpath_device {
+};
+
+static inline int scsi_mpath_dev_alloc(struct scsi_device *sdev)
+{
+ return 0;
+}
+static inline void scsi_mpath_dev_release(struct scsi_device *sdev)
+{
+}
+static inline int scsi_multipath_init(void)
+{
+ return 0;
+}
+static inline void scsi_multipath_exit(void)
+{
+}
+#endif /* CONFIG_SCSI_MULTIPATH */
+#endif /* _SCSI_SCSI_MULTIPATH_H */
--
2.43.5
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH v2 02/18] scsi-multipath: introduce scsi_device head structure
2026-04-28 11:14 [PATCH v2 00/18] Native SCSI multipath support John Garry
2026-04-28 11:14 ` [PATCH v2 01/18] scsi-multipath: introduce basic SCSI device support John Garry
@ 2026-04-28 11:14 ` John Garry
2026-04-28 11:14 ` [PATCH v2 03/18] scsi-multipath: provide sysfs link from to scsi_device John Garry
` (15 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: John Garry @ 2026-04-28 11:14 UTC (permalink / raw)
To: hch, kbusch, sagi, axboe, martin.petersen, james.bottomley, hare,
bmarzins, nilay
Cc: jmeneghi, linux-nvme, linux-scsi, michael.christie, snitzer,
dm-devel, linux-kernel, John Garry
Introduce a scsi_device head structure - scsi_mpath_head - to manage
multipathing for a scsi_device. This is similar to nvme_ns_head structure.
A list of scsi_mpath_head structures is managed to lookup for matching
multipathed scsi_device's. Matching is done through the scsi_device
unique id.
A new class for multipathed devices is added, scsi_mpath_device_class.
The purpose of this class is for managing the scsi_mpath_head.dev member.
The naming for the scsi_device structure is in form H:C:I:L,
where H is host, C is channel, I is ID, and L is lun.
However, for a multipathed scsi_device, all the naming members may be
different between member scsi_device's. As such, just use a simple
single-number naming index for each scsi_mpath_head.
The sysfs device folder will have links to the scsi_device's so, it will
be possible to lookup the member scsi_device's.
An example sysfs entry is as follows:
# ls -l /sys/class/scsi_mpath_device/scsi_mpath_device0/
total 0
drwxr-xr-x 2 root root 0 Apr 13 15:48 power
lrwxrwxrwx 1 root root 0 Apr 13 15:48 subsystem -> ../../../../class/scsi_mpath_device
-rw-r--r-- 1 root root 4096 Apr 13 15:48 uevent
-r--r--r-- 1 root root 4096 Apr 13 15:48 vpd_id
Signed-off-by: John Garry <john.g.garry@oracle.com>
---
drivers/scsi/scsi_multipath.c | 209 +++++++++++++++++++++++++++++++++-
drivers/scsi/scsi_sysfs.c | 3 +
include/scsi/scsi_multipath.h | 29 +++++
3 files changed, 239 insertions(+), 2 deletions(-)
diff --git a/drivers/scsi/scsi_multipath.c b/drivers/scsi/scsi_multipath.c
index ff37cfdf2f9d1..18d50d051b3ba 100644
--- a/drivers/scsi/scsi_multipath.c
+++ b/drivers/scsi/scsi_multipath.c
@@ -27,6 +27,10 @@ static const char *scsi_multipath_modes[] = {
static int scsi_multipath = SCSI_MULTIPATH_OFF;
+static LIST_HEAD(scsi_mpath_heads_list);
+static DEFINE_MUTEX(scsi_mpath_heads_lock);
+static DEFINE_IDA(scsi_multipath_dev_ida);
+
static int scsi_multipath_param_set(const char *val, const struct kernel_param *kp)
{
if (!val)
@@ -69,6 +73,60 @@ static int scsi_mpath_unique_lun_id(struct scsi_device *sdev)
return 0;
}
+static void scsi_mpath_head_release(struct device *dev)
+{
+ struct scsi_mpath_head *scsi_mpath_head =
+ container_of(dev, struct scsi_mpath_head, dev);
+ struct mpath_head *mpath_head = scsi_mpath_head->mpath_head;
+
+ ida_free(&scsi_multipath_dev_ida, scsi_mpath_head->index);
+ mpath_put_head(mpath_head);
+ kfree(scsi_mpath_head);
+}
+
+static ssize_t scsi_mpath_device_vpd_id_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct scsi_mpath_head *scsi_mpath_head =
+ container_of(dev, struct scsi_mpath_head, dev);
+
+ return sysfs_emit(buf, "%s\n", scsi_mpath_head->vpd_id);
+}
+static DEVICE_ATTR(vpd_id, S_IRUGO, scsi_mpath_device_vpd_id_show, NULL);
+
+static struct attribute *scsi_mpath_device_attrs[] = {
+ &dev_attr_vpd_id.attr,
+ NULL
+};
+
+static const struct attribute_group scsi_mpath_device_attrs_group = {
+ .attrs = scsi_mpath_device_attrs,
+};
+
+static bool scsi_multipath_sysfs_group_visible(struct kobject *kobj)
+{
+ return true;
+}
+
+static bool scsi_multipath_sysfs_attr_visible(struct kobject *kobj,
+ struct attribute *attr, int n)
+{
+ return false;
+}
+DEFINE_SYSFS_GROUP_VISIBLE(scsi_multipath_sysfs)
+
+const struct attribute_group *scsi_mpath_device_groups[] = {
+ &scsi_mpath_device_attrs_group,
+ NULL
+};
+
+static const struct class scsi_mpath_device_class = {
+ .name = "scsi_mpath_device",
+ .dev_groups = scsi_mpath_device_groups,
+ .dev_release = scsi_mpath_head_release,
+};
+
static int scsi_multipath_sdev_init(struct scsi_device *sdev)
{
struct Scsi_Host *shost = sdev->host;
@@ -88,6 +146,73 @@ static int scsi_multipath_sdev_init(struct scsi_device *sdev)
return 0;
}
+struct mpath_head_template smpdt = {
+};
+
+static struct scsi_mpath_head *scsi_mpath_alloc_head(void)
+{
+ struct scsi_mpath_head *scsi_mpath_head;
+ int ret;
+
+ scsi_mpath_head = kzalloc(sizeof(*scsi_mpath_head), GFP_KERNEL);
+ if (!scsi_mpath_head)
+ return NULL;
+
+ ida_init(&scsi_mpath_head->ida);
+
+ scsi_mpath_head->mpath_head = mpath_alloc_head();
+ if (IS_ERR(scsi_mpath_head->mpath_head))
+ goto out_free;
+ scsi_mpath_head->mpath_head->mpdt = &smpdt;
+ scsi_mpath_head->mpath_head->drvdata = scsi_mpath_head;
+
+ scsi_mpath_head->index = ida_alloc(&scsi_multipath_dev_ida, GFP_KERNEL);
+ if (scsi_mpath_head->index < 0)
+ goto out_put_head;
+ kref_init(&scsi_mpath_head->ref);
+
+ device_initialize(&scsi_mpath_head->dev);
+ scsi_mpath_head->dev.class = &scsi_mpath_device_class;
+ ret = dev_set_name(&scsi_mpath_head->dev, "scsi_mpath_device%d",
+ scsi_mpath_head->index);
+ if (ret) {
+ put_device(&scsi_mpath_head->dev);
+ goto out_free_ida;
+ }
+
+ return scsi_mpath_head;
+
+out_free_ida:
+ ida_free(&scsi_multipath_dev_ida, scsi_mpath_head->index);
+out_put_head:
+ mpath_put_head(scsi_mpath_head->mpath_head);
+out_free:
+ kfree(scsi_mpath_head);
+ return NULL;
+}
+
+static struct scsi_mpath_head *scsi_mpath_find_head(
+ struct scsi_mpath_device *scsi_mpath_dev)
+{
+ struct scsi_mpath_head *scsi_mpath_head;
+ int ret;
+
+ list_for_each_entry(scsi_mpath_head, &scsi_mpath_heads_list, entry) {
+ ret = scsi_mpath_get_head(scsi_mpath_head);
+ if (ret)
+ continue;
+ if (strncmp(scsi_mpath_head->vpd_id,
+ scsi_mpath_dev->device_id_str,
+ SCSI_MPATH_DEVICE_ID_LEN) == 0) {
+
+ return scsi_mpath_head;
+ }
+ scsi_mpath_put_head(scsi_mpath_head);
+ }
+
+ return NULL;
+}
+
static void scsi_multipath_sdev_uninit(struct scsi_device *sdev)
{
kfree(sdev->scsi_mpath_dev);
@@ -96,6 +221,7 @@ static void scsi_multipath_sdev_uninit(struct scsi_device *sdev)
int scsi_mpath_dev_alloc(struct scsi_device *sdev)
{
+ struct scsi_mpath_head *scsi_mpath_head;
int ret;
if (scsi_multipath == SCSI_MULTIPATH_OFF)
@@ -116,13 +242,58 @@ int scsi_mpath_dev_alloc(struct scsi_device *sdev)
goto out_uninit;
}
- return 0;
+ mutex_lock(&scsi_mpath_heads_lock);
+ scsi_mpath_head = scsi_mpath_find_head(sdev->scsi_mpath_dev);
+ if (scsi_mpath_head)
+ goto found;
+ scsi_mpath_head = scsi_mpath_alloc_head();
+ if (!scsi_mpath_head) {
+ sdev_printk(KERN_NOTICE, sdev, "could not allocate multipath head, device multipathing disabled\n");
+ mutex_unlock(&scsi_mpath_heads_lock);
+ goto out_uninit;
+ }
+ strscpy(scsi_mpath_head->vpd_id, sdev->scsi_mpath_dev->device_id_str,
+ SCSI_MPATH_DEVICE_ID_LEN);
+
+ ret = device_add(&scsi_mpath_head->dev);
+ if (ret) {
+ mutex_unlock(&scsi_mpath_heads_lock);
+ goto out_put_head;
+ }
+
+ list_add_tail(&scsi_mpath_head->entry, &scsi_mpath_heads_list);
+found:
+ mutex_unlock(&scsi_mpath_heads_lock);
+ ret = ida_alloc(&scsi_mpath_head->ida, GFP_KERNEL);
+ if (ret < 0)
+ goto out_put_head;
+ sdev->scsi_mpath_dev->index = ret;
+
+ sdev->scsi_mpath_dev->scsi_mpath_head = scsi_mpath_head;
+ return 0;
+out_put_head:
+ scsi_mpath_put_head(scsi_mpath_head);
out_uninit:
scsi_multipath_sdev_uninit(sdev);
return ret;
}
+static void scsi_mpath_remove_head(struct scsi_mpath_device *scsi_mpath_dev)
+{
+ scsi_mpath_put_head(scsi_mpath_dev->scsi_mpath_head);
+ scsi_mpath_dev->scsi_mpath_head = NULL;
+}
+
+void scsi_mpath_remove_device(struct scsi_mpath_device *scsi_mpath_dev)
+{
+ struct scsi_mpath_head *scsi_mpath_head = scsi_mpath_dev->scsi_mpath_head;
+
+ ida_free(&scsi_mpath_head->ida, scsi_mpath_dev->index);
+
+ scsi_mpath_remove_head(scsi_mpath_dev);
+}
+
void scsi_mpath_dev_release(struct scsi_device *sdev)
{
struct scsi_mpath_device *scsi_mpath_dev = sdev->scsi_mpath_dev;
@@ -133,13 +304,47 @@ void scsi_mpath_dev_release(struct scsi_device *sdev)
scsi_multipath_sdev_uninit(sdev);
}
-int __init scsi_multipath_init(void)
+int scsi_mpath_get_head(struct scsi_mpath_head *scsi_mpath_head)
{
+ if (!kref_get_unless_zero(&scsi_mpath_head->ref))
+ return -ENXIO;
return 0;
}
+EXPORT_SYMBOL_GPL(scsi_mpath_get_head);
+
+static void scsi_mpath_free_head(struct kref *ref)
+{
+ struct scsi_mpath_head *scsi_mpath_head =
+ container_of(ref, struct scsi_mpath_head, ref);
+
+ /*
+ * If we race with scsi_mpath_find_head(), then that function may
+ * find this scsi_mpath_head in the heads list; however we would fail
+ * to take a reference to this scsi_mpath_head and continue the search.
+ * As such, it is safe to call device_unregister (and free
+ * scsi_mpath_head) after we delete this head from the list.
+ */
+ mutex_lock(&scsi_mpath_heads_lock);
+ list_del_init(&scsi_mpath_head->entry);
+ mutex_unlock(&scsi_mpath_heads_lock);
+
+ device_unregister(&scsi_mpath_head->dev);
+}
+
+void scsi_mpath_put_head(struct scsi_mpath_head *scsi_mpath_head)
+{
+ kref_put(&scsi_mpath_head->ref, scsi_mpath_free_head);
+}
+EXPORT_SYMBOL_GPL(scsi_mpath_put_head);
+
+int __init scsi_multipath_init(void)
+{
+ return class_register(&scsi_mpath_device_class);
+}
void __exit scsi_multipath_exit(void)
{
+ class_unregister(&scsi_mpath_device_class);
}
MODULE_LICENSE("GPL");
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 47534d9f2cf9b..043fd2d9cc417 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -1485,6 +1485,9 @@ void __scsi_remove_device(struct scsi_device *sdev)
} else
put_device(&sdev->sdev_dev);
+ if (sdev->scsi_mpath_dev)
+ scsi_mpath_remove_device(sdev->scsi_mpath_dev);
+
/*
* Stop accepting new requests and wait until all queuecommand() and
* scsi_run_queue() invocations have finished before tearing down the
diff --git a/include/scsi/scsi_multipath.h b/include/scsi/scsi_multipath.h
index d3d410dafd17a..b3e0b98f39c56 100644
--- a/include/scsi/scsi_multipath.h
+++ b/include/scsi/scsi_multipath.h
@@ -19,12 +19,25 @@
#ifdef CONFIG_SCSI_MULTIPATH
#define SCSI_MPATH_DEVICE_ID_LEN 256
+struct scsi_mpath_head {
+ char vpd_id[SCSI_MPATH_DEVICE_ID_LEN];
+ struct list_head entry;
+ struct ida ida;
+ struct kref ref;
+ struct mpath_head *mpath_head;
+ struct device dev;
+ int index;
+};
+
struct scsi_mpath_device {
struct mpath_device mpath_device;
struct scsi_device *sdev;
+ int index;
+ struct scsi_mpath_head *scsi_mpath_head;
char device_id_str[SCSI_MPATH_DEVICE_ID_LEN];
};
+
#define to_scsi_mpath_device(d) \
container_of(d, struct scsi_mpath_device, mpath_device)
@@ -32,8 +45,13 @@ int scsi_mpath_dev_alloc(struct scsi_device *sdev);
void scsi_mpath_dev_release(struct scsi_device *sdev);
int scsi_multipath_init(void);
void scsi_multipath_exit(void);
+void scsi_mpath_remove_device(struct scsi_mpath_device *scsi_mpath_dev);
+int scsi_mpath_get_head(struct scsi_mpath_head *);
+void scsi_mpath_put_head(struct scsi_mpath_head *);
#else /* CONFIG_SCSI_MULTIPATH */
+struct scsi_mpath_head {
+};
struct scsi_mpath_device {
};
@@ -51,5 +69,16 @@ static inline int scsi_multipath_init(void)
static inline void scsi_multipath_exit(void)
{
}
+static inline void scsi_mpath_remove_device(struct scsi_mpath_device
+ *scsi_mpath_dev)
+{
+}
+static inline int scsi_mpath_get_head(struct scsi_mpath_head *)
+{
+ return 0;
+}
+static inline void scsi_mpath_put_head(struct scsi_mpath_head *)
+{
+}
#endif /* CONFIG_SCSI_MULTIPATH */
#endif /* _SCSI_SCSI_MULTIPATH_H */
--
2.43.5
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH v2 03/18] scsi-multipath: provide sysfs link from to scsi_device
2026-04-28 11:14 [PATCH v2 00/18] Native SCSI multipath support John Garry
2026-04-28 11:14 ` [PATCH v2 01/18] scsi-multipath: introduce basic SCSI device support John Garry
2026-04-28 11:14 ` [PATCH v2 02/18] scsi-multipath: introduce scsi_device head structure John Garry
@ 2026-04-28 11:14 ` John Garry
2026-04-28 11:14 ` [PATCH v2 04/18] scsi-multipath: support iopolicy John Garry
` (14 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: John Garry @ 2026-04-28 11:14 UTC (permalink / raw)
To: hch, kbusch, sagi, axboe, martin.petersen, james.bottomley, hare,
bmarzins, nilay
Cc: jmeneghi, linux-nvme, linux-scsi, michael.christie, snitzer,
dm-devel, linux-kernel, John Garry, Hannes Reinecke
Provide a link in sysfs from a scsi_mpath_device to member scsi_device's.
An example is as follows:
# ls -l /sys/class/scsi_mpath_device/scsi_mpath_device0/multipath/
total 0
lrwxrwxrwx 1 root root 0 Feb 24 12:01 8:0:0:0 -> ../../../../platform/host8/session1/target8:0:0/8:0:0:0
lrwxrwxrwx 1 root root 0 Feb 24 12:01 9:0:0:0 -> ../../../../platform/host9/session2/target9:0:0/9:0:0:0
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: John Garry <john.g.garry@oracle.com>
---
drivers/scsi/scsi_multipath.c | 51 ++++++++++++++++++++++++++++++-----
drivers/scsi/scsi_sysfs.c | 5 ++++
include/scsi/scsi_multipath.h | 8 ++++++
3 files changed, 58 insertions(+), 6 deletions(-)
diff --git a/drivers/scsi/scsi_multipath.c b/drivers/scsi/scsi_multipath.c
index 18d50d051b3ba..31ab1d477c686 100644
--- a/drivers/scsi/scsi_multipath.c
+++ b/drivers/scsi/scsi_multipath.c
@@ -104,23 +104,62 @@ static const struct attribute_group scsi_mpath_device_attrs_group = {
.attrs = scsi_mpath_device_attrs,
};
+static struct attribute dummy_attr = {
+ .name = "dummy",
+};
+
+static struct attribute *scsi_mpath_attrs[] = {
+ &dummy_attr,
+ NULL
+};
+
static bool scsi_multipath_sysfs_group_visible(struct kobject *kobj)
{
return true;
}
+DEFINE_SIMPLE_SYSFS_GROUP_VISIBLE(scsi_multipath_sysfs)
-static bool scsi_multipath_sysfs_attr_visible(struct kobject *kobj,
- struct attribute *attr, int n)
-{
- return false;
-}
-DEFINE_SYSFS_GROUP_VISIBLE(scsi_multipath_sysfs)
+static const struct attribute_group scsi_mpath_attr_group = {
+ .name = "multipath",
+ .attrs = scsi_mpath_attrs,
+ .is_visible = SYSFS_GROUP_VISIBLE(scsi_multipath_sysfs),
+};
const struct attribute_group *scsi_mpath_device_groups[] = {
&scsi_mpath_device_attrs_group,
+ &scsi_mpath_attr_group,
NULL
};
+void scsi_mpath_add_sysfs_link(struct scsi_device *sdev)
+{
+ struct device *target = &sdev->sdev_gendev;
+ struct scsi_mpath_head *scsi_mpath_head =
+ sdev->scsi_mpath_dev->scsi_mpath_head;
+ struct device *source = &scsi_mpath_head->dev;
+ int error;
+
+ error = sysfs_add_link_to_group(&source->kobj, "multipath",
+ &target->kobj, dev_name(target));
+ if (error) {
+ sdev_printk(KERN_INFO, sdev, "Failed to create mpath sysfs link, error=%d\n",
+ error);
+ }
+}
+EXPORT_SYMBOL_GPL(scsi_mpath_add_sysfs_link);
+
+void scsi_mpath_remove_sysfs_link(struct scsi_device *sdev)
+{
+ struct device *target = &sdev->sdev_gendev;
+ struct scsi_mpath_head *scsi_mpath_head =
+ sdev->scsi_mpath_dev->scsi_mpath_head;
+ struct device *source = &scsi_mpath_head->dev;
+
+ sysfs_remove_link_from_group(&source->kobj, "multipath",
+ dev_name(target));
+}
+EXPORT_SYMBOL_GPL(scsi_mpath_remove_sysfs_link);
+
static const struct class scsi_mpath_device_class = {
.name = "scsi_mpath_device",
.dev_groups = scsi_mpath_device_groups,
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 043fd2d9cc417..bb4fcd03d8777 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -1427,6 +1427,9 @@ int scsi_sysfs_add_sdev(struct scsi_device *sdev)
transport_add_device(&sdev->sdev_gendev);
sdev->is_visible = 1;
+ if (sdev->scsi_mpath_dev)
+ scsi_mpath_add_sysfs_link(sdev);
+
if (IS_ENABLED(CONFIG_BLK_DEV_BSG)) {
sdev->bsg_dev = scsi_bsg_register_queue(sdev);
if (IS_ERR(sdev->bsg_dev)) {
@@ -1479,6 +1482,8 @@ void __scsi_remove_device(struct scsi_device *sdev)
if (IS_ENABLED(CONFIG_BLK_DEV_BSG) && sdev->bsg_dev)
bsg_unregister_queue(sdev->bsg_dev);
+ if (sdev->scsi_mpath_dev)
+ scsi_mpath_remove_sysfs_link(sdev);
device_unregister(&sdev->sdev_dev);
transport_remove_device(dev);
device_del(dev);
diff --git a/include/scsi/scsi_multipath.h b/include/scsi/scsi_multipath.h
index b3e0b98f39c56..9e92d949392d4 100644
--- a/include/scsi/scsi_multipath.h
+++ b/include/scsi/scsi_multipath.h
@@ -46,6 +46,8 @@ void scsi_mpath_dev_release(struct scsi_device *sdev);
int scsi_multipath_init(void);
void scsi_multipath_exit(void);
void scsi_mpath_remove_device(struct scsi_mpath_device *scsi_mpath_dev);
+void scsi_mpath_add_sysfs_link(struct scsi_device *sdev);
+void scsi_mpath_remove_sysfs_link(struct scsi_device *sdev);
int scsi_mpath_get_head(struct scsi_mpath_head *);
void scsi_mpath_put_head(struct scsi_mpath_head *);
#else /* CONFIG_SCSI_MULTIPATH */
@@ -80,5 +82,11 @@ static inline int scsi_mpath_get_head(struct scsi_mpath_head *)
static inline void scsi_mpath_put_head(struct scsi_mpath_head *)
{
}
+static inline void scsi_mpath_add_sysfs_link(struct scsi_device *sdev)
+{
+}
+static inline void scsi_mpath_remove_sysfs_link(struct scsi_device *sdev)
+{
+}
#endif /* CONFIG_SCSI_MULTIPATH */
#endif /* _SCSI_SCSI_MULTIPATH_H */
--
2.43.5
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH v2 04/18] scsi-multipath: support iopolicy
2026-04-28 11:14 [PATCH v2 00/18] Native SCSI multipath support John Garry
` (2 preceding siblings ...)
2026-04-28 11:14 ` [PATCH v2 03/18] scsi-multipath: provide sysfs link from to scsi_device John Garry
@ 2026-04-28 11:14 ` John Garry
2026-04-28 11:14 ` [PATCH v2 05/18] scsi-multipath: clone each bio John Garry
` (13 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: John Garry @ 2026-04-28 11:14 UTC (permalink / raw)
To: hch, kbusch, sagi, axboe, martin.petersen, james.bottomley, hare,
bmarzins, nilay
Cc: jmeneghi, linux-nvme, linux-scsi, michael.christie, snitzer,
dm-devel, linux-kernel, John Garry
Add support to set the multipath iopolicy.
The iopolicy member is per scsi_mpath_head structure.
A module param is added so that the default iopolicy may be set.
Signed-off-by: John Garry <john.g.garry@oracle.com>
---
drivers/scsi/scsi_multipath.c | 54 +++++++++++++++++++++++++++++++++++
include/scsi/scsi_multipath.h | 1 +
2 files changed, 55 insertions(+)
diff --git a/drivers/scsi/scsi_multipath.c b/drivers/scsi/scsi_multipath.c
index 31ab1d477c686..c6ae02644fdf4 100644
--- a/drivers/scsi/scsi_multipath.c
+++ b/drivers/scsi/scsi_multipath.c
@@ -60,6 +60,23 @@ static const struct kernel_param_ops multipath_param_ops = {
module_param_cb(multipath, &multipath_param_ops, &scsi_multipath, 0444);
MODULE_PARM_DESC(multipath, "turn on native multipath support, options: on, off, always");
+static int iopolicy = MPATH_IOPOLICY_NUMA;
+
+static int scsi_mpath_set_iopolicy_param(const char *val, const struct kernel_param *kp)
+{
+ return mpath_set_iopolicy(val, &iopolicy);
+}
+
+static int scsi_mpath_get_iopolicy_param(char *buf, const struct kernel_param *kp)
+{
+ return mpath_get_iopolicy(buf, iopolicy);
+}
+
+module_param_call(multipath_iopolicy, scsi_mpath_set_iopolicy_param,
+ scsi_mpath_get_iopolicy_param, &iopolicy, 0644);
+MODULE_PARM_DESC(multipath_iopolicy,
+ "Default multipath I/O policy; 'numa' (default), 'round-robin' or 'queue-depth'");
+
static int scsi_mpath_unique_lun_id(struct scsi_device *sdev)
{
struct scsi_mpath_device *scsi_mpath_dev = sdev->scsi_mpath_dev;
@@ -95,8 +112,36 @@ static ssize_t scsi_mpath_device_vpd_id_show(struct device *dev,
}
static DEVICE_ATTR(vpd_id, S_IRUGO, scsi_mpath_device_vpd_id_show, NULL);
+static ssize_t scsi_mpath_device_iopolicy_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct scsi_mpath_head *scsi_mpath_head =
+ container_of(dev, struct scsi_mpath_head, dev);
+ struct mpath_head *mpath_head = scsi_mpath_head->mpath_head;
+
+ if (!mpath_iopolicy_store(&scsi_mpath_head->iopolicy, buf, count))
+ return -EINVAL;
+
+ mpath_clear_paths(mpath_head);
+ mpath_schedule_requeue_work(mpath_head);
+ return count;
+}
+
+static ssize_t scsi_mpath_device_iopolicy_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct scsi_mpath_head *scsi_mpath_head =
+ container_of(dev, struct scsi_mpath_head, dev);
+
+ return mpath_iopolicy_show(&scsi_mpath_head->iopolicy, buf);
+}
+
+static DEVICE_ATTR(iopolicy, S_IRUGO | S_IWUSR,
+ scsi_mpath_device_iopolicy_show, scsi_mpath_device_iopolicy_store);
+
static struct attribute *scsi_mpath_device_attrs[] = {
&dev_attr_vpd_id.attr,
+ &dev_attr_iopolicy.attr,
NULL
};
@@ -185,7 +230,15 @@ static int scsi_multipath_sdev_init(struct scsi_device *sdev)
return 0;
}
+static enum mpath_iopolicy_e scsi_mpath_get_iopolicy(struct mpath_head *mpath_head)
+{
+ struct scsi_mpath_head *scsi_mpath_head = mpath_head->drvdata;
+
+ return mpath_read_iopolicy(&scsi_mpath_head->iopolicy);
+}
+
struct mpath_head_template smpdt = {
+ .get_iopolicy = scsi_mpath_get_iopolicy,
};
static struct scsi_mpath_head *scsi_mpath_alloc_head(void)
@@ -204,6 +257,7 @@ static struct scsi_mpath_head *scsi_mpath_alloc_head(void)
goto out_free;
scsi_mpath_head->mpath_head->mpdt = &smpdt;
scsi_mpath_head->mpath_head->drvdata = scsi_mpath_head;
+ scsi_mpath_head->iopolicy.iopolicy = iopolicy;
scsi_mpath_head->index = ida_alloc(&scsi_multipath_dev_ida, GFP_KERNEL);
if (scsi_mpath_head->index < 0)
diff --git a/include/scsi/scsi_multipath.h b/include/scsi/scsi_multipath.h
index 9e92d949392d4..de6339989d2a2 100644
--- a/include/scsi/scsi_multipath.h
+++ b/include/scsi/scsi_multipath.h
@@ -24,6 +24,7 @@ struct scsi_mpath_head {
struct list_head entry;
struct ida ida;
struct kref ref;
+ struct mpath_iopolicy iopolicy;
struct mpath_head *mpath_head;
struct device dev;
int index;
--
2.43.5
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH v2 05/18] scsi-multipath: clone each bio
2026-04-28 11:14 [PATCH v2 00/18] Native SCSI multipath support John Garry
` (3 preceding siblings ...)
2026-04-28 11:14 ` [PATCH v2 04/18] scsi-multipath: support iopolicy John Garry
@ 2026-04-28 11:14 ` John Garry
2026-04-28 11:14 ` [PATCH v2 06/18] scsi-multipath: clear path when decide is blocked John Garry
` (12 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: John Garry @ 2026-04-28 11:14 UTC (permalink / raw)
To: hch, kbusch, sagi, axboe, martin.petersen, james.bottomley, hare,
bmarzins, nilay
Cc: jmeneghi, linux-nvme, linux-scsi, michael.christie, snitzer,
dm-devel, linux-kernel, John Garry
For failover handling, we will take the approach to resubmit each
bio.
However, unlike NVMe, for SCSI there is no guarantee that any bio submitted
is either all or none completed.
As such, for SCSI, for failover handling we will take the approach to
just re-submit the original bio. For this, clone and submit each bio.
Signed-off-by: John Garry <john.g.garry@oracle.com>
---
drivers/scsi/scsi_multipath.c | 35 ++++++++++++++++++++++++++++++++++-
include/scsi/scsi_multipath.h | 1 +
2 files changed, 35 insertions(+), 1 deletion(-)
diff --git a/drivers/scsi/scsi_multipath.c b/drivers/scsi/scsi_multipath.c
index c6ae02644fdf4..068c5e93ade1e 100644
--- a/drivers/scsi/scsi_multipath.c
+++ b/drivers/scsi/scsi_multipath.c
@@ -96,6 +96,7 @@ static void scsi_mpath_head_release(struct device *dev)
container_of(dev, struct scsi_mpath_head, dev);
struct mpath_head *mpath_head = scsi_mpath_head->mpath_head;
+ bioset_exit(&scsi_mpath_head->bio_pool);
ida_free(&scsi_multipath_dev_ida, scsi_mpath_head->index);
mpath_put_head(mpath_head);
kfree(scsi_mpath_head);
@@ -230,6 +231,32 @@ static int scsi_multipath_sdev_init(struct scsi_device *sdev)
return 0;
}
+static void scsi_mpath_clone_end_io(struct bio *clone)
+{
+ struct bio *master_bio = clone->bi_private;
+
+ master_bio->bi_status = clone->bi_status;
+ bio_put(clone);
+ bio_endio(master_bio);
+}
+
+static struct bio *scsi_mpath_clone_bio(struct bio *bio)
+{
+ struct mpath_head *mpath_head = bio->bi_bdev->bd_disk->private_data;
+ struct scsi_mpath_head *scsi_mpath_head = mpath_head->drvdata;
+ struct bio *clone;
+
+ clone = bio_alloc_clone(bio->bi_bdev, bio, GFP_NOIO,
+ &scsi_mpath_head->bio_pool);
+ if (!clone)
+ return NULL;
+
+ clone->bi_end_io = scsi_mpath_clone_end_io;
+ clone->bi_private = bio;
+
+ return clone;
+}
+
static enum mpath_iopolicy_e scsi_mpath_get_iopolicy(struct mpath_head *mpath_head)
{
struct scsi_mpath_head *scsi_mpath_head = mpath_head->drvdata;
@@ -239,6 +266,7 @@ static enum mpath_iopolicy_e scsi_mpath_get_iopolicy(struct mpath_head *mpath_he
struct mpath_head_template smpdt = {
.get_iopolicy = scsi_mpath_get_iopolicy,
+ .clone_bio = scsi_mpath_clone_bio,
};
static struct scsi_mpath_head *scsi_mpath_alloc_head(void)
@@ -252,9 +280,12 @@ static struct scsi_mpath_head *scsi_mpath_alloc_head(void)
ida_init(&scsi_mpath_head->ida);
+ if (bioset_init(&scsi_mpath_head->bio_pool, BIO_POOL_SIZE,
+ 0, BIOSET_PERCPU_CACHE))
+ goto out_free;
scsi_mpath_head->mpath_head = mpath_alloc_head();
if (IS_ERR(scsi_mpath_head->mpath_head))
- goto out_free;
+ goto out_bioset_exit;
scsi_mpath_head->mpath_head->mpdt = &smpdt;
scsi_mpath_head->mpath_head->drvdata = scsi_mpath_head;
scsi_mpath_head->iopolicy.iopolicy = iopolicy;
@@ -279,6 +310,8 @@ static struct scsi_mpath_head *scsi_mpath_alloc_head(void)
ida_free(&scsi_multipath_dev_ida, scsi_mpath_head->index);
out_put_head:
mpath_put_head(scsi_mpath_head->mpath_head);
+out_bioset_exit:
+ bioset_exit(&scsi_mpath_head->bio_pool);
out_free:
kfree(scsi_mpath_head);
return NULL;
diff --git a/include/scsi/scsi_multipath.h b/include/scsi/scsi_multipath.h
index de6339989d2a2..c2eeccea52d3b 100644
--- a/include/scsi/scsi_multipath.h
+++ b/include/scsi/scsi_multipath.h
@@ -25,6 +25,7 @@ struct scsi_mpath_head {
struct ida ida;
struct kref ref;
struct mpath_iopolicy iopolicy;
+ struct bio_set bio_pool;
struct mpath_head *mpath_head;
struct device dev;
int index;
--
2.43.5
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH v2 06/18] scsi-multipath: clear path when decide is blocked
2026-04-28 11:14 [PATCH v2 00/18] Native SCSI multipath support John Garry
` (4 preceding siblings ...)
2026-04-28 11:14 ` [PATCH v2 05/18] scsi-multipath: clone each bio John Garry
@ 2026-04-28 11:14 ` John Garry
2026-04-28 11:14 ` [PATCH v2 07/18] scsi-multipath: failover handling John Garry
` (11 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: John Garry @ 2026-04-28 11:14 UTC (permalink / raw)
To: hch, kbusch, sagi, axboe, martin.petersen, james.bottomley, hare,
bmarzins, nilay
Cc: jmeneghi, linux-nvme, linux-scsi, michael.christie, snitzer,
dm-devel, linux-kernel, John Garry
Add scsi_mpath_dev_clear_path() to clear a device path when it becomes
blocked, and call from __scsi_internal_device_block_nowait().
Signed-off-by: John Garry <john.g.garry@oracle.com>
---
drivers/scsi/scsi_lib.c | 3 +++
drivers/scsi/scsi_multipath.c | 11 +++++++++++
include/scsi/scsi_multipath.h | 5 +++++
3 files changed, 19 insertions(+)
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index d3a8cd4166f92..43154f521198a 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -33,6 +33,7 @@
#include <scsi/scsi_eh.h>
#include <scsi/scsi_host.h>
#include <scsi/scsi_transport.h> /* scsi_init_limits() */
+#include <scsi/scsi_multipath.h>
#include <scsi/scsi_dh.h>
#include <trace/events/scsi.h>
@@ -2907,6 +2908,8 @@ EXPORT_SYMBOL(scsi_target_resume);
static int __scsi_internal_device_block_nowait(struct scsi_device *sdev)
{
+ if (sdev->scsi_mpath_dev)
+ scsi_mpath_dev_clear_path(sdev->scsi_mpath_dev);
if (scsi_device_set_state(sdev, SDEV_BLOCK))
return scsi_device_set_state(sdev, SDEV_CREATED_BLOCK);
diff --git a/drivers/scsi/scsi_multipath.c b/drivers/scsi/scsi_multipath.c
index 068c5e93ade1e..a4636a53ffbf4 100644
--- a/drivers/scsi/scsi_multipath.c
+++ b/drivers/scsi/scsi_multipath.c
@@ -113,6 +113,17 @@ static ssize_t scsi_mpath_device_vpd_id_show(struct device *dev,
}
static DEVICE_ATTR(vpd_id, S_IRUGO, scsi_mpath_device_vpd_id_show, NULL);
+void scsi_mpath_dev_clear_path(struct scsi_mpath_device *scsi_mpath_dev)
+{
+ struct mpath_device *mpath_device = &scsi_mpath_dev->mpath_device;
+ struct scsi_mpath_head *scsi_mpath_head = scsi_mpath_dev->scsi_mpath_head;
+ struct mpath_head *mpath_head = scsi_mpath_head->mpath_head;
+
+ if (mpath_clear_current_path(mpath_device))
+ mpath_synchronize(mpath_head);
+}
+EXPORT_SYMBOL_GPL(scsi_mpath_dev_clear_path);
+
static ssize_t scsi_mpath_device_iopolicy_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
diff --git a/include/scsi/scsi_multipath.h b/include/scsi/scsi_multipath.h
index c2eeccea52d3b..d0e1cda836865 100644
--- a/include/scsi/scsi_multipath.h
+++ b/include/scsi/scsi_multipath.h
@@ -47,6 +47,7 @@ int scsi_mpath_dev_alloc(struct scsi_device *sdev);
void scsi_mpath_dev_release(struct scsi_device *sdev);
int scsi_multipath_init(void);
void scsi_multipath_exit(void);
+void scsi_mpath_dev_clear_path(struct scsi_mpath_device *scsi_mpath_dev);
void scsi_mpath_remove_device(struct scsi_mpath_device *scsi_mpath_dev);
void scsi_mpath_add_sysfs_link(struct scsi_device *sdev);
void scsi_mpath_remove_sysfs_link(struct scsi_device *sdev);
@@ -73,6 +74,10 @@ static inline int scsi_multipath_init(void)
static inline void scsi_multipath_exit(void)
{
}
+static inline void scsi_mpath_dev_clear_path(
+ struct scsi_mpath_device *scsi_mpath_dev)
+{
+}
static inline void scsi_mpath_remove_device(struct scsi_mpath_device
*scsi_mpath_dev)
{
--
2.43.5
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH v2 07/18] scsi-multipath: failover handling
2026-04-28 11:14 [PATCH v2 00/18] Native SCSI multipath support John Garry
` (5 preceding siblings ...)
2026-04-28 11:14 ` [PATCH v2 06/18] scsi-multipath: clear path when decide is blocked John Garry
@ 2026-04-28 11:14 ` John Garry
2026-04-28 11:14 ` [PATCH v2 08/18] scsi-multipath: provide callbacks for path state John Garry
` (10 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: John Garry @ 2026-04-28 11:14 UTC (permalink / raw)
To: hch, kbusch, sagi, axboe, martin.petersen, james.bottomley, hare,
bmarzins, nilay
Cc: jmeneghi, linux-nvme, linux-scsi, michael.christie, snitzer,
dm-devel, linux-kernel, John Garry
Failover occurs when the scsi_cmnd has failed and it is discovered that the
target scsi_device has transport down.
For a scsi command which suffers failover, requeue the master bio of each
bio attached to its request.
A bio which for which failover occurs is handled in
scsi_mpath_clone_end_io(). Failover is detected for blk_path_error()
occurring, same as how dm-mpath detects this.
Signed-off-by: John Garry <john.g.garry@oracle.com>
---
drivers/scsi/scsi_multipath.c | 33 +++++++++++++++++++++++++++++++++
1 file changed, 33 insertions(+)
diff --git a/drivers/scsi/scsi_multipath.c b/drivers/scsi/scsi_multipath.c
index a4636a53ffbf4..0dcbf12217165 100644
--- a/drivers/scsi/scsi_multipath.c
+++ b/drivers/scsi/scsi_multipath.c
@@ -242,11 +242,44 @@ static int scsi_multipath_sdev_init(struct scsi_device *sdev)
return 0;
}
+static inline void bio_list_add_clone(struct bio_list *bl,
+ struct bio *clone)
+{
+ struct bio *master_bio = clone->bi_private;
+
+ if (bl->tail)
+ bl->tail->bi_next = master_bio;
+ else
+ bl->head = master_bio;
+ bl->tail = master_bio;
+ bio_put(clone);
+}
+
static void scsi_mpath_clone_end_io(struct bio *clone)
{
struct bio *master_bio = clone->bi_private;
master_bio->bi_status = clone->bi_status;
+
+ if (clone->bi_status && blk_path_error(clone->bi_status)) {
+ struct block_device *bi_bdev = clone->bi_bdev;
+ struct request_queue *q = bi_bdev->bd_queue;
+ struct scsi_device *sdev = scsi_device_from_queue(q);
+ struct scsi_mpath_device *scsi_mpath_dev = sdev->scsi_mpath_dev;
+ struct mpath_device *mpath_device = &scsi_mpath_dev->mpath_device;
+ struct mpath_head *mpath_head = mpath_device->mpath_head;
+ unsigned long flags;
+
+ scsi_mpath_dev_clear_path(scsi_mpath_dev);
+
+ spin_lock_irqsave(&mpath_head->requeue_lock, flags);
+ bio_list_add_clone(&mpath_head->requeue_list, clone);
+ spin_unlock_irqrestore(&mpath_head->requeue_lock, flags);
+
+ mpath_schedule_requeue_work(mpath_head);
+ return;
+ }
+
bio_put(clone);
bio_endio(master_bio);
}
--
2.43.5
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH v2 08/18] scsi-multipath: provide callbacks for path state
2026-04-28 11:14 [PATCH v2 00/18] Native SCSI multipath support John Garry
` (6 preceding siblings ...)
2026-04-28 11:14 ` [PATCH v2 07/18] scsi-multipath: failover handling John Garry
@ 2026-04-28 11:14 ` John Garry
2026-04-28 11:14 ` [PATCH v2 09/18] scsi-multipath: add scsi_mpath_get_nr_active() John Garry
` (9 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: John Garry @ 2026-04-28 11:14 UTC (permalink / raw)
To: hch, kbusch, sagi, axboe, martin.petersen, james.bottomley, hare,
bmarzins, nilay
Cc: jmeneghi, linux-nvme, linux-scsi, michael.christie, snitzer,
dm-devel, linux-kernel, John Garry
Provide callbacks for .is_disabled, .is_optimized, and .available_path.
These all use scsi_device.sdev_state and scsi_device.access_state.
Member scsi_device.access_state will be driven by ALUA. Currently
only device handlers support this, and in future we will have core
SCSI support for implicit ALUA (not relying on device handlers).
Signed-off-by: John Garry <john.g.garry@oracle.com>
---
drivers/scsi/scsi_multipath.c | 46 +++++++++++++++++++++++++++++++++++
1 file changed, 46 insertions(+)
diff --git a/drivers/scsi/scsi_multipath.c b/drivers/scsi/scsi_multipath.c
index 0dcbf12217165..a3bf95e2a18eb 100644
--- a/drivers/scsi/scsi_multipath.c
+++ b/drivers/scsi/scsi_multipath.c
@@ -308,7 +308,53 @@ static enum mpath_iopolicy_e scsi_mpath_get_iopolicy(struct mpath_head *mpath_he
return mpath_read_iopolicy(&scsi_mpath_head->iopolicy);
}
+static bool scsi_mpath_is_disabled(struct mpath_device *mpath_device)
+{
+ struct scsi_mpath_device *scsi_mpath_dev =
+ to_scsi_mpath_device(mpath_device);
+ struct scsi_device *sdev = scsi_mpath_dev->sdev;
+ unsigned char access_state = READ_ONCE(sdev->access_state);
+
+ if (sdev->sdev_state != SDEV_RUNNING)
+ return true;
+
+ if (access_state == SCSI_ACCESS_STATE_OPTIMAL ||
+ access_state == SCSI_ACCESS_STATE_ACTIVE)
+ return false;
+
+ return true;
+}
+
+static bool scsi_mpath_is_optimized(struct mpath_device *mpath_device)
+{
+ struct scsi_mpath_device *scsi_mpath_dev =
+ to_scsi_mpath_device(mpath_device);
+ struct scsi_device *sdev = scsi_mpath_dev->sdev;
+
+ if (sdev->sdev_state != SDEV_RUNNING)
+ return false;
+
+ return READ_ONCE(sdev->access_state) == SCSI_ACCESS_STATE_OPTIMAL;
+}
+
+static bool scsi_mpath_available_path(struct mpath_device *mpath_device)
+{
+ struct scsi_mpath_device *scsi_mpath_dev =
+ to_scsi_mpath_device(mpath_device);
+ struct scsi_device *sdev = scsi_mpath_dev->sdev;
+ enum scsi_device_state sdev_state = sdev->sdev_state;
+
+ if (sdev_state == SDEV_RUNNING || sdev_state == SDEV_QUIESCE ||
+ sdev_state == SDEV_BLOCK || sdev_state == SDEV_CREATED_BLOCK)
+ return true;
+
+ return false;
+}
+
struct mpath_head_template smpdt = {
+ .is_disabled = scsi_mpath_is_disabled,
+ .is_optimized = scsi_mpath_is_optimized,
+ .available_path = scsi_mpath_available_path,
.get_iopolicy = scsi_mpath_get_iopolicy,
.clone_bio = scsi_mpath_clone_bio,
};
--
2.43.5
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH v2 09/18] scsi-multipath: add scsi_mpath_get_nr_active()
2026-04-28 11:14 [PATCH v2 00/18] Native SCSI multipath support John Garry
` (7 preceding siblings ...)
2026-04-28 11:14 ` [PATCH v2 08/18] scsi-multipath: provide callbacks for path state John Garry
@ 2026-04-28 11:14 ` John Garry
2026-04-28 11:14 ` [PATCH v2 10/18] scsi-multipath: add scsi_mpath_{start,end}_request() John Garry
` (8 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: John Garry @ 2026-04-28 11:14 UTC (permalink / raw)
To: hch, kbusch, sagi, axboe, martin.petersen, james.bottomley, hare,
bmarzins, nilay
Cc: jmeneghi, linux-nvme, linux-scsi, michael.christie, snitzer,
dm-devel, linux-kernel, John Garry
Add scsi_mpath_get_nr_active() for calculating the number of active
requests associated with an mpath_device. This is required for queue
depth multipath iopolicy.
For NVMe, this count is per controller. The reason is that many NSes may
be connected to a controller, so congestion should be judged at
controller level.
SCSI has no definition of a controller, but SCSI host is a comparable
concept.
Indeed, many SCSI disks may be connected to the same SCSI host, so it
makes sense to count number of active requests at this point. However,
for a transport like iSCSI Initiator over TCP/IP, we have a separate SCSI
host per SCSI device (so there the count would be same at SCSI device
level).
Signed-off-by: John Garry <john.g.garry@oracle.com>
---
drivers/scsi/scsi_multipath.c | 11 +++++++++++
include/scsi/scsi_host.h | 4 ++++
2 files changed, 15 insertions(+)
diff --git a/drivers/scsi/scsi_multipath.c b/drivers/scsi/scsi_multipath.c
index a3bf95e2a18eb..80f32b940339f 100644
--- a/drivers/scsi/scsi_multipath.c
+++ b/drivers/scsi/scsi_multipath.c
@@ -351,12 +351,23 @@ static bool scsi_mpath_available_path(struct mpath_device *mpath_device)
return false;
}
+static int scsi_mpath_get_nr_active(struct mpath_device *mpath_device)
+{
+ struct scsi_mpath_device *scsi_mpath_dev =
+ to_scsi_mpath_device(mpath_device);
+ struct scsi_device *sdev = scsi_mpath_dev->sdev;
+ struct Scsi_Host *shost = sdev->host;
+
+ return atomic_read(&shost->mpath_nr_active);
+}
+
struct mpath_head_template smpdt = {
.is_disabled = scsi_mpath_is_disabled,
.is_optimized = scsi_mpath_is_optimized,
.available_path = scsi_mpath_available_path,
.get_iopolicy = scsi_mpath_get_iopolicy,
.clone_bio = scsi_mpath_clone_bio,
+ .get_nr_active = scsi_mpath_get_nr_active,
};
static struct scsi_mpath_head *scsi_mpath_alloc_head(void)
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index f6e12565a81de..979d1e89f0f13 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -752,6 +752,10 @@ struct Scsi_Host {
/* Delay for runtime autosuspend */
int rpm_autosuspend_delay;
+ #ifdef CONFIG_SCSI_MULTIPATH
+ atomic_t mpath_nr_active;
+ #endif
+
/*
* We should ensure that this is aligned, both for better performance
* and also because some compilers (m68k) don't automatically force
--
2.43.5
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH v2 10/18] scsi-multipath: add scsi_mpath_{start,end}_request()
2026-04-28 11:14 [PATCH v2 00/18] Native SCSI multipath support John Garry
` (8 preceding siblings ...)
2026-04-28 11:14 ` [PATCH v2 09/18] scsi-multipath: add scsi_mpath_get_nr_active() John Garry
@ 2026-04-28 11:14 ` John Garry
2026-04-28 11:14 ` [PATCH v2 11/18] scsi-multipath: block PR commands John Garry
` (7 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: John Garry @ 2026-04-28 11:14 UTC (permalink / raw)
To: hch, kbusch, sagi, axboe, martin.petersen, james.bottomley, hare,
bmarzins, nilay
Cc: jmeneghi, linux-nvme, linux-scsi, michael.christie, snitzer,
dm-devel, linux-kernel, John Garry
Add scsi_mpath_{start,end}_request() to handle updating private multipath
request data, like nvme_mpath_{start,end}_request().
Signed-off-by: John Garry <john.g.garry@oracle.com>
---
drivers/scsi/scsi_lib.c | 6 +++++
drivers/scsi/scsi_multipath.c | 51 +++++++++++++++++++++++++++++++++++
include/scsi/scsi_cmnd.h | 5 ++++
include/scsi/scsi_multipath.h | 9 +++++++
4 files changed, 71 insertions(+)
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 43154f521198a..46ed669c41dc9 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -645,6 +645,9 @@ static bool scsi_end_request(struct request *req, blk_status_t error,
struct scsi_device *sdev = cmd->device;
struct request_queue *q = sdev->request_queue;
+ if (is_mpath_request(req))
+ scsi_mpath_end_request(req);
+
if (blk_update_request(req, error, bytes))
return true;
@@ -1893,6 +1896,9 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
memset(cmd->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE);
cmd->submitter = SUBMITTED_BY_BLOCK_LAYER;
+ if (is_mpath_request(req))
+ scsi_mpath_start_request(req);
+
blk_mq_start_request(req);
if (blk_mq_is_reserved_rq(req)) {
reason = shost->hostt->queue_reserved_command(shost, cmd);
diff --git a/drivers/scsi/scsi_multipath.c b/drivers/scsi/scsi_multipath.c
index 80f32b940339f..a2a21793db895 100644
--- a/drivers/scsi/scsi_multipath.c
+++ b/drivers/scsi/scsi_multipath.c
@@ -564,6 +564,57 @@ void scsi_mpath_put_head(struct scsi_mpath_head *scsi_mpath_head)
}
EXPORT_SYMBOL_GPL(scsi_mpath_put_head);
+void scsi_mpath_start_request(struct request *req)
+{
+ struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(req);
+ struct scsi_device *sdev = scmd->device;
+ struct scsi_mpath_device *scsi_mpath_dev = sdev->scsi_mpath_dev;
+ struct scsi_mpath_head *scsi_mpath_head =
+ scsi_mpath_dev->scsi_mpath_head;
+ struct mpath_head *mpath_head = scsi_mpath_head->mpath_head;
+ struct gendisk *disk = mpath_head->disk;
+
+ if (mpath_qd_iopolicy(&scsi_mpath_head->iopolicy) &&
+ !(scmd->flags & SCMD_MPATH_CNT_ACTIVE)) {
+ struct Scsi_Host *shost = sdev->host;
+
+ atomic_inc(&shost->mpath_nr_active);
+ scmd->flags |= SCMD_MPATH_CNT_ACTIVE;
+ }
+
+ if (!blk_queue_io_stat(disk->queue) || blk_rq_is_passthrough(req) ||
+ (scmd->flags & SCMD_MPATH_IO_STATS))
+ return;
+
+ scmd->flags |= SCMD_MPATH_IO_STATS;
+ scmd->start_time = bdev_start_io_acct(disk->part0, req_op(req),
+ jiffies);
+}
+
+void scsi_mpath_end_request(struct request *req)
+{
+ struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(req);
+ struct scsi_device *sdev = scmd->device;
+ struct scsi_mpath_device *scsi_mpath_dev =
+ sdev->scsi_mpath_dev;
+ struct scsi_mpath_head *scsi_mpath_head =
+ scsi_mpath_dev->scsi_mpath_head;
+ struct mpath_head *mpath_head = scsi_mpath_head->mpath_head;
+ struct gendisk *disk = mpath_head->disk;
+
+ if (scmd->flags & SCMD_MPATH_CNT_ACTIVE) {
+ struct Scsi_Host *shost = sdev->host;
+
+ atomic_dec_if_positive(&shost->mpath_nr_active);
+ }
+
+ if (!(scmd->flags & SCMD_MPATH_IO_STATS))
+ return;
+ bdev_end_io_acct(disk->part0, req_op(req),
+ blk_rq_bytes(req) >> SECTOR_SHIFT,
+ scmd->start_time);
+}
+
int __init scsi_multipath_init(void)
{
return class_register(&scsi_mpath_device_class);
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index 8ecfb94049db5..c6571a36e577b 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -60,6 +60,8 @@ struct scsi_pointer {
#define SCMD_FAIL_IF_RECOVERING (1 << 4)
/* flags preserved across unprep / reprep */
#define SCMD_PRESERVED_FLAGS (SCMD_INITIALIZED | SCMD_FAIL_IF_RECOVERING)
+#define SCMD_MPATH_IO_STATS (1 << 5)
+#define SCMD_MPATH_CNT_ACTIVE (1 << 6)
/* for scmd->state */
#define SCMD_STATE_COMPLETE 0
@@ -139,6 +141,9 @@ struct scsi_cmnd {
* to release this memory. (The memory
* obtained by scsi_malloc is guaranteed
* to be at an address < 16Mb). */
+ #ifdef CONFIG_SCSI_MULTIPATH
+ unsigned long start_time;
+ #endif
int result; /* Status code from lower level driver */
};
diff --git a/include/scsi/scsi_multipath.h b/include/scsi/scsi_multipath.h
index d0e1cda836865..fdbdb0e5d02e0 100644
--- a/include/scsi/scsi_multipath.h
+++ b/include/scsi/scsi_multipath.h
@@ -53,6 +53,9 @@ void scsi_mpath_add_sysfs_link(struct scsi_device *sdev);
void scsi_mpath_remove_sysfs_link(struct scsi_device *sdev);
int scsi_mpath_get_head(struct scsi_mpath_head *);
void scsi_mpath_put_head(struct scsi_mpath_head *);
+void scsi_mpath_start_request(struct request *req);
+void scsi_mpath_end_request(struct request *req);
+
#else /* CONFIG_SCSI_MULTIPATH */
struct scsi_mpath_head {
@@ -89,6 +92,12 @@ static inline int scsi_mpath_get_head(struct scsi_mpath_head *)
static inline void scsi_mpath_put_head(struct scsi_mpath_head *)
{
}
+static inline void scsi_mpath_start_request(struct request *)
+{
+}
+static inline void scsi_mpath_end_request(struct request *)
+{
+}
static inline void scsi_mpath_add_sysfs_link(struct scsi_device *sdev)
{
}
--
2.43.5
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH v2 11/18] scsi-multipath: block PR commands
2026-04-28 11:14 [PATCH v2 00/18] Native SCSI multipath support John Garry
` (9 preceding siblings ...)
2026-04-28 11:14 ` [PATCH v2 10/18] scsi-multipath: add scsi_mpath_{start,end}_request() John Garry
@ 2026-04-28 11:14 ` John Garry
2026-04-28 11:14 ` [PATCH v2 12/18] scsi-multipath: add delayed disk removal support John Garry
` (6 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: John Garry @ 2026-04-28 11:14 UTC (permalink / raw)
To: hch, kbusch, sagi, axboe, martin.petersen, james.bottomley, hare,
bmarzins, nilay
Cc: jmeneghi, linux-nvme, linux-scsi, michael.christie, snitzer,
dm-devel, linux-kernel, John Garry
As described by Benjamin in the following link, PR support for SCSI is
quite complicated:
https://lore.kernel.org/linux-scsi/aaHecneNg9Q8EtiS@redhat.com/
For initial scsi-multipath support, just don't support PRs. This means that
we need to intercept PR SCSI commands for passthrough and reject them.
Signed-off-by: John Garry <john.g.garry@oracle.com>
---
drivers/scsi/scsi_lib.c | 6 ++++++
drivers/scsi/scsi_multipath.c | 11 +++++++++++
include/scsi/scsi_multipath.h | 5 +++++
3 files changed, 22 insertions(+)
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 46ed669c41dc9..c3c6831af97dc 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1296,6 +1296,12 @@ static blk_status_t scsi_setup_scsi_cmnd(struct scsi_device *sdev,
{
struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
+ if (sdev->scsi_mpath_dev) {
+ blk_status_t ret = scsi_mpath_setup_scsi_cmnd(cmd);
+ if (ret)
+ return ret;
+ }
+
/*
* Passthrough requests may transfer data, in which case they must
* a bio attached to them. Or they might contain a SCSI command
diff --git a/drivers/scsi/scsi_multipath.c b/drivers/scsi/scsi_multipath.c
index a2a21793db895..e0670d353e59f 100644
--- a/drivers/scsi/scsi_multipath.c
+++ b/drivers/scsi/scsi_multipath.c
@@ -242,6 +242,17 @@ static int scsi_multipath_sdev_init(struct scsi_device *sdev)
return 0;
}
+blk_status_t scsi_mpath_setup_scsi_cmnd(struct scsi_cmnd *scmd)
+{
+ switch (scmd->cmnd[0]) {
+ /* Special handling required which is not yet supported */
+ case PERSISTENT_RESERVE_IN:
+ case PERSISTENT_RESERVE_OUT:
+ return BLK_STS_NOTSUPP;
+ }
+ return BLK_STS_OK;
+}
+
static inline void bio_list_add_clone(struct bio_list *bl,
struct bio *clone)
{
diff --git a/include/scsi/scsi_multipath.h b/include/scsi/scsi_multipath.h
index fdbdb0e5d02e0..52c80b0658d61 100644
--- a/include/scsi/scsi_multipath.h
+++ b/include/scsi/scsi_multipath.h
@@ -43,6 +43,7 @@ struct scsi_mpath_device {
#define to_scsi_mpath_device(d) \
container_of(d, struct scsi_mpath_device, mpath_device)
+blk_status_t scsi_mpath_setup_scsi_cmnd(struct scsi_cmnd *);
int scsi_mpath_dev_alloc(struct scsi_device *sdev);
void scsi_mpath_dev_release(struct scsi_device *sdev);
int scsi_multipath_init(void);
@@ -63,6 +64,10 @@ struct scsi_mpath_head {
struct scsi_mpath_device {
};
+static inline blk_status_t scsi_mpath_setup_scsi_cmnd(struct scsi_cmnd *)
+{
+ return BLK_STS_OK;
+}
static inline int scsi_mpath_dev_alloc(struct scsi_device *sdev)
{
return 0;
--
2.43.5
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH v2 12/18] scsi-multipath: add delayed disk removal support
2026-04-28 11:14 [PATCH v2 00/18] Native SCSI multipath support John Garry
` (10 preceding siblings ...)
2026-04-28 11:14 ` [PATCH v2 11/18] scsi-multipath: block PR commands John Garry
@ 2026-04-28 11:14 ` John Garry
2026-04-28 11:14 ` [PATCH v2 13/18] scsi: sd: add multipath disk class John Garry
` (5 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: John Garry @ 2026-04-28 11:14 UTC (permalink / raw)
To: hch, kbusch, sagi, axboe, martin.petersen, james.bottomley, hare,
bmarzins, nilay
Cc: jmeneghi, linux-nvme, linux-scsi, michael.christie, snitzer,
dm-devel, linux-kernel, John Garry
Add support in core code for delayed disk removal support. In this, the
callback calls into the scsi_driver to do the necessary removal work.
The scsi_disk driver (sd) must ensure that the scsi_mpath_device does not
go away while the delayed removal work is active, i.e. it must keep a
reference.
No reference to the scsi_disk multipath structures are kept outside that
driver, so that driver needs to provide a scsi_driver.mpath_remove_head
callback to do the necessary work.
Signed-off-by: John Garry <john.g.garry@oracle.com>
---
drivers/scsi/scsi_multipath.c | 18 ++++++++++++++++++
include/scsi/scsi_driver.h | 4 ++++
2 files changed, 22 insertions(+)
diff --git a/drivers/scsi/scsi_multipath.c b/drivers/scsi/scsi_multipath.c
index e0670d353e59f..2806477e1137b 100644
--- a/drivers/scsi/scsi_multipath.c
+++ b/drivers/scsi/scsi_multipath.c
@@ -372,7 +372,25 @@ static int scsi_mpath_get_nr_active(struct mpath_device *mpath_device)
return atomic_read(&shost->mpath_nr_active);
}
+static int scsi_mpath_remove_head_drv(struct device_driver *drv, void *data)
+{
+ struct scsi_mpath_head *scsi_mpath_head = data;
+ struct scsi_driver *scsi_driver = to_scsi_driver(drv);
+
+ if (scsi_driver->mpath_remove_head)
+ scsi_driver->mpath_remove_head(scsi_mpath_head);
+
+ return 0;
+}
+
+static void scsi_mpath_remove_head_work(struct mpath_head *mpath_head)
+{
+ bus_for_each_drv(&scsi_bus_type, NULL, mpath_head->drvdata,
+ scsi_mpath_remove_head_drv);
+}
+
struct mpath_head_template smpdt = {
+ .remove_head = scsi_mpath_remove_head_work,
.is_disabled = scsi_mpath_is_disabled,
.is_optimized = scsi_mpath_is_optimized,
.available_path = scsi_mpath_available_path,
diff --git a/include/scsi/scsi_driver.h b/include/scsi/scsi_driver.h
index 249cea724abd1..d92b63d357f2a 100644
--- a/include/scsi/scsi_driver.h
+++ b/include/scsi/scsi_driver.h
@@ -8,6 +8,7 @@
struct module;
struct request;
+struct scsi_mpath_head;
struct scsi_driver {
struct device_driver gendrv;
@@ -22,6 +23,9 @@ struct scsi_driver {
int (*done)(struct scsi_cmnd *);
int (*eh_action)(struct scsi_cmnd *, int);
void (*eh_reset)(struct scsi_cmnd *);
+ #ifdef CONFIG_SCSI_MULTIPATH
+ void (*mpath_remove_head)(struct scsi_mpath_head *);
+ #endif
};
#define to_scsi_driver(drv) \
container_of((drv), struct scsi_driver, gendrv)
--
2.43.5
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH v2 13/18] scsi: sd: add multipath disk class
2026-04-28 11:14 [PATCH v2 00/18] Native SCSI multipath support John Garry
` (11 preceding siblings ...)
2026-04-28 11:14 ` [PATCH v2 12/18] scsi-multipath: add delayed disk removal support John Garry
@ 2026-04-28 11:14 ` John Garry
2026-04-28 11:14 ` [PATCH v2 14/18] scsi: sd: support multipath disk John Garry
` (4 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: John Garry @ 2026-04-28 11:14 UTC (permalink / raw)
To: hch, kbusch, sagi, axboe, martin.petersen, james.bottomley, hare,
bmarzins, nilay
Cc: jmeneghi, linux-nvme, linux-scsi, michael.christie, snitzer,
dm-devel, linux-kernel, John Garry
Add a new class, sd_mpath_disk_class, which is the multipath version of
the scsi_disk class.
Structure sd_mpath_disk is introduced to manage the multipath gendisk.
Signed-off-by: John Garry <john.g.garry@oracle.com>
---
drivers/scsi/sd.c | 43 ++++++++++++++++++++++++++++++++++++++++++-
drivers/scsi/sd.h | 3 +++
2 files changed, 45 insertions(+), 1 deletion(-)
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 628a1d0a74bac..c74f336f8cba9 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -70,6 +70,7 @@
#include <scsi/scsi_ioctl.h>
#include <scsi/scsicam.h>
#include <scsi/scsi_common.h>
+#include <scsi/scsi_multipath.h>
#include "sd.h"
#include "scsi_priv.h"
@@ -110,6 +111,39 @@ static DEFINE_IDA(sd_index_ida);
static mempool_t *sd_page_pool;
static struct lock_class_key sd_bio_compl_lkclass;
+#ifdef CONFIG_SCSI_MULTIPATH
+struct sd_mpath_disk {
+ struct scsi_mpath_head *scsi_mpath_head;
+};
+
+static void sd_mpath_disk_release(struct device *dev)
+{
+}
+
+static const struct class sd_mpath_disk_class = {
+ .name = "scsi_mpath_disk",
+ .dev_release = sd_mpath_disk_release,
+};
+
+static int sd_mpath_class_register(void)
+{
+ return class_register(&sd_mpath_disk_class);
+}
+
+static void sd_mpath_class_unregister(void)
+{
+ class_unregister(&sd_mpath_disk_class);
+}
+#else /* CONFIG_SCSI_MULTIPATH */
+static int sd_mpath_class_register(void)
+{
+ return 0;
+}
+
+static void sd_mpath_class_unregister(void)
+{
+}
+#endif
static const char *sd_cache_types[] = {
"write through", "none", "write back",
@@ -4399,11 +4433,15 @@ static int __init init_sd(void)
if (err)
goto err_out;
+ err = sd_mpath_class_register();
+ if (err)
+ goto err_out_class;
+
sd_page_pool = mempool_create_page_pool(SD_MEMPOOL_SIZE, 0);
if (!sd_page_pool) {
printk(KERN_ERR "sd: can't init discard page pool\n");
err = -ENOMEM;
- goto err_out_class;
+ goto err_out_mpath_class;
}
err = scsi_register_driver(&sd_template);
@@ -4414,6 +4452,8 @@ static int __init init_sd(void)
err_out_driver:
mempool_destroy(sd_page_pool);
+err_out_mpath_class:
+ sd_mpath_class_unregister();
err_out_class:
class_unregister(&sd_disk_class);
err_out:
@@ -4437,6 +4477,7 @@ static void __exit exit_sd(void)
mempool_destroy(sd_page_pool);
class_unregister(&sd_disk_class);
+ sd_mpath_class_unregister();
for (i = 0; i < SD_MAJORS; i++)
unregister_blkdev(sd_major(i), "sd");
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 574af82430169..304b24644d942 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -83,6 +83,9 @@ struct zoned_disk_info {
struct scsi_disk {
struct scsi_device *device;
+ #ifdef CONFIG_SCSI_MULTIPATH
+ struct sd_mpath_disk *sd_mpath_disk;
+ #endif
/*
* disk_dev is used to show attributes in /sys/class/scsi_disk/,
--
2.43.5
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH v2 14/18] scsi: sd: support multipath disk
2026-04-28 11:14 [PATCH v2 00/18] Native SCSI multipath support John Garry
` (12 preceding siblings ...)
2026-04-28 11:14 ` [PATCH v2 13/18] scsi: sd: add multipath disk class John Garry
@ 2026-04-28 11:14 ` John Garry
2026-04-28 11:14 ` [PATCH v2 15/18] scsi: sd: add multipath disk attr groups John Garry
` (3 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: John Garry @ 2026-04-28 11:14 UTC (permalink / raw)
To: hch, kbusch, sagi, axboe, martin.petersen, james.bottomley, hare,
bmarzins, nilay
Cc: jmeneghi, linux-nvme, linux-scsi, michael.christie, snitzer,
dm-devel, linux-kernel, John Garry
Add support to attach a multipath disk.
We still allocate the gendisk per path, and this is required for the
per-path submission. However, those gendisks are marked as hidden. Those
disks are named sdX:Y, where X is the multipath disk index and Y is the
per-path index.
A global list of sd_mpath_disks is kept for matching scsi_device's.
The multipath gendisk has the name and disk->major/minor set to minic a
scsi_disk.
The following is an example of relevant scsi_disk and block sysfs
directories:
$ ls -l /sys/block/ | grep sdc
lrwxrwxrwx 1 root root 0 Feb 24 16:01 sdc -> ../devices/virtual/scsi_mpath_disk/0/sdc
lrwxrwxrwx 1 root root 0 Feb 24 16:01 sdc:0 -> ../devices/platform/host8/session1/target8:0:0/8:0:0:0/block/sdc:0
lrwxrwxrwx 1 root root 0 Feb 24 16:02 sdc:1 -> ../devices/platform/host9/session2/target9:0:0/9:0:0:0/block/sdc:1
$ ls -l /sys/class/scsi_mpath_disk/scsi_mpath_disk0/
total 0
drwxr-xr-x 2 root root 0 Feb 24 16:03 power
drwxr-xr-x 11 root root 0 Feb 24 16:01 sdc
lrwxrwxrwx 1 root root 0 Feb 24 16:01 subsystem -> ../../../../class/scsi_mpath_disk
-rw-r--r-- 1 root root 4096 Feb 24 16:01 uevent
$ ls -l /sys/class/scsi_mpath_disk/scsi_mpath_disk0/sdc/multipath/
total 0
lrwxrwxrwx 1 root root 0 Feb 24 16:20 sdc:0 -> ../../../../../platform/host8/session1/target8:0:0/8:0:0:0/block/sdc:0
lrwxrwxrwx 1 root root 0 Feb 24 16:20 sdc:1 -> ../../../../../platform/host9/session2/target9:0:0/9:0:0:0/block/sdc:1
$ ls -l /dev/sdc*
brw-rw---- 1 root disk 8, 32 Feb 24 16:01 /dev/sdc
brw-rw---- 1 root disk 8, 33 Feb 24 16:01 /dev/sdc1
brw-rw---- 1 root disk 8, 34 Feb 24 16:01 /dev/sdc2
$ lsblk /dev/sdc
NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINTS
sdc 8:32 0 600M 0 disk
|-sdc1 8:33 0 9M 0 part
`-sdc2 8:34 0 568M 0 part
Signed-off-by: John Garry <john.g.garry@oracle.com>
---
drivers/scsi/sd.c | 396 +++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 378 insertions(+), 18 deletions(-)
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index c74f336f8cba9..ca20f9430b4ac 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -112,12 +112,30 @@ static DEFINE_IDA(sd_index_ida);
static mempool_t *sd_page_pool;
static struct lock_class_key sd_bio_compl_lkclass;
#ifdef CONFIG_SCSI_MULTIPATH
+static LIST_HEAD(sd_mpath_disks_list);
+static DEFINE_MUTEX(sd_mpath_disks_lock);
+
struct sd_mpath_disk {
+ struct device dev;
+ int disk_index;
+ int disk_count;
+ struct list_head entry;
struct scsi_mpath_head *scsi_mpath_head;
};
static void sd_mpath_disk_release(struct device *dev)
{
+ struct sd_mpath_disk *sd_mpath_disk =
+ container_of(dev, struct sd_mpath_disk, dev);
+ struct scsi_mpath_head *scsi_mpath_head =
+ sd_mpath_disk->scsi_mpath_head;
+ struct mpath_head *mpath_head = scsi_mpath_head->mpath_head;
+
+ mpath_put_disk(mpath_head);
+ ida_free(&sd_index_ida, sd_mpath_disk->disk_index);
+ scsi_mpath_put_head(scsi_mpath_head);
+
+ kfree(sd_mpath_disk);
}
static const struct class sd_mpath_disk_class = {
@@ -787,7 +805,8 @@ static void scsi_disk_release(struct device *dev)
{
struct scsi_disk *sdkp = to_scsi_disk(dev);
- ida_free(&sd_index_ida, sdkp->index);
+ if (sdkp->index >= 0)
+ ida_free(&sd_index_ida, sdkp->index);
put_device(&sdkp->device->sdev_gendev);
free_opal_dev(sdkp->opal_dev);
@@ -3964,6 +3983,322 @@ static int sd_format_disk_name(char *prefix, int index, char *buf, int buflen)
return 0;
}
+#ifdef CONFIG_SCSI_MULTIPATH
+static int sd_mpath_revalidate_head(struct scsi_disk *sdkp)
+{
+ struct sd_mpath_disk *sd_mpath_disk = sdkp->sd_mpath_disk;
+ struct scsi_mpath_head *scsi_mpath_head = sd_mpath_disk->scsi_mpath_head;
+ struct mpath_head *mpath_head = scsi_mpath_head->mpath_head;
+ struct gendisk *disk = mpath_head->disk;
+ struct queue_limits *sdkp_lim = &sdkp->disk->queue->limits;
+ struct queue_limits lim;
+ unsigned int memflags;
+ int ret;
+
+ lim = queue_limits_start_update(disk->queue);
+ memflags = blk_mq_freeze_queue(disk->queue);
+
+ lim.logical_block_size = sdkp_lim->logical_block_size;
+ lim.physical_block_size = sdkp_lim->physical_block_size;
+ lim.io_min = sdkp_lim->io_min;
+ lim.io_opt = sdkp_lim->io_opt;
+
+ queue_limits_stack_bdev(&lim, sdkp->disk->part0, 0,
+ disk->disk_name);
+
+ /* TODO: setup integrity limits */
+ lim.max_write_streams = sdkp_lim->max_write_streams;
+ lim.write_stream_granularity = sdkp_lim->write_stream_granularity;
+ ret = queue_limits_commit_update(disk->queue, &lim);
+
+ set_capacity_and_notify(disk, get_capacity(sdkp->disk));
+
+ blk_mq_unfreeze_queue(disk->queue, memflags);
+
+ return ret;
+}
+static int sd_mpath_get_disk(struct sd_mpath_disk *sd_mpath_disk)
+{
+ if (!get_device(&sd_mpath_disk->dev))
+ return -ENXIO;
+ return 0;
+}
+
+static void sd_mpath_put_disk(struct sd_mpath_disk *sd_mpath_disk)
+{
+ put_device(&sd_mpath_disk->dev);
+}
+
+static struct sd_mpath_disk *sd_mpath_find_disk(
+ struct scsi_mpath_head *scsi_mpath_head)
+{
+ struct sd_mpath_disk *sd_mpath_disk;
+ int ret;
+
+ list_for_each_entry(sd_mpath_disk, &sd_mpath_disks_list, entry) {
+ ret = sd_mpath_get_disk(sd_mpath_disk);
+ if (ret)
+ continue;
+
+ if (sd_mpath_disk->scsi_mpath_head == scsi_mpath_head)
+ return sd_mpath_disk;
+
+ sd_mpath_put_disk(sd_mpath_disk);
+ }
+
+ return NULL;
+}
+
+static void sd_mpath_add_disk(struct scsi_disk *sdkp)
+{
+ struct scsi_device *sdp = sdkp->device;
+ struct scsi_mpath_device *scsi_mpath_dev = sdp->scsi_mpath_dev;
+ struct mpath_device *mpath_device = &scsi_mpath_dev->mpath_device;
+ struct sd_mpath_disk *sd_mpath_disk = sdkp->sd_mpath_disk;
+ struct scsi_mpath_head *scsi_mpath_head = sd_mpath_disk->scsi_mpath_head;
+ struct mpath_head *mpath_head = scsi_mpath_head->mpath_head;
+
+ mpath_device->disk = sdkp->disk;
+ mpath_device->numa_node = dev_to_node(sdp->host->dma_dev);
+ mpath_add_device(mpath_head, mpath_device);
+ mpath_device_set_live(mpath_device);
+}
+
+static int sd_mpath_probe(struct scsi_disk *sdkp)
+{
+ struct scsi_device *sdp = sdkp->device;
+ struct scsi_mpath_device *scsi_mpath_dev = sdp->scsi_mpath_dev;
+ struct device *dma_dev = sdp->host->dma_dev;
+ struct scsi_mpath_head *scsi_mpath_head =
+ scsi_mpath_dev->scsi_mpath_head;
+ struct sd_mpath_disk *sd_mpath_disk;
+ struct mpath_head *mpath_head = scsi_mpath_head->mpath_head;
+ char disk_name[DISK_NAME_LEN - 2];
+ struct queue_limits lim;
+ struct gendisk *disk;
+ int error;
+
+ /*
+ * sd_mpath_disks_list is kept locked if no disk found.
+ * Otherwise an extra reference is taken.
+ */
+ mutex_lock(&sd_mpath_disks_lock);
+ sd_mpath_disk = sd_mpath_find_disk(scsi_mpath_head);
+ if (sd_mpath_disk) {
+ error = sized_strscpy(disk_name, mpath_head->disk->disk_name,
+ sizeof(disk_name));
+ if (error < 0) {
+ /*
+ * Should not happen as would fail for the same when
+ * allocating the sd_mpath_disk
+ */
+ sd_mpath_put_disk(sd_mpath_disk);
+ mutex_unlock(&sd_mpath_disks_lock);
+ return error;
+ }
+ sd_mpath_disk->disk_count++;
+ mutex_unlock(&sd_mpath_disks_lock);
+
+ goto found;
+ }
+
+ sd_mpath_disk = kzalloc(sizeof(*sd_mpath_disk), GFP_KERNEL);
+ if (!sd_mpath_disk) {
+ error = -ENOMEM;
+ goto out_unlock;
+ }
+
+ sd_mpath_disk->scsi_mpath_head = scsi_mpath_head;
+ device_initialize(&sd_mpath_disk->dev);
+ sd_mpath_disk->dev.class = &sd_mpath_disk_class;
+
+ blk_set_stacking_limits(&lim);
+ lim.dma_alignment = 3;
+ lim.features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT |
+ BLK_FEAT_POLL | BLK_FEAT_ATOMIC_WRITES;
+
+ error = mpath_alloc_head_disk(mpath_head, &lim,
+ dev_to_node(dma_dev));
+ if (error)
+ goto out_free_disk;
+ disk = mpath_head->disk;
+
+ mpath_head->parent = &sd_mpath_disk->dev;
+
+ error = ida_alloc(&sd_index_ida, GFP_KERNEL);
+ if (error < 0) {
+ sdev_printk(KERN_WARNING, sdp, "sd_probe: memory exhausted.\n");
+ goto out_put_disk;
+ }
+ sd_mpath_disk->disk_index = error;
+ error = sd_format_disk_name("sd", sd_mpath_disk->disk_index,
+ disk->disk_name, DISK_NAME_LEN);
+ if (error)
+ goto out_free_index;
+
+ error = sized_strscpy(disk_name, mpath_head->disk->disk_name,
+ sizeof(disk_name));
+ if (error < 0)
+ goto out_free_index;
+
+ error = dev_set_name(&sd_mpath_disk->dev, "scsi_mpath_disk%d",
+ scsi_mpath_head->index);
+ if (error)
+ goto out_free_index;
+
+ /* undone in sd_mpath_disk_release() */
+ scsi_mpath_get_head(scsi_mpath_head);
+ scsi_mpath_head->mpath_head->drv_module = THIS_MODULE;
+
+ error = device_add(&sd_mpath_disk->dev);
+ if (error) {
+ put_device(&sd_mpath_disk->dev);
+ goto out_unlock;
+ }
+
+ list_add_tail(&sd_mpath_disk->entry, &sd_mpath_disks_list);
+ disk->major = sd_major((sd_mpath_disk->disk_index & 0xf0) >> 4);
+ disk->first_minor = ((sd_mpath_disk->disk_index & 0xf) << 4) |
+ (sd_mpath_disk->disk_index & 0xfff00);
+ disk->minors = SD_MINORS;
+
+ sd_mpath_disk->disk_count = 1;
+ mutex_unlock(&sd_mpath_disks_lock);
+found:
+ sdkp->sd_mpath_disk = sd_mpath_disk;
+ sdkp->disk->flags |= GENHD_FL_HIDDEN;
+ snprintf(sdkp->disk->disk_name, DISK_NAME_LEN, "%s:%d",
+ disk_name, scsi_mpath_dev->index);
+
+ sdkp->index = -1;
+ return 0;
+
+out_free_index:
+ ida_free(&sd_index_ida, sd_mpath_disk->disk_index);
+out_put_disk:
+ mpath_put_disk(mpath_head);
+out_free_disk:
+ kfree(sd_mpath_disk);
+out_unlock:
+ mutex_unlock(&sd_mpath_disks_lock);
+ return error;
+}
+
+static void sd_mpath_remove(struct scsi_disk *sdkp)
+{
+ struct sd_mpath_disk *sd_mpath_disk = sdkp->sd_mpath_disk;
+ struct scsi_device *sdp = sdkp->device;
+ struct scsi_mpath_device *scsi_mpath_dev = sdp->scsi_mpath_dev;
+ struct mpath_device *mpath_device = &scsi_mpath_dev->mpath_device;
+ struct scsi_mpath_head *scsi_mpath_head = sd_mpath_disk->scsi_mpath_head;
+ struct mpath_head *mpath_head = scsi_mpath_head->mpath_head;
+ bool remove = false;
+
+ mpath_synchronize(mpath_head);
+
+ if (mpath_clear_current_path(mpath_device))
+ mpath_synchronize(mpath_head);
+
+ mpath_delete_device(mpath_device);
+
+ mutex_lock(&sd_mpath_disks_lock);
+ sd_mpath_disk->disk_count--;
+ if (!sd_mpath_disk->disk_count && mpath_can_remove_head(mpath_head)) {
+ list_del_init(&sd_mpath_disk->entry);
+ remove = true;
+ }
+ mutex_unlock(&sd_mpath_disks_lock);
+ mpath_remove_sysfs_link(mpath_device);
+ mpath_device->disk = NULL;
+
+ if (remove) {
+ device_del(&sd_mpath_disk->dev);
+ mpath_remove_disk(mpath_head);
+ }
+ sd_mpath_put_disk(sd_mpath_disk);
+}
+
+static void sd_mpath_remove_head(struct scsi_mpath_head *scsi_mpath_head)
+{
+ struct mpath_head *mpath_head = scsi_mpath_head->mpath_head;
+ struct sd_mpath_disk *sd_mpath_disk;
+ struct device *dev = &scsi_mpath_head->dev;
+
+ mutex_lock(&sd_mpath_disks_lock);
+ sd_mpath_disk = sd_mpath_find_disk(scsi_mpath_head);
+ if (!sd_mpath_disk) {
+ dev_warn(dev, "could not find mpath disk\n");
+ mutex_unlock(&sd_mpath_disks_lock);
+ return;
+ }
+
+ list_del_init(&sd_mpath_disk->entry);
+ mutex_unlock(&sd_mpath_disks_lock);
+
+ device_del(&sd_mpath_disk->dev);
+ mpath_remove_disk(mpath_head);
+ sd_mpath_put_disk(sd_mpath_disk);
+}
+
+/*
+ * Always calls for a failed probe, so we need to handle that some structures
+ * have not been setup.
+ */
+static void sd_mpath_fail_probe(struct scsi_disk *sdkp)
+{
+ struct sd_mpath_disk *sd_mpath_disk = sdkp->sd_mpath_disk;
+ struct scsi_mpath_device *scsi_mpath_dev;
+ struct mpath_device *mpath_device;
+ struct scsi_device *sdp = sdkp->device;
+ struct scsi_mpath_head *scsi_mpath_head;
+ struct mpath_head *mpath_head;
+ bool remove = false;
+
+ if (!sd_mpath_disk)
+ return;
+
+ scsi_mpath_dev = sdp->scsi_mpath_dev;
+ mpath_device = &scsi_mpath_dev->mpath_device;
+ scsi_mpath_head = sd_mpath_disk->scsi_mpath_head;
+ mpath_head = scsi_mpath_head->mpath_head;
+
+ mutex_lock(&sd_mpath_disks_lock);
+ sd_mpath_disk->disk_count--;
+ if (!sd_mpath_disk->disk_count) {
+ list_del_init(&sd_mpath_disk->entry);
+ remove = true;
+ }
+ mutex_unlock(&sd_mpath_disks_lock);
+ mpath_device->disk = NULL;
+
+ if (remove) {
+ device_del(&sd_mpath_disk->dev);
+ mpath_remove_disk(mpath_head);
+ }
+ sd_mpath_put_disk(sd_mpath_disk);
+}
+
+#else /* CONFIG_SCSI_MULTIPATH */
+static int sd_mpath_probe(struct scsi_disk *sdkp)
+{
+ return 0;
+}
+static void sd_mpath_remove(struct scsi_disk *sdkp)
+{
+ return;
+}
+static void sd_mpath_fail_probe(struct scsi_disk *sdkp)
+{
+
+}
+static int sd_mpath_revalidate_head(struct scsi_disk *sdkp)
+{
+ return 0;
+}
+static void sd_mpath_add_disk(struct scsi_disk *sdkp)
+{
+}
+#endif
/**
* sd_probe - called during driver initialization and whenever a
* new scsi device is attached to the system. It is called once
@@ -4016,22 +4351,33 @@ static int sd_probe(struct scsi_device *sdp)
&sd_bio_compl_lkclass);
if (!gd)
goto out_free;
+ sdkp->disk = gd;
+ sdkp->device = sdp;
- index = ida_alloc(&sd_index_ida, GFP_KERNEL);
- if (index < 0) {
- sdev_printk(KERN_WARNING, sdp, "sd_probe: memory exhausted.\n");
- goto out_put;
- }
+ if (sdp->scsi_mpath_dev) {
+ error = sd_mpath_probe(sdkp);
+ if (error)
+ goto out_put;
+ } else {
+ index = ida_alloc(&sd_index_ida, GFP_KERNEL);
+ if (index < 0) {
+ sdev_printk(KERN_WARNING, sdp, "sd_probe: memory exhausted.\n");
+ goto out_put;
+ }
- error = sd_format_disk_name("sd", index, gd->disk_name, DISK_NAME_LEN);
- if (error) {
- sdev_printk(KERN_WARNING, sdp, "SCSI disk (sd) name length exceeded.\n");
- goto out_free_index;
+ error = sd_format_disk_name("sd", index, gd->disk_name,
+ DISK_NAME_LEN);
+ if (error) {
+ sdev_printk(KERN_WARNING, sdp, "SCSI disk (sd) name length exceeded.\n");
+ goto out_free_index;
+ }
+ sdkp->index = index;
+
+ gd->major = sd_major((index & 0xf0) >> 4);
+ gd->first_minor = ((index & 0xf) << 4) | (index & 0xfff00);
+ gd->minors = SD_MINORS;
}
- sdkp->device = sdp;
- sdkp->disk = gd;
- sdkp->index = index;
sdkp->max_retries = SD_MAX_RETRIES;
atomic_set(&sdkp->openers, 0);
atomic_set(&sdkp->device->ioerr_cnt, 0);
@@ -4051,16 +4397,13 @@ static int sd_probe(struct scsi_device *sdp)
error = device_add(&sdkp->disk_dev);
if (error) {
+ sd_mpath_fail_probe(sdkp);
put_device(&sdkp->disk_dev);
goto out;
}
dev_set_drvdata(dev, sdkp);
- gd->major = sd_major((index & 0xf0) >> 4);
- gd->first_minor = ((index & 0xf) << 4) | (index & 0xfff00);
- gd->minors = SD_MINORS;
-
gd->fops = &sd_fops;
gd->private_data = sdkp;
@@ -4078,6 +4421,12 @@ static int sd_probe(struct scsi_device *sdp)
sd_revalidate_disk(gd);
+ if (sdp->scsi_mpath_dev) {
+ error = sd_mpath_revalidate_head(sdkp);
+ if (error)
+ sdev_printk(KERN_WARNING, sdp, "could not revalidate multipath limits\n");
+ }
+
if (sdp->removable) {
gd->flags |= GENHD_FL_REMOVABLE;
gd->events |= DISK_EVENT_MEDIA_CHANGE;
@@ -4092,11 +4441,15 @@ static int sd_probe(struct scsi_device *sdp)
error = device_add_disk(dev, gd, NULL);
if (error) {
+ sd_mpath_fail_probe(sdkp);
device_unregister(&sdkp->disk_dev);
put_disk(gd);
goto out;
}
+ if (sdp->scsi_mpath_dev)
+ sd_mpath_add_disk(sdkp);
+
if (sdkp->security) {
sdkp->opal_dev = init_opal_dev(sdkp, &sd_sec_submit);
if (sdkp->opal_dev)
@@ -4110,7 +4463,8 @@ static int sd_probe(struct scsi_device *sdp)
return 0;
out_free_index:
- ida_free(&sd_index_ida, index);
+ if (index >= 0)
+ ida_free(&sd_index_ida, index);
out_put:
put_disk(gd);
out_free:
@@ -4238,6 +4592,9 @@ static void sd_remove(struct scsi_device *sdp)
struct device *dev = &sdp->sdev_gendev;
struct scsi_disk *sdkp = dev_get_drvdata(dev);
+ if (sdp->scsi_mpath_dev)
+ sd_mpath_remove(sdkp);
+
scsi_autopm_get_device(sdkp->device);
device_del(&sdkp->disk_dev);
@@ -4403,6 +4760,9 @@ static struct scsi_driver sd_template = {
.resume = sd_resume,
.init_command = sd_init_command,
.uninit_command = sd_uninit_command,
+ #ifdef CONFIG_SCSI_MULTIPATH
+ .mpath_remove_head = sd_mpath_remove_head,
+ #endif
.done = sd_done,
.eh_action = sd_eh_action,
.eh_reset = sd_eh_reset,
--
2.43.5
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH v2 15/18] scsi: sd: add multipath disk attr groups
2026-04-28 11:14 [PATCH v2 00/18] Native SCSI multipath support John Garry
` (13 preceding siblings ...)
2026-04-28 11:14 ` [PATCH v2 14/18] scsi: sd: support multipath disk John Garry
@ 2026-04-28 11:14 ` John Garry
2026-04-28 11:14 ` [PATCH v2 16/18] scsi: sd: add mpath_dev file John Garry
` (2 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: John Garry @ 2026-04-28 11:14 UTC (permalink / raw)
To: hch, kbusch, sagi, axboe, martin.petersen, james.bottomley, hare,
bmarzins, nilay
Cc: jmeneghi, linux-nvme, linux-scsi, michael.christie, snitzer,
dm-devel, linux-kernel, John Garry
Set multipath disk attr groups, which includes delayed disk removal and
everything from mpath_attr_group.
Signed-off-by: John Garry <john.g.garry@oracle.com>
---
drivers/scsi/sd.c | 36 ++++++++++++++++++++++++++++++++++++
1 file changed, 36 insertions(+)
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index ca20f9430b4ac..b1cf35194895e 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -4064,6 +4064,41 @@ static void sd_mpath_add_disk(struct scsi_disk *sdkp)
mpath_device_set_live(mpath_device);
}
+static ssize_t sd_mpath_device_delayed_removal_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct mpath_head *mpath_head = mpath_bd_device_to_head(dev);
+
+ return mpath_delayed_removal_secs_store(mpath_head, buf, count);
+}
+
+static ssize_t sd_mpath_device_delayed_removal_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct mpath_head *mpath_head = mpath_bd_device_to_head(dev);
+
+ return mpath_delayed_removal_secs_show(mpath_head, buf);
+}
+
+static DEVICE_ATTR(delayed_removal_secs, S_IRUGO | S_IWUSR,
+ sd_mpath_device_delayed_removal_show,
+ sd_mpath_device_delayed_removal_store);
+
+static struct attribute *sd_mpath_disk_attrs[] = {
+ &dev_attr_delayed_removal_secs.attr,
+ NULL
+};
+
+static const struct attribute_group sd_mpath_disk_attr_group = {
+ .attrs = sd_mpath_disk_attrs,
+};
+
+const struct attribute_group *sd_mpath_disk_attr_groups[] = {
+ &sd_mpath_disk_attr_group,
+ &mpath_attr_group,
+ NULL
+};
+
static int sd_mpath_probe(struct scsi_disk *sdkp)
{
struct scsi_device *sdp = sdkp->device;
@@ -4149,6 +4184,7 @@ static int sd_mpath_probe(struct scsi_disk *sdkp)
/* undone in sd_mpath_disk_release() */
scsi_mpath_get_head(scsi_mpath_head);
scsi_mpath_head->mpath_head->drv_module = THIS_MODULE;
+ scsi_mpath_head->mpath_head->disk_groups = sd_mpath_disk_attr_groups;
error = device_add(&sd_mpath_disk->dev);
if (error) {
--
2.43.5
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH v2 16/18] scsi: sd: add mpath_dev file
2026-04-28 11:14 [PATCH v2 00/18] Native SCSI multipath support John Garry
` (14 preceding siblings ...)
2026-04-28 11:14 ` [PATCH v2 15/18] scsi: sd: add multipath disk attr groups John Garry
@ 2026-04-28 11:14 ` John Garry
2026-04-28 11:14 ` [PATCH v2 17/18] scsi: sd: add mpath_numa_nodes dev attribute John Garry
2026-04-28 11:14 ` [PATCH v2 18/18] scsi: sd: add mpath_queue_depth " John Garry
17 siblings, 0 replies; 19+ messages in thread
From: John Garry @ 2026-04-28 11:14 UTC (permalink / raw)
To: hch, kbusch, sagi, axboe, martin.petersen, james.bottomley, hare,
bmarzins, nilay
Cc: jmeneghi, linux-nvme, linux-scsi, michael.christie, snitzer,
dm-devel, linux-kernel, John Garry
Since per-path gendisk is hidden, we have no /dev/ file.
Add a mpath_dev file so that the multipath disk can be looked up from
per-path gendisk directory.
The following is an example of this usage:
$ ls -l /dev/sdc
brw-rw---- 1 root disk 8, 32 Feb 24 16:08 /dev/sdc
$ cat /sys/class/scsi_mpath_disk/scsi_mpath_disk0/sdc/multipath/sdc:0/mpath_dev
8:32
This can be used by a util like lsscsi, which would find that the gendisk
for the per-path scsi_device is missing.
Signed-off-by: John Garry <john.g.garry@oracle.com>
---
drivers/scsi/sd.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 49 insertions(+), 1 deletion(-)
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index b1cf35194895e..380da0b0298bb 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -4017,6 +4017,52 @@ static int sd_mpath_revalidate_head(struct scsi_disk *sdkp)
return ret;
}
+
+static ssize_t sd_mpath_dev_show(struct device *dev,
+ struct device_attribute *attr, char *page)
+{
+ struct gendisk *gd = dev_to_disk(dev);
+ struct scsi_disk *sdkp = gd->private_data;
+ struct sd_mpath_disk *sd_mpath_disk = sdkp->sd_mpath_disk;
+ struct scsi_mpath_head *scsi_mpath_head = sd_mpath_disk->scsi_mpath_head;
+ struct mpath_head *mpath_head = scsi_mpath_head->mpath_head;
+ struct gendisk *disk = mpath_head->disk;
+ struct device *disk_dev = disk_to_dev(disk);
+
+ return print_dev_t(page, disk_dev->devt);
+}
+static DEVICE_ATTR(mpath_dev, 0444, sd_mpath_dev_show, NULL);
+
+static struct attribute *sd_mpath_dev_attrs[] = {
+ &dev_attr_mpath_dev.attr,
+ NULL
+};
+
+static umode_t sd_mpath_dev_attr_is_visible(struct kobject *kobj,
+ struct attribute *attr, int i)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct gendisk *gd = dev_to_disk(dev);
+ struct scsi_disk *sdkp = gd->private_data;
+ struct scsi_device *sdev = sdkp->device;
+ struct scsi_mpath_device *scsi_mpath_device = sdev->scsi_mpath_dev;
+
+ if (!scsi_mpath_device)
+ return 0;
+
+ return attr->mode;
+}
+
+static const struct attribute_group sd_mpath_dev_attr_group = {
+ .is_visible = sd_mpath_dev_attr_is_visible,
+ .attrs = sd_mpath_dev_attrs,
+};
+
+static const struct attribute_group *sd_mpath_dev_groups[] = {
+ &sd_mpath_dev_attr_group,
+ NULL
+};
+
static int sd_mpath_get_disk(struct sd_mpath_disk *sd_mpath_disk)
{
if (!get_device(&sd_mpath_disk->dev))
@@ -4334,6 +4380,8 @@ static int sd_mpath_revalidate_head(struct scsi_disk *sdkp)
static void sd_mpath_add_disk(struct scsi_disk *sdkp)
{
}
+
+#define sd_mpath_dev_groups NULL
#endif
/**
* sd_probe - called during driver initialization and whenever a
@@ -4475,7 +4523,7 @@ static int sd_probe(struct scsi_device *sdp)
sdp->host->rpm_autosuspend_delay);
}
- error = device_add_disk(dev, gd, NULL);
+ error = device_add_disk(dev, gd, sd_mpath_dev_groups);
if (error) {
sd_mpath_fail_probe(sdkp);
device_unregister(&sdkp->disk_dev);
--
2.43.5
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH v2 17/18] scsi: sd: add mpath_numa_nodes dev attribute
2026-04-28 11:14 [PATCH v2 00/18] Native SCSI multipath support John Garry
` (15 preceding siblings ...)
2026-04-28 11:14 ` [PATCH v2 16/18] scsi: sd: add mpath_dev file John Garry
@ 2026-04-28 11:14 ` John Garry
2026-04-28 11:14 ` [PATCH v2 18/18] scsi: sd: add mpath_queue_depth " John Garry
17 siblings, 0 replies; 19+ messages in thread
From: John Garry @ 2026-04-28 11:14 UTC (permalink / raw)
To: hch, kbusch, sagi, axboe, martin.petersen, james.bottomley, hare,
bmarzins, nilay
Cc: jmeneghi, linux-nvme, linux-scsi, michael.christie, snitzer,
dm-devel, linux-kernel, John Garry
Add an attribute to show multipath NUMA node per-path (scsi_disk).
The following is an example of reading the file:
$ cat /sys/devices/platform/host8/session1/target8:0:0/8:0:0:0/block/sdc:0/numa_
mpath_numa_nodes
0-3
$ cat /sys/devices/platform/host9/session2/target9:0:0/9:0:0:0/block/sdc:1/numa_
mpath_numa_nodes
$
Signed-off-by: John Garry <john.g.garry@oracle.com>
---
drivers/scsi/sd.c | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 380da0b0298bb..ee604f9f8cd20 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -4033,8 +4033,25 @@ static ssize_t sd_mpath_dev_show(struct device *dev,
}
static DEVICE_ATTR(mpath_dev, 0444, sd_mpath_dev_show, NULL);
+static ssize_t sd_mpath_numa_nodes_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct gendisk *gd = dev_to_disk(dev);
+ struct scsi_disk *sdkp = gd->private_data;
+ struct scsi_device *sdev = sdkp->device;
+ struct scsi_mpath_device *scsi_mpath_dev = sdev->scsi_mpath_dev;
+ struct mpath_device *mpath_device = &scsi_mpath_dev->mpath_device;
+ struct sd_mpath_disk *sd_mpath_disk = sdkp->sd_mpath_disk;
+ struct scsi_mpath_head *scsi_mpath_head = sd_mpath_disk->scsi_mpath_head;
+ struct mpath_iopolicy *mpath_iopolicy = &scsi_mpath_head->iopolicy;
+
+ return mpath_numa_nodes_show(mpath_device, mpath_iopolicy, buf);
+}
+static DEVICE_ATTR(mpath_numa_nodes, 0444, sd_mpath_numa_nodes_show, NULL);
+
static struct attribute *sd_mpath_dev_attrs[] = {
&dev_attr_mpath_dev.attr,
+ &dev_attr_mpath_numa_nodes.attr,
NULL
};
--
2.43.5
^ permalink raw reply related [flat|nested] 19+ messages in thread* [PATCH v2 18/18] scsi: sd: add mpath_queue_depth dev attribute
2026-04-28 11:14 [PATCH v2 00/18] Native SCSI multipath support John Garry
` (16 preceding siblings ...)
2026-04-28 11:14 ` [PATCH v2 17/18] scsi: sd: add mpath_numa_nodes dev attribute John Garry
@ 2026-04-28 11:14 ` John Garry
17 siblings, 0 replies; 19+ messages in thread
From: John Garry @ 2026-04-28 11:14 UTC (permalink / raw)
To: hch, kbusch, sagi, axboe, martin.petersen, james.bottomley, hare,
bmarzins, nilay
Cc: jmeneghi, linux-nvme, linux-scsi, michael.christie, snitzer,
dm-devel, linux-kernel, John Garry
Add a queue_depth file so that the multipath dynamic queue depth can be
looked up from per-path gendisk (scsi_disk) directory.
Signed-off-by: John Garry <john.g.garry@oracle.com>
---
drivers/scsi/sd.c | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index ee604f9f8cd20..f416457b5a08f 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -4049,9 +4049,27 @@ static ssize_t sd_mpath_numa_nodes_show(struct device *dev,
}
static DEVICE_ATTR(mpath_numa_nodes, 0444, sd_mpath_numa_nodes_show, NULL);
+static ssize_t sd_mpath_queue_depth_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct gendisk *gd = dev_to_disk(dev);
+ struct scsi_disk *sdkp = gd->private_data;
+ struct scsi_device *sdev = sdkp->device;
+ struct sd_mpath_disk *sd_mpath_disk = sdkp->sd_mpath_disk;
+ struct scsi_mpath_head *scsi_mpath_head = sd_mpath_disk->scsi_mpath_head;
+ struct Scsi_Host *shost = sdev->host;
+
+ if (!mpath_qd_iopolicy(&scsi_mpath_head->iopolicy))
+ return 0;
+
+ return sysfs_emit(buf, "%d\n", atomic_read(&shost->mpath_nr_active));
+}
+static DEVICE_ATTR(mpath_queue_depth, 0444, sd_mpath_queue_depth_show, NULL);
+
static struct attribute *sd_mpath_dev_attrs[] = {
&dev_attr_mpath_dev.attr,
&dev_attr_mpath_numa_nodes.attr,
+ &dev_attr_mpath_queue_depth.attr,
NULL
};
--
2.43.5
^ permalink raw reply related [flat|nested] 19+ messages in thread