public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 00/13] scsi: Core ALUA driver
@ 2026-03-17 12:06 John Garry
  2026-03-17 12:06 ` [PATCH 01/13] scsi: scsi_dh_alua: Delete alua_port_group John Garry
                   ` (13 more replies)
  0 siblings, 14 replies; 63+ messages in thread
From: John Garry @ 2026-03-17 12:06 UTC (permalink / raw)
  To: martin.petersen, james.bottomley, hare, bmarzins
  Cc: jmeneghi, linux-scsi, michael.christie, snitzer, dm-devel,
	linux-kernel, John Garry

Following on the back of the ALUA support for native SCSI multipath
proposal at [0], this is an attempt to move to a SCSI core ALUA driver.

Essentially this series move the bulk of the ALUA handling from
scsi_dh_alua.c to a core driver. We still need to support ALUA for DH, so
the scsi_dh_alua.c is still responsible for driving ALUA support and the
SCSI core ALUA driver just provides a set of library functions for that.

The SCSI core ALUA driver also provides implicit ALUA support for no DH,
like when we would be native SCSI multipath.

This series is just really an RFC quality work and its purpose is
to decide on the direction of ALUA support for native SCSI multipath.

I think that this work is a real regression possibility for
dm-multipath, so we need to be careful.

[0] https://lore.kernel.org/linux-scsi/20260310114925.1222263-1-john.g.garry@oracle.com/T/#m9c054433076812dff464d0e3b50a00620cfe0af1

John Garry (13):
  scsi: scsi_dh_alua: Delete alua_port_group
  scsi: alua: Create a core ALUA driver
  scsi: alua: Add scsi_alua_rtpg()
  scsi: alua: Add scsi_alua_stpg()
  scsi: alua: Add scsi_alua_tur()
  scsi: alua: Add scsi_alua_rtpg_run()
  scsi: alua: Add scsi_alua_stpg_run()
  scsi: alua: Add scsi_alua_check_tpgs()
  scsi: alua: Add scsi_alua_handle_state_transition()
  scsi: alua: Add scsi_alua_prep_fn()
  scsi: alua: Add scsi_device_alua_implicit()
  scsi: scsi_dh_alua: Switch to use core support
  scsi: core: Add implicit ALUA support

 drivers/scsi/Kconfig                       |   10 +-
 drivers/scsi/Makefile                      |    1 +
 drivers/scsi/device_handler/Kconfig        |    1 +
 drivers/scsi/device_handler/scsi_dh_alua.c | 1003 ++------------------
 drivers/scsi/scsi.c                        |    7 +
 drivers/scsi/scsi_alua.c                   |  748 +++++++++++++++
 drivers/scsi/scsi_error.c                  |    7 +
 drivers/scsi/scsi_lib.c                    |    7 +
 drivers/scsi/scsi_scan.c                   |    6 +
 drivers/scsi/scsi_sysfs.c                  |    7 +-
 include/scsi/scsi_alua.h                   |  103 ++
 include/scsi/scsi_device.h                 |    1 +
 12 files changed, 977 insertions(+), 924 deletions(-)
 create mode 100644 drivers/scsi/scsi_alua.c
 create mode 100644 include/scsi/scsi_alua.h

-- 
2.43.5


^ permalink raw reply	[flat|nested] 63+ messages in thread

* [PATCH 01/13] scsi: scsi_dh_alua: Delete alua_port_group
  2026-03-17 12:06 [PATCH 00/13] scsi: Core ALUA driver John Garry
@ 2026-03-17 12:06 ` John Garry
  2026-03-18  7:44   ` Hannes Reinecke
  2026-03-23  0:08   ` Benjamin Marzinski
  2026-03-17 12:06 ` [PATCH 02/13] scsi: alua: Create a core ALUA driver John Garry
                   ` (12 subsequent siblings)
  13 siblings, 2 replies; 63+ messages in thread
From: John Garry @ 2026-03-17 12:06 UTC (permalink / raw)
  To: martin.petersen, james.bottomley, hare, bmarzins
  Cc: jmeneghi, linux-scsi, michael.christie, snitzer, dm-devel,
	linux-kernel, John Garry

Delete the alua_port_group usage, as it is more accurate to manage the
port group info per-scsi device - see [0]

[0] https://lore.kernel.org/linux-scsi/20260310114925.1222263-1-john.g.garry@oracle.com/T/#m4ffc0d07f169b70b8fd2407bae9632aa0f8c1f9a

For now, the handler data will be used to hold the ALUA-related info.

Signed-off-by: John Garry <john.g.garry@oracle.com>
---
 drivers/scsi/device_handler/scsi_dh_alua.c | 663 ++++++---------------
 1 file changed, 180 insertions(+), 483 deletions(-)

diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c
index efb08b9b145a1..067021fffc16f 100644
--- a/drivers/scsi/device_handler/scsi_dh_alua.c
+++ b/drivers/scsi/device_handler/scsi_dh_alua.c
@@ -54,41 +54,27 @@ static uint optimize_stpg;
 module_param(optimize_stpg, uint, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(optimize_stpg, "Allow use of a non-optimized path, rather than sending a STPG, when implicit TPGS is supported (0=No,1=Yes). Default is 0.");
 
-static LIST_HEAD(port_group_list);
-static DEFINE_SPINLOCK(port_group_lock);
 static struct workqueue_struct *kaluad_wq;
 
-struct alua_port_group {
-	struct kref		kref;
-	struct rcu_head		rcu;
-	struct list_head	node;
-	struct list_head	dh_list;
-	unsigned char		device_id_str[256];
-	int			device_id_len;
+struct alua_dh_data {
 	int			group_id;
-	int			tpgs;
+	struct scsi_device	*sdev;
+	int			init_error;
+	struct mutex		init_mutex;
+	bool			disabled;
+	unsigned		flags; /* used for optimizing STPG */
+	spinlock_t		lock;
+
+	/* alua stuff */
 	int			state;
 	int			pref;
 	int			valid_states;
-	unsigned		flags; /* used for optimizing STPG */
+	int			tpgs;
 	unsigned char		transition_tmo;
 	unsigned long		expiry;
 	unsigned long		interval;
 	struct delayed_work	rtpg_work;
-	spinlock_t		lock;
 	struct list_head	rtpg_list;
-	struct scsi_device	*rtpg_sdev;
-};
-
-struct alua_dh_data {
-	struct list_head	node;
-	struct alua_port_group __rcu *pg;
-	int			group_id;
-	spinlock_t		pg_lock;
-	struct scsi_device	*sdev;
-	int			init_error;
-	struct mutex		init_mutex;
-	bool			disabled;
 };
 
 struct alua_queue_data {
@@ -101,24 +87,10 @@ struct alua_queue_data {
 #define ALUA_POLICY_SWITCH_ALL		1
 
 static void alua_rtpg_work(struct work_struct *work);
-static bool alua_rtpg_queue(struct alua_port_group *pg,
-			    struct scsi_device *sdev,
+static bool alua_rtpg_queue(struct scsi_device *sdev,
 			    struct alua_queue_data *qdata, bool force);
 static void alua_check(struct scsi_device *sdev, bool force);
 
-static void release_port_group(struct kref *kref)
-{
-	struct alua_port_group *pg;
-
-	pg = container_of(kref, struct alua_port_group, kref);
-	if (pg->rtpg_sdev)
-		flush_delayed_work(&pg->rtpg_work);
-	spin_lock(&port_group_lock);
-	list_del(&pg->node);
-	spin_unlock(&port_group_lock);
-	kfree_rcu(pg, rcu);
-}
-
 /*
  * submit_rtpg - Issue a REPORT TARGET GROUP STATES command
  * @sdev: sdev the command should be sent to
@@ -182,88 +154,6 @@ static int submit_stpg(struct scsi_device *sdev, int group_id,
 				ALUA_FAILOVER_RETRIES, &exec_args);
 }
 
-static struct alua_port_group *alua_find_get_pg(char *id_str, size_t id_size,
-						int group_id)
-{
-	struct alua_port_group *pg;
-
-	if (!id_str || !id_size || !strlen(id_str))
-		return NULL;
-
-	list_for_each_entry(pg, &port_group_list, node) {
-		if (pg->group_id != group_id)
-			continue;
-		if (!pg->device_id_len || pg->device_id_len != id_size)
-			continue;
-		if (strncmp(pg->device_id_str, id_str, id_size))
-			continue;
-		if (!kref_get_unless_zero(&pg->kref))
-			continue;
-		return pg;
-	}
-
-	return NULL;
-}
-
-/*
- * alua_alloc_pg - Allocate a new port_group structure
- * @sdev: scsi device
- * @group_id: port group id
- * @tpgs: target port group settings
- *
- * Allocate a new port_group structure for a given
- * device.
- */
-static struct alua_port_group *alua_alloc_pg(struct scsi_device *sdev,
-					     int group_id, int tpgs)
-{
-	struct alua_port_group *pg, *tmp_pg;
-
-	pg = kzalloc_obj(struct alua_port_group);
-	if (!pg)
-		return ERR_PTR(-ENOMEM);
-
-	pg->device_id_len = scsi_vpd_lun_id(sdev, pg->device_id_str,
-					    sizeof(pg->device_id_str));
-	if (pg->device_id_len <= 0) {
-		/*
-		 * TPGS supported but no device identification found.
-		 * Generate private device identification.
-		 */
-		sdev_printk(KERN_INFO, sdev,
-			    "%s: No device descriptors found\n",
-			    ALUA_DH_NAME);
-		pg->device_id_str[0] = '\0';
-		pg->device_id_len = 0;
-	}
-	pg->group_id = group_id;
-	pg->tpgs = tpgs;
-	pg->state = SCSI_ACCESS_STATE_OPTIMAL;
-	pg->valid_states = TPGS_SUPPORT_ALL;
-	if (optimize_stpg)
-		pg->flags |= ALUA_OPTIMIZE_STPG;
-	kref_init(&pg->kref);
-	INIT_DELAYED_WORK(&pg->rtpg_work, alua_rtpg_work);
-	INIT_LIST_HEAD(&pg->rtpg_list);
-	INIT_LIST_HEAD(&pg->node);
-	INIT_LIST_HEAD(&pg->dh_list);
-	spin_lock_init(&pg->lock);
-
-	spin_lock(&port_group_lock);
-	tmp_pg = alua_find_get_pg(pg->device_id_str, pg->device_id_len,
-				  group_id);
-	if (tmp_pg) {
-		spin_unlock(&port_group_lock);
-		kfree(pg);
-		return tmp_pg;
-	}
-
-	list_add(&pg->node, &port_group_list);
-	spin_unlock(&port_group_lock);
-
-	return pg;
-}
-
 /*
  * alua_check_tpgs - Evaluate TPGS setting
  * @sdev: device to be checked
@@ -326,13 +216,10 @@ static int alua_check_tpgs(struct scsi_device *sdev)
 static int alua_check_vpd(struct scsi_device *sdev, struct alua_dh_data *h,
 			  int tpgs)
 {
-	int rel_port = -1, group_id;
-	struct alua_port_group *pg, *old_pg = NULL;
-	bool pg_updated = false;
-	unsigned long flags;
+	int rel_port = -1;
 
-	group_id = scsi_vpd_tpg_id(sdev, &rel_port);
-	if (group_id < 0) {
+	h->group_id = scsi_vpd_tpg_id(sdev, &rel_port);
+	if (h->group_id < 0) {
 		/*
 		 * Internal error; TPGS supported but required
 		 * VPD identification descriptors not present.
@@ -343,51 +230,9 @@ static int alua_check_vpd(struct scsi_device *sdev, struct alua_dh_data *h,
 			    ALUA_DH_NAME);
 		return SCSI_DH_DEV_UNSUPP;
 	}
+	h->tpgs = tpgs;
 
-	pg = alua_alloc_pg(sdev, group_id, tpgs);
-	if (IS_ERR(pg)) {
-		if (PTR_ERR(pg) == -ENOMEM)
-			return SCSI_DH_NOMEM;
-		return SCSI_DH_DEV_UNSUPP;
-	}
-	if (pg->device_id_len)
-		sdev_printk(KERN_INFO, sdev,
-			    "%s: device %s port group %x rel port %x\n",
-			    ALUA_DH_NAME, pg->device_id_str,
-			    group_id, rel_port);
-	else
-		sdev_printk(KERN_INFO, sdev,
-			    "%s: port group %x rel port %x\n",
-			    ALUA_DH_NAME, group_id, rel_port);
-
-	kref_get(&pg->kref);
-
-	/* Check for existing port group references */
-	spin_lock(&h->pg_lock);
-	old_pg = rcu_dereference_protected(h->pg, lockdep_is_held(&h->pg_lock));
-	if (old_pg != pg) {
-		/* port group has changed. Update to new port group */
-		if (h->pg) {
-			spin_lock_irqsave(&old_pg->lock, flags);
-			list_del_rcu(&h->node);
-			spin_unlock_irqrestore(&old_pg->lock, flags);
-		}
-		rcu_assign_pointer(h->pg, pg);
-		pg_updated = true;
-	}
-
-	spin_lock_irqsave(&pg->lock, flags);
-	if (pg_updated)
-		list_add_rcu(&h->node, &pg->dh_list);
-	spin_unlock_irqrestore(&pg->lock, flags);
-
-	spin_unlock(&h->pg_lock);
-
-	alua_rtpg_queue(pg, sdev, NULL, true);
-	kref_put(&pg->kref, release_port_group);
-
-	if (old_pg)
-		kref_put(&old_pg->kref, release_port_group);
+	alua_rtpg_queue(sdev, NULL, true);
 
 	return SCSI_DH_OK;
 }
@@ -417,14 +262,8 @@ static char print_alua_state(unsigned char state)
 static void alua_handle_state_transition(struct scsi_device *sdev)
 {
 	struct alua_dh_data *h = sdev->handler_data;
-	struct alua_port_group *pg;
-
-	rcu_read_lock();
-	pg = rcu_dereference(h->pg);
-	if (pg)
-		pg->state = SCSI_ACCESS_STATE_TRANSITIONING;
-	rcu_read_unlock();
-	alua_check(sdev, false);
+
+	h->state = SCSI_ACCESS_STATE_TRANSITIONING;
 }
 
 static enum scsi_disposition alua_check_sense(struct scsi_device *sdev,
@@ -532,10 +371,10 @@ static int alua_tur(struct scsi_device *sdev)
  * Returns SCSI_DH_DEV_OFFLINED if the path is
  * found to be unusable.
  */
-static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg)
+static int alua_rtpg(struct scsi_device *sdev)
 {
 	struct scsi_sense_hdr sense_hdr;
-	struct alua_port_group *tmp_pg;
+	struct alua_dh_data *h = sdev->handler_data;
 	int len, k, off, bufflen = ALUA_RTPG_SIZE;
 	int group_id_old, state_old, pref_old, valid_states_old;
 	unsigned char *desc, *buff;
@@ -545,19 +384,32 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg)
 	unsigned char orig_transition_tmo;
 	unsigned long flags;
 	bool transitioning_sense = false;
+	int rel_port, group_id = scsi_vpd_tpg_id(sdev, &rel_port);
+
+	if (group_id < 0) {
+		/*
+		 * Internal error; TPGS supported but required
+		 * VPD identification descriptors not present.
+		 * Disable ALUA support
+		 */
+		sdev_printk(KERN_INFO, sdev,
+			    "%s: No target port descriptors found\n",
+			    ALUA_DH_NAME);
+		return SCSI_DH_DEV_UNSUPP;
+	}
 
-	group_id_old = pg->group_id;
-	state_old = pg->state;
-	pref_old = pg->pref;
-	valid_states_old = pg->valid_states;
+	group_id_old = h->group_id;
+	state_old = h->state;
+	pref_old = h->pref;
+	valid_states_old = h->valid_states;
 
-	if (!pg->expiry) {
+	if (!h->expiry) {
 		unsigned long transition_tmo = ALUA_FAILOVER_TIMEOUT * HZ;
 
-		if (pg->transition_tmo)
-			transition_tmo = pg->transition_tmo * HZ;
+		if (h->transition_tmo)
+			transition_tmo = h->transition_tmo * HZ;
 
-		pg->expiry = round_jiffies_up(jiffies + transition_tmo);
+		h->expiry = round_jiffies_up(jiffies + transition_tmo);
 	}
 
 	buff = kzalloc(bufflen, GFP_KERNEL);
@@ -566,7 +418,7 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg)
 
  retry:
 	err = 0;
-	retval = submit_rtpg(sdev, buff, bufflen, &sense_hdr, pg->flags);
+	retval = submit_rtpg(sdev, buff, bufflen, &sense_hdr, h->flags);
 
 	if (retval) {
 		/*
@@ -578,7 +430,7 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg)
 		 * So ignore any errors to avoid spurious failures during
 		 * path failover.
 		 */
-		if ((pg->valid_states & ~TPGS_SUPPORT_OPTIMIZED) == 0) {
+		if ((h->valid_states & ~TPGS_SUPPORT_OPTIMIZED) == 0) {
 			sdev_printk(KERN_INFO, sdev,
 				    "%s: ignoring rtpg result %d\n",
 				    ALUA_DH_NAME, retval);
@@ -607,9 +459,9 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg)
 		 * Note:  some arrays return a sense key of ILLEGAL_REQUEST
 		 * with ASC 00h if they don't support the extended header.
 		 */
-		if (!(pg->flags & ALUA_RTPG_EXT_HDR_UNSUPP) &&
+		if (!(h->flags & ALUA_RTPG_EXT_HDR_UNSUPP) &&
 		    sense_hdr.sense_key == ILLEGAL_REQUEST) {
-			pg->flags |= ALUA_RTPG_EXT_HDR_UNSUPP;
+			h->flags |= ALUA_RTPG_EXT_HDR_UNSUPP;
 			goto retry;
 		}
 		/*
@@ -628,7 +480,7 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg)
 		if (sense_hdr.sense_key == UNIT_ATTENTION)
 			err = SCSI_DH_RETRY;
 		if (err == SCSI_DH_RETRY &&
-		    pg->expiry != 0 && time_before(jiffies, pg->expiry)) {
+		    h->expiry != 0 && time_before(jiffies, h->expiry)) {
 			sdev_printk(KERN_ERR, sdev, "%s: rtpg retry\n",
 				    ALUA_DH_NAME);
 			scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr);
@@ -639,7 +491,7 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg)
 			    ALUA_DH_NAME);
 		scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr);
 		kfree(buff);
-		pg->expiry = 0;
+		h->expiry = 0;
 		return SCSI_DH_IO;
 	}
 
@@ -654,23 +506,23 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg)
 			sdev_printk(KERN_WARNING, sdev,
 				    "%s: kmalloc buffer failed\n",__func__);
 			/* Temporary failure, bypass */
-			pg->expiry = 0;
+			h->expiry = 0;
 			return SCSI_DH_DEV_TEMP_BUSY;
 		}
 		goto retry;
 	}
 
-	orig_transition_tmo = pg->transition_tmo;
+	orig_transition_tmo = h->transition_tmo;
 	if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR && buff[5] != 0)
-		pg->transition_tmo = buff[5];
+		h->transition_tmo = buff[5];
 	else
-		pg->transition_tmo = ALUA_FAILOVER_TIMEOUT;
+		h->transition_tmo = ALUA_FAILOVER_TIMEOUT;
 
-	if (orig_transition_tmo != pg->transition_tmo) {
+	if (orig_transition_tmo != h->transition_tmo) {
 		sdev_printk(KERN_INFO, sdev,
 			    "%s: transition timeout set to %d seconds\n",
-			    ALUA_DH_NAME, pg->transition_tmo);
-		pg->expiry = jiffies + pg->transition_tmo * HZ;
+			    ALUA_DH_NAME, h->transition_tmo);
+		h->expiry = jiffies + h->transition_tmo * HZ;
 	}
 
 	if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR)
@@ -681,95 +533,71 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg)
 	for (k = tpg_desc_tbl_off, desc = buff + tpg_desc_tbl_off;
 	     k < len;
 	     k += off, desc += off) {
-		u16 group_id = get_unaligned_be16(&desc[2]);
-
-		spin_lock_irqsave(&port_group_lock, flags);
-		tmp_pg = alua_find_get_pg(pg->device_id_str, pg->device_id_len,
-					  group_id);
-		spin_unlock_irqrestore(&port_group_lock, flags);
-		if (tmp_pg) {
-			if (spin_trylock_irqsave(&tmp_pg->lock, flags)) {
-				if ((tmp_pg == pg) ||
-				    !(tmp_pg->flags & ALUA_PG_RUNNING)) {
-					struct alua_dh_data *h;
-
-					tmp_pg->state = desc[0] & 0x0f;
-					tmp_pg->pref = desc[0] >> 7;
-					rcu_read_lock();
-					list_for_each_entry_rcu(h,
-						&tmp_pg->dh_list, node) {
-						if (!h->sdev)
-							continue;
-						h->sdev->access_state = desc[0];
-					}
-					rcu_read_unlock();
-				}
-				if (tmp_pg == pg)
-					tmp_pg->valid_states = desc[1];
-				spin_unlock_irqrestore(&tmp_pg->lock, flags);
-			}
-			kref_put(&tmp_pg->kref, release_port_group);
+		u16 group_id_desc = get_unaligned_be16(&desc[2]);
+
+		spin_lock_irqsave(&h->lock, flags);
+		if (group_id_desc == group_id) {
+			h->group_id = group_id;
+			WRITE_ONCE(h->state, desc[0] & 0x0f);
+			h->pref = desc[0] >> 7;
+			WRITE_ONCE(sdev->access_state, desc[0]);
+			h->valid_states = desc[1];
 		}
+		spin_unlock_irqrestore(&h->lock, flags);
 		off = 8 + (desc[7] * 4);
 	}
 
  skip_rtpg:
-	spin_lock_irqsave(&pg->lock, flags);
+	spin_lock_irqsave(&h->lock, flags);
 	if (transitioning_sense)
-		pg->state = SCSI_ACCESS_STATE_TRANSITIONING;
+		h->state = SCSI_ACCESS_STATE_TRANSITIONING;
 
-	if (group_id_old != pg->group_id || state_old != pg->state ||
-		pref_old != pg->pref || valid_states_old != pg->valid_states)
+	if (group_id_old != h->group_id || state_old != h->state ||
+		pref_old != h->pref || valid_states_old != h->valid_states)
 		sdev_printk(KERN_INFO, sdev,
 			"%s: port group %02x state %c %s supports %c%c%c%c%c%c%c\n",
-			ALUA_DH_NAME, pg->group_id, print_alua_state(pg->state),
-			pg->pref ? "preferred" : "non-preferred",
-			pg->valid_states&TPGS_SUPPORT_TRANSITION?'T':'t',
-			pg->valid_states&TPGS_SUPPORT_OFFLINE?'O':'o',
-			pg->valid_states&TPGS_SUPPORT_LBA_DEPENDENT?'L':'l',
-			pg->valid_states&TPGS_SUPPORT_UNAVAILABLE?'U':'u',
-			pg->valid_states&TPGS_SUPPORT_STANDBY?'S':'s',
-			pg->valid_states&TPGS_SUPPORT_NONOPTIMIZED?'N':'n',
-			pg->valid_states&TPGS_SUPPORT_OPTIMIZED?'A':'a');
-
-	switch (pg->state) {
+			ALUA_DH_NAME, h->group_id, print_alua_state(h->state),
+			h->pref ? "preferred" : "non-preferred",
+			h->valid_states&TPGS_SUPPORT_TRANSITION?'T':'t',
+			h->valid_states&TPGS_SUPPORT_OFFLINE?'O':'o',
+			h->valid_states&TPGS_SUPPORT_LBA_DEPENDENT?'L':'l',
+			h->valid_states&TPGS_SUPPORT_UNAVAILABLE?'U':'u',
+			h->valid_states&TPGS_SUPPORT_STANDBY?'S':'s',
+			h->valid_states&TPGS_SUPPORT_NONOPTIMIZED?'N':'n',
+			h->valid_states&TPGS_SUPPORT_OPTIMIZED?'A':'a');
+
+	switch (h->state) {
 	case SCSI_ACCESS_STATE_TRANSITIONING:
-		if (time_before(jiffies, pg->expiry)) {
+		if (time_before(jiffies, h->expiry)) {
 			/* State transition, retry */
-			pg->interval = ALUA_RTPG_RETRY_DELAY;
+			h->interval = ALUA_RTPG_RETRY_DELAY;
 			err = SCSI_DH_RETRY;
 		} else {
 			struct alua_dh_data *h;
+			unsigned char access_state;
 
 			/* Transitioning time exceeded, set port to standby */
 			err = SCSI_DH_IO;
-			pg->state = SCSI_ACCESS_STATE_STANDBY;
-			pg->expiry = 0;
-			rcu_read_lock();
-			list_for_each_entry_rcu(h, &pg->dh_list, node) {
-				if (!h->sdev)
-					continue;
-				h->sdev->access_state =
-					(pg->state & SCSI_ACCESS_STATE_MASK);
-				if (pg->pref)
-					h->sdev->access_state |=
-						SCSI_ACCESS_STATE_PREFERRED;
-			}
-			rcu_read_unlock();
+			h->state = SCSI_ACCESS_STATE_STANDBY;
+			h->expiry = 0;
+			access_state = h->state & SCSI_ACCESS_STATE_MASK;
+			if (h->pref)
+				access_state |= SCSI_ACCESS_STATE_PREFERRED;
+			WRITE_ONCE(sdev->access_state, access_state);
 		}
 		break;
 	case SCSI_ACCESS_STATE_OFFLINE:
 		/* Path unusable */
 		err = SCSI_DH_DEV_OFFLINED;
-		pg->expiry = 0;
+		h->expiry = 0;
 		break;
 	default:
 		/* Useable path if active */
 		err = SCSI_DH_OK;
-		pg->expiry = 0;
+		h->expiry = 0;
 		break;
 	}
-	spin_unlock_irqrestore(&pg->lock, flags);
+	spin_unlock_irqrestore(&h->lock, flags);
 	kfree(buff);
 	return err;
 }
@@ -782,22 +610,23 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg)
  * a re-evaluation of the target group state or SCSI_DH_OK
  * if no further action needs to be taken.
  */
-static unsigned alua_stpg(struct scsi_device *sdev, struct alua_port_group *pg)
+static unsigned alua_stpg(struct scsi_device *sdev)
 {
 	int retval;
 	struct scsi_sense_hdr sense_hdr;
+	struct alua_dh_data *h = sdev->handler_data;
 
-	if (!(pg->tpgs & TPGS_MODE_EXPLICIT)) {
+	if (!(h->tpgs & TPGS_MODE_EXPLICIT)) {
 		/* Only implicit ALUA supported, retry */
 		return SCSI_DH_RETRY;
 	}
-	switch (pg->state) {
+	switch (h->state) {
 	case SCSI_ACCESS_STATE_OPTIMAL:
 		return SCSI_DH_OK;
 	case SCSI_ACCESS_STATE_ACTIVE:
-		if ((pg->flags & ALUA_OPTIMIZE_STPG) &&
-		    !pg->pref &&
-		    (pg->tpgs & TPGS_MODE_IMPLICIT))
+		if ((h->flags & ALUA_OPTIMIZE_STPG) &&
+		    !h->pref &&
+		    (h->tpgs & TPGS_MODE_IMPLICIT))
 			return SCSI_DH_OK;
 		break;
 	case SCSI_ACCESS_STATE_STANDBY:
@@ -810,10 +639,10 @@ static unsigned alua_stpg(struct scsi_device *sdev, struct alua_port_group *pg)
 	default:
 		sdev_printk(KERN_INFO, sdev,
 			    "%s: stpg failed, unhandled TPGS state %d",
-			    ALUA_DH_NAME, pg->state);
+			    ALUA_DH_NAME, h->state);
 		return SCSI_DH_NOSYS;
 	}
-	retval = submit_stpg(sdev, pg->group_id, &sense_hdr);
+	retval = submit_stpg(sdev, h->group_id, &sense_hdr);
 
 	if (retval) {
 		if (retval < 0 || !scsi_sense_valid(&sense_hdr)) {
@@ -832,144 +661,75 @@ static unsigned alua_stpg(struct scsi_device *sdev, struct alua_port_group *pg)
 	return SCSI_DH_RETRY;
 }
 
-/*
- * The caller must call scsi_device_put() on the returned pointer if it is not
- * NULL.
- */
-static struct scsi_device * __must_check
-alua_rtpg_select_sdev(struct alua_port_group *pg)
-{
-	struct alua_dh_data *h;
-	struct scsi_device *sdev = NULL, *prev_sdev;
-
-	lockdep_assert_held(&pg->lock);
-	if (WARN_ON(!pg->rtpg_sdev))
-		return NULL;
-
-	/*
-	 * RCU protection isn't necessary for dh_list here
-	 * as we hold pg->lock, but for access to h->pg.
-	 */
-	rcu_read_lock();
-	list_for_each_entry_rcu(h, &pg->dh_list, node) {
-		if (!h->sdev)
-			continue;
-		if (h->sdev == pg->rtpg_sdev) {
-			h->disabled = true;
-			continue;
-		}
-		if (rcu_dereference(h->pg) == pg &&
-		    !h->disabled &&
-		    !scsi_device_get(h->sdev)) {
-			sdev = h->sdev;
-			break;
-		}
-	}
-	rcu_read_unlock();
-
-	if (!sdev) {
-		pr_warn("%s: no device found for rtpg\n",
-			(pg->device_id_len ?
-			 (char *)pg->device_id_str : "(nameless PG)"));
-		return NULL;
-	}
-
-	sdev_printk(KERN_INFO, sdev, "rtpg retry on different device\n");
-
-	prev_sdev = pg->rtpg_sdev;
-	pg->rtpg_sdev = sdev;
-
-	return prev_sdev;
-}
-
 static void alua_rtpg_work(struct work_struct *work)
 {
-	struct alua_port_group *pg =
-		container_of(work, struct alua_port_group, rtpg_work.work);
-	struct scsi_device *sdev, *prev_sdev = NULL;
+	struct alua_dh_data *h =
+		container_of(work, struct alua_dh_data, rtpg_work.work);
+	struct scsi_device *sdev = h->sdev;
 	LIST_HEAD(qdata_list);
 	int err = SCSI_DH_OK;
 	struct alua_queue_data *qdata, *tmp;
-	struct alua_dh_data *h;
 	unsigned long flags;
 
-	spin_lock_irqsave(&pg->lock, flags);
-	sdev = pg->rtpg_sdev;
-	if (!sdev) {
-		WARN_ON(pg->flags & ALUA_PG_RUN_RTPG);
-		WARN_ON(pg->flags & ALUA_PG_RUN_STPG);
-		spin_unlock_irqrestore(&pg->lock, flags);
-		kref_put(&pg->kref, release_port_group);
-		return;
-	}
-	pg->flags |= ALUA_PG_RUNNING;
-	if (pg->flags & ALUA_PG_RUN_RTPG) {
-		int state = pg->state;
+	spin_lock_irqsave(&h->lock, flags);
+	h->flags |= ALUA_PG_RUNNING;
+	if (h->flags & ALUA_PG_RUN_RTPG) {
+		int state = h->state;
 
-		pg->flags &= ~ALUA_PG_RUN_RTPG;
-		spin_unlock_irqrestore(&pg->lock, flags);
+		h->flags &= ~ALUA_PG_RUN_RTPG;
+		spin_unlock_irqrestore(&h->lock, flags);
 		if (state == SCSI_ACCESS_STATE_TRANSITIONING) {
 			if (alua_tur(sdev) == SCSI_DH_RETRY) {
-				spin_lock_irqsave(&pg->lock, flags);
-				pg->flags &= ~ALUA_PG_RUNNING;
-				pg->flags |= ALUA_PG_RUN_RTPG;
-				if (!pg->interval)
-					pg->interval = ALUA_RTPG_RETRY_DELAY;
-				spin_unlock_irqrestore(&pg->lock, flags);
-				queue_delayed_work(kaluad_wq, &pg->rtpg_work,
-						   pg->interval * HZ);
+				spin_lock_irqsave(&h->lock, flags);
+				h->flags &= ~ALUA_PG_RUNNING;
+				h->flags |= ALUA_PG_RUN_RTPG;
+				if (!h->interval)
+					h->interval = ALUA_RTPG_RETRY_DELAY;
+				spin_unlock_irqrestore(&h->lock, flags);
+				queue_delayed_work(kaluad_wq, &h->rtpg_work,
+						   h->interval * HZ);
 				return;
 			}
 			/* Send RTPG on failure or if TUR indicates SUCCESS */
 		}
-		err = alua_rtpg(sdev, pg);
-		spin_lock_irqsave(&pg->lock, flags);
+		err = alua_rtpg(sdev);
+		spin_lock_irqsave(&h->lock, flags);
 
-		/* If RTPG failed on the current device, try using another */
-		if (err == SCSI_DH_RES_TEMP_UNAVAIL &&
-		    (prev_sdev = alua_rtpg_select_sdev(pg)))
-			err = SCSI_DH_IMM_RETRY;
-
-		if (err == SCSI_DH_RETRY || err == SCSI_DH_IMM_RETRY ||
-		    pg->flags & ALUA_PG_RUN_RTPG) {
-			pg->flags &= ~ALUA_PG_RUNNING;
+		if (err == SCSI_DH_RETRY || h->flags & ALUA_PG_RUN_RTPG) {
+			h->flags &= ~ALUA_PG_RUNNING;
 			if (err == SCSI_DH_IMM_RETRY)
-				pg->interval = 0;
-			else if (!pg->interval && !(pg->flags & ALUA_PG_RUN_RTPG))
-				pg->interval = ALUA_RTPG_RETRY_DELAY;
-			pg->flags |= ALUA_PG_RUN_RTPG;
-			spin_unlock_irqrestore(&pg->lock, flags);
+				h->interval = 0;
+			else if (!h->interval && !(h->flags & ALUA_PG_RUN_RTPG))
+				h->interval = ALUA_RTPG_RETRY_DELAY;
+			h->flags |= ALUA_PG_RUN_RTPG;
+			spin_unlock_irqrestore(&h->lock, flags);
 			goto queue_rtpg;
 		}
 		if (err != SCSI_DH_OK)
-			pg->flags &= ~ALUA_PG_RUN_STPG;
+			h->flags &= ~ALUA_PG_RUN_STPG;
 	}
-	if (pg->flags & ALUA_PG_RUN_STPG) {
-		pg->flags &= ~ALUA_PG_RUN_STPG;
-		spin_unlock_irqrestore(&pg->lock, flags);
-		err = alua_stpg(sdev, pg);
-		spin_lock_irqsave(&pg->lock, flags);
-		if (err == SCSI_DH_RETRY || pg->flags & ALUA_PG_RUN_RTPG) {
-			pg->flags |= ALUA_PG_RUN_RTPG;
-			pg->interval = 0;
-			pg->flags &= ~ALUA_PG_RUNNING;
-			spin_unlock_irqrestore(&pg->lock, flags);
+	if (h->flags & ALUA_PG_RUN_STPG) {
+		h->flags &= ~ALUA_PG_RUN_STPG;
+		spin_unlock_irqrestore(&h->lock, flags);
+		err = alua_stpg(sdev);
+		spin_lock_irqsave(&h->lock, flags);
+		if (err == SCSI_DH_RETRY || h->flags & ALUA_PG_RUN_RTPG) {
+			h->flags |= ALUA_PG_RUN_RTPG;
+			h->interval = 0;
+			h->flags &= ~ALUA_PG_RUNNING;
+			spin_unlock_irqrestore(&h->lock, flags);
 			goto queue_rtpg;
 		}
 	}
 
-	list_splice_init(&pg->rtpg_list, &qdata_list);
+	list_splice_init(&h->rtpg_list, &qdata_list);
 	/*
 	 * We went through an RTPG, for good or bad.
-	 * Re-enable all devices for the next attempt.
+	 * Re-enable the device for the next attempt.
 	 */
-	list_for_each_entry(h, &pg->dh_list, node)
-		h->disabled = false;
-	pg->rtpg_sdev = NULL;
-	spin_unlock_irqrestore(&pg->lock, flags);
+	h->disabled = false;
+	spin_unlock_irqrestore(&h->lock, flags);
 
-	if (prev_sdev)
-		scsi_device_put(prev_sdev);
 
 	list_for_each_entry_safe(qdata, tmp, &qdata_list, entry) {
 		list_del(&qdata->entry);
@@ -977,22 +737,19 @@ static void alua_rtpg_work(struct work_struct *work)
 			qdata->callback_fn(qdata->callback_data, err);
 		kfree(qdata);
 	}
-	spin_lock_irqsave(&pg->lock, flags);
-	pg->flags &= ~ALUA_PG_RUNNING;
-	spin_unlock_irqrestore(&pg->lock, flags);
+	spin_lock_irqsave(&h->lock, flags);
+	h->flags &= ~ALUA_PG_RUNNING;
+	spin_unlock_irqrestore(&h->lock, flags);
 	scsi_device_put(sdev);
-	kref_put(&pg->kref, release_port_group);
+
 	return;
 
 queue_rtpg:
-	if (prev_sdev)
-		scsi_device_put(prev_sdev);
-	queue_delayed_work(kaluad_wq, &pg->rtpg_work, pg->interval * HZ);
+	queue_delayed_work(kaluad_wq, &h->rtpg_work, h->interval * HZ);
 }
 
 /**
  * alua_rtpg_queue() - cause RTPG to be submitted asynchronously
- * @pg: ALUA port group associated with @sdev.
  * @sdev: SCSI device for which to submit an RTPG.
  * @qdata: Information about the callback to invoke after the RTPG.
  * @force: Whether or not to submit an RTPG if a work item that will submit an
@@ -1004,51 +761,34 @@ static void alua_rtpg_work(struct work_struct *work)
  * Context: may be called from atomic context (alua_check()) only if the caller
  *	holds an sdev reference.
  */
-static bool alua_rtpg_queue(struct alua_port_group *pg,
-			    struct scsi_device *sdev,
+static bool alua_rtpg_queue(struct scsi_device *sdev,
 			    struct alua_queue_data *qdata, bool force)
 {
 	int start_queue = 0;
+	struct alua_dh_data *h = sdev->handler_data;
 	unsigned long flags;
 
-	if (WARN_ON_ONCE(!pg) || scsi_device_get(sdev))
+	if (scsi_device_get(sdev))
 		return false;
 
-	spin_lock_irqsave(&pg->lock, flags);
+	spin_lock_irqsave(&h->lock, flags);
 	if (qdata) {
-		list_add_tail(&qdata->entry, &pg->rtpg_list);
-		pg->flags |= ALUA_PG_RUN_STPG;
+		list_add_tail(&qdata->entry, &h->rtpg_list);
+		h->flags |= ALUA_PG_RUN_STPG;
 		force = true;
 	}
-	if (pg->rtpg_sdev == NULL) {
-		struct alua_dh_data *h = sdev->handler_data;
-
-		rcu_read_lock();
-		if (h && rcu_dereference(h->pg) == pg) {
-			pg->interval = 0;
-			pg->flags |= ALUA_PG_RUN_RTPG;
-			kref_get(&pg->kref);
-			pg->rtpg_sdev = sdev;
-			start_queue = 1;
-		}
-		rcu_read_unlock();
-	} else if (!(pg->flags & ALUA_PG_RUN_RTPG) && force) {
-		pg->flags |= ALUA_PG_RUN_RTPG;
+	if (!(h->flags & ALUA_PG_RUN_RTPG) && force) {
+		h->flags |= ALUA_PG_RUN_RTPG;
 		/* Do not queue if the worker is already running */
-		if (!(pg->flags & ALUA_PG_RUNNING)) {
-			kref_get(&pg->kref);
+		if (!(h->flags & ALUA_PG_RUNNING))
 			start_queue = 1;
-		}
 	}
 
-	spin_unlock_irqrestore(&pg->lock, flags);
-
+	spin_unlock_irqrestore(&h->lock, flags);
 	if (start_queue) {
-		if (queue_delayed_work(kaluad_wq, &pg->rtpg_work,
+		if (queue_delayed_work(kaluad_wq, &h->rtpg_work,
 				msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS)))
 			sdev = NULL;
-		else
-			kref_put(&pg->kref, release_port_group);
 	}
 	if (sdev)
 		scsi_device_put(sdev);
@@ -1088,7 +828,6 @@ static int alua_initialize(struct scsi_device *sdev, struct alua_dh_data *h)
 static int alua_set_params(struct scsi_device *sdev, const char *params)
 {
 	struct alua_dh_data *h = sdev->handler_data;
-	struct alua_port_group *pg = NULL;
 	unsigned int optimize = 0, argc;
 	const char *p = params;
 	int result = SCSI_DH_OK;
@@ -1102,19 +841,12 @@ static int alua_set_params(struct scsi_device *sdev, const char *params)
 	if ((sscanf(p, "%u", &optimize) != 1) || (optimize > 1))
 		return -EINVAL;
 
-	rcu_read_lock();
-	pg = rcu_dereference(h->pg);
-	if (!pg) {
-		rcu_read_unlock();
-		return -ENXIO;
-	}
-	spin_lock_irqsave(&pg->lock, flags);
+	spin_lock_irqsave(&h->lock, flags);
 	if (optimize)
-		pg->flags |= ALUA_OPTIMIZE_STPG;
+		h->flags |= ALUA_OPTIMIZE_STPG;
 	else
-		pg->flags &= ~ALUA_OPTIMIZE_STPG;
-	spin_unlock_irqrestore(&pg->lock, flags);
-	rcu_read_unlock();
+		h->flags &= ~ALUA_OPTIMIZE_STPG;
+	spin_unlock_irqrestore(&h->lock, flags);
 
 	return result;
 }
@@ -1132,10 +864,8 @@ static int alua_set_params(struct scsi_device *sdev, const char *params)
 static int alua_activate(struct scsi_device *sdev,
 			activate_complete fn, void *data)
 {
-	struct alua_dh_data *h = sdev->handler_data;
 	int err = SCSI_DH_OK;
 	struct alua_queue_data *qdata;
-	struct alua_port_group *pg;
 
 	qdata = kzalloc_obj(*qdata);
 	if (!qdata) {
@@ -1145,26 +875,12 @@ static int alua_activate(struct scsi_device *sdev,
 	qdata->callback_fn = fn;
 	qdata->callback_data = data;
 
-	mutex_lock(&h->init_mutex);
-	rcu_read_lock();
-	pg = rcu_dereference(h->pg);
-	if (!pg || !kref_get_unless_zero(&pg->kref)) {
-		rcu_read_unlock();
-		kfree(qdata);
-		err = h->init_error;
-		mutex_unlock(&h->init_mutex);
-		goto out;
-	}
-	rcu_read_unlock();
-	mutex_unlock(&h->init_mutex);
-
-	if (alua_rtpg_queue(pg, sdev, qdata, true)) {
+	if (alua_rtpg_queue(sdev, qdata, true)) {
 		fn = NULL;
 	} else {
 		kfree(qdata);
 		err = SCSI_DH_DEV_OFFLINED;
 	}
-	kref_put(&pg->kref, release_port_group);
 out:
 	if (fn)
 		fn(data, err);
@@ -1179,18 +895,7 @@ static int alua_activate(struct scsi_device *sdev,
  */
 static void alua_check(struct scsi_device *sdev, bool force)
 {
-	struct alua_dh_data *h = sdev->handler_data;
-	struct alua_port_group *pg;
-
-	rcu_read_lock();
-	pg = rcu_dereference(h->pg);
-	if (!pg || !kref_get_unless_zero(&pg->kref)) {
-		rcu_read_unlock();
-		return;
-	}
-	rcu_read_unlock();
-	alua_rtpg_queue(pg, sdev, NULL, force);
-	kref_put(&pg->kref, release_port_group);
+	alua_rtpg_queue(sdev, NULL, force);
 }
 
 /*
@@ -1202,14 +907,12 @@ static void alua_check(struct scsi_device *sdev, bool force)
 static blk_status_t alua_prep_fn(struct scsi_device *sdev, struct request *req)
 {
 	struct alua_dh_data *h = sdev->handler_data;
-	struct alua_port_group *pg;
-	unsigned char state = SCSI_ACCESS_STATE_OPTIMAL;
+	unsigned long flags;
+	unsigned char state;
 
-	rcu_read_lock();
-	pg = rcu_dereference(h->pg);
-	if (pg)
-		state = pg->state;
-	rcu_read_unlock();
+	spin_lock_irqsave(&h->lock, flags);
+	state = h->state;
+	spin_unlock_irqrestore(&h->lock, flags);
 
 	switch (state) {
 	case SCSI_ACCESS_STATE_OPTIMAL:
@@ -1242,20 +945,26 @@ static int alua_bus_attach(struct scsi_device *sdev)
 	h = kzalloc_obj(*h);
 	if (!h)
 		return SCSI_DH_NOMEM;
-	spin_lock_init(&h->pg_lock);
-	rcu_assign_pointer(h->pg, NULL);
+	spin_lock_init(&h->lock);
 	h->init_error = SCSI_DH_OK;
 	h->sdev = sdev;
-	INIT_LIST_HEAD(&h->node);
+	INIT_DELAYED_WORK(&h->rtpg_work, alua_rtpg_work);
+	INIT_LIST_HEAD(&h->rtpg_list);
 
 	mutex_init(&h->init_mutex);
+
+	h->state = SCSI_ACCESS_STATE_OPTIMAL;
+	h->valid_states = TPGS_SUPPORT_ALL;
+	if (optimize_stpg)
+		h->flags |= ALUA_OPTIMIZE_STPG;
+
+	sdev->handler_data = h;
 	err = alua_initialize(sdev, h);
 	if (err != SCSI_DH_OK && err != SCSI_DH_DEV_OFFLINED)
 		goto failed;
-
-	sdev->handler_data = h;
 	return SCSI_DH_OK;
 failed:
+	sdev->handler_data = NULL;
 	kfree(h);
 	return err;
 }
@@ -1267,20 +976,8 @@ static int alua_bus_attach(struct scsi_device *sdev)
 static void alua_bus_detach(struct scsi_device *sdev)
 {
 	struct alua_dh_data *h = sdev->handler_data;
-	struct alua_port_group *pg;
-
-	spin_lock(&h->pg_lock);
-	pg = rcu_dereference_protected(h->pg, lockdep_is_held(&h->pg_lock));
-	rcu_assign_pointer(h->pg, NULL);
-	spin_unlock(&h->pg_lock);
-	if (pg) {
-		spin_lock_irq(&pg->lock);
-		list_del_rcu(&h->node);
-		spin_unlock_irq(&pg->lock);
-		kref_put(&pg->kref, release_port_group);
-	}
+
 	sdev->handler_data = NULL;
-	synchronize_rcu();
 	kfree(h);
 }
 
-- 
2.43.5


^ permalink raw reply related	[flat|nested] 63+ messages in thread

* [PATCH 02/13] scsi: alua: Create a core ALUA driver
  2026-03-17 12:06 [PATCH 00/13] scsi: Core ALUA driver John Garry
  2026-03-17 12:06 ` [PATCH 01/13] scsi: scsi_dh_alua: Delete alua_port_group John Garry
@ 2026-03-17 12:06 ` John Garry
  2026-03-18  7:47   ` Hannes Reinecke
                     ` (2 more replies)
  2026-03-17 12:06 ` [PATCH 03/13] scsi: alua: Add scsi_alua_rtpg() John Garry
                   ` (11 subsequent siblings)
  13 siblings, 3 replies; 63+ messages in thread
From: John Garry @ 2026-03-17 12:06 UTC (permalink / raw)
  To: martin.petersen, james.bottomley, hare, bmarzins
  Cc: jmeneghi, linux-scsi, michael.christie, snitzer, dm-devel,
	linux-kernel, John Garry

Add a dedicated ALUA driver which can be used for native SCSI multipath
and also DH-based ALUA support.

The core driver will provide ALUA support for when a scsi_device does not
have a DH attachment.

The core driver will provide functionality to handle RTPG and STPG, but
the scsi DH ALUA driver will be responsible for driving these when DH
attached.

New structure alua_data holds all ALUA-related scsi_device info.

Hannes Reinecke originally authored the kernel ALUA code.

Signed-off-by: John Garry <john.g.garry@oracle.com>
---
 drivers/scsi/Kconfig                | 10 +++-
 drivers/scsi/Makefile               |  1 +
 drivers/scsi/device_handler/Kconfig |  1 +
 drivers/scsi/scsi.c                 |  7 +++
 drivers/scsi/scsi_alua.c            | 78 +++++++++++++++++++++++++++++
 drivers/scsi/scsi_scan.c            |  4 ++
 drivers/scsi/scsi_sysfs.c           |  3 ++
 include/scsi/scsi_alua.h            | 45 +++++++++++++++++
 include/scsi/scsi_device.h          |  1 +
 9 files changed, 149 insertions(+), 1 deletion(-)
 create mode 100644 drivers/scsi/scsi_alua.c
 create mode 100644 include/scsi/scsi_alua.h

diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 19d0884479a24..396cc0fda9fcc 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -76,8 +76,16 @@ config SCSI_LIB_KUNIT_TEST
 
 	  If unsure say N.
 
-comment "SCSI support type (disk, tape, CD-ROM)"
+config SCSI_ALUA
+	tristate "SPC-3 ALUA support"
 	depends on SCSI
+	help
+	  SCSI support for generic SPC-3 Asymmetric Logical Unit
+	  Access (ALUA).
+
+	  If unsure, say Y.
+
+comment "SCSI support type (disk, tape, CD-ROM)"
 
 config BLK_DEV_SD
 	tristate "SCSI disk support"
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
index 16de3e41f94c4..90c25f36ea3a8 100644
--- a/drivers/scsi/Makefile
+++ b/drivers/scsi/Makefile
@@ -153,6 +153,7 @@ obj-$(CONFIG_SCSI_ENCLOSURE)	+= ses.o
 
 obj-$(CONFIG_SCSI_HISI_SAS) += hisi_sas/
 
+obj-$(CONFIG_SCSI_ALUA) += scsi_alua.o
 # This goes last, so that "real" scsi devices probe earlier
 obj-$(CONFIG_SCSI_DEBUG)	+= scsi_debug.o
 scsi_mod-y			+= scsi.o hosts.o scsi_ioctl.o \
diff --git a/drivers/scsi/device_handler/Kconfig b/drivers/scsi/device_handler/Kconfig
index 368eb94c24562..ff06aea8c272c 100644
--- a/drivers/scsi/device_handler/Kconfig
+++ b/drivers/scsi/device_handler/Kconfig
@@ -35,6 +35,7 @@ config SCSI_DH_EMC
 config SCSI_DH_ALUA
 	tristate "SPC-3 ALUA Device Handler"
 	depends on SCSI_DH && SCSI
+	select SCSI_ALUA
 	help
 	  SCSI Device handler for generic SPC-3 Asymmetric Logical Unit
 	  Access (ALUA).
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 76cdad063f7bc..fc90ee19bb962 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -58,6 +58,7 @@
 #include <linux/unaligned.h>
 
 #include <scsi/scsi.h>
+#include <scsi/scsi_alua.h>
 #include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_dbg.h>
 #include <scsi/scsi_device.h>
@@ -1042,12 +1043,17 @@ static int __init init_scsi(void)
 	error = scsi_sysfs_register();
 	if (error)
 		goto cleanup_sysctl;
+	error = scsi_alua_init();
+	if (error)
+		goto cleanup_sysfs;
 
 	scsi_netlink_init();
 
 	printk(KERN_NOTICE "SCSI subsystem initialized\n");
 	return 0;
 
+cleanup_sysfs:
+	scsi_sysfs_unregister();
 cleanup_sysctl:
 	scsi_exit_sysctl();
 cleanup_hosts:
@@ -1066,6 +1072,7 @@ static int __init init_scsi(void)
 static void __exit exit_scsi(void)
 {
 	scsi_netlink_exit();
+	scsi_exit_alua();
 	scsi_sysfs_unregister();
 	scsi_exit_sysctl();
 	scsi_exit_hosts();
diff --git a/drivers/scsi/scsi_alua.c b/drivers/scsi/scsi_alua.c
new file mode 100644
index 0000000000000..a5a67c6deff17
--- /dev/null
+++ b/drivers/scsi/scsi_alua.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Generic SCSI-3 ALUA SCSI driver
+ *
+ * Copyright (C) 2007-2010 Hannes Reinecke, SUSE Linux Products GmbH.
+ * All rights reserved.
+ */
+
+#include <scsi/scsi.h>
+#include <scsi/scsi_proto.h>
+#include <scsi/scsi_dbg.h>
+#include <scsi/scsi_eh.h>
+#include <scsi/scsi_alua.h>
+
+#define DRV_NAME "alua"
+
+static struct workqueue_struct *kalua_wq;
+
+int scsi_alua_sdev_init(struct scsi_device *sdev)
+{
+	int rel_port, ret, tpgs;
+
+	tpgs = scsi_device_tpgs(sdev);
+	if (!tpgs)
+		return 0;
+
+	sdev->alua = kzalloc(sizeof(*sdev->alua), GFP_KERNEL);
+	if (!sdev->alua)
+		return -ENOMEM;
+
+	sdev->alua->group_id = scsi_vpd_tpg_id(sdev, &rel_port);
+	sdev_printk(KERN_INFO, sdev,
+			    "%s: group_id=%d\n",
+			    DRV_NAME, sdev->alua->group_id);
+	if (sdev->alua->group_id < 0) {
+		/*
+		 * Internal error; TPGS supported but required
+		 * VPD identification descriptors not present.
+		 * Disable ALUA support.
+		 */
+		sdev_printk(KERN_INFO, sdev,
+			    "%s: No target port descriptors found\n",
+			    __func__);
+		ret = -EIO;
+		goto out_free_data;
+	}
+
+	sdev->alua->sdev = sdev;
+	sdev->alua->tpgs = tpgs;
+
+	return 0;
+out_free_data:
+	kfree(sdev->alua);
+	sdev->alua = NULL;
+	return ret;
+}
+
+void scsi_alua_sdev_exit(struct scsi_device *sdev)
+{
+	kfree(sdev->alua);
+	sdev->alua = NULL;
+}
+
+int scsi_alua_init(void)
+{
+	kalua_wq = alloc_workqueue("kalua", WQ_MEM_RECLAIM | WQ_PERCPU, 0);
+	if (!kalua_wq)
+		return -ENOMEM;
+	return 0;
+}
+
+void scsi_exit_alua(void)
+{
+	destroy_workqueue(kalua_wq);
+}
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("scsi_alua");
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 2cfcf1f5d6a46..3af64d1231445 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -38,6 +38,7 @@
 #include <linux/unaligned.h>
 
 #include <scsi/scsi.h>
+#include <scsi/scsi_alua.h>
 #include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_device.h>
 #include <scsi/scsi_driver.h>
@@ -1123,6 +1124,9 @@ static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result,
 	sdev->max_queue_depth = sdev->queue_depth;
 	WARN_ON_ONCE(sdev->max_queue_depth > sdev->budget_map.depth);
 
+	if (scsi_device_tpgs(sdev))
+		scsi_alua_sdev_init(sdev);
+
 	/*
 	 * Ok, the device is now all set up, we can
 	 * register it and tell the rest of the kernel
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 6b8c5c05f2944..6c4c3c22f6acf 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -16,6 +16,7 @@
 #include <linux/bsg.h>
 
 #include <scsi/scsi.h>
+#include <scsi/scsi_alua.h>
 #include <scsi/scsi_device.h>
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_tcq.h>
@@ -480,6 +481,8 @@ static void scsi_device_dev_release(struct device *dev)
 
 	sbitmap_free(&sdev->budget_map);
 
+	scsi_alua_sdev_exit(sdev);
+
 	mutex_lock(&sdev->inquiry_mutex);
 	vpd_pg0 = rcu_replace_pointer(sdev->vpd_pg0, vpd_pg0,
 				       lockdep_is_held(&sdev->inquiry_mutex));
diff --git a/include/scsi/scsi_alua.h b/include/scsi/scsi_alua.h
new file mode 100644
index 0000000000000..07cdcb4f5b518
--- /dev/null
+++ b/include/scsi/scsi_alua.h
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Generic SCSI-3 ALUA SCSI Device Handler
+ *
+ * Copyright (C) 2007-2010 Hannes Reinecke, SUSE Linux Products GmbH.
+ * All rights reserved.
+ */
+#ifndef _SCSI_ALUA_H
+#define _SCSI_ALUA_H
+
+#include <scsi/scsi.h>
+#include <scsi/scsi_device.h>
+
+#if IS_ENABLED(CONFIG_SCSI_ALUA)
+
+struct alua_data {
+	int			group_id;
+	int			tpgs;
+	struct scsi_device	*sdev;
+};
+
+int scsi_alua_sdev_init(struct scsi_device *sdev);
+void scsi_alua_sdev_exit(struct scsi_device *sdev);
+
+int scsi_alua_init(void);
+void scsi_exit_alua(void);
+#else //CONFIG_SCSI_ALUA
+
+static inline int scsi_alua_sdev_init(struct scsi_device *sdev)
+{
+	return 0;
+}
+static inline void scsi_alua_sdev_exit(struct scsi_device *sdev)
+{
+
+}
+static inline int scsi_alua_init(void)
+{
+	return 0;
+}
+static inline void scsi_exit_alua(void)
+{
+}
+#endif // CONFIG_SCSI_ALUA
+#endif // _SCSI_ALUA_H
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index d32f5841f4f85..c439e837dcaa6 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -295,6 +295,7 @@ struct scsi_device {
 	struct mutex		state_mutex;
 	enum scsi_device_state sdev_state;
 	struct task_struct	*quiesced_by;
+	struct alua_data	*alua;
 	unsigned long		sdev_data[];
 } __attribute__((aligned(sizeof(unsigned long))));
 
-- 
2.43.5


^ permalink raw reply related	[flat|nested] 63+ messages in thread

* [PATCH 03/13] scsi: alua: Add scsi_alua_rtpg()
  2026-03-17 12:06 [PATCH 00/13] scsi: Core ALUA driver John Garry
  2026-03-17 12:06 ` [PATCH 01/13] scsi: scsi_dh_alua: Delete alua_port_group John Garry
  2026-03-17 12:06 ` [PATCH 02/13] scsi: alua: Create a core ALUA driver John Garry
@ 2026-03-17 12:06 ` John Garry
  2026-03-18  7:50   ` Hannes Reinecke
  2026-03-17 12:06 ` [PATCH 04/13] scsi: alua: Add scsi_alua_stpg() John Garry
                   ` (10 subsequent siblings)
  13 siblings, 1 reply; 63+ messages in thread
From: John Garry @ 2026-03-17 12:06 UTC (permalink / raw)
  To: martin.petersen, james.bottomley, hare, bmarzins
  Cc: jmeneghi, linux-scsi, michael.christie, snitzer, dm-devel,
	linux-kernel, John Garry

Add scsi_alua_rtpg(), which does the same as alua_rtpg() from
scsi_dh_alua.c

Members of the per-sdev alua_data structure are updated from same in
alua_dh_data.

Signed-off-by: John Garry <john.g.garry@oracle.com>
---
 drivers/scsi/scsi_alua.c | 311 +++++++++++++++++++++++++++++++++++++++
 include/scsi/scsi_alua.h |   8 +
 2 files changed, 319 insertions(+)

diff --git a/drivers/scsi/scsi_alua.c b/drivers/scsi/scsi_alua.c
index a5a67c6deff17..50c1d17b52dc7 100644
--- a/drivers/scsi/scsi_alua.c
+++ b/drivers/scsi/scsi_alua.c
@@ -6,6 +6,8 @@
  * All rights reserved.
  */
 
+#include <linux/unaligned.h>
+
 #include <scsi/scsi.h>
 #include <scsi/scsi_proto.h>
 #include <scsi/scsi_dbg.h>
@@ -16,6 +18,314 @@
 
 static struct workqueue_struct *kalua_wq;
 
+#define TPGS_SUPPORT_NONE		0x00
+#define TPGS_SUPPORT_OPTIMIZED		0x01
+#define TPGS_SUPPORT_NONOPTIMIZED	0x02
+#define TPGS_SUPPORT_STANDBY		0x04
+#define TPGS_SUPPORT_UNAVAILABLE	0x08
+#define TPGS_SUPPORT_LBA_DEPENDENT	0x10
+#define TPGS_SUPPORT_OFFLINE		0x40
+#define TPGS_SUPPORT_TRANSITION		0x80
+#define TPGS_SUPPORT_ALL		0xdf
+
+#define RTPG_FMT_MASK			0x70
+#define RTPG_FMT_EXT_HDR		0x10
+
+#define ALUA_RTPG_SIZE			128
+#define ALUA_FAILOVER_TIMEOUT		60
+#define ALUA_FAILOVER_RETRIES		5
+#define ALUA_RTPG_DELAY_MSECS		5
+#define ALUA_RTPG_RETRY_DELAY		2
+
+/*
+ * submit_rtpg - Issue a REPORT TARGET GROUP STATES command
+ * @sdev: sdev the command should be sent to
+ */
+static int submit_rtpg(struct scsi_device *sdev, unsigned char *buff,
+		       int bufflen, struct scsi_sense_hdr *sshdr)
+{
+	u8 cdb[MAX_COMMAND_SIZE];
+	blk_opf_t opf = REQ_OP_DRV_IN | REQ_FAILFAST_DEV |
+				REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER;
+	const struct scsi_exec_args exec_args = {
+		.sshdr = sshdr,
+	};
+
+	/* Prepare the command. */
+	memset(cdb, 0x0, MAX_COMMAND_SIZE);
+	cdb[0] = MAINTENANCE_IN;
+	if (!sdev->alua->rtpg_ext_hdr_unsupp)
+		cdb[1] = MI_REPORT_TARGET_PGS | MI_EXT_HDR_PARAM_FMT;
+	else
+		cdb[1] = MI_REPORT_TARGET_PGS;
+	put_unaligned_be32(bufflen, &cdb[6]);
+
+	return scsi_execute_cmd(sdev, cdb, opf, buff, bufflen,
+				ALUA_FAILOVER_TIMEOUT * HZ,
+				ALUA_FAILOVER_RETRIES, &exec_args);
+}
+
+static char print_alua_state(unsigned char state)
+{
+	switch (state) {
+	case SCSI_ACCESS_STATE_OPTIMAL:
+		return 'A';
+	case SCSI_ACCESS_STATE_ACTIVE:
+		return 'N';
+	case SCSI_ACCESS_STATE_STANDBY:
+		return 'S';
+	case SCSI_ACCESS_STATE_UNAVAILABLE:
+		return 'U';
+	case SCSI_ACCESS_STATE_LBA:
+		return 'L';
+	case SCSI_ACCESS_STATE_OFFLINE:
+		return 'O';
+	case SCSI_ACCESS_STATE_TRANSITIONING:
+		return 'T';
+	default:
+		return 'X';
+	}
+}
+
+/*
+ * scsi_alua_rtpg - Evaluate REPORT TARGET GROUP STATES
+ * @sdev: the device to be evaluated.
+ *
+ * Evaluate the Target Port Group State.
+ * Returns -ENODEV if the path is
+ * found to be unusable.
+ */
+__maybe_unused
+static int scsi_alua_rtpg(struct scsi_device *sdev)
+{
+	struct alua_data *alua = sdev->alua;
+	struct scsi_sense_hdr sense_hdr;
+	int len, k, off, bufflen = ALUA_RTPG_SIZE;
+	int group_id_old, state_old, pref_old, valid_states_old;
+	unsigned char *desc, *buff;
+	unsigned err;
+	int retval;
+	unsigned int tpg_desc_tbl_off;
+	unsigned char orig_transition_tmo;
+	unsigned long flags;
+	bool transitioning_sense = false;
+	int rel_port, group_id = scsi_vpd_tpg_id(sdev, &rel_port);
+
+	if (group_id < 0) {
+		/*
+		 * Internal error; TPGS supported but required
+		 * VPD identification descriptors not present.
+		 * Disable ALUA support
+		 */
+		sdev_printk(KERN_INFO, sdev,
+			    "%s: No target port descriptors found\n",
+			    DRV_NAME);
+		return -EOPNOTSUPP; //SCSI_DH_DEV_UNSUPP;
+	}
+
+	group_id_old = alua->group_id;
+	state_old = alua->state;
+	pref_old = alua->pref;
+	valid_states_old = alua->valid_states;
+
+	if (!alua->expiry) {
+		unsigned long transition_tmo = ALUA_FAILOVER_TIMEOUT * HZ;
+
+		if (alua->transition_tmo)
+			transition_tmo = alua->transition_tmo * HZ;
+
+		alua->expiry = round_jiffies_up(jiffies + transition_tmo);
+	}
+
+	buff = kzalloc(bufflen, GFP_KERNEL);
+	if (!buff)
+		return -ENOMEM;
+
+ retry:
+	err = 0;
+	retval = submit_rtpg(sdev, buff, bufflen, &sense_hdr);
+
+	if (retval) {
+		/*
+		 * Some (broken) implementations have a habit of returning
+		 * an error during things like firmware update etc.
+		 * But if the target only supports active/optimized there's
+		 * not much we can do; it's not that we can switch paths
+		 * or anything.
+		 * So ignore any errors to avoid spurious failures during
+		 * path failover.
+		 */
+		if ((alua->valid_states & ~TPGS_SUPPORT_OPTIMIZED) == 0) {
+			sdev_printk(KERN_INFO, sdev,
+				    "%s: ignoring rtpg result %d\n",
+				    DRV_NAME, retval);
+			kfree(buff);
+			return 0;//SCSI_DH_OK
+		}
+		if (retval < 0 || !scsi_sense_valid(&sense_hdr)) {
+			sdev_printk(KERN_INFO, sdev,
+				    "%s: rtpg failed, result %d\n",
+				    DRV_NAME, retval);
+			kfree(buff);
+			if (retval < 0)
+				return -EBUSY;//SCSI_DH_DEV_TEMP_BUSY;
+			if (host_byte(retval) == DID_NO_CONNECT)
+				return -ENOENT;//SCSI_DH_RES_TEMP_UNAVAIL;
+			return -EIO;//SCSI_DH_IO
+		}
+
+		/*
+		 * submit_rtpg() has failed on existing arrays
+		 * when requesting extended header info, and
+		 * the array doesn't support extended headers,
+		 * even though it shouldn't according to T10.
+		 * The retry without rtpg_ext_hdr_req set
+		 * handles this.
+		 * Note:  some arrays return a sense key of ILLEGAL_REQUEST
+		 * with ASC 00h if they don't support the extended header.
+		 */
+		if (!alua->rtpg_ext_hdr_unsupp &&
+		    sense_hdr.sense_key == ILLEGAL_REQUEST) {
+			alua->rtpg_ext_hdr_unsupp = true;
+			goto retry;
+		}
+		/*
+		 * If the array returns with 'ALUA state transition'
+		 * sense code here it cannot return RTPG data during
+		 * transition. So set the state to 'transitioning' directly.
+		 */
+		if (sense_hdr.sense_key == NOT_READY &&
+		    sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a) {
+			transitioning_sense = true;
+			goto skip_rtpg;
+		}
+		/*
+		 * Retry on any other UNIT ATTENTION occurred.
+		 */
+		if (sense_hdr.sense_key == UNIT_ATTENTION)
+			err = -EAGAIN;//SCSI_DH_RETRY
+		if (err == -EAGAIN &&
+		    alua->expiry != 0 && time_before(jiffies, alua->expiry)) {
+			sdev_printk(KERN_ERR, sdev, "%s: rtpg retry\n",
+				    DRV_NAME);
+			scsi_print_sense_hdr(sdev, DRV_NAME, &sense_hdr);
+			kfree(buff);
+			return err;
+		}
+		sdev_printk(KERN_ERR, sdev, "%s: rtpg failed\n",
+			    DRV_NAME);
+		scsi_print_sense_hdr(sdev, DRV_NAME, &sense_hdr);
+		kfree(buff);
+		alua->expiry = 0;
+		return -EIO;//SCSI_DH_IO
+	}
+
+	len = get_unaligned_be32(&buff[0]) + 4;
+
+	if (len > bufflen) {
+		/* Resubmit with the correct length */
+		kfree(buff);
+		bufflen = len;
+		buff = kmalloc(bufflen, GFP_KERNEL);
+		if (!buff) {
+			sdev_printk(KERN_WARNING, sdev,
+				    "%s: kmalloc buffer failed\n",__func__);
+			/* Temporary failure, bypass */
+			alua->expiry = 0;
+			return -EBUSY;//SCSI_DH_DEV_TEMP_BUSY;
+		}
+		goto retry;
+	}
+
+	orig_transition_tmo = alua->transition_tmo;
+	if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR && buff[5] != 0)
+		alua->transition_tmo = buff[5];
+	else
+		alua->transition_tmo = ALUA_FAILOVER_TIMEOUT;
+
+	if (orig_transition_tmo != alua->transition_tmo) {
+		sdev_printk(KERN_INFO, sdev,
+			    "%s: transition timeout set to %d seconds\n",
+			    DRV_NAME, alua->transition_tmo);
+		alua->expiry = jiffies + alua->transition_tmo * HZ;
+	}
+
+	if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR)
+		tpg_desc_tbl_off = 8;
+	else
+		tpg_desc_tbl_off = 4;
+
+	for (k = tpg_desc_tbl_off, desc = buff + tpg_desc_tbl_off;
+	     k < len;
+	     k += off, desc += off) {
+		u16 group_id_desc = get_unaligned_be16(&desc[2]);
+
+		spin_lock_irqsave(&alua->lock, flags);
+		if (group_id_desc == group_id) {
+			alua->group_id = group_id;
+			WRITE_ONCE(alua->state, desc[0] & 0x0f);
+			alua->pref = desc[0] >> 7;
+			WRITE_ONCE(sdev->access_state, desc[0]);
+			alua->valid_states = desc[1];
+		}
+		spin_unlock_irqrestore(&alua->lock, flags);
+		off = 8 + (desc[7] * 4);
+	}
+
+ skip_rtpg:
+	spin_lock_irqsave(&alua->lock, flags);
+	if (transitioning_sense)
+		alua->state = SCSI_ACCESS_STATE_TRANSITIONING;
+
+	if (group_id_old != alua->group_id || state_old != alua->state ||
+		pref_old != alua->pref || valid_states_old != alua->valid_states)
+		sdev_printk(KERN_INFO, sdev,
+			"%s: port group %02x state %c %s supports %c%c%c%c%c%c%c\n",
+			DRV_NAME, alua->group_id, print_alua_state(alua->state),
+			alua->pref ? "preferred" : "non-preferred",
+			alua->valid_states&TPGS_SUPPORT_TRANSITION?'T':'t',
+			alua->valid_states&TPGS_SUPPORT_OFFLINE?'O':'o',
+			alua->valid_states&TPGS_SUPPORT_LBA_DEPENDENT?'L':'l',
+			alua->valid_states&TPGS_SUPPORT_UNAVAILABLE?'U':'u',
+			alua->valid_states&TPGS_SUPPORT_STANDBY?'S':'s',
+			alua->valid_states&TPGS_SUPPORT_NONOPTIMIZED?'N':'n',
+			alua->valid_states&TPGS_SUPPORT_OPTIMIZED?'A':'a');
+
+	switch (alua->state) {
+	case SCSI_ACCESS_STATE_TRANSITIONING:
+		if (time_before(jiffies, alua->expiry)) {
+			/* State transition, retry */
+			alua->interval = ALUA_RTPG_RETRY_DELAY;
+			err = -EAGAIN;//SCSI_DH_RETRY
+		} else {
+			unsigned char access_state;
+
+			/* Transitioning time exceeded, set port to standby */
+			err = -EIO;//SCSI_DH_IO;
+			alua->state = SCSI_ACCESS_STATE_STANDBY;
+			alua->expiry = 0;
+			access_state = alua->state & SCSI_ACCESS_STATE_MASK;
+			if (alua->pref)
+				access_state |= SCSI_ACCESS_STATE_PREFERRED;
+			WRITE_ONCE(sdev->access_state, access_state);
+		}
+		break;
+	case SCSI_ACCESS_STATE_OFFLINE:
+		/* Path unusable */
+		err = -ENODEV;//SCSI_DH_DEV_OFFLINED;
+		alua->expiry = 0;
+		break;
+	default:
+		/* Useable path if active */
+		err = 0;//SCSI_DH_OK
+		alua->expiry = 0;
+		break;
+	}
+	spin_unlock_irqrestore(&alua->lock, flags);
+	kfree(buff);
+	return err;
+}
+
 int scsi_alua_sdev_init(struct scsi_device *sdev)
 {
 	int rel_port, ret, tpgs;
@@ -47,6 +357,7 @@ int scsi_alua_sdev_init(struct scsi_device *sdev)
 
 	sdev->alua->sdev = sdev;
 	sdev->alua->tpgs = tpgs;
+	spin_lock_init(&sdev->alua->lock);
 
 	return 0;
 out_free_data:
diff --git a/include/scsi/scsi_alua.h b/include/scsi/scsi_alua.h
index 07cdcb4f5b518..068277261ed9d 100644
--- a/include/scsi/scsi_alua.h
+++ b/include/scsi/scsi_alua.h
@@ -16,7 +16,15 @@
 struct alua_data {
 	int			group_id;
 	int			tpgs;
+	int			state;
+	int			pref;
+	int			valid_states;
+	bool			rtpg_ext_hdr_unsupp;
+	unsigned char		transition_tmo;
+	unsigned long		expiry;
+	unsigned long		interval;
 	struct scsi_device	*sdev;
+	spinlock_t		lock;
 };
 
 int scsi_alua_sdev_init(struct scsi_device *sdev);
-- 
2.43.5


^ permalink raw reply related	[flat|nested] 63+ messages in thread

* [PATCH 04/13] scsi: alua: Add scsi_alua_stpg()
  2026-03-17 12:06 [PATCH 00/13] scsi: Core ALUA driver John Garry
                   ` (2 preceding siblings ...)
  2026-03-17 12:06 ` [PATCH 03/13] scsi: alua: Add scsi_alua_rtpg() John Garry
@ 2026-03-17 12:06 ` John Garry
  2026-03-18  7:53   ` Hannes Reinecke
  2026-03-17 12:06 ` [PATCH 05/13] scsi: alua: Add scsi_alua_tur() John Garry
                   ` (9 subsequent siblings)
  13 siblings, 1 reply; 63+ messages in thread
From: John Garry @ 2026-03-17 12:06 UTC (permalink / raw)
  To: martin.petersen, james.bottomley, hare, bmarzins
  Cc: jmeneghi, linux-scsi, michael.christie, snitzer, dm-devel,
	linux-kernel, John Garry

Add a core equivalent of alua_stpg() from scsi_dh_alua.c

Signed-off-by: John Garry <john.g.garry@oracle.com>
---
 drivers/scsi/scsi_alua.c | 99 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 99 insertions(+)

diff --git a/drivers/scsi/scsi_alua.c b/drivers/scsi/scsi_alua.c
index 50c1d17b52dc7..1045885f74169 100644
--- a/drivers/scsi/scsi_alua.c
+++ b/drivers/scsi/scsi_alua.c
@@ -30,6 +30,9 @@ static struct workqueue_struct *kalua_wq;
 
 #define RTPG_FMT_MASK			0x70
 #define RTPG_FMT_EXT_HDR		0x10
+#define TPGS_MODE_NONE			0x0
+#define TPGS_MODE_IMPLICIT		0x1
+#define TPGS_MODE_EXPLICIT		0x2
 
 #define ALUA_RTPG_SIZE			128
 #define ALUA_FAILOVER_TIMEOUT		60
@@ -65,6 +68,41 @@ static int submit_rtpg(struct scsi_device *sdev, unsigned char *buff,
 				ALUA_FAILOVER_RETRIES, &exec_args);
 }
 
+/*
+ * submit_stpg - Issue a SET TARGET PORT GROUP command
+ *
+ * Currently we're only setting the current target port group state
+ * to 'active/optimized' and let the array firmware figure out
+ * the states of the remaining groups.
+ */
+static int submit_stpg(struct scsi_device *sdev,
+				struct scsi_sense_hdr *sshdr)
+{
+	u8 cdb[MAX_COMMAND_SIZE];
+	unsigned char stpg_data[8];
+	int stpg_len = 8;
+	blk_opf_t opf = REQ_OP_DRV_OUT | REQ_FAILFAST_DEV |
+				REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER;
+	const struct scsi_exec_args exec_args = {
+		.sshdr = sshdr,
+	};
+
+	/* Prepare the data buffer */
+	memset(stpg_data, 0, stpg_len);
+	stpg_data[4] = SCSI_ACCESS_STATE_OPTIMAL;
+	put_unaligned_be16(sdev->alua->group_id, &stpg_data[6]);
+
+	/* Prepare the command. */
+	memset(cdb, 0x0, MAX_COMMAND_SIZE);
+	cdb[0] = MAINTENANCE_OUT;
+	cdb[1] = MO_SET_TARGET_PGS;
+	put_unaligned_be32(stpg_len, &cdb[6]);
+
+	return scsi_execute_cmd(sdev, cdb, opf, stpg_data,
+				stpg_len, ALUA_FAILOVER_TIMEOUT * HZ,
+				ALUA_FAILOVER_RETRIES, &exec_args);
+}
+
 static char print_alua_state(unsigned char state)
 {
 	switch (state) {
@@ -326,6 +364,67 @@ static int scsi_alua_rtpg(struct scsi_device *sdev)
 	return err;
 }
 
+
+/*
+ * scsi_alua_stpg - Issue a SET TARGET PORT GROUP command
+ *
+ * Issue a SET TARGET PORT GROUP command and evaluate the
+ * response. Returns SCSI_DH_RETRY per default to trigger
+ * a re-evaluation of the target group state or SCSI_DH_OK
+ * if no further action needs to be taken.
+ */
+__maybe_unused
+static int scsi_alua_stpg(struct scsi_device *sdev, bool optimize)
+{
+	struct alua_data *alua = sdev->alua;
+	int retval;
+	struct scsi_sense_hdr sense_hdr;
+
+	if (!(alua->tpgs & TPGS_MODE_EXPLICIT)) {
+		/* Only implicit ALUA supported, retry */
+		return -EAGAIN;//SCSI_DH_RETRY;
+	}
+	switch (alua->state) {
+	case SCSI_ACCESS_STATE_OPTIMAL:
+		return 0;//SCSI_DH_OK;
+	case SCSI_ACCESS_STATE_ACTIVE:
+		if (optimize &&
+		    !alua->pref &&
+		    (alua->tpgs & TPGS_MODE_IMPLICIT))
+			return 0;//SCSI_DH_OK;
+		break;
+	case SCSI_ACCESS_STATE_STANDBY:
+	case SCSI_ACCESS_STATE_UNAVAILABLE:
+		break;
+	case SCSI_ACCESS_STATE_OFFLINE:
+		return -EIO;//SCSI_DH_IO;
+	case SCSI_ACCESS_STATE_TRANSITIONING:
+		break;
+	default:
+		sdev_printk(KERN_INFO, sdev,
+			    "%s: stpg failed, unhandled TPGS state %d",
+			    DRV_NAME, alua->state);
+		return -ENOSYS ;//SCSI_DH_NOSYS;
+	}
+	retval = submit_stpg(sdev, &sense_hdr);
+
+	if (retval) {
+		if (retval < 0 || !scsi_sense_valid(&sense_hdr)) {
+			sdev_printk(KERN_INFO, sdev,
+				    "%s: stpg failed, result %d",
+				    DRV_NAME, retval);
+			if (retval < 0)
+				return -EBUSY;//SCSI_DH_DEV_TEMP_BUSY;
+		} else {
+			sdev_printk(KERN_INFO, sdev, "%s: stpg failed\n",
+				    DRV_NAME);
+			scsi_print_sense_hdr(sdev, DRV_NAME, &sense_hdr);
+		}
+	}
+	/* Retry RTPG */
+	return -EAGAIN;//SCSI_DH_RETRY;
+}
+
 int scsi_alua_sdev_init(struct scsi_device *sdev)
 {
 	int rel_port, ret, tpgs;
-- 
2.43.5


^ permalink raw reply related	[flat|nested] 63+ messages in thread

* [PATCH 05/13] scsi: alua: Add scsi_alua_tur()
  2026-03-17 12:06 [PATCH 00/13] scsi: Core ALUA driver John Garry
                   ` (3 preceding siblings ...)
  2026-03-17 12:06 ` [PATCH 04/13] scsi: alua: Add scsi_alua_stpg() John Garry
@ 2026-03-17 12:06 ` John Garry
  2026-03-18  7:54   ` Hannes Reinecke
  2026-03-17 12:06 ` [PATCH 06/13] scsi: alua: Add scsi_alua_rtpg_run() John Garry
                   ` (8 subsequent siblings)
  13 siblings, 1 reply; 63+ messages in thread
From: John Garry @ 2026-03-17 12:06 UTC (permalink / raw)
  To: martin.petersen, james.bottomley, hare, bmarzins
  Cc: jmeneghi, linux-scsi, michael.christie, snitzer, dm-devel,
	linux-kernel, John Garry

Add same as alua_tur() from scsi_dh_alua.c

Signed-off-by: John Garry <john.g.garry@oracle.com>
---
 drivers/scsi/scsi_alua.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/drivers/scsi/scsi_alua.c b/drivers/scsi/scsi_alua.c
index 1045885f74169..d8825ad7a1672 100644
--- a/drivers/scsi/scsi_alua.c
+++ b/drivers/scsi/scsi_alua.c
@@ -40,6 +40,32 @@ static struct workqueue_struct *kalua_wq;
 #define ALUA_RTPG_DELAY_MSECS		5
 #define ALUA_RTPG_RETRY_DELAY		2
 
+/*
+ * alua_tur - Send a TEST UNIT READY
+ * @sdev: device to which the TEST UNIT READY command should be send
+ *
+ * Send a TEST UNIT READY to @sdev to figure out the device state
+ * Returns SCSI_DH_RETRY if the sense code is NOT READY/ALUA TRANSITIONING,
+ * SCSI_DH_OK if no error occurred, and SCSI_DH_IO otherwise.
+ */
+__maybe_unused
+static int scsi_alua_tur(struct scsi_device *sdev)
+{
+	struct scsi_sense_hdr sense_hdr;
+	int retval;
+
+	retval = scsi_test_unit_ready(sdev, ALUA_FAILOVER_TIMEOUT * HZ,
+				      ALUA_FAILOVER_RETRIES, &sense_hdr);
+	if ((sense_hdr.sense_key == NOT_READY ||
+	     sense_hdr.sense_key == UNIT_ATTENTION) &&
+	    sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a)
+		return -EAGAIN;//SCSI_DH_RETRY;
+	else if (retval)
+		return -EIO;//SCSI_DH_IO;
+	else
+		return 0;//SCSI_DH_OK;
+}
+
 /*
  * submit_rtpg - Issue a REPORT TARGET GROUP STATES command
  * @sdev: sdev the command should be sent to
-- 
2.43.5


^ permalink raw reply related	[flat|nested] 63+ messages in thread

* [PATCH 06/13] scsi: alua: Add scsi_alua_rtpg_run()
  2026-03-17 12:06 [PATCH 00/13] scsi: Core ALUA driver John Garry
                   ` (4 preceding siblings ...)
  2026-03-17 12:06 ` [PATCH 05/13] scsi: alua: Add scsi_alua_tur() John Garry
@ 2026-03-17 12:06 ` John Garry
  2026-03-17 12:06 ` [PATCH 07/13] scsi: alua: Add scsi_alua_stpg_run() John Garry
                   ` (7 subsequent siblings)
  13 siblings, 0 replies; 63+ messages in thread
From: John Garry @ 2026-03-17 12:06 UTC (permalink / raw)
  To: martin.petersen, james.bottomley, hare, bmarzins
  Cc: jmeneghi, linux-scsi, michael.christie, snitzer, dm-devel,
	linux-kernel, John Garry

Add a function to run rtpg and handle error codes - it does equivalent
handling as in alua_rtpg_work() from scsi_dh_alua.c

Signed-off-by: John Garry <john.g.garry@oracle.com>
---
 drivers/scsi/scsi_alua.c | 33 +++++++++++++++++++++++++++++++--
 include/scsi/scsi_alua.h |  6 ++++++
 2 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/scsi_alua.c b/drivers/scsi/scsi_alua.c
index d8825ad7a1672..e4cb43ba645fa 100644
--- a/drivers/scsi/scsi_alua.c
+++ b/drivers/scsi/scsi_alua.c
@@ -48,7 +48,6 @@ static struct workqueue_struct *kalua_wq;
  * Returns SCSI_DH_RETRY if the sense code is NOT READY/ALUA TRANSITIONING,
  * SCSI_DH_OK if no error occurred, and SCSI_DH_IO otherwise.
  */
-__maybe_unused
 static int scsi_alua_tur(struct scsi_device *sdev)
 {
 	struct scsi_sense_hdr sense_hdr;
@@ -159,7 +158,6 @@ static char print_alua_state(unsigned char state)
  * Returns -ENODEV if the path is
  * found to be unusable.
  */
-__maybe_unused
 static int scsi_alua_rtpg(struct scsi_device *sdev)
 {
 	struct alua_data *alua = sdev->alua;
@@ -390,6 +388,37 @@ static int scsi_alua_rtpg(struct scsi_device *sdev)
 	return err;
 }
 
+int scsi_alua_rtpg_run(struct scsi_device *sdev)
+{
+	struct alua_data *alua = sdev->alua;
+	unsigned long flags;
+	int state, err;
+
+	spin_lock_irqsave(&alua->lock, flags);
+	state = alua->state;
+	spin_unlock_irqrestore(&alua->lock, flags);
+
+	if (state == SCSI_ACCESS_STATE_TRANSITIONING) {
+		if (scsi_alua_tur(sdev) == -EAGAIN) {
+			spin_lock_irqsave(&alua->lock, flags);
+			alua->interval = ALUA_RTPG_RETRY_DELAY;
+			spin_unlock_irqrestore(&alua->lock, flags);
+			return -EAGAIN;
+		}
+		/* Send RTPG on failure or if TUR indicates SUCCESS */
+	}
+
+	err = scsi_alua_rtpg(sdev);
+	spin_lock_irqsave(&alua->lock, flags);
+	if (err == -EAGAIN) {
+		alua->interval = ALUA_RTPG_RETRY_DELAY;
+		spin_unlock_irqrestore(&alua->lock, flags);
+		return -EAGAIN;
+	}
+	spin_unlock_irqrestore(&alua->lock, flags);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(scsi_alua_rtpg_run);
 
 /*
  * scsi_alua_stpg - Issue a SET TARGET PORT GROUP command
diff --git a/include/scsi/scsi_alua.h b/include/scsi/scsi_alua.h
index 068277261ed9d..1eb5481f40bd4 100644
--- a/include/scsi/scsi_alua.h
+++ b/include/scsi/scsi_alua.h
@@ -30,10 +30,16 @@ struct alua_data {
 int scsi_alua_sdev_init(struct scsi_device *sdev);
 void scsi_alua_sdev_exit(struct scsi_device *sdev);
 
+int scsi_alua_rtpg_run(struct scsi_device *sdev);
+
 int scsi_alua_init(void);
 void scsi_exit_alua(void);
 #else //CONFIG_SCSI_ALUA
 
+static inline int scsi_alua_rtpg_run(struct scsi_device *sdev)
+{
+	return 0;
+}
 static inline int scsi_alua_sdev_init(struct scsi_device *sdev)
 {
 	return 0;
-- 
2.43.5


^ permalink raw reply related	[flat|nested] 63+ messages in thread

* [PATCH 07/13] scsi: alua: Add scsi_alua_stpg_run()
  2026-03-17 12:06 [PATCH 00/13] scsi: Core ALUA driver John Garry
                   ` (5 preceding siblings ...)
  2026-03-17 12:06 ` [PATCH 06/13] scsi: alua: Add scsi_alua_rtpg_run() John Garry
@ 2026-03-17 12:06 ` John Garry
  2026-03-18  7:57   ` Hannes Reinecke
  2026-03-17 12:06 ` [PATCH 08/13] scsi: alua: Add scsi_alua_check_tpgs() John Garry
                   ` (6 subsequent siblings)
  13 siblings, 1 reply; 63+ messages in thread
From: John Garry @ 2026-03-17 12:06 UTC (permalink / raw)
  To: martin.petersen, james.bottomley, hare, bmarzins
  Cc: jmeneghi, linux-scsi, michael.christie, snitzer, dm-devel,
	linux-kernel, John Garry

Add a function to run stpg and handle error codes - it does equivalent
handling as in alua_rtpg_work() from scsi_dh_alua.c

Signed-off-by: John Garry <john.g.garry@oracle.com>
---
 drivers/scsi/scsi_alua.c | 20 +++++++++++++++++++-
 include/scsi/scsi_alua.h |  5 +++++
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/scsi_alua.c b/drivers/scsi/scsi_alua.c
index e4cb43ba645fa..4e20a537a4ad6 100644
--- a/drivers/scsi/scsi_alua.c
+++ b/drivers/scsi/scsi_alua.c
@@ -428,7 +428,6 @@ EXPORT_SYMBOL_GPL(scsi_alua_rtpg_run);
  * a re-evaluation of the target group state or SCSI_DH_OK
  * if no further action needs to be taken.
  */
-__maybe_unused
 static int scsi_alua_stpg(struct scsi_device *sdev, bool optimize)
 {
 	struct alua_data *alua = sdev->alua;
@@ -480,6 +479,25 @@ static int scsi_alua_stpg(struct scsi_device *sdev, bool optimize)
 	return -EAGAIN;//SCSI_DH_RETRY;
 }
 
+int scsi_alua_stpg_run(struct scsi_device *sdev, bool optimize)
+{
+	struct alua_data *alua = sdev->alua;
+	unsigned long flags;
+	int err;
+
+	err = scsi_alua_stpg(sdev, optimize);
+	spin_lock_irqsave(&alua->lock, flags);
+	if (err == EAGAIN) {
+		alua->interval = 0;
+		spin_unlock_irqrestore(&alua->lock, flags);
+		return -EAGAIN;
+	}
+	spin_unlock_irqrestore(&alua->lock, flags);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(scsi_alua_stpg_run);
+
 int scsi_alua_sdev_init(struct scsi_device *sdev)
 {
 	int rel_port, ret, tpgs;
diff --git a/include/scsi/scsi_alua.h b/include/scsi/scsi_alua.h
index 1eb5481f40bd4..6e4f262bbfbc0 100644
--- a/include/scsi/scsi_alua.h
+++ b/include/scsi/scsi_alua.h
@@ -31,6 +31,7 @@ int scsi_alua_sdev_init(struct scsi_device *sdev);
 void scsi_alua_sdev_exit(struct scsi_device *sdev);
 
 int scsi_alua_rtpg_run(struct scsi_device *sdev);
+int scsi_alua_stpg_run(struct scsi_device *sdev, bool optimize);
 
 int scsi_alua_init(void);
 void scsi_exit_alua(void);
@@ -40,6 +41,10 @@ static inline int scsi_alua_rtpg_run(struct scsi_device *sdev)
 {
 	return 0;
 }
+static inline int scsi_alua_stpg_run(struct scsi_device *sdev, bool optimize)
+{
+	return 0;
+}
 static inline int scsi_alua_sdev_init(struct scsi_device *sdev)
 {
 	return 0;
-- 
2.43.5


^ permalink raw reply related	[flat|nested] 63+ messages in thread

* [PATCH 08/13] scsi: alua: Add scsi_alua_check_tpgs()
  2026-03-17 12:06 [PATCH 00/13] scsi: Core ALUA driver John Garry
                   ` (6 preceding siblings ...)
  2026-03-17 12:06 ` [PATCH 07/13] scsi: alua: Add scsi_alua_stpg_run() John Garry
@ 2026-03-17 12:06 ` John Garry
  2026-03-18  7:57   ` Hannes Reinecke
  2026-03-17 12:06 ` [PATCH 09/13] scsi: alua: Add scsi_alua_handle_state_transition() John Garry
                   ` (5 subsequent siblings)
  13 siblings, 1 reply; 63+ messages in thread
From: John Garry @ 2026-03-17 12:06 UTC (permalink / raw)
  To: martin.petersen, james.bottomley, hare, bmarzins
  Cc: jmeneghi, linux-scsi, michael.christie, snitzer, dm-devel,
	linux-kernel, John Garry

Add a core version of alua_check_tpgs() from scsi_sh_alua.c

Signed-off-by: John Garry <john.g.garry@oracle.com>
---
 drivers/scsi/scsi_alua.c | 53 ++++++++++++++++++++++++++++++++++++++++
 include/scsi/scsi_alua.h |  6 +++++
 2 files changed, 59 insertions(+)

diff --git a/drivers/scsi/scsi_alua.c b/drivers/scsi/scsi_alua.c
index 4e20a537a4ad6..9c317e60d031e 100644
--- a/drivers/scsi/scsi_alua.c
+++ b/drivers/scsi/scsi_alua.c
@@ -128,6 +128,59 @@ static int submit_stpg(struct scsi_device *sdev,
 				ALUA_FAILOVER_RETRIES, &exec_args);
 }
 
+/*
+ * scsi_alua_check_tpgs - Evaluate TPGS setting
+ * @sdev: device to be checked
+ *
+ * Examine the TPGS setting of the sdev to find out if ALUA
+ * is supported.
+ */
+int scsi_alua_check_tpgs(struct scsi_device *sdev)
+{
+	int tpgs = TPGS_MODE_NONE;
+
+	/*
+	 * ALUA support for non-disk devices is fraught with
+	 * difficulties, so disable it for now.
+	 */
+	if (sdev->type != TYPE_DISK) {
+		sdev_printk(KERN_INFO, sdev,
+			    "%s: disable for non-disk devices\n",
+			    DRV_NAME);
+		return tpgs;
+	}
+
+	tpgs = scsi_device_tpgs(sdev);
+	switch (tpgs) {
+	case TPGS_MODE_EXPLICIT|TPGS_MODE_IMPLICIT:
+		sdev_printk(KERN_INFO, sdev,
+			    "%s: supports implicit and explicit TPGS\n",
+			    DRV_NAME);
+		break;
+	case TPGS_MODE_EXPLICIT:
+		sdev_printk(KERN_INFO, sdev, "%s: supports explicit TPGS\n",
+			    DRV_NAME);
+		break;
+	case TPGS_MODE_IMPLICIT:
+		sdev_printk(KERN_INFO, sdev, "%s: supports implicit TPGS\n",
+			    DRV_NAME);
+		break;
+	case TPGS_MODE_NONE:
+		sdev_printk(KERN_INFO, sdev, "%s: not supported\n",
+			    DRV_NAME);
+		break;
+	default:
+		sdev_printk(KERN_INFO, sdev,
+			    "%s: unsupported TPGS setting %d\n",
+			    DRV_NAME, tpgs);
+		tpgs = TPGS_MODE_NONE;
+		break;
+	}
+
+	return tpgs;
+}
+EXPORT_SYMBOL_GPL(scsi_alua_check_tpgs);
+
 static char print_alua_state(unsigned char state)
 {
 	switch (state) {
diff --git a/include/scsi/scsi_alua.h b/include/scsi/scsi_alua.h
index 6e4f262bbfbc0..2e664f20d9681 100644
--- a/include/scsi/scsi_alua.h
+++ b/include/scsi/scsi_alua.h
@@ -30,6 +30,8 @@ struct alua_data {
 int scsi_alua_sdev_init(struct scsi_device *sdev);
 void scsi_alua_sdev_exit(struct scsi_device *sdev);
 
+int scsi_alua_check_tpgs(struct scsi_device *sdev);
+
 int scsi_alua_rtpg_run(struct scsi_device *sdev);
 int scsi_alua_stpg_run(struct scsi_device *sdev, bool optimize);
 
@@ -37,6 +39,10 @@ int scsi_alua_init(void);
 void scsi_exit_alua(void);
 #else //CONFIG_SCSI_ALUA
 
+static inline int scsi_alua_check_tpgs(struct scsi_device *sdev)
+{
+	return 0;
+}
 static inline int scsi_alua_rtpg_run(struct scsi_device *sdev)
 {
 	return 0;
-- 
2.43.5


^ permalink raw reply related	[flat|nested] 63+ messages in thread

* [PATCH 09/13] scsi: alua: Add scsi_alua_handle_state_transition()
  2026-03-17 12:06 [PATCH 00/13] scsi: Core ALUA driver John Garry
                   ` (7 preceding siblings ...)
  2026-03-17 12:06 ` [PATCH 08/13] scsi: alua: Add scsi_alua_check_tpgs() John Garry
@ 2026-03-17 12:06 ` John Garry
  2026-03-18  7:58   ` Hannes Reinecke
  2026-03-17 12:07 ` [PATCH 10/13] scsi: alua: Add scsi_alua_prep_fn() John Garry
                   ` (4 subsequent siblings)
  13 siblings, 1 reply; 63+ messages in thread
From: John Garry @ 2026-03-17 12:06 UTC (permalink / raw)
  To: martin.petersen, james.bottomley, hare, bmarzins
  Cc: jmeneghi, linux-scsi, michael.christie, snitzer, dm-devel,
	linux-kernel, John Garry

Add an equivalent of alua_handle_state_transition() from scsi_dh_alua.c

Signed-off-by: John Garry <john.g.garry@oracle.com>
---
 drivers/scsi/scsi_alua.c | 11 +++++++++++
 include/scsi/scsi_alua.h |  5 +++++
 2 files changed, 16 insertions(+)

diff --git a/drivers/scsi/scsi_alua.c b/drivers/scsi/scsi_alua.c
index 9c317e60d031e..d19d1845bc324 100644
--- a/drivers/scsi/scsi_alua.c
+++ b/drivers/scsi/scsi_alua.c
@@ -40,6 +40,17 @@ static struct workqueue_struct *kalua_wq;
 #define ALUA_RTPG_DELAY_MSECS		5
 #define ALUA_RTPG_RETRY_DELAY		2
 
+void scsi_alua_handle_state_transition(struct scsi_device *sdev)
+{
+	struct alua_data *alua = sdev->alua;
+	unsigned long flags;
+
+	spin_lock_irqsave(&alua->lock, flags);
+	alua->state = SCSI_ACCESS_STATE_TRANSITIONING;
+	spin_unlock_irqrestore(&alua->lock, flags);
+}
+EXPORT_SYMBOL_GPL(scsi_alua_handle_state_transition);
+
 /*
  * alua_tur - Send a TEST UNIT READY
  * @sdev: device to which the TEST UNIT READY command should be send
diff --git a/include/scsi/scsi_alua.h b/include/scsi/scsi_alua.h
index 2e664f20d9681..5b3a12861658f 100644
--- a/include/scsi/scsi_alua.h
+++ b/include/scsi/scsi_alua.h
@@ -30,6 +30,8 @@ struct alua_data {
 int scsi_alua_sdev_init(struct scsi_device *sdev);
 void scsi_alua_sdev_exit(struct scsi_device *sdev);
 
+void scsi_alua_handle_state_transition(struct scsi_device *sdev);
+
 int scsi_alua_check_tpgs(struct scsi_device *sdev);
 
 int scsi_alua_rtpg_run(struct scsi_device *sdev);
@@ -39,6 +41,9 @@ int scsi_alua_init(void);
 void scsi_exit_alua(void);
 #else //CONFIG_SCSI_ALUA
 
+static inline void scsi_alua_handle_state_transition(struct scsi_device *sdev)
+{
+}
 static inline int scsi_alua_check_tpgs(struct scsi_device *sdev)
 {
 	return 0;
-- 
2.43.5


^ permalink raw reply related	[flat|nested] 63+ messages in thread

* [PATCH 10/13] scsi: alua: Add scsi_alua_prep_fn()
  2026-03-17 12:06 [PATCH 00/13] scsi: Core ALUA driver John Garry
                   ` (8 preceding siblings ...)
  2026-03-17 12:06 ` [PATCH 09/13] scsi: alua: Add scsi_alua_handle_state_transition() John Garry
@ 2026-03-17 12:07 ` John Garry
  2026-03-18  8:01   ` Hannes Reinecke
  2026-03-17 12:07 ` [PATCH 11/13] scsi: alua: Add scsi_device_alua_implicit() John Garry
                   ` (3 subsequent siblings)
  13 siblings, 1 reply; 63+ messages in thread
From: John Garry @ 2026-03-17 12:07 UTC (permalink / raw)
  To: martin.petersen, james.bottomley, hare, bmarzins
  Cc: jmeneghi, linux-scsi, michael.christie, snitzer, dm-devel,
	linux-kernel, John Garry

Add a core version of alua_prep_fn() from scsi_dh_alua.c

Signed-off-by: John Garry <john.g.garry@oracle.com>
---
 drivers/scsi/scsi_alua.c | 23 +++++++++++++++++++++++
 include/scsi/scsi_alua.h |  8 ++++++++
 2 files changed, 31 insertions(+)

diff --git a/drivers/scsi/scsi_alua.c b/drivers/scsi/scsi_alua.c
index d19d1845bc324..c269105dbae4a 100644
--- a/drivers/scsi/scsi_alua.c
+++ b/drivers/scsi/scsi_alua.c
@@ -608,6 +608,29 @@ void scsi_alua_sdev_exit(struct scsi_device *sdev)
 	sdev->alua = NULL;
 }
 
+blk_status_t scsi_alua_prep_fn(struct scsi_device *sdev, struct request *req)
+{
+	struct alua_data *alua = sdev->alua;
+	unsigned long flags;
+	unsigned char state;
+
+	spin_lock_irqsave(&alua->lock, flags);
+	state = alua->state;
+	spin_unlock_irqrestore(&alua->lock, flags);
+
+	switch (state) {
+	case SCSI_ACCESS_STATE_OPTIMAL:
+	case SCSI_ACCESS_STATE_ACTIVE:
+	case SCSI_ACCESS_STATE_LBA:
+	case SCSI_ACCESS_STATE_TRANSITIONING:
+		return BLK_STS_OK;
+	default:
+		req->rq_flags |= RQF_QUIET;
+		return BLK_STS_IOERR;
+	}
+}
+EXPORT_SYMBOL_GPL(scsi_alua_prep_fn);
+
 int scsi_alua_init(void)
 {
 	kalua_wq = alloc_workqueue("kalua", WQ_MEM_RECLAIM | WQ_PERCPU, 0);
diff --git a/include/scsi/scsi_alua.h b/include/scsi/scsi_alua.h
index 5b3a12861658f..c16d4adc915ec 100644
--- a/include/scsi/scsi_alua.h
+++ b/include/scsi/scsi_alua.h
@@ -8,6 +8,7 @@
 #ifndef _SCSI_ALUA_H
 #define _SCSI_ALUA_H
 
+#include <linux/blk-mq.h>
 #include <scsi/scsi.h>
 #include <scsi/scsi_device.h>
 
@@ -37,6 +38,8 @@ int scsi_alua_check_tpgs(struct scsi_device *sdev);
 int scsi_alua_rtpg_run(struct scsi_device *sdev);
 int scsi_alua_stpg_run(struct scsi_device *sdev, bool optimize);
 
+blk_status_t scsi_alua_prep_fn(struct scsi_device *sdev, struct request *req);
+
 int scsi_alua_init(void);
 void scsi_exit_alua(void);
 #else //CONFIG_SCSI_ALUA
@@ -56,6 +59,11 @@ static inline int scsi_alua_stpg_run(struct scsi_device *sdev, bool optimize)
 {
 	return 0;
 }
+static inline
+blk_status_t scsi_alua_prep_fn(struct scsi_device *sdev, struct request *req)
+{
+	return BLK_STS_OK;
+}
 static inline int scsi_alua_sdev_init(struct scsi_device *sdev)
 {
 	return 0;
-- 
2.43.5


^ permalink raw reply related	[flat|nested] 63+ messages in thread

* [PATCH 11/13] scsi: alua: Add scsi_device_alua_implicit()
  2026-03-17 12:06 [PATCH 00/13] scsi: Core ALUA driver John Garry
                   ` (9 preceding siblings ...)
  2026-03-17 12:07 ` [PATCH 10/13] scsi: alua: Add scsi_alua_prep_fn() John Garry
@ 2026-03-17 12:07 ` John Garry
  2026-03-18  8:02   ` Hannes Reinecke
  2026-03-17 12:07 ` [PATCH 12/13] scsi: scsi_dh_alua: Switch to use core support John Garry
                   ` (2 subsequent siblings)
  13 siblings, 1 reply; 63+ messages in thread
From: John Garry @ 2026-03-17 12:07 UTC (permalink / raw)
  To: martin.petersen, james.bottomley, hare, bmarzins
  Cc: jmeneghi, linux-scsi, michael.christie, snitzer, dm-devel,
	linux-kernel, John Garry

Add to function to check whether implicit support is available, as this
will be the general check for ALUA support and no DH support.

Signed-off-by: John Garry <john.g.garry@oracle.com>
---
 drivers/scsi/scsi_alua.c | 7 +++++++
 include/scsi/scsi_alua.h | 6 ++++++
 2 files changed, 13 insertions(+)

diff --git a/drivers/scsi/scsi_alua.c b/drivers/scsi/scsi_alua.c
index c269105dbae4a..d3fcd887e5018 100644
--- a/drivers/scsi/scsi_alua.c
+++ b/drivers/scsi/scsi_alua.c
@@ -631,6 +631,13 @@ blk_status_t scsi_alua_prep_fn(struct scsi_device *sdev, struct request *req)
 }
 EXPORT_SYMBOL_GPL(scsi_alua_prep_fn);
 
+bool scsi_device_alua_implicit(struct scsi_device *sdev)
+{
+	if (!sdev->alua)
+		return false;
+	return sdev->alua->tpgs & TPGS_MODE_IMPLICIT;
+}
+
 int scsi_alua_init(void)
 {
 	kalua_wq = alloc_workqueue("kalua", WQ_MEM_RECLAIM | WQ_PERCPU, 0);
diff --git a/include/scsi/scsi_alua.h b/include/scsi/scsi_alua.h
index c16d4adc915ec..2d5db944f75b7 100644
--- a/include/scsi/scsi_alua.h
+++ b/include/scsi/scsi_alua.h
@@ -40,6 +40,8 @@ int scsi_alua_stpg_run(struct scsi_device *sdev, bool optimize);
 
 blk_status_t scsi_alua_prep_fn(struct scsi_device *sdev, struct request *req);
 
+bool scsi_device_alua_implicit(struct scsi_device *sdev);
+
 int scsi_alua_init(void);
 void scsi_exit_alua(void);
 #else //CONFIG_SCSI_ALUA
@@ -64,6 +66,10 @@ blk_status_t scsi_alua_prep_fn(struct scsi_device *sdev, struct request *req)
 {
 	return BLK_STS_OK;
 }
+static inline bool scsi_device_alua_implicit(struct scsi_device *sdev)
+{
+	return false;
+}
 static inline int scsi_alua_sdev_init(struct scsi_device *sdev)
 {
 	return 0;
-- 
2.43.5


^ permalink raw reply related	[flat|nested] 63+ messages in thread

* [PATCH 12/13] scsi: scsi_dh_alua: Switch to use core support
  2026-03-17 12:06 [PATCH 00/13] scsi: Core ALUA driver John Garry
                   ` (10 preceding siblings ...)
  2026-03-17 12:07 ` [PATCH 11/13] scsi: alua: Add scsi_device_alua_implicit() John Garry
@ 2026-03-17 12:07 ` John Garry
  2026-03-23  1:47   ` Benjamin Marzinski
  2026-03-17 12:07 ` [PATCH 13/13] scsi: core: Add implicit ALUA support John Garry
  2026-03-22 17:37 ` [PATCH 00/13] scsi: Core ALUA driver Benjamin Marzinski
  13 siblings, 1 reply; 63+ messages in thread
From: John Garry @ 2026-03-17 12:07 UTC (permalink / raw)
  To: martin.petersen, james.bottomley, hare, bmarzins
  Cc: jmeneghi, linux-scsi, michael.christie, snitzer, dm-devel,
	linux-kernel, John Garry

Switch to use core scsi ALUA support.

We still need to drive the state machine for explicit ALUA.

Signed-off-by: John Garry <john.g.garry@oracle.com>
---
 drivers/scsi/device_handler/scsi_dh_alua.c | 580 +--------------------
 1 file changed, 21 insertions(+), 559 deletions(-)

diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c
index 067021fffc16f..4d53fab85a7ed 100644
--- a/drivers/scsi/device_handler/scsi_dh_alua.c
+++ b/drivers/scsi/device_handler/scsi_dh_alua.c
@@ -10,6 +10,7 @@
 #include <linux/module.h>
 #include <linux/unaligned.h>
 #include <scsi/scsi.h>
+#include <scsi/scsi_alua.h>
 #include <scsi/scsi_proto.h>
 #include <scsi/scsi_dbg.h>
 #include <scsi/scsi_eh.h>
@@ -44,7 +45,6 @@
 
 /* device handler flags */
 #define ALUA_OPTIMIZE_STPG		0x01
-#define ALUA_RTPG_EXT_HDR_UNSUPP	0x02
 /* State machine flags */
 #define ALUA_PG_RUN_RTPG		0x10
 #define ALUA_PG_RUN_STPG		0x20
@@ -65,14 +65,6 @@ struct alua_dh_data {
 	unsigned		flags; /* used for optimizing STPG */
 	spinlock_t		lock;
 
-	/* alua stuff */
-	int			state;
-	int			pref;
-	int			valid_states;
-	int			tpgs;
-	unsigned char		transition_tmo;
-	unsigned long		expiry;
-	unsigned long		interval;
 	struct delayed_work	rtpg_work;
 	struct list_head	rtpg_list;
 };
@@ -91,121 +83,6 @@ static bool alua_rtpg_queue(struct scsi_device *sdev,
 			    struct alua_queue_data *qdata, bool force);
 static void alua_check(struct scsi_device *sdev, bool force);
 
-/*
- * submit_rtpg - Issue a REPORT TARGET GROUP STATES command
- * @sdev: sdev the command should be sent to
- */
-static int submit_rtpg(struct scsi_device *sdev, unsigned char *buff,
-		       int bufflen, struct scsi_sense_hdr *sshdr, int flags)
-{
-	u8 cdb[MAX_COMMAND_SIZE];
-	blk_opf_t opf = REQ_OP_DRV_IN | REQ_FAILFAST_DEV |
-				REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER;
-	const struct scsi_exec_args exec_args = {
-		.sshdr = sshdr,
-	};
-
-	/* Prepare the command. */
-	memset(cdb, 0x0, MAX_COMMAND_SIZE);
-	cdb[0] = MAINTENANCE_IN;
-	if (!(flags & ALUA_RTPG_EXT_HDR_UNSUPP))
-		cdb[1] = MI_REPORT_TARGET_PGS | MI_EXT_HDR_PARAM_FMT;
-	else
-		cdb[1] = MI_REPORT_TARGET_PGS;
-	put_unaligned_be32(bufflen, &cdb[6]);
-
-	return scsi_execute_cmd(sdev, cdb, opf, buff, bufflen,
-				ALUA_FAILOVER_TIMEOUT * HZ,
-				ALUA_FAILOVER_RETRIES, &exec_args);
-}
-
-/*
- * submit_stpg - Issue a SET TARGET PORT GROUP command
- *
- * Currently we're only setting the current target port group state
- * to 'active/optimized' and let the array firmware figure out
- * the states of the remaining groups.
- */
-static int submit_stpg(struct scsi_device *sdev, int group_id,
-		       struct scsi_sense_hdr *sshdr)
-{
-	u8 cdb[MAX_COMMAND_SIZE];
-	unsigned char stpg_data[8];
-	int stpg_len = 8;
-	blk_opf_t opf = REQ_OP_DRV_OUT | REQ_FAILFAST_DEV |
-				REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER;
-	const struct scsi_exec_args exec_args = {
-		.sshdr = sshdr,
-	};
-
-	/* Prepare the data buffer */
-	memset(stpg_data, 0, stpg_len);
-	stpg_data[4] = SCSI_ACCESS_STATE_OPTIMAL;
-	put_unaligned_be16(group_id, &stpg_data[6]);
-
-	/* Prepare the command. */
-	memset(cdb, 0x0, MAX_COMMAND_SIZE);
-	cdb[0] = MAINTENANCE_OUT;
-	cdb[1] = MO_SET_TARGET_PGS;
-	put_unaligned_be32(stpg_len, &cdb[6]);
-
-	return scsi_execute_cmd(sdev, cdb, opf, stpg_data,
-				stpg_len, ALUA_FAILOVER_TIMEOUT * HZ,
-				ALUA_FAILOVER_RETRIES, &exec_args);
-}
-
-/*
- * alua_check_tpgs - Evaluate TPGS setting
- * @sdev: device to be checked
- *
- * Examine the TPGS setting of the sdev to find out if ALUA
- * is supported.
- */
-static int alua_check_tpgs(struct scsi_device *sdev)
-{
-	int tpgs = TPGS_MODE_NONE;
-
-	/*
-	 * ALUA support for non-disk devices is fraught with
-	 * difficulties, so disable it for now.
-	 */
-	if (sdev->type != TYPE_DISK) {
-		sdev_printk(KERN_INFO, sdev,
-			    "%s: disable for non-disk devices\n",
-			    ALUA_DH_NAME);
-		return tpgs;
-	}
-
-	tpgs = scsi_device_tpgs(sdev);
-	switch (tpgs) {
-	case TPGS_MODE_EXPLICIT|TPGS_MODE_IMPLICIT:
-		sdev_printk(KERN_INFO, sdev,
-			    "%s: supports implicit and explicit TPGS\n",
-			    ALUA_DH_NAME);
-		break;
-	case TPGS_MODE_EXPLICIT:
-		sdev_printk(KERN_INFO, sdev, "%s: supports explicit TPGS\n",
-			    ALUA_DH_NAME);
-		break;
-	case TPGS_MODE_IMPLICIT:
-		sdev_printk(KERN_INFO, sdev, "%s: supports implicit TPGS\n",
-			    ALUA_DH_NAME);
-		break;
-	case TPGS_MODE_NONE:
-		sdev_printk(KERN_INFO, sdev, "%s: not supported\n",
-			    ALUA_DH_NAME);
-		break;
-	default:
-		sdev_printk(KERN_INFO, sdev,
-			    "%s: unsupported TPGS setting %d\n",
-			    ALUA_DH_NAME, tpgs);
-		tpgs = TPGS_MODE_NONE;
-		break;
-	}
-
-	return tpgs;
-}
-
 /*
  * alua_check_vpd - Evaluate INQUIRY vpd page 0x83
  * @sdev: device to be checked
@@ -216,56 +93,11 @@ static int alua_check_tpgs(struct scsi_device *sdev)
 static int alua_check_vpd(struct scsi_device *sdev, struct alua_dh_data *h,
 			  int tpgs)
 {
-	int rel_port = -1;
-
-	h->group_id = scsi_vpd_tpg_id(sdev, &rel_port);
-	if (h->group_id < 0) {
-		/*
-		 * Internal error; TPGS supported but required
-		 * VPD identification descriptors not present.
-		 * Disable ALUA support
-		 */
-		sdev_printk(KERN_INFO, sdev,
-			    "%s: No target port descriptors found\n",
-			    ALUA_DH_NAME);
-		return SCSI_DH_DEV_UNSUPP;
-	}
-	h->tpgs = tpgs;
-
 	alua_rtpg_queue(sdev, NULL, true);
 
 	return SCSI_DH_OK;
 }
 
-static char print_alua_state(unsigned char state)
-{
-	switch (state) {
-	case SCSI_ACCESS_STATE_OPTIMAL:
-		return 'A';
-	case SCSI_ACCESS_STATE_ACTIVE:
-		return 'N';
-	case SCSI_ACCESS_STATE_STANDBY:
-		return 'S';
-	case SCSI_ACCESS_STATE_UNAVAILABLE:
-		return 'U';
-	case SCSI_ACCESS_STATE_LBA:
-		return 'L';
-	case SCSI_ACCESS_STATE_OFFLINE:
-		return 'O';
-	case SCSI_ACCESS_STATE_TRANSITIONING:
-		return 'T';
-	default:
-		return 'X';
-	}
-}
-
-static void alua_handle_state_transition(struct scsi_device *sdev)
-{
-	struct alua_dh_data *h = sdev->handler_data;
-
-	h->state = SCSI_ACCESS_STATE_TRANSITIONING;
-}
-
 static enum scsi_disposition alua_check_sense(struct scsi_device *sdev,
 					      struct scsi_sense_hdr *sense_hdr)
 {
@@ -275,7 +107,7 @@ static enum scsi_disposition alua_check_sense(struct scsi_device *sdev,
 			/*
 			 * LUN Not Accessible - ALUA state transition
 			 */
-			alua_handle_state_transition(sdev);
+			scsi_alua_handle_state_transition(sdev);
 			return NEEDS_RETRY;
 		}
 		break;
@@ -284,7 +116,7 @@ static enum scsi_disposition alua_check_sense(struct scsi_device *sdev,
 			/*
 			 * LUN Not Accessible - ALUA state transition
 			 */
-			alua_handle_state_transition(sdev);
+			scsi_alua_handle_state_transition(sdev);
 			return NEEDS_RETRY;
 		}
 		if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x00) {
@@ -338,329 +170,6 @@ static enum scsi_disposition alua_check_sense(struct scsi_device *sdev,
 	return SCSI_RETURN_NOT_HANDLED;
 }
 
-/*
- * alua_tur - Send a TEST UNIT READY
- * @sdev: device to which the TEST UNIT READY command should be send
- *
- * Send a TEST UNIT READY to @sdev to figure out the device state
- * Returns SCSI_DH_RETRY if the sense code is NOT READY/ALUA TRANSITIONING,
- * SCSI_DH_OK if no error occurred, and SCSI_DH_IO otherwise.
- */
-static int alua_tur(struct scsi_device *sdev)
-{
-	struct scsi_sense_hdr sense_hdr;
-	int retval;
-
-	retval = scsi_test_unit_ready(sdev, ALUA_FAILOVER_TIMEOUT * HZ,
-				      ALUA_FAILOVER_RETRIES, &sense_hdr);
-	if ((sense_hdr.sense_key == NOT_READY ||
-	     sense_hdr.sense_key == UNIT_ATTENTION) &&
-	    sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a)
-		return SCSI_DH_RETRY;
-	else if (retval)
-		return SCSI_DH_IO;
-	else
-		return SCSI_DH_OK;
-}
-
-/*
- * alua_rtpg - Evaluate REPORT TARGET GROUP STATES
- * @sdev: the device to be evaluated.
- *
- * Evaluate the Target Port Group State.
- * Returns SCSI_DH_DEV_OFFLINED if the path is
- * found to be unusable.
- */
-static int alua_rtpg(struct scsi_device *sdev)
-{
-	struct scsi_sense_hdr sense_hdr;
-	struct alua_dh_data *h = sdev->handler_data;
-	int len, k, off, bufflen = ALUA_RTPG_SIZE;
-	int group_id_old, state_old, pref_old, valid_states_old;
-	unsigned char *desc, *buff;
-	unsigned err;
-	int retval;
-	unsigned int tpg_desc_tbl_off;
-	unsigned char orig_transition_tmo;
-	unsigned long flags;
-	bool transitioning_sense = false;
-	int rel_port, group_id = scsi_vpd_tpg_id(sdev, &rel_port);
-
-	if (group_id < 0) {
-		/*
-		 * Internal error; TPGS supported but required
-		 * VPD identification descriptors not present.
-		 * Disable ALUA support
-		 */
-		sdev_printk(KERN_INFO, sdev,
-			    "%s: No target port descriptors found\n",
-			    ALUA_DH_NAME);
-		return SCSI_DH_DEV_UNSUPP;
-	}
-
-	group_id_old = h->group_id;
-	state_old = h->state;
-	pref_old = h->pref;
-	valid_states_old = h->valid_states;
-
-	if (!h->expiry) {
-		unsigned long transition_tmo = ALUA_FAILOVER_TIMEOUT * HZ;
-
-		if (h->transition_tmo)
-			transition_tmo = h->transition_tmo * HZ;
-
-		h->expiry = round_jiffies_up(jiffies + transition_tmo);
-	}
-
-	buff = kzalloc(bufflen, GFP_KERNEL);
-	if (!buff)
-		return SCSI_DH_DEV_TEMP_BUSY;
-
- retry:
-	err = 0;
-	retval = submit_rtpg(sdev, buff, bufflen, &sense_hdr, h->flags);
-
-	if (retval) {
-		/*
-		 * Some (broken) implementations have a habit of returning
-		 * an error during things like firmware update etc.
-		 * But if the target only supports active/optimized there's
-		 * not much we can do; it's not that we can switch paths
-		 * or anything.
-		 * So ignore any errors to avoid spurious failures during
-		 * path failover.
-		 */
-		if ((h->valid_states & ~TPGS_SUPPORT_OPTIMIZED) == 0) {
-			sdev_printk(KERN_INFO, sdev,
-				    "%s: ignoring rtpg result %d\n",
-				    ALUA_DH_NAME, retval);
-			kfree(buff);
-			return SCSI_DH_OK;
-		}
-		if (retval < 0 || !scsi_sense_valid(&sense_hdr)) {
-			sdev_printk(KERN_INFO, sdev,
-				    "%s: rtpg failed, result %d\n",
-				    ALUA_DH_NAME, retval);
-			kfree(buff);
-			if (retval < 0)
-				return SCSI_DH_DEV_TEMP_BUSY;
-			if (host_byte(retval) == DID_NO_CONNECT)
-				return SCSI_DH_RES_TEMP_UNAVAIL;
-			return SCSI_DH_IO;
-		}
-
-		/*
-		 * submit_rtpg() has failed on existing arrays
-		 * when requesting extended header info, and
-		 * the array doesn't support extended headers,
-		 * even though it shouldn't according to T10.
-		 * The retry without rtpg_ext_hdr_req set
-		 * handles this.
-		 * Note:  some arrays return a sense key of ILLEGAL_REQUEST
-		 * with ASC 00h if they don't support the extended header.
-		 */
-		if (!(h->flags & ALUA_RTPG_EXT_HDR_UNSUPP) &&
-		    sense_hdr.sense_key == ILLEGAL_REQUEST) {
-			h->flags |= ALUA_RTPG_EXT_HDR_UNSUPP;
-			goto retry;
-		}
-		/*
-		 * If the array returns with 'ALUA state transition'
-		 * sense code here it cannot return RTPG data during
-		 * transition. So set the state to 'transitioning' directly.
-		 */
-		if (sense_hdr.sense_key == NOT_READY &&
-		    sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a) {
-			transitioning_sense = true;
-			goto skip_rtpg;
-		}
-		/*
-		 * Retry on any other UNIT ATTENTION occurred.
-		 */
-		if (sense_hdr.sense_key == UNIT_ATTENTION)
-			err = SCSI_DH_RETRY;
-		if (err == SCSI_DH_RETRY &&
-		    h->expiry != 0 && time_before(jiffies, h->expiry)) {
-			sdev_printk(KERN_ERR, sdev, "%s: rtpg retry\n",
-				    ALUA_DH_NAME);
-			scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr);
-			kfree(buff);
-			return err;
-		}
-		sdev_printk(KERN_ERR, sdev, "%s: rtpg failed\n",
-			    ALUA_DH_NAME);
-		scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr);
-		kfree(buff);
-		h->expiry = 0;
-		return SCSI_DH_IO;
-	}
-
-	len = get_unaligned_be32(&buff[0]) + 4;
-
-	if (len > bufflen) {
-		/* Resubmit with the correct length */
-		kfree(buff);
-		bufflen = len;
-		buff = kmalloc(bufflen, GFP_KERNEL);
-		if (!buff) {
-			sdev_printk(KERN_WARNING, sdev,
-				    "%s: kmalloc buffer failed\n",__func__);
-			/* Temporary failure, bypass */
-			h->expiry = 0;
-			return SCSI_DH_DEV_TEMP_BUSY;
-		}
-		goto retry;
-	}
-
-	orig_transition_tmo = h->transition_tmo;
-	if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR && buff[5] != 0)
-		h->transition_tmo = buff[5];
-	else
-		h->transition_tmo = ALUA_FAILOVER_TIMEOUT;
-
-	if (orig_transition_tmo != h->transition_tmo) {
-		sdev_printk(KERN_INFO, sdev,
-			    "%s: transition timeout set to %d seconds\n",
-			    ALUA_DH_NAME, h->transition_tmo);
-		h->expiry = jiffies + h->transition_tmo * HZ;
-	}
-
-	if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR)
-		tpg_desc_tbl_off = 8;
-	else
-		tpg_desc_tbl_off = 4;
-
-	for (k = tpg_desc_tbl_off, desc = buff + tpg_desc_tbl_off;
-	     k < len;
-	     k += off, desc += off) {
-		u16 group_id_desc = get_unaligned_be16(&desc[2]);
-
-		spin_lock_irqsave(&h->lock, flags);
-		if (group_id_desc == group_id) {
-			h->group_id = group_id;
-			WRITE_ONCE(h->state, desc[0] & 0x0f);
-			h->pref = desc[0] >> 7;
-			WRITE_ONCE(sdev->access_state, desc[0]);
-			h->valid_states = desc[1];
-		}
-		spin_unlock_irqrestore(&h->lock, flags);
-		off = 8 + (desc[7] * 4);
-	}
-
- skip_rtpg:
-	spin_lock_irqsave(&h->lock, flags);
-	if (transitioning_sense)
-		h->state = SCSI_ACCESS_STATE_TRANSITIONING;
-
-	if (group_id_old != h->group_id || state_old != h->state ||
-		pref_old != h->pref || valid_states_old != h->valid_states)
-		sdev_printk(KERN_INFO, sdev,
-			"%s: port group %02x state %c %s supports %c%c%c%c%c%c%c\n",
-			ALUA_DH_NAME, h->group_id, print_alua_state(h->state),
-			h->pref ? "preferred" : "non-preferred",
-			h->valid_states&TPGS_SUPPORT_TRANSITION?'T':'t',
-			h->valid_states&TPGS_SUPPORT_OFFLINE?'O':'o',
-			h->valid_states&TPGS_SUPPORT_LBA_DEPENDENT?'L':'l',
-			h->valid_states&TPGS_SUPPORT_UNAVAILABLE?'U':'u',
-			h->valid_states&TPGS_SUPPORT_STANDBY?'S':'s',
-			h->valid_states&TPGS_SUPPORT_NONOPTIMIZED?'N':'n',
-			h->valid_states&TPGS_SUPPORT_OPTIMIZED?'A':'a');
-
-	switch (h->state) {
-	case SCSI_ACCESS_STATE_TRANSITIONING:
-		if (time_before(jiffies, h->expiry)) {
-			/* State transition, retry */
-			h->interval = ALUA_RTPG_RETRY_DELAY;
-			err = SCSI_DH_RETRY;
-		} else {
-			struct alua_dh_data *h;
-			unsigned char access_state;
-
-			/* Transitioning time exceeded, set port to standby */
-			err = SCSI_DH_IO;
-			h->state = SCSI_ACCESS_STATE_STANDBY;
-			h->expiry = 0;
-			access_state = h->state & SCSI_ACCESS_STATE_MASK;
-			if (h->pref)
-				access_state |= SCSI_ACCESS_STATE_PREFERRED;
-			WRITE_ONCE(sdev->access_state, access_state);
-		}
-		break;
-	case SCSI_ACCESS_STATE_OFFLINE:
-		/* Path unusable */
-		err = SCSI_DH_DEV_OFFLINED;
-		h->expiry = 0;
-		break;
-	default:
-		/* Useable path if active */
-		err = SCSI_DH_OK;
-		h->expiry = 0;
-		break;
-	}
-	spin_unlock_irqrestore(&h->lock, flags);
-	kfree(buff);
-	return err;
-}
-
-/*
- * alua_stpg - Issue a SET TARGET PORT GROUP command
- *
- * Issue a SET TARGET PORT GROUP command and evaluate the
- * response. Returns SCSI_DH_RETRY per default to trigger
- * a re-evaluation of the target group state or SCSI_DH_OK
- * if no further action needs to be taken.
- */
-static unsigned alua_stpg(struct scsi_device *sdev)
-{
-	int retval;
-	struct scsi_sense_hdr sense_hdr;
-	struct alua_dh_data *h = sdev->handler_data;
-
-	if (!(h->tpgs & TPGS_MODE_EXPLICIT)) {
-		/* Only implicit ALUA supported, retry */
-		return SCSI_DH_RETRY;
-	}
-	switch (h->state) {
-	case SCSI_ACCESS_STATE_OPTIMAL:
-		return SCSI_DH_OK;
-	case SCSI_ACCESS_STATE_ACTIVE:
-		if ((h->flags & ALUA_OPTIMIZE_STPG) &&
-		    !h->pref &&
-		    (h->tpgs & TPGS_MODE_IMPLICIT))
-			return SCSI_DH_OK;
-		break;
-	case SCSI_ACCESS_STATE_STANDBY:
-	case SCSI_ACCESS_STATE_UNAVAILABLE:
-		break;
-	case SCSI_ACCESS_STATE_OFFLINE:
-		return SCSI_DH_IO;
-	case SCSI_ACCESS_STATE_TRANSITIONING:
-		break;
-	default:
-		sdev_printk(KERN_INFO, sdev,
-			    "%s: stpg failed, unhandled TPGS state %d",
-			    ALUA_DH_NAME, h->state);
-		return SCSI_DH_NOSYS;
-	}
-	retval = submit_stpg(sdev, h->group_id, &sense_hdr);
-
-	if (retval) {
-		if (retval < 0 || !scsi_sense_valid(&sense_hdr)) {
-			sdev_printk(KERN_INFO, sdev,
-				    "%s: stpg failed, result %d",
-				    ALUA_DH_NAME, retval);
-			if (retval < 0)
-				return SCSI_DH_DEV_TEMP_BUSY;
-		} else {
-			sdev_printk(KERN_INFO, sdev, "%s: stpg failed\n",
-				    ALUA_DH_NAME);
-			scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr);
-		}
-	}
-	/* Retry RTPG */
-	return SCSI_DH_RETRY;
-}
-
 static void alua_rtpg_work(struct work_struct *work)
 {
 	struct alua_dh_data *h =
@@ -670,56 +179,41 @@ static void alua_rtpg_work(struct work_struct *work)
 	int err = SCSI_DH_OK;
 	struct alua_queue_data *qdata, *tmp;
 	unsigned long flags;
+	int ret;
 
 	spin_lock_irqsave(&h->lock, flags);
 	h->flags |= ALUA_PG_RUNNING;
 	if (h->flags & ALUA_PG_RUN_RTPG) {
-		int state = h->state;
 
 		h->flags &= ~ALUA_PG_RUN_RTPG;
 		spin_unlock_irqrestore(&h->lock, flags);
-		if (state == SCSI_ACCESS_STATE_TRANSITIONING) {
-			if (alua_tur(sdev) == SCSI_DH_RETRY) {
-				spin_lock_irqsave(&h->lock, flags);
-				h->flags &= ~ALUA_PG_RUNNING;
-				h->flags |= ALUA_PG_RUN_RTPG;
-				if (!h->interval)
-					h->interval = ALUA_RTPG_RETRY_DELAY;
-				spin_unlock_irqrestore(&h->lock, flags);
-				queue_delayed_work(kaluad_wq, &h->rtpg_work,
-						   h->interval * HZ);
-				return;
-			}
-			/* Send RTPG on failure or if TUR indicates SUCCESS */
-		}
-		err = alua_rtpg(sdev);
-		spin_lock_irqsave(&h->lock, flags);
-
-		if (err == SCSI_DH_RETRY || h->flags & ALUA_PG_RUN_RTPG) {
+		ret = scsi_alua_rtpg_run(sdev);
+		if (ret == -EAGAIN) {
+			spin_lock_irqsave(&h->lock, flags);
 			h->flags &= ~ALUA_PG_RUNNING;
-			if (err == SCSI_DH_IMM_RETRY)
-				h->interval = 0;
-			else if (!h->interval && !(h->flags & ALUA_PG_RUN_RTPG))
-				h->interval = ALUA_RTPG_RETRY_DELAY;
 			h->flags |= ALUA_PG_RUN_RTPG;
 			spin_unlock_irqrestore(&h->lock, flags);
-			goto queue_rtpg;
+			queue_delayed_work(kaluad_wq, &h->rtpg_work,
+							   sdev->alua->interval * HZ);
+			return;
 		}
-		if (err != SCSI_DH_OK)
-			h->flags &= ~ALUA_PG_RUN_STPG;
+		if (err != 0)
+				h->flags &= ~ALUA_PG_RUN_STPG;
 	}
+	spin_lock_irqsave(&h->lock, flags);
 	if (h->flags & ALUA_PG_RUN_STPG) {
 		h->flags &= ~ALUA_PG_RUN_STPG;
 		spin_unlock_irqrestore(&h->lock, flags);
-		err = alua_stpg(sdev);
-		spin_lock_irqsave(&h->lock, flags);
-		if (err == SCSI_DH_RETRY || h->flags & ALUA_PG_RUN_RTPG) {
+		ret = scsi_alua_stpg_run(sdev, h->flags & ALUA_OPTIMIZE_STPG);
+		if (err == -EAGAIN || h->flags & ALUA_PG_RUN_RTPG) {
+			spin_lock_irqsave(&h->lock, flags);
 			h->flags |= ALUA_PG_RUN_RTPG;
-			h->interval = 0;
 			h->flags &= ~ALUA_PG_RUNNING;
 			spin_unlock_irqrestore(&h->lock, flags);
 			goto queue_rtpg;
 		}
+	} else {
+		spin_unlock_irqrestore(&h->lock, flags);
 	}
 
 	list_splice_init(&h->rtpg_list, &qdata_list);
@@ -728,8 +222,6 @@ static void alua_rtpg_work(struct work_struct *work)
 	 * Re-enable the device for the next attempt.
 	 */
 	h->disabled = false;
-	spin_unlock_irqrestore(&h->lock, flags);
-
 
 	list_for_each_entry_safe(qdata, tmp, &qdata_list, entry) {
 		list_del(&qdata->entry);
@@ -745,7 +237,7 @@ static void alua_rtpg_work(struct work_struct *work)
 	return;
 
 queue_rtpg:
-	queue_delayed_work(kaluad_wq, &h->rtpg_work, h->interval * HZ);
+	queue_delayed_work(kaluad_wq, &h->rtpg_work, sdev->alua->interval * HZ);
 }
 
 /**
@@ -809,7 +301,7 @@ static int alua_initialize(struct scsi_device *sdev, struct alua_dh_data *h)
 
 	mutex_lock(&h->init_mutex);
 	h->disabled = false;
-	tpgs = alua_check_tpgs(sdev);
+	tpgs = scsi_alua_check_tpgs(sdev);
 	if (tpgs != TPGS_MODE_NONE)
 		err = alua_check_vpd(sdev, h, tpgs);
 	h->init_error = err;
@@ -898,34 +390,6 @@ static void alua_check(struct scsi_device *sdev, bool force)
 	alua_rtpg_queue(sdev, NULL, force);
 }
 
-/*
- * alua_prep_fn - request callback
- *
- * Fail I/O to all paths not in state
- * active/optimized or active/non-optimized.
- */
-static blk_status_t alua_prep_fn(struct scsi_device *sdev, struct request *req)
-{
-	struct alua_dh_data *h = sdev->handler_data;
-	unsigned long flags;
-	unsigned char state;
-
-	spin_lock_irqsave(&h->lock, flags);
-	state = h->state;
-	spin_unlock_irqrestore(&h->lock, flags);
-
-	switch (state) {
-	case SCSI_ACCESS_STATE_OPTIMAL:
-	case SCSI_ACCESS_STATE_ACTIVE:
-	case SCSI_ACCESS_STATE_LBA:
-	case SCSI_ACCESS_STATE_TRANSITIONING:
-		return BLK_STS_OK;
-	default:
-		req->rq_flags |= RQF_QUIET;
-		return BLK_STS_IOERR;
-	}
-}
-
 static void alua_rescan(struct scsi_device *sdev)
 {
 	struct alua_dh_data *h = sdev->handler_data;
@@ -953,8 +417,6 @@ static int alua_bus_attach(struct scsi_device *sdev)
 
 	mutex_init(&h->init_mutex);
 
-	h->state = SCSI_ACCESS_STATE_OPTIMAL;
-	h->valid_states = TPGS_SUPPORT_ALL;
 	if (optimize_stpg)
 		h->flags |= ALUA_OPTIMIZE_STPG;
 
@@ -986,7 +448,7 @@ static struct scsi_device_handler alua_dh = {
 	.module = THIS_MODULE,
 	.attach = alua_bus_attach,
 	.detach = alua_bus_detach,
-	.prep_fn = alua_prep_fn,
+	.prep_fn = scsi_alua_prep_fn,
 	.check_sense = alua_check_sense,
 	.activate = alua_activate,
 	.rescan = alua_rescan,
-- 
2.43.5


^ permalink raw reply related	[flat|nested] 63+ messages in thread

* [PATCH 13/13] scsi: core: Add implicit ALUA support
  2026-03-17 12:06 [PATCH 00/13] scsi: Core ALUA driver John Garry
                   ` (11 preceding siblings ...)
  2026-03-17 12:07 ` [PATCH 12/13] scsi: scsi_dh_alua: Switch to use core support John Garry
@ 2026-03-17 12:07 ` John Garry
  2026-03-18  8:08   ` Hannes Reinecke
                     ` (2 more replies)
  2026-03-22 17:37 ` [PATCH 00/13] scsi: Core ALUA driver Benjamin Marzinski
  13 siblings, 3 replies; 63+ messages in thread
From: John Garry @ 2026-03-17 12:07 UTC (permalink / raw)
  To: martin.petersen, james.bottomley, hare, bmarzins
  Cc: jmeneghi, linux-scsi, michael.christie, snitzer, dm-devel,
	linux-kernel, John Garry

For when no device handler is used, add ALUA support.

This will be equivalent to when native SCSI multipathing is used.

Essentially all the same handling is available as DH alua driver for
rescan, request prep, sense handling.

Signed-off-by: John Garry <john.g.garry@oracle.com>
---
 drivers/scsi/scsi_alua.c  | 93 +++++++++++++++++++++++++++++++++++++++
 drivers/scsi/scsi_error.c |  7 +++
 drivers/scsi/scsi_lib.c   |  7 +++
 drivers/scsi/scsi_scan.c  |  2 +
 drivers/scsi/scsi_sysfs.c |  4 +-
 include/scsi/scsi_alua.h  | 14 ++++++
 6 files changed, 126 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/scsi_alua.c b/drivers/scsi/scsi_alua.c
index d3fcd887e5018..ee0229b1a9d12 100644
--- a/drivers/scsi/scsi_alua.c
+++ b/drivers/scsi/scsi_alua.c
@@ -562,6 +562,90 @@ int scsi_alua_stpg_run(struct scsi_device *sdev, bool optimize)
 }
 EXPORT_SYMBOL_GPL(scsi_alua_stpg_run);
 
+enum scsi_disposition scsi_alua_check_sense(struct scsi_device *sdev,
+					      struct scsi_sense_hdr *sense_hdr)
+{
+	switch (sense_hdr->sense_key) {
+	case NOT_READY:
+		if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0a) {
+			/*
+			 * LUN Not Accessible - ALUA state transition
+			 */
+			scsi_alua_handle_state_transition(sdev);
+			return NEEDS_RETRY;
+		}
+		break;
+	case UNIT_ATTENTION:
+		if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0a) {
+			/*
+			 * LUN Not Accessible - ALUA state transition
+			 */
+			scsi_alua_handle_state_transition(sdev);
+			return NEEDS_RETRY;
+		}
+		if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x00) {
+			/*
+			 * Power On, Reset, or Bus Device Reset.
+			 * Might have obscured a state transition,
+			 * so schedule a recheck.
+			 */
+			scsi_device_alua_rescan(sdev);
+			return ADD_TO_MLQUEUE;
+		}
+		if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x04)
+			/*
+			 * Device internal reset
+			 */
+			return ADD_TO_MLQUEUE;
+		if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x01)
+			/*
+			 * Mode Parameters Changed
+			 */
+			return ADD_TO_MLQUEUE;
+		if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x06) {
+			/*
+			 * ALUA state changed
+			 */
+			scsi_device_alua_rescan(sdev);
+			return ADD_TO_MLQUEUE;
+		}
+		if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x07) {
+			/*
+			 * Implicit ALUA state transition failed
+			 */
+			scsi_device_alua_rescan(sdev);
+			return ADD_TO_MLQUEUE;
+		}
+		if (sense_hdr->asc == 0x3f && sense_hdr->ascq == 0x03)
+			/*
+			 * Inquiry data has changed
+			 */
+			return ADD_TO_MLQUEUE;
+		if (sense_hdr->asc == 0x3f && sense_hdr->ascq == 0x0e)
+			/*
+			 * REPORTED_LUNS_DATA_HAS_CHANGED is reported
+			 * when switching controllers on targets like
+			 * Intel Multi-Flex. We can just retry.
+			 */
+			return ADD_TO_MLQUEUE;
+		break;
+	}
+
+	return SCSI_RETURN_NOT_HANDLED;
+}
+
+static void alua_rtpg_work(struct work_struct *work)
+{
+	struct alua_data *alua =
+		container_of(work, struct alua_data, work.work);
+	int ret;
+
+	ret = scsi_alua_rtpg_run(alua->sdev);
+
+	if (ret == -EAGAIN)
+		queue_delayed_work(kalua_wq, &alua->work, alua->interval * HZ);
+}
+
 int scsi_alua_sdev_init(struct scsi_device *sdev)
 {
 	int rel_port, ret, tpgs;
@@ -591,6 +675,7 @@ int scsi_alua_sdev_init(struct scsi_device *sdev)
 		goto out_free_data;
 	}
 
+	INIT_DELAYED_WORK(&sdev->alua->work, alua_rtpg_work);
 	sdev->alua->sdev = sdev;
 	sdev->alua->tpgs = tpgs;
 	spin_lock_init(&sdev->alua->lock);
@@ -638,6 +723,14 @@ bool scsi_device_alua_implicit(struct scsi_device *sdev)
 	return sdev->alua->tpgs & TPGS_MODE_IMPLICIT;
 }
 
+void scsi_device_alua_rescan(struct scsi_device *sdev)
+{
+	struct alua_data *alua = sdev->alua;
+
+	queue_delayed_work(kalua_wq, &alua->work,
+				msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS));
+}
+
 int scsi_alua_init(void)
 {
 	kalua_wq = alloc_workqueue("kalua", WQ_MEM_RECLAIM | WQ_PERCPU, 0);
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 147127fb4db9c..a542e7a85a24d 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -29,6 +29,7 @@
 #include <linux/jiffies.h>
 
 #include <scsi/scsi.h>
+#include <scsi/scsi_alua.h>
 #include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_dbg.h>
 #include <scsi/scsi_device.h>
@@ -578,6 +579,12 @@ enum scsi_disposition scsi_check_sense(struct scsi_cmnd *scmd)
 		if (rc != SCSI_RETURN_NOT_HANDLED)
 			return rc;
 		/* handler does not care. Drop down to default handling */
+	} else if (scsi_device_alua_implicit(sdev)) {
+		enum scsi_disposition rc;
+
+		rc = scsi_alua_check_sense(sdev, &sshdr);
+		if (rc != SCSI_RETURN_NOT_HANDLED)
+			return rc;
 	}
 
 	if (scmd->cmnd[0] == TEST_UNIT_READY &&
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index d3a8cd4166f92..e5bcee555ea10 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -26,6 +26,7 @@
 #include <linux/unaligned.h>
 
 #include <scsi/scsi.h>
+#include <scsi/scsi_alua.h>
 #include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_dbg.h>
 #include <scsi/scsi_device.h>
@@ -1719,6 +1720,12 @@ static blk_status_t scsi_prepare_cmd(struct request *req)
 	if (sdev->handler && sdev->handler->prep_fn) {
 		blk_status_t ret = sdev->handler->prep_fn(sdev, req);
 
+		if (ret != BLK_STS_OK)
+			return ret;
+	} else if (scsi_device_alua_implicit(sdev)) {
+		/* We should be able to make this common for ALUA DH as well */
+		blk_status_t ret = scsi_alua_prep_fn(sdev, req);
+
 		if (ret != BLK_STS_OK)
 			return ret;
 	}
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 3af64d1231445..73caf83bd1097 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -1744,6 +1744,8 @@ int scsi_rescan_device(struct scsi_device *sdev)
 
 	if (sdev->handler && sdev->handler->rescan)
 		sdev->handler->rescan(sdev);
+	else if (scsi_device_alua_implicit(sdev))
+		scsi_device_alua_rescan(sdev);
 
 	if (dev->driver && try_module_get(dev->driver->owner)) {
 		struct scsi_driver *drv = to_scsi_driver(dev->driver);
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 6c4c3c22f6acf..71a9613898cfc 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -1152,7 +1152,7 @@ sdev_show_access_state(struct device *dev,
 	unsigned char access_state;
 	const char *access_state_name;
 
-	if (!sdev->handler)
+	if (!sdev->handler && !scsi_device_alua_implicit(sdev))
 		return -EINVAL;
 
 	access_state = (sdev->access_state & SCSI_ACCESS_STATE_MASK);
@@ -1409,6 +1409,8 @@ int scsi_sysfs_add_sdev(struct scsi_device *sdev)
 	scsi_autopm_get_device(sdev);
 
 	scsi_dh_add_device(sdev);
+	if (!sdev->handler && scsi_device_alua_implicit(sdev))
+		scsi_device_alua_rescan(sdev);
 
 	error = device_add(&sdev->sdev_gendev);
 	if (error) {
diff --git a/include/scsi/scsi_alua.h b/include/scsi/scsi_alua.h
index 2d5db944f75b7..8e506d1d66cce 100644
--- a/include/scsi/scsi_alua.h
+++ b/include/scsi/scsi_alua.h
@@ -24,6 +24,7 @@ struct alua_data {
 	unsigned char		transition_tmo;
 	unsigned long		expiry;
 	unsigned long		interval;
+	struct delayed_work	work;
 	struct scsi_device	*sdev;
 	spinlock_t		lock;
 };
@@ -35,11 +36,15 @@ void scsi_alua_handle_state_transition(struct scsi_device *sdev);
 
 int scsi_alua_check_tpgs(struct scsi_device *sdev);
 
+enum scsi_disposition scsi_alua_check_sense(struct scsi_device *sdev,
+				struct scsi_sense_hdr *sense_hdr);
+
 int scsi_alua_rtpg_run(struct scsi_device *sdev);
 int scsi_alua_stpg_run(struct scsi_device *sdev, bool optimize);
 
 blk_status_t scsi_alua_prep_fn(struct scsi_device *sdev, struct request *req);
 
+void scsi_device_alua_rescan(struct scsi_device *sdev);
 bool scsi_device_alua_implicit(struct scsi_device *sdev);
 
 int scsi_alua_init(void);
@@ -53,6 +58,12 @@ static inline int scsi_alua_check_tpgs(struct scsi_device *sdev)
 {
 	return 0;
 }
+static inline
+enum scsi_disposition scsi_alua_check_sense(struct scsi_device *sdev,
+				struct scsi_sense_hdr *sense_hdr)
+{
+	return SCSI_RETURN_NOT_HANDLED;
+}
 static inline int scsi_alua_rtpg_run(struct scsi_device *sdev)
 {
 	return 0;
@@ -66,6 +77,9 @@ blk_status_t scsi_alua_prep_fn(struct scsi_device *sdev, struct request *req)
 {
 	return BLK_STS_OK;
 }
+static inline void scsi_device_alua_rescan(struct scsi_device *sdev)
+{
+}
 static inline bool scsi_device_alua_implicit(struct scsi_device *sdev)
 {
 	return false;
-- 
2.43.5


^ permalink raw reply related	[flat|nested] 63+ messages in thread

* Re: [PATCH 01/13] scsi: scsi_dh_alua: Delete alua_port_group
  2026-03-17 12:06 ` [PATCH 01/13] scsi: scsi_dh_alua: Delete alua_port_group John Garry
@ 2026-03-18  7:44   ` Hannes Reinecke
  2026-03-18  8:53     ` John Garry
  2026-03-23  0:08   ` Benjamin Marzinski
  1 sibling, 1 reply; 63+ messages in thread
From: Hannes Reinecke @ 2026-03-18  7:44 UTC (permalink / raw)
  To: John Garry, martin.petersen, james.bottomley, hare, bmarzins
  Cc: jmeneghi, linux-scsi, michael.christie, snitzer, dm-devel,
	linux-kernel

On 3/17/26 13:06, John Garry wrote:
> Delete the alua_port_group usage, as it is more accurate to manage the
> port group info per-scsi device - see [0]
> 
> [0] https://lore.kernel.org/linux-scsi/20260310114925.1222263-1-john.g.garry@oracle.com/T/#m4ffc0d07f169b70b8fd2407bae9632aa0f8c1f9a
> 
> For now, the handler data will be used to hold the ALUA-related info.
> 
> Signed-off-by: John Garry <john.g.garry@oracle.com>
> ---
>   drivers/scsi/device_handler/scsi_dh_alua.c | 663 ++++++---------------
>   1 file changed, 180 insertions(+), 483 deletions(-)
> 
In principle, yes, but I would put this at the end after the patches to
move the alua functionality to the scsi core.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                  Kernel Storage Architect
hare@suse.de                                +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich

^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 02/13] scsi: alua: Create a core ALUA driver
  2026-03-17 12:06 ` [PATCH 02/13] scsi: alua: Create a core ALUA driver John Garry
@ 2026-03-18  7:47   ` Hannes Reinecke
  2026-03-23 12:56     ` John Garry
  2026-03-18 17:17   ` kernel test robot
  2026-03-18 22:54   ` kernel test robot
  2 siblings, 1 reply; 63+ messages in thread
From: Hannes Reinecke @ 2026-03-18  7:47 UTC (permalink / raw)
  To: John Garry, martin.petersen, james.bottomley, hare, bmarzins
  Cc: jmeneghi, linux-scsi, michael.christie, snitzer, dm-devel,
	linux-kernel

On 3/17/26 13:06, John Garry wrote:
> Add a dedicated ALUA driver which can be used for native SCSI multipath
> and also DH-based ALUA support.
> 
Is this really a 'driver'? It's more additional functionality for a SCSI
device, and not really a driver.
At least I _think_ it is ...

> The core driver will provide ALUA support for when a scsi_device does not
> have a DH attachment.
> 
> The core driver will provide functionality to handle RTPG and STPG, but
> the scsi DH ALUA driver will be responsible for driving these when DH
> attached.
> 
> New structure alua_data holds all ALUA-related scsi_device info.
> 
> Hannes Reinecke originally authored the kernel ALUA code.
> 
> Signed-off-by: John Garry <john.g.garry@oracle.com>
> ---
>   drivers/scsi/Kconfig                | 10 +++-
>   drivers/scsi/Makefile               |  1 +
>   drivers/scsi/device_handler/Kconfig |  1 +
>   drivers/scsi/scsi.c                 |  7 +++
>   drivers/scsi/scsi_alua.c            | 78 +++++++++++++++++++++++++++++
>   drivers/scsi/scsi_scan.c            |  4 ++
>   drivers/scsi/scsi_sysfs.c           |  3 ++
>   include/scsi/scsi_alua.h            | 45 +++++++++++++++++
>   include/scsi/scsi_device.h          |  1 +
>   9 files changed, 149 insertions(+), 1 deletion(-)
>   create mode 100644 drivers/scsi/scsi_alua.c
>   create mode 100644 include/scsi/scsi_alua.h
> 
> diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
> index 19d0884479a24..396cc0fda9fcc 100644
> --- a/drivers/scsi/Kconfig
> +++ b/drivers/scsi/Kconfig
> @@ -76,8 +76,16 @@ config SCSI_LIB_KUNIT_TEST
>   
>   	  If unsure say N.
>   
> -comment "SCSI support type (disk, tape, CD-ROM)"
> +config SCSI_ALUA
> +	tristate "SPC-3 ALUA support"
>   	depends on SCSI
> +	help
> +	  SCSI support for generic SPC-3 Asymmetric Logical Unit
> +	  Access (ALUA).
> +
> +	  If unsure, say Y.
> +
> +comment "SCSI support type (disk, tape, CD-ROM)"
>   
>   config BLK_DEV_SD
>   	tristate "SCSI disk support"
> diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
> index 16de3e41f94c4..90c25f36ea3a8 100644
> --- a/drivers/scsi/Makefile
> +++ b/drivers/scsi/Makefile
> @@ -153,6 +153,7 @@ obj-$(CONFIG_SCSI_ENCLOSURE)	+= ses.o
>   
>   obj-$(CONFIG_SCSI_HISI_SAS) += hisi_sas/
>   
> +obj-$(CONFIG_SCSI_ALUA) += scsi_alua.o
>   # This goes last, so that "real" scsi devices probe earlier
>   obj-$(CONFIG_SCSI_DEBUG)	+= scsi_debug.o
>   scsi_mod-y			+= scsi.o hosts.o scsi_ioctl.o \
> diff --git a/drivers/scsi/device_handler/Kconfig b/drivers/scsi/device_handler/Kconfig
> index 368eb94c24562..ff06aea8c272c 100644
> --- a/drivers/scsi/device_handler/Kconfig
> +++ b/drivers/scsi/device_handler/Kconfig
> @@ -35,6 +35,7 @@ config SCSI_DH_EMC
>   config SCSI_DH_ALUA
>   	tristate "SPC-3 ALUA Device Handler"
>   	depends on SCSI_DH && SCSI
> +	select SCSI_ALUA
>   	help
>   	  SCSI Device handler for generic SPC-3 Asymmetric Logical Unit
>   	  Access (ALUA).
> diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
> index 76cdad063f7bc..fc90ee19bb962 100644
> --- a/drivers/scsi/scsi.c
> +++ b/drivers/scsi/scsi.c
> @@ -58,6 +58,7 @@
>   #include <linux/unaligned.h>
>   
>   #include <scsi/scsi.h>
> +#include <scsi/scsi_alua.h>
>   #include <scsi/scsi_cmnd.h>
>   #include <scsi/scsi_dbg.h>
>   #include <scsi/scsi_device.h>
> @@ -1042,12 +1043,17 @@ static int __init init_scsi(void)
>   	error = scsi_sysfs_register();
>   	if (error)
>   		goto cleanup_sysctl;
> +	error = scsi_alua_init();
> +	if (error)
> +		goto cleanup_sysfs;
>   
>   	scsi_netlink_init();
>   
>   	printk(KERN_NOTICE "SCSI subsystem initialized\n");
>   	return 0;
>   
> +cleanup_sysfs:
> +	scsi_sysfs_unregister();
>   cleanup_sysctl:
>   	scsi_exit_sysctl();
>   cleanup_hosts:
> @@ -1066,6 +1072,7 @@ static int __init init_scsi(void)
>   static void __exit exit_scsi(void)
>   {
>   	scsi_netlink_exit();
> +	scsi_exit_alua();
>   	scsi_sysfs_unregister();
>   	scsi_exit_sysctl();
>   	scsi_exit_hosts();
> diff --git a/drivers/scsi/scsi_alua.c b/drivers/scsi/scsi_alua.c
> new file mode 100644
> index 0000000000000..a5a67c6deff17
> --- /dev/null
> +++ b/drivers/scsi/scsi_alua.c
> @@ -0,0 +1,78 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Generic SCSI-3 ALUA SCSI driver
> + *
> + * Copyright (C) 2007-2010 Hannes Reinecke, SUSE Linux Products GmbH.
> + * All rights reserved.
> + */
> +
> +#include <scsi/scsi.h>
> +#include <scsi/scsi_proto.h>
> +#include <scsi/scsi_dbg.h>
> +#include <scsi/scsi_eh.h>
> +#include <scsi/scsi_alua.h>
> +
> +#define DRV_NAME "alua"
> +
> +static struct workqueue_struct *kalua_wq;
> +
> +int scsi_alua_sdev_init(struct scsi_device *sdev)
> +{
> +	int rel_port, ret, tpgs;
> +
> +	tpgs = scsi_device_tpgs(sdev);
> +	if (!tpgs)
> +		return 0;
> +
> +	sdev->alua = kzalloc(sizeof(*sdev->alua), GFP_KERNEL);
> +	if (!sdev->alua)
> +		return -ENOMEM;
> +

Why do you allocate a separate structure?
Is this structure shared with something?
Wouldn't it be better to just add some field to the scsi_device?

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                  Kernel Storage Architect
hare@suse.de                                +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich

^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 03/13] scsi: alua: Add scsi_alua_rtpg()
  2026-03-17 12:06 ` [PATCH 03/13] scsi: alua: Add scsi_alua_rtpg() John Garry
@ 2026-03-18  7:50   ` Hannes Reinecke
  2026-03-23 12:58     ` John Garry
  0 siblings, 1 reply; 63+ messages in thread
From: Hannes Reinecke @ 2026-03-18  7:50 UTC (permalink / raw)
  To: John Garry, martin.petersen, james.bottomley, hare, bmarzins
  Cc: jmeneghi, linux-scsi, michael.christie, snitzer, dm-devel,
	linux-kernel

On 3/17/26 13:06, John Garry wrote:
> Add scsi_alua_rtpg(), which does the same as alua_rtpg() from
> scsi_dh_alua.c
> 
> Members of the per-sdev alua_data structure are updated from same in
> alua_dh_data.
> 
> Signed-off-by: John Garry <john.g.garry@oracle.com>
> ---
>   drivers/scsi/scsi_alua.c | 311 +++++++++++++++++++++++++++++++++++++++
>   include/scsi/scsi_alua.h |   8 +
>   2 files changed, 319 insertions(+)
> 
> diff --git a/drivers/scsi/scsi_alua.c b/drivers/scsi/scsi_alua.c
> index a5a67c6deff17..50c1d17b52dc7 100644
> --- a/drivers/scsi/scsi_alua.c
> +++ b/drivers/scsi/scsi_alua.c
> @@ -6,6 +6,8 @@
>    * All rights reserved.
>    */
>   
> +#include <linux/unaligned.h>
> +
>   #include <scsi/scsi.h>
>   #include <scsi/scsi_proto.h>
>   #include <scsi/scsi_dbg.h>
> @@ -16,6 +18,314 @@
>   
>   static struct workqueue_struct *kalua_wq;
>   
> +#define TPGS_SUPPORT_NONE		0x00
> +#define TPGS_SUPPORT_OPTIMIZED		0x01
> +#define TPGS_SUPPORT_NONOPTIMIZED	0x02
> +#define TPGS_SUPPORT_STANDBY		0x04
> +#define TPGS_SUPPORT_UNAVAILABLE	0x08
> +#define TPGS_SUPPORT_LBA_DEPENDENT	0x10
> +#define TPGS_SUPPORT_OFFLINE		0x40
> +#define TPGS_SUPPORT_TRANSITION		0x80
> +#define TPGS_SUPPORT_ALL		0xdf
> +
> +#define RTPG_FMT_MASK			0x70
> +#define RTPG_FMT_EXT_HDR		0x10
> +
> +#define ALUA_RTPG_SIZE			128
> +#define ALUA_FAILOVER_TIMEOUT		60
> +#define ALUA_FAILOVER_RETRIES		5
> +#define ALUA_RTPG_DELAY_MSECS		5
> +#define ALUA_RTPG_RETRY_DELAY		2
> +
> +/*
> + * submit_rtpg - Issue a REPORT TARGET GROUP STATES command
> + * @sdev: sdev the command should be sent to
> + */
> +static int submit_rtpg(struct scsi_device *sdev, unsigned char *buff,
> +		       int bufflen, struct scsi_sense_hdr *sshdr)
> +{
> +	u8 cdb[MAX_COMMAND_SIZE];
> +	blk_opf_t opf = REQ_OP_DRV_IN | REQ_FAILFAST_DEV |
> +				REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER;
> +	const struct scsi_exec_args exec_args = {
> +		.sshdr = sshdr,
> +	};
> +
> +	/* Prepare the command. */
> +	memset(cdb, 0x0, MAX_COMMAND_SIZE);
> +	cdb[0] = MAINTENANCE_IN;
> +	if (!sdev->alua->rtpg_ext_hdr_unsupp)
> +		cdb[1] = MI_REPORT_TARGET_PGS | MI_EXT_HDR_PARAM_FMT;
> +	else
> +		cdb[1] = MI_REPORT_TARGET_PGS;
> +	put_unaligned_be32(bufflen, &cdb[6]);
> +
> +	return scsi_execute_cmd(sdev, cdb, opf, buff, bufflen,
> +				ALUA_FAILOVER_TIMEOUT * HZ,
> +				ALUA_FAILOVER_RETRIES, &exec_args);
> +}
> +
> +static char print_alua_state(unsigned char state)
> +{
> +	switch (state) {
> +	case SCSI_ACCESS_STATE_OPTIMAL:
> +		return 'A';
> +	case SCSI_ACCESS_STATE_ACTIVE:
> +		return 'N';
> +	case SCSI_ACCESS_STATE_STANDBY:
> +		return 'S';
> +	case SCSI_ACCESS_STATE_UNAVAILABLE:
> +		return 'U';
> +	case SCSI_ACCESS_STATE_LBA:
> +		return 'L';
> +	case SCSI_ACCESS_STATE_OFFLINE:
> +		return 'O';
> +	case SCSI_ACCESS_STATE_TRANSITIONING:
> +		return 'T';
> +	default:
> +		return 'X';
> +	}
> +}
> +
> +/*
> + * scsi_alua_rtpg - Evaluate REPORT TARGET GROUP STATES
> + * @sdev: the device to be evaluated.
> + *
> + * Evaluate the Target Port Group State.
> + * Returns -ENODEV if the path is
> + * found to be unusable.
> + */
> +__maybe_unused
> +static int scsi_alua_rtpg(struct scsi_device *sdev)
> +{
> +	struct alua_data *alua = sdev->alua;
> +	struct scsi_sense_hdr sense_hdr;
> +	int len, k, off, bufflen = ALUA_RTPG_SIZE;
> +	int group_id_old, state_old, pref_old, valid_states_old;
> +	unsigned char *desc, *buff;
> +	unsigned err;
> +	int retval;
> +	unsigned int tpg_desc_tbl_off;
> +	unsigned char orig_transition_tmo;
> +	unsigned long flags;
> +	bool transitioning_sense = false;
> +	int rel_port, group_id = scsi_vpd_tpg_id(sdev, &rel_port);
> +
> +	if (group_id < 0) {
> +		/*
> +		 * Internal error; TPGS supported but required
> +		 * VPD identification descriptors not present.
> +		 * Disable ALUA support
> +		 */
> +		sdev_printk(KERN_INFO, sdev,
> +			    "%s: No target port descriptors found\n",
> +			    DRV_NAME);
> +		return -EOPNOTSUPP; //SCSI_DH_DEV_UNSUPP;
> +	}
> +
> +	group_id_old = alua->group_id;
> +	state_old = alua->state;
> +	pref_old = alua->pref;
> +	valid_states_old = alua->valid_states;
> +
> +	if (!alua->expiry) {
> +		unsigned long transition_tmo = ALUA_FAILOVER_TIMEOUT * HZ;
> +
> +		if (alua->transition_tmo)
> +			transition_tmo = alua->transition_tmo * HZ;
> +
> +		alua->expiry = round_jiffies_up(jiffies + transition_tmo);
> +	}
> +
> +	buff = kzalloc(bufflen, GFP_KERNEL);
> +	if (!buff)
> +		return -ENOMEM;
> +
> + retry:
> +	err = 0;
> +	retval = submit_rtpg(sdev, buff, bufflen, &sense_hdr);
> +
> +	if (retval) {
> +		/*
> +		 * Some (broken) implementations have a habit of returning
> +		 * an error during things like firmware update etc.
> +		 * But if the target only supports active/optimized there's
> +		 * not much we can do; it's not that we can switch paths
> +		 * or anything.
> +		 * So ignore any errors to avoid spurious failures during
> +		 * path failover.
> +		 */
> +		if ((alua->valid_states & ~TPGS_SUPPORT_OPTIMIZED) == 0) {
> +			sdev_printk(KERN_INFO, sdev,
> +				    "%s: ignoring rtpg result %d\n",
> +				    DRV_NAME, retval);
> +			kfree(buff);
> +			return 0;//SCSI_DH_OK
> +		}
> +		if (retval < 0 || !scsi_sense_valid(&sense_hdr)) {
> +			sdev_printk(KERN_INFO, sdev,
> +				    "%s: rtpg failed, result %d\n",
> +				    DRV_NAME, retval);
> +			kfree(buff);
> +			if (retval < 0)
> +				return -EBUSY;//SCSI_DH_DEV_TEMP_BUSY;
> +			if (host_byte(retval) == DID_NO_CONNECT)
> +				return -ENOENT;//SCSI_DH_RES_TEMP_UNAVAIL;
> +			return -EIO;//SCSI_DH_IO
> +		}
> +
> +		/*
> +		 * submit_rtpg() has failed on existing arrays
> +		 * when requesting extended header info, and
> +		 * the array doesn't support extended headers,
> +		 * even though it shouldn't according to T10.
> +		 * The retry without rtpg_ext_hdr_req set
> +		 * handles this.
> +		 * Note:  some arrays return a sense key of ILLEGAL_REQUEST
> +		 * with ASC 00h if they don't support the extended header.
> +		 */
> +		if (!alua->rtpg_ext_hdr_unsupp &&
> +		    sense_hdr.sense_key == ILLEGAL_REQUEST) {
> +			alua->rtpg_ext_hdr_unsupp = true;
> +			goto retry;
> +		}
> +		/*
> +		 * If the array returns with 'ALUA state transition'
> +		 * sense code here it cannot return RTPG data during
> +		 * transition. So set the state to 'transitioning' directly.
> +		 */
> +		if (sense_hdr.sense_key == NOT_READY &&
> +		    sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a) {
> +			transitioning_sense = true;
> +			goto skip_rtpg;
> +		}
> +		/*
> +		 * Retry on any other UNIT ATTENTION occurred.
> +		 */
> +		if (sense_hdr.sense_key == UNIT_ATTENTION)
> +			err = -EAGAIN;//SCSI_DH_RETRY
> +		if (err == -EAGAIN &&
> +		    alua->expiry != 0 && time_before(jiffies, alua->expiry)) {
> +			sdev_printk(KERN_ERR, sdev, "%s: rtpg retry\n",
> +				    DRV_NAME);
> +			scsi_print_sense_hdr(sdev, DRV_NAME, &sense_hdr);
> +			kfree(buff);
> +			return err;
> +		}
> +		sdev_printk(KERN_ERR, sdev, "%s: rtpg failed\n",
> +			    DRV_NAME);
> +		scsi_print_sense_hdr(sdev, DRV_NAME, &sense_hdr);
> +		kfree(buff);
> +		alua->expiry = 0;
> +		return -EIO;//SCSI_DH_IO
> +	}
> +
> +	len = get_unaligned_be32(&buff[0]) + 4;
> +
> +	if (len > bufflen) {
> +		/* Resubmit with the correct length */
> +		kfree(buff);
> +		bufflen = len;
> +		buff = kmalloc(bufflen, GFP_KERNEL);
> +		if (!buff) {
> +			sdev_printk(KERN_WARNING, sdev,
> +				    "%s: kmalloc buffer failed\n",__func__);
> +			/* Temporary failure, bypass */
> +			alua->expiry = 0;
> +			return -EBUSY;//SCSI_DH_DEV_TEMP_BUSY;
> +		}
> +		goto retry;
> +	}
> +
> +	orig_transition_tmo = alua->transition_tmo;
> +	if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR && buff[5] != 0)
> +		alua->transition_tmo = buff[5];
> +	else
> +		alua->transition_tmo = ALUA_FAILOVER_TIMEOUT;
> +
> +	if (orig_transition_tmo != alua->transition_tmo) {
> +		sdev_printk(KERN_INFO, sdev,
> +			    "%s: transition timeout set to %d seconds\n",
> +			    DRV_NAME, alua->transition_tmo);
> +		alua->expiry = jiffies + alua->transition_tmo * HZ;
> +	}
> +
> +	if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR)
> +		tpg_desc_tbl_off = 8;
> +	else
> +		tpg_desc_tbl_off = 4;
> +
> +	for (k = tpg_desc_tbl_off, desc = buff + tpg_desc_tbl_off;
> +	     k < len;
> +	     k += off, desc += off) {
> +		u16 group_id_desc = get_unaligned_be16(&desc[2]);
> +
> +		spin_lock_irqsave(&alua->lock, flags);
> +		if (group_id_desc == group_id) {
> +			alua->group_id = group_id;
> +			WRITE_ONCE(alua->state, desc[0] & 0x0f);
> +			alua->pref = desc[0] >> 7;
> +			WRITE_ONCE(sdev->access_state, desc[0]);
> +			alua->valid_states = desc[1];
> +		}
> +		spin_unlock_irqrestore(&alua->lock, flags);
> +		off = 8 + (desc[7] * 4);
> +	}
> +
> + skip_rtpg:
> +	spin_lock_irqsave(&alua->lock, flags);
> +	if (transitioning_sense)
> +		alua->state = SCSI_ACCESS_STATE_TRANSITIONING;
> +
> +	if (group_id_old != alua->group_id || state_old != alua->state ||
> +		pref_old != alua->pref || valid_states_old != alua->valid_states)
> +		sdev_printk(KERN_INFO, sdev,
> +			"%s: port group %02x state %c %s supports %c%c%c%c%c%c%c\n",
> +			DRV_NAME, alua->group_id, print_alua_state(alua->state),
> +			alua->pref ? "preferred" : "non-preferred",
> +			alua->valid_states&TPGS_SUPPORT_TRANSITION?'T':'t',
> +			alua->valid_states&TPGS_SUPPORT_OFFLINE?'O':'o',
> +			alua->valid_states&TPGS_SUPPORT_LBA_DEPENDENT?'L':'l',
> +			alua->valid_states&TPGS_SUPPORT_UNAVAILABLE?'U':'u',
> +			alua->valid_states&TPGS_SUPPORT_STANDBY?'S':'s',
> +			alua->valid_states&TPGS_SUPPORT_NONOPTIMIZED?'N':'n',
> +			alua->valid_states&TPGS_SUPPORT_OPTIMIZED?'A':'a');
> +
> +	switch (alua->state) {
> +	case SCSI_ACCESS_STATE_TRANSITIONING:
> +		if (time_before(jiffies, alua->expiry)) {
> +			/* State transition, retry */
> +			alua->interval = ALUA_RTPG_RETRY_DELAY;
> +			err = -EAGAIN;//SCSI_DH_RETRY
> +		} else {
> +			unsigned char access_state;
> +
> +			/* Transitioning time exceeded, set port to standby */
> +			err = -EIO;//SCSI_DH_IO;
> +			alua->state = SCSI_ACCESS_STATE_STANDBY;
> +			alua->expiry = 0;
> +			access_state = alua->state & SCSI_ACCESS_STATE_MASK;
> +			if (alua->pref)
> +				access_state |= SCSI_ACCESS_STATE_PREFERRED;
> +			WRITE_ONCE(sdev->access_state, access_state);
> +		}
> +		break;
> +	case SCSI_ACCESS_STATE_OFFLINE:
> +		/* Path unusable */
> +		err = -ENODEV;//SCSI_DH_DEV_OFFLINED;
> +		alua->expiry = 0;
> +		break;
> +	default:
> +		/* Useable path if active */
> +		err = 0;//SCSI_DH_OK
> +		alua->expiry = 0;
> +		break;
> +	}
> +	spin_unlock_irqrestore(&alua->lock, flags);
> +	kfree(buff);
> +	return err;
> +}
> +
>   int scsi_alua_sdev_init(struct scsi_device *sdev)
>   {
>   	int rel_port, ret, tpgs;
> @@ -47,6 +357,7 @@ int scsi_alua_sdev_init(struct scsi_device *sdev)
>   
>   	sdev->alua->sdev = sdev;
>   	sdev->alua->tpgs = tpgs;
> +	spin_lock_init(&sdev->alua->lock);
>   
>   	return 0;
>   out_free_data:
> diff --git a/include/scsi/scsi_alua.h b/include/scsi/scsi_alua.h
> index 07cdcb4f5b518..068277261ed9d 100644
> --- a/include/scsi/scsi_alua.h
> +++ b/include/scsi/scsi_alua.h
> @@ -16,7 +16,15 @@
>   struct alua_data {
>   	int			group_id;
>   	int			tpgs;
> +	int			state;
> +	int			pref;
> +	int			valid_states;
> +	bool			rtpg_ext_hdr_unsupp;
> +	unsigned char		transition_tmo;
> +	unsigned long		expiry;
> +	unsigned long		interval;
>   	struct scsi_device	*sdev;
> +	spinlock_t		lock;
>   };
>   
>   int scsi_alua_sdev_init(struct scsi_device *sdev);

Ah, right. Now I see where you want to go with the separate
structure. Still wonder why you need the 'sdev' back link in
there, though.

Other than that:

Reviewed-by: Hannes Reinecke <hare@suse.de>

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                  Kernel Storage Architect
hare@suse.de                                +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich

^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 04/13] scsi: alua: Add scsi_alua_stpg()
  2026-03-17 12:06 ` [PATCH 04/13] scsi: alua: Add scsi_alua_stpg() John Garry
@ 2026-03-18  7:53   ` Hannes Reinecke
  0 siblings, 0 replies; 63+ messages in thread
From: Hannes Reinecke @ 2026-03-18  7:53 UTC (permalink / raw)
  To: John Garry, martin.petersen, james.bottomley, hare, bmarzins
  Cc: jmeneghi, linux-scsi, michael.christie, snitzer, dm-devel,
	linux-kernel

On 3/17/26 13:06, John Garry wrote:
> Add a core equivalent of alua_stpg() from scsi_dh_alua.c
> 
> Signed-off-by: John Garry <john.g.garry@oracle.com>
> ---
>   drivers/scsi/scsi_alua.c | 99 ++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 99 insertions(+)
> 
> diff --git a/drivers/scsi/scsi_alua.c b/drivers/scsi/scsi_alua.c
> index 50c1d17b52dc7..1045885f74169 100644
> --- a/drivers/scsi/scsi_alua.c
> +++ b/drivers/scsi/scsi_alua.c
> @@ -30,6 +30,9 @@ static struct workqueue_struct *kalua_wq;
>   
>   #define RTPG_FMT_MASK			0x70
>   #define RTPG_FMT_EXT_HDR		0x10
> +#define TPGS_MODE_NONE			0x0
> +#define TPGS_MODE_IMPLICIT		0x1
> +#define TPGS_MODE_EXPLICIT		0x2
>   
>   #define ALUA_RTPG_SIZE			128
>   #define ALUA_FAILOVER_TIMEOUT		60
> @@ -65,6 +68,41 @@ static int submit_rtpg(struct scsi_device *sdev, unsigned char *buff,
>   				ALUA_FAILOVER_RETRIES, &exec_args);
>   }
>   
> +/*
> + * submit_stpg - Issue a SET TARGET PORT GROUP command
> + *
> + * Currently we're only setting the current target port group state
> + * to 'active/optimized' and let the array firmware figure out
> + * the states of the remaining groups.
> + */
> +static int submit_stpg(struct scsi_device *sdev,
> +				struct scsi_sense_hdr *sshdr)
> +{
> +	u8 cdb[MAX_COMMAND_SIZE];
> +	unsigned char stpg_data[8];
> +	int stpg_len = 8;
> +	blk_opf_t opf = REQ_OP_DRV_OUT | REQ_FAILFAST_DEV |
> +				REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER;
> +	const struct scsi_exec_args exec_args = {
> +		.sshdr = sshdr,
> +	};
> +
> +	/* Prepare the data buffer */
> +	memset(stpg_data, 0, stpg_len);
> +	stpg_data[4] = SCSI_ACCESS_STATE_OPTIMAL;
> +	put_unaligned_be16(sdev->alua->group_id, &stpg_data[6]);
> +
> +	/* Prepare the command. */
> +	memset(cdb, 0x0, MAX_COMMAND_SIZE);
> +	cdb[0] = MAINTENANCE_OUT;
> +	cdb[1] = MO_SET_TARGET_PGS;
> +	put_unaligned_be32(stpg_len, &cdb[6]);
> +
> +	return scsi_execute_cmd(sdev, cdb, opf, stpg_data,
> +				stpg_len, ALUA_FAILOVER_TIMEOUT * HZ,
> +				ALUA_FAILOVER_RETRIES, &exec_args);
> +}
> +
>   static char print_alua_state(unsigned char state)
>   {
>   	switch (state) {
> @@ -326,6 +364,67 @@ static int scsi_alua_rtpg(struct scsi_device *sdev)
>   	return err;
>   }
>   
> +
> +/*
> + * scsi_alua_stpg - Issue a SET TARGET PORT GROUP command
> + *
> + * Issue a SET TARGET PORT GROUP command and evaluate the
> + * response. Returns SCSI_DH_RETRY per default to trigger
> + * a re-evaluation of the target group state or SCSI_DH_OK
> + * if no further action needs to be taken.
> + */
> +__maybe_unused
> +static int scsi_alua_stpg(struct scsi_device *sdev, bool optimize)
> +{
> +	struct alua_data *alua = sdev->alua;
> +	int retval;
> +	struct scsi_sense_hdr sense_hdr;
> +
> +	if (!(alua->tpgs & TPGS_MODE_EXPLICIT)) {
> +		/* Only implicit ALUA supported, retry */
> +		return -EAGAIN;//SCSI_DH_RETRY;
> +	}
> +	switch (alua->state) {
> +	case SCSI_ACCESS_STATE_OPTIMAL:
> +		return 0;//SCSI_DH_OK;
> +	case SCSI_ACCESS_STATE_ACTIVE:
> +		if (optimize &&
> +		    !alua->pref &&
> +		    (alua->tpgs & TPGS_MODE_IMPLICIT))
> +			return 0;//SCSI_DH_OK;
> +		break;
> +	case SCSI_ACCESS_STATE_STANDBY:
> +	case SCSI_ACCESS_STATE_UNAVAILABLE:
> +		break;
> +	case SCSI_ACCESS_STATE_OFFLINE:
> +		return -EIO;//SCSI_DH_IO;
> +	case SCSI_ACCESS_STATE_TRANSITIONING:
> +		break;
> +	default:
> +		sdev_printk(KERN_INFO, sdev,
> +			    "%s: stpg failed, unhandled TPGS state %d",
> +			    DRV_NAME, alua->state);
> +		return -ENOSYS ;//SCSI_DH_NOSYS;
> +	}
> +	retval = submit_stpg(sdev, &sense_hdr);
> +
> +	if (retval) {
> +		if (retval < 0 || !scsi_sense_valid(&sense_hdr)) {
> +			sdev_printk(KERN_INFO, sdev,
> +				    "%s: stpg failed, result %d",
> +				    DRV_NAME, retval);
> +			if (retval < 0)
> +				return -EBUSY;//SCSI_DH_DEV_TEMP_BUSY;
> +		} else {
> +			sdev_printk(KERN_INFO, sdev, "%s: stpg failed\n",
> +				    DRV_NAME);
> +			scsi_print_sense_hdr(sdev, DRV_NAME, &sense_hdr);
> +		}
> +	}
> +	/* Retry RTPG */
> +	return -EAGAIN;//SCSI_DH_RETRY;
> +}
> +
>   int scsi_alua_sdev_init(struct scsi_device *sdev)
>   {
>   	int rel_port, ret, tpgs;

Hmm. The return code from alus_stpg() was really an internal thing in 
scsi_dh_alua to drive the state machine.
I'd rather have _this_ function to use normal syntax (ie return '0' on 
success), and modify the state machine in scsi_dh_alua accordingly.

Note: stpg handling should be done _only_ in scsi_dh_alua. The scsi
core should not attempt anything clever here.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                  Kernel Storage Architect
hare@suse.de                                +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich

^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 05/13] scsi: alua: Add scsi_alua_tur()
  2026-03-17 12:06 ` [PATCH 05/13] scsi: alua: Add scsi_alua_tur() John Garry
@ 2026-03-18  7:54   ` Hannes Reinecke
  2026-03-23 13:42     ` John Garry
  0 siblings, 1 reply; 63+ messages in thread
From: Hannes Reinecke @ 2026-03-18  7:54 UTC (permalink / raw)
  To: John Garry, martin.petersen, james.bottomley, hare, bmarzins
  Cc: jmeneghi, linux-scsi, michael.christie, snitzer, dm-devel,
	linux-kernel

On 3/17/26 13:06, John Garry wrote:
> Add same as alua_tur() from scsi_dh_alua.c
> 
> Signed-off-by: John Garry <john.g.garry@oracle.com>
> ---
>   drivers/scsi/scsi_alua.c | 26 ++++++++++++++++++++++++++
>   1 file changed, 26 insertions(+)
> 
> diff --git a/drivers/scsi/scsi_alua.c b/drivers/scsi/scsi_alua.c
> index 1045885f74169..d8825ad7a1672 100644
> --- a/drivers/scsi/scsi_alua.c
> +++ b/drivers/scsi/scsi_alua.c
> @@ -40,6 +40,32 @@ static struct workqueue_struct *kalua_wq;
>   #define ALUA_RTPG_DELAY_MSECS		5
>   #define ALUA_RTPG_RETRY_DELAY		2
>   
> +/*
> + * alua_tur - Send a TEST UNIT READY
> + * @sdev: device to which the TEST UNIT READY command should be send
> + *
> + * Send a TEST UNIT READY to @sdev to figure out the device state
> + * Returns SCSI_DH_RETRY if the sense code is NOT READY/ALUA TRANSITIONING,
> + * SCSI_DH_OK if no error occurred, and SCSI_DH_IO otherwise.
> + */
> +__maybe_unused
> +static int scsi_alua_tur(struct scsi_device *sdev)
> +{
> +	struct scsi_sense_hdr sense_hdr;
> +	int retval;
> +
> +	retval = scsi_test_unit_ready(sdev, ALUA_FAILOVER_TIMEOUT * HZ,
> +				      ALUA_FAILOVER_RETRIES, &sense_hdr);
> +	if ((sense_hdr.sense_key == NOT_READY ||
> +	     sense_hdr.sense_key == UNIT_ATTENTION) &&
> +	    sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a)
> +		return -EAGAIN;//SCSI_DH_RETRY;
> +	else if (retval)
> +		return -EIO;//SCSI_DH_IO;
> +	else
> +		return 0;//SCSI_DH_OK;
> +}
> +
>   /*
>    * submit_rtpg - Issue a REPORT TARGET GROUP STATES command
>    * @sdev: sdev the command should be sent to

???
And this function is useful _why_?
We're just sending a normal 'TEST UNIT READY', it has nothing to
do with ALUA. Why do we have a special function here?

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                  Kernel Storage Architect
hare@suse.de                                +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich

^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 07/13] scsi: alua: Add scsi_alua_stpg_run()
  2026-03-17 12:06 ` [PATCH 07/13] scsi: alua: Add scsi_alua_stpg_run() John Garry
@ 2026-03-18  7:57   ` Hannes Reinecke
  2026-03-18  8:59     ` John Garry
  0 siblings, 1 reply; 63+ messages in thread
From: Hannes Reinecke @ 2026-03-18  7:57 UTC (permalink / raw)
  To: John Garry, martin.petersen, james.bottomley, hare, bmarzins
  Cc: jmeneghi, linux-scsi, michael.christie, snitzer, dm-devel,
	linux-kernel

On 3/17/26 13:06, John Garry wrote:
> Add a function to run stpg and handle error codes - it does equivalent
> handling as in alua_rtpg_work() from scsi_dh_alua.c
> 
> Signed-off-by: John Garry <john.g.garry@oracle.com>
> ---
>   drivers/scsi/scsi_alua.c | 20 +++++++++++++++++++-
>   include/scsi/scsi_alua.h |  5 +++++
>   2 files changed, 24 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/scsi/scsi_alua.c b/drivers/scsi/scsi_alua.c
> index e4cb43ba645fa..4e20a537a4ad6 100644
> --- a/drivers/scsi/scsi_alua.c
> +++ b/drivers/scsi/scsi_alua.c
> @@ -428,7 +428,6 @@ EXPORT_SYMBOL_GPL(scsi_alua_rtpg_run);
>    * a re-evaluation of the target group state or SCSI_DH_OK
>    * if no further action needs to be taken.
>    */
> -__maybe_unused
>   static int scsi_alua_stpg(struct scsi_device *sdev, bool optimize)
>   {
>   	struct alua_data *alua = sdev->alua;
> @@ -480,6 +479,25 @@ static int scsi_alua_stpg(struct scsi_device *sdev, bool optimize)
>   	return -EAGAIN;//SCSI_DH_RETRY;
>   }
>   
> +int scsi_alua_stpg_run(struct scsi_device *sdev, bool optimize)
> +{
> +	struct alua_data *alua = sdev->alua;
> +	unsigned long flags;
> +	int err;
> +
> +	err = scsi_alua_stpg(sdev, optimize);
> +	spin_lock_irqsave(&alua->lock, flags);
> +	if (err == EAGAIN) {
> +		alua->interval = 0;
> +		spin_unlock_irqrestore(&alua->lock, flags);
> +		return -EAGAIN;
> +	}
> +	spin_unlock_irqrestore(&alua->lock, flags);
> +
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(scsi_alua_stpg_run);
> +
>   int scsi_alua_sdev_init(struct scsi_device *sdev)
>   {
>   	int rel_port, ret, tpgs;
> diff --git a/include/scsi/scsi_alua.h b/include/scsi/scsi_alua.h
> index 1eb5481f40bd4..6e4f262bbfbc0 100644
> --- a/include/scsi/scsi_alua.h
> +++ b/include/scsi/scsi_alua.h
> @@ -31,6 +31,7 @@ int scsi_alua_sdev_init(struct scsi_device *sdev);
>   void scsi_alua_sdev_exit(struct scsi_device *sdev);
>   
>   int scsi_alua_rtpg_run(struct scsi_device *sdev);
> +int scsi_alua_stpg_run(struct scsi_device *sdev, bool optimize);
>   
>   int scsi_alua_init(void);
>   void scsi_exit_alua(void);
> @@ -40,6 +41,10 @@ static inline int scsi_alua_rtpg_run(struct scsi_device *sdev)
>   {
>   	return 0;
>   }
> +static inline int scsi_alua_stpg_run(struct scsi_device *sdev, bool optimize)
> +{
> +	return 0;
> +}
>   static inline int scsi_alua_sdev_init(struct scsi_device *sdev)
>   {
>   	return 0;

No. STPG handling should be done in scsi_dh_alua _only_. We really
should not attempt this in the scsi core.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                  Kernel Storage Architect
hare@suse.de                                +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich

^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 08/13] scsi: alua: Add scsi_alua_check_tpgs()
  2026-03-17 12:06 ` [PATCH 08/13] scsi: alua: Add scsi_alua_check_tpgs() John Garry
@ 2026-03-18  7:57   ` Hannes Reinecke
  0 siblings, 0 replies; 63+ messages in thread
From: Hannes Reinecke @ 2026-03-18  7:57 UTC (permalink / raw)
  To: John Garry, martin.petersen, james.bottomley, hare, bmarzins
  Cc: jmeneghi, linux-scsi, michael.christie, snitzer, dm-devel,
	linux-kernel

On 3/17/26 13:06, John Garry wrote:
> Add a core version of alua_check_tpgs() from scsi_sh_alua.c
> 
> Signed-off-by: John Garry <john.g.garry@oracle.com>
> ---
>   drivers/scsi/scsi_alua.c | 53 ++++++++++++++++++++++++++++++++++++++++
>   include/scsi/scsi_alua.h |  6 +++++
>   2 files changed, 59 insertions(+)
> 
Reviewed-by: Hannes Reinecke <hare@suse.de>

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                  Kernel Storage Architect
hare@suse.de                                +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich

^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 09/13] scsi: alua: Add scsi_alua_handle_state_transition()
  2026-03-17 12:06 ` [PATCH 09/13] scsi: alua: Add scsi_alua_handle_state_transition() John Garry
@ 2026-03-18  7:58   ` Hannes Reinecke
  2026-03-23 13:43     ` John Garry
  0 siblings, 1 reply; 63+ messages in thread
From: Hannes Reinecke @ 2026-03-18  7:58 UTC (permalink / raw)
  To: John Garry, martin.petersen, james.bottomley, hare, bmarzins
  Cc: jmeneghi, linux-scsi, michael.christie, snitzer, dm-devel,
	linux-kernel

On 3/17/26 13:06, John Garry wrote:
> Add an equivalent of alua_handle_state_transition() from scsi_dh_alua.c
> 
> Signed-off-by: John Garry <john.g.garry@oracle.com>
> ---
>   drivers/scsi/scsi_alua.c | 11 +++++++++++
>   include/scsi/scsi_alua.h |  5 +++++
>   2 files changed, 16 insertions(+)
> 
> diff --git a/drivers/scsi/scsi_alua.c b/drivers/scsi/scsi_alua.c
> index 9c317e60d031e..d19d1845bc324 100644
> --- a/drivers/scsi/scsi_alua.c
> +++ b/drivers/scsi/scsi_alua.c
> @@ -40,6 +40,17 @@ static struct workqueue_struct *kalua_wq;
>   #define ALUA_RTPG_DELAY_MSECS		5
>   #define ALUA_RTPG_RETRY_DELAY		2
>   
> +void scsi_alua_handle_state_transition(struct scsi_device *sdev)
> +{
> +	struct alua_data *alua = sdev->alua;
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&alua->lock, flags);
> +	alua->state = SCSI_ACCESS_STATE_TRANSITIONING;
> +	spin_unlock_irqrestore(&alua->lock, flags);
> +}
> +EXPORT_SYMBOL_GPL(scsi_alua_handle_state_transition);
> +
>   /*
>    * alua_tur - Send a TEST UNIT READY
>    * @sdev: device to which the TEST UNIT READY command should be send
> diff --git a/include/scsi/scsi_alua.h b/include/scsi/scsi_alua.h
> index 2e664f20d9681..5b3a12861658f 100644
> --- a/include/scsi/scsi_alua.h
> +++ b/include/scsi/scsi_alua.h
> @@ -30,6 +30,8 @@ struct alua_data {
>   int scsi_alua_sdev_init(struct scsi_device *sdev);
>   void scsi_alua_sdev_exit(struct scsi_device *sdev);
>   
> +void scsi_alua_handle_state_transition(struct scsi_device *sdev);
> +
>   int scsi_alua_check_tpgs(struct scsi_device *sdev);
>   
>   int scsi_alua_rtpg_run(struct scsi_device *sdev);
> @@ -39,6 +41,9 @@ int scsi_alua_init(void);
>   void scsi_exit_alua(void);
>   #else //CONFIG_SCSI_ALUA
>   
> +static inline void scsi_alua_handle_state_transition(struct scsi_device *sdev)
> +{
> +}
>   static inline int scsi_alua_check_tpgs(struct scsi_device *sdev)
>   {
>   	return 0;

???
This doesn't handle a state transition, it just _sets_ the state 
transition. Please fold it into the patch where the state transition
is actually handled.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                  Kernel Storage Architect
hare@suse.de                                +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich

^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 10/13] scsi: alua: Add scsi_alua_prep_fn()
  2026-03-17 12:07 ` [PATCH 10/13] scsi: alua: Add scsi_alua_prep_fn() John Garry
@ 2026-03-18  8:01   ` Hannes Reinecke
  2026-03-23 13:49     ` John Garry
  0 siblings, 1 reply; 63+ messages in thread
From: Hannes Reinecke @ 2026-03-18  8:01 UTC (permalink / raw)
  To: John Garry, martin.petersen, james.bottomley, hare, bmarzins
  Cc: jmeneghi, linux-scsi, michael.christie, snitzer, dm-devel,
	linux-kernel

On 3/17/26 13:07, John Garry wrote:
> Add a core version of alua_prep_fn() from scsi_dh_alua.c
> 
> Signed-off-by: John Garry <john.g.garry@oracle.com>
> ---
>   drivers/scsi/scsi_alua.c | 23 +++++++++++++++++++++++
>   include/scsi/scsi_alua.h |  8 ++++++++
>   2 files changed, 31 insertions(+)
> 
> diff --git a/drivers/scsi/scsi_alua.c b/drivers/scsi/scsi_alua.c
> index d19d1845bc324..c269105dbae4a 100644
> --- a/drivers/scsi/scsi_alua.c
> +++ b/drivers/scsi/scsi_alua.c
> @@ -608,6 +608,29 @@ void scsi_alua_sdev_exit(struct scsi_device *sdev)
>   	sdev->alua = NULL;
>   }
>   
> +blk_status_t scsi_alua_prep_fn(struct scsi_device *sdev, struct request *req)
> +{
> +	struct alua_data *alua = sdev->alua;
> +	unsigned long flags;
> +	unsigned char state;
> +
> +	spin_lock_irqsave(&alua->lock, flags);
> +	state = alua->state;
> +	spin_unlock_irqrestore(&alua->lock, flags);
> +
> +	switch (state) {
> +	case SCSI_ACCESS_STATE_OPTIMAL:
> +	case SCSI_ACCESS_STATE_ACTIVE:
> +	case SCSI_ACCESS_STATE_LBA:
> +	case SCSI_ACCESS_STATE_TRANSITIONING:
> +		return BLK_STS_OK;
> +	default:
> +		req->rq_flags |= RQF_QUIET;
> +		return BLK_STS_IOERR;
> +	}
> +}
> +EXPORT_SYMBOL_GPL(scsi_alua_prep_fn);
> +
>   int scsi_alua_init(void)
>   {
>   	kalua_wq = alloc_workqueue("kalua", WQ_MEM_RECLAIM | WQ_PERCPU, 0);
> diff --git a/include/scsi/scsi_alua.h b/include/scsi/scsi_alua.h
> index 5b3a12861658f..c16d4adc915ec 100644
> --- a/include/scsi/scsi_alua.h
> +++ b/include/scsi/scsi_alua.h
> @@ -8,6 +8,7 @@
>   #ifndef _SCSI_ALUA_H
>   #define _SCSI_ALUA_H
>   
> +#include <linux/blk-mq.h>
>   #include <scsi/scsi.h>
>   #include <scsi/scsi_device.h>
>   
> @@ -37,6 +38,8 @@ int scsi_alua_check_tpgs(struct scsi_device *sdev);
>   int scsi_alua_rtpg_run(struct scsi_device *sdev);
>   int scsi_alua_stpg_run(struct scsi_device *sdev, bool optimize);
>   
> +blk_status_t scsi_alua_prep_fn(struct scsi_device *sdev, struct request *req);
> +
>   int scsi_alua_init(void);
>   void scsi_exit_alua(void);
>   #else //CONFIG_SCSI_ALUA
> @@ -56,6 +59,11 @@ static inline int scsi_alua_stpg_run(struct scsi_device *sdev, bool optimize)
>   {
>   	return 0;
>   }
> +static inline
> +blk_status_t scsi_alua_prep_fn(struct scsi_device *sdev, struct request *req)
> +{
> +	return BLK_STS_OK;
> +}
>   static inline int scsi_alua_sdev_init(struct scsi_device *sdev)
>   {
>   	return 0;

Hmm. The 'prep_fn' thingie was implemented such that other drivers (like 
scsi_dh) could intercept the scsi prep function and inject their own
stuff. But now with this patchset the functionality is in the scsi core,
so really we should do away with the prep_fn here and call the functions
directly.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                  Kernel Storage Architect
hare@suse.de                                +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich

^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 11/13] scsi: alua: Add scsi_device_alua_implicit()
  2026-03-17 12:07 ` [PATCH 11/13] scsi: alua: Add scsi_device_alua_implicit() John Garry
@ 2026-03-18  8:02   ` Hannes Reinecke
  2026-03-23 13:50     ` John Garry
  0 siblings, 1 reply; 63+ messages in thread
From: Hannes Reinecke @ 2026-03-18  8:02 UTC (permalink / raw)
  To: John Garry, martin.petersen, james.bottomley, hare, bmarzins
  Cc: jmeneghi, linux-scsi, michael.christie, snitzer, dm-devel,
	linux-kernel

On 3/17/26 13:07, John Garry wrote:
> Add to function to check whether implicit support is available, as this
> will be the general check for ALUA support and no DH support.
> 
> Signed-off-by: John Garry <john.g.garry@oracle.com>
> ---
>   drivers/scsi/scsi_alua.c | 7 +++++++
>   include/scsi/scsi_alua.h | 6 ++++++
>   2 files changed, 13 insertions(+)
> 
> diff --git a/drivers/scsi/scsi_alua.c b/drivers/scsi/scsi_alua.c
> index c269105dbae4a..d3fcd887e5018 100644
> --- a/drivers/scsi/scsi_alua.c
> +++ b/drivers/scsi/scsi_alua.c
> @@ -631,6 +631,13 @@ blk_status_t scsi_alua_prep_fn(struct scsi_device *sdev, struct request *req)
>   }
>   EXPORT_SYMBOL_GPL(scsi_alua_prep_fn);
>   
> +bool scsi_device_alua_implicit(struct scsi_device *sdev)
> +{
> +	if (!sdev->alua)
> +		return false;
> +	return sdev->alua->tpgs & TPGS_MODE_IMPLICIT;
> +}
> +
>   int scsi_alua_init(void)
>   {
>   	kalua_wq = alloc_workqueue("kalua", WQ_MEM_RECLAIM | WQ_PERCPU, 0);
> diff --git a/include/scsi/scsi_alua.h b/include/scsi/scsi_alua.h
> index c16d4adc915ec..2d5db944f75b7 100644
> --- a/include/scsi/scsi_alua.h
> +++ b/include/scsi/scsi_alua.h
> @@ -40,6 +40,8 @@ int scsi_alua_stpg_run(struct scsi_device *sdev, bool optimize);
>   
>   blk_status_t scsi_alua_prep_fn(struct scsi_device *sdev, struct request *req);
>   
> +bool scsi_device_alua_implicit(struct scsi_device *sdev);
> +
>   int scsi_alua_init(void);
>   void scsi_exit_alua(void);
>   #else //CONFIG_SCSI_ALUA
> @@ -64,6 +66,10 @@ blk_status_t scsi_alua_prep_fn(struct scsi_device *sdev, struct request *req)
>   {
>   	return BLK_STS_OK;
>   }
> +static inline bool scsi_device_alua_implicit(struct scsi_device *sdev)
> +{
> +	return false;
> +}
>   static inline int scsi_alua_sdev_init(struct scsi_device *sdev)
>   {
>   	return 0;

Hmm. Can you fold it into the patch where it's actually called?
It's getting hard to review without that.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                  Kernel Storage Architect
hare@suse.de                                +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich

^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 13/13] scsi: core: Add implicit ALUA support
  2026-03-17 12:07 ` [PATCH 13/13] scsi: core: Add implicit ALUA support John Garry
@ 2026-03-18  8:08   ` Hannes Reinecke
  2026-03-18 23:08   ` kernel test robot
  2026-03-23  1:58   ` Benjamin Marzinski
  2 siblings, 0 replies; 63+ messages in thread
From: Hannes Reinecke @ 2026-03-18  8:08 UTC (permalink / raw)
  To: John Garry, martin.petersen, james.bottomley, hare, bmarzins
  Cc: jmeneghi, linux-scsi, michael.christie, snitzer, dm-devel,
	linux-kernel

On 3/17/26 13:07, John Garry wrote:
> For when no device handler is used, add ALUA support.
> 
> This will be equivalent to when native SCSI multipathing is used.
> 
> Essentially all the same handling is available as DH alua driver for
> rescan, request prep, sense handling.
> 
> Signed-off-by: John Garry <john.g.garry@oracle.com>
> ---
>   drivers/scsi/scsi_alua.c  | 93 +++++++++++++++++++++++++++++++++++++++
>   drivers/scsi/scsi_error.c |  7 +++
>   drivers/scsi/scsi_lib.c   |  7 +++
>   drivers/scsi/scsi_scan.c  |  2 +
>   drivers/scsi/scsi_sysfs.c |  4 +-
>   include/scsi/scsi_alua.h  | 14 ++++++
>   6 files changed, 126 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/scsi/scsi_alua.c b/drivers/scsi/scsi_alua.c
> index d3fcd887e5018..ee0229b1a9d12 100644
> --- a/drivers/scsi/scsi_alua.c
> +++ b/drivers/scsi/scsi_alua.c
> @@ -562,6 +562,90 @@ int scsi_alua_stpg_run(struct scsi_device *sdev, bool optimize)
>   }
>   EXPORT_SYMBOL_GPL(scsi_alua_stpg_run);
>   
> +enum scsi_disposition scsi_alua_check_sense(struct scsi_device *sdev,
> +					      struct scsi_sense_hdr *sense_hdr)
> +{
> +	switch (sense_hdr->sense_key) {
> +	case NOT_READY:
> +		if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0a) {
> +			/*
> +			 * LUN Not Accessible - ALUA state transition
> +			 */
> +			scsi_alua_handle_state_transition(sdev);
> +			return NEEDS_RETRY;
> +		}
> +		break;
> +	case UNIT_ATTENTION:
> +		if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0a) {
> +			/*
> +			 * LUN Not Accessible - ALUA state transition
> +			 */
> +			scsi_alua_handle_state_transition(sdev);
> +			return NEEDS_RETRY;
> +		}
> +		if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x00) {
> +			/*
> +			 * Power On, Reset, or Bus Device Reset.
> +			 * Might have obscured a state transition,
> +			 * so schedule a recheck.
> +			 */
> +			scsi_device_alua_rescan(sdev);
> +			return ADD_TO_MLQUEUE;
> +		}
> +		if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x04)
> +			/*
> +			 * Device internal reset
> +			 */
> +			return ADD_TO_MLQUEUE;
> +		if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x01)
> +			/*
> +			 * Mode Parameters Changed
> +			 */
> +			return ADD_TO_MLQUEUE;
> +		if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x06) {
> +			/*
> +			 * ALUA state changed
> +			 */
> +			scsi_device_alua_rescan(sdev);
> +			return ADD_TO_MLQUEUE;
> +		}
> +		if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x07) {
> +			/*
> +			 * Implicit ALUA state transition failed
> +			 */
> +			scsi_device_alua_rescan(sdev);
> +			return ADD_TO_MLQUEUE;
> +		}
> +		if (sense_hdr->asc == 0x3f && sense_hdr->ascq == 0x03)
> +			/*
> +			 * Inquiry data has changed
> +			 */
> +			return ADD_TO_MLQUEUE;
> +		if (sense_hdr->asc == 0x3f && sense_hdr->ascq == 0x0e)
> +			/*
> +			 * REPORTED_LUNS_DATA_HAS_CHANGED is reported
> +			 * when switching controllers on targets like
> +			 * Intel Multi-Flex. We can just retry.
> +			 */
> +			return ADD_TO_MLQUEUE;
> +		break;
> +	}
> +
> +	return SCSI_RETURN_NOT_HANDLED;
> +}
> +
> +static void alua_rtpg_work(struct work_struct *work)
> +{
> +	struct alua_data *alua =
> +		container_of(work, struct alua_data, work.work);
> +	int ret;
> +
> +	ret = scsi_alua_rtpg_run(alua->sdev);
> +
> +	if (ret == -EAGAIN)
> +		queue_delayed_work(kalua_wq, &alua->work, alua->interval * HZ);
> +}
> +
>   int scsi_alua_sdev_init(struct scsi_device *sdev)
>   {
>   	int rel_port, ret, tpgs;
> @@ -591,6 +675,7 @@ int scsi_alua_sdev_init(struct scsi_device *sdev)
>   		goto out_free_data;
>   	}
>   
> +	INIT_DELAYED_WORK(&sdev->alua->work, alua_rtpg_work);
>   	sdev->alua->sdev = sdev;
>   	sdev->alua->tpgs = tpgs;
>   	spin_lock_init(&sdev->alua->lock);
> @@ -638,6 +723,14 @@ bool scsi_device_alua_implicit(struct scsi_device *sdev)
>   	return sdev->alua->tpgs & TPGS_MODE_IMPLICIT;
>   }
>   
> +void scsi_device_alua_rescan(struct scsi_device *sdev)
> +{
> +	struct alua_data *alua = sdev->alua;
> +
> +	queue_delayed_work(kalua_wq, &alua->work,
> +				msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS));
> +}
> +
>   int scsi_alua_init(void)
>   {
>   	kalua_wq = alloc_workqueue("kalua", WQ_MEM_RECLAIM | WQ_PERCPU, 0);
> diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
> index 147127fb4db9c..a542e7a85a24d 100644
> --- a/drivers/scsi/scsi_error.c
> +++ b/drivers/scsi/scsi_error.c
> @@ -29,6 +29,7 @@
>   #include <linux/jiffies.h>
>   
>   #include <scsi/scsi.h>
> +#include <scsi/scsi_alua.h>
>   #include <scsi/scsi_cmnd.h>
>   #include <scsi/scsi_dbg.h>
>   #include <scsi/scsi_device.h>
> @@ -578,6 +579,12 @@ enum scsi_disposition scsi_check_sense(struct scsi_cmnd *scmd)
>   		if (rc != SCSI_RETURN_NOT_HANDLED)
>   			return rc;
>   		/* handler does not care. Drop down to default handling */
> +	} else if (scsi_device_alua_implicit(sdev)) {
> +		enum scsi_disposition rc;
> +
> +		rc = scsi_alua_check_sense(sdev, &sshdr);
> +		if (rc != SCSI_RETURN_NOT_HANDLED)
> +			return rc;
>   	}
>   
>   	if (scmd->cmnd[0] == TEST_UNIT_READY &&
> diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
> index d3a8cd4166f92..e5bcee555ea10 100644
> --- a/drivers/scsi/scsi_lib.c
> +++ b/drivers/scsi/scsi_lib.c
> @@ -26,6 +26,7 @@
>   #include <linux/unaligned.h>
>   
>   #include <scsi/scsi.h>
> +#include <scsi/scsi_alua.h>
>   #include <scsi/scsi_cmnd.h>
>   #include <scsi/scsi_dbg.h>
>   #include <scsi/scsi_device.h>
> @@ -1719,6 +1720,12 @@ static blk_status_t scsi_prepare_cmd(struct request *req)
>   	if (sdev->handler && sdev->handler->prep_fn) {
>   		blk_status_t ret = sdev->handler->prep_fn(sdev, req);
>   
> +		if (ret != BLK_STS_OK)
> +			return ret;
> +	} else if (scsi_device_alua_implicit(sdev)) {
> +		/* We should be able to make this common for ALUA DH as well */
> +		blk_status_t ret = scsi_alua_prep_fn(sdev, req);
> +
>   		if (ret != BLK_STS_OK)
>   			return ret;
>   	}
> diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
> index 3af64d1231445..73caf83bd1097 100644
> --- a/drivers/scsi/scsi_scan.c
> +++ b/drivers/scsi/scsi_scan.c
> @@ -1744,6 +1744,8 @@ int scsi_rescan_device(struct scsi_device *sdev)
>   
>   	if (sdev->handler && sdev->handler->rescan)
>   		sdev->handler->rescan(sdev);
> +	else if (scsi_device_alua_implicit(sdev))
> +		scsi_device_alua_rescan(sdev);
>   
>   	if (dev->driver && try_module_get(dev->driver->owner)) {
>   		struct scsi_driver *drv = to_scsi_driver(dev->driver);
> diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
> index 6c4c3c22f6acf..71a9613898cfc 100644
> --- a/drivers/scsi/scsi_sysfs.c
> +++ b/drivers/scsi/scsi_sysfs.c
> @@ -1152,7 +1152,7 @@ sdev_show_access_state(struct device *dev,
>   	unsigned char access_state;
>   	const char *access_state_name;
>   
> -	if (!sdev->handler)
> +	if (!sdev->handler && !scsi_device_alua_implicit(sdev))
>   		return -EINVAL;
>   
>   	access_state = (sdev->access_state & SCSI_ACCESS_STATE_MASK);
> @@ -1409,6 +1409,8 @@ int scsi_sysfs_add_sdev(struct scsi_device *sdev)
>   	scsi_autopm_get_device(sdev);
>   
>   	scsi_dh_add_device(sdev);
> +	if (!sdev->handler && scsi_device_alua_implicit(sdev))
> +		scsi_device_alua_rescan(sdev);
>   
>   	error = device_add(&sdev->sdev_gendev);
>   	if (error) {
> diff --git a/include/scsi/scsi_alua.h b/include/scsi/scsi_alua.h
> index 2d5db944f75b7..8e506d1d66cce 100644
> --- a/include/scsi/scsi_alua.h
> +++ b/include/scsi/scsi_alua.h
> @@ -24,6 +24,7 @@ struct alua_data {
>   	unsigned char		transition_tmo;
>   	unsigned long		expiry;
>   	unsigned long		interval;
> +	struct delayed_work	work;
>   	struct scsi_device	*sdev;
>   	spinlock_t		lock;
>   };
> @@ -35,11 +36,15 @@ void scsi_alua_handle_state_transition(struct scsi_device *sdev);
>   
>   int scsi_alua_check_tpgs(struct scsi_device *sdev);
>   
> +enum scsi_disposition scsi_alua_check_sense(struct scsi_device *sdev,
> +				struct scsi_sense_hdr *sense_hdr);
> +
>   int scsi_alua_rtpg_run(struct scsi_device *sdev);
>   int scsi_alua_stpg_run(struct scsi_device *sdev, bool optimize);
>   
>   blk_status_t scsi_alua_prep_fn(struct scsi_device *sdev, struct request *req);
>   
> +void scsi_device_alua_rescan(struct scsi_device *sdev);
>   bool scsi_device_alua_implicit(struct scsi_device *sdev);
>   
>   int scsi_alua_init(void);
> @@ -53,6 +58,12 @@ static inline int scsi_alua_check_tpgs(struct scsi_device *sdev)
>   {
>   	return 0;
>   }
> +static inline
> +enum scsi_disposition scsi_alua_check_sense(struct scsi_device *sdev,
> +				struct scsi_sense_hdr *sense_hdr)
> +{
> +	return SCSI_RETURN_NOT_HANDLED;
> +}
>   static inline int scsi_alua_rtpg_run(struct scsi_device *sdev)
>   {
>   	return 0;
> @@ -66,6 +77,9 @@ blk_status_t scsi_alua_prep_fn(struct scsi_device *sdev, struct request *req)
>   {
>   	return BLK_STS_OK;
>   }
> +static inline void scsi_device_alua_rescan(struct scsi_device *sdev)
> +{
> +}
>   static inline bool scsi_device_alua_implicit(struct scsi_device *sdev)
>   {
>   	return false;

... and this justifies what I mentioned with the previous two patches. 
Please fold the patch for scsi_device_alua_implicit() into this, and 
open-code the prep_fn such that we know what's going on.

The ALUA state machine might be challenging, though.
The scsi_dh_alua driver had this as a workqueue, as it was the only way
how we could execute several calls consecutively.
I'm not utterly convinced that we need have the very same functionality
in the scsi core, but for simplicity let's keep it.
But: we only should keep the 'retry RTPG' logic in the scsi core;
sending STPG should be delegated to scsi_dh_alua.
So you need to adjust scsi_dh_alua for that, too, and cannot just
lift the existing functions into the scsi core.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                  Kernel Storage Architect
hare@suse.de                                +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich

^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 01/13] scsi: scsi_dh_alua: Delete alua_port_group
  2026-03-18  7:44   ` Hannes Reinecke
@ 2026-03-18  8:53     ` John Garry
  0 siblings, 0 replies; 63+ messages in thread
From: John Garry @ 2026-03-18  8:53 UTC (permalink / raw)
  To: Hannes Reinecke, martin.petersen, james.bottomley, hare, bmarzins
  Cc: jmeneghi, linux-scsi, michael.christie, snitzer, dm-devel,
	linux-kernel

On 18/03/2026 07:44, Hannes Reinecke wrote:
> On 3/17/26 13:06, John Garry wrote:
>> Delete the alua_port_group usage, as it is more accurate to manage the
>> port group info per-scsi device - see [0]
>>
>> [0] https://urldefense.com/v3/__https://lore.kernel.org/linux- 
>> scsi/20260310114925.1222263-1-john.g.garry@oracle.com/T/ 
>> *m4ffc0d07f169b70b8fd2407bae9632aa0f8c1f9a__;Iw!!ACWV5N9M2RV99hQ! 
>> L3ooFRT-lbVw- 
>> vEOYnh_4z9auyqWLGG4U8lhysEdtpLWZCp5ReAn77SF2Tnr4nxHbv7zdsG9q6NxhWw$
>> For now, the handler data will be used to hold the ALUA-related info.
>>
>> Signed-off-by: John Garry <john.g.garry@oracle.com>
>> ---
>>   drivers/scsi/device_handler/scsi_dh_alua.c | 663 ++++++---------------
>>   1 file changed, 180 insertions(+), 483 deletions(-)
>>
> In principle, yes, but I would put this at the end after the patches to
> move the alua functionality to the scsi core.

The alua_port_group functionality is intertwined with all the ALUA 
handling, so it is hard to start to separate out (from scsi_dh_alua.c) 
and then remove it. Furthermore, it would just be duplicating what we 
have in sdev->alua structure.

^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 07/13] scsi: alua: Add scsi_alua_stpg_run()
  2026-03-18  7:57   ` Hannes Reinecke
@ 2026-03-18  8:59     ` John Garry
  2026-03-18  9:24       ` Hannes Reinecke
  0 siblings, 1 reply; 63+ messages in thread
From: John Garry @ 2026-03-18  8:59 UTC (permalink / raw)
  To: Hannes Reinecke, martin.petersen, james.bottomley, hare, bmarzins
  Cc: jmeneghi, linux-scsi, michael.christie, snitzer, dm-devel,
	linux-kernel

On 18/03/2026 07:57, Hannes Reinecke wrote:
>> +static inline int scsi_alua_stpg_run(struct scsi_device *sdev, bool 
>> optimize)
>> +{
>> +    return 0;
>> +}
>>   static inline int scsi_alua_sdev_init(struct scsi_device *sdev)
>>   {
>>       return 0;
> 
> No. STPG handling should be done in scsi_dh_alua _only_. We really
> should not attempt this in the scsi core.

It's not so nice to have the functionality spread out. The way I see it 
is that drivers/scsi/scsi_alua.c is mostly a library, but also has 
functionality to "drive" ALUA for native SCSI multipathing.

Anyway, can you confirm which of the following do you think from this 
series should be in scsi_dh_alua.c:

- scsi_alua_stpg_run()
- scsi_alua_stpg()
- submit_stpg()

You already said scsi_alua_stpg_run() should be.

Thanks,
John

^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 07/13] scsi: alua: Add scsi_alua_stpg_run()
  2026-03-18  8:59     ` John Garry
@ 2026-03-18  9:24       ` Hannes Reinecke
  2026-03-23 13:58         ` John Garry
  0 siblings, 1 reply; 63+ messages in thread
From: Hannes Reinecke @ 2026-03-18  9:24 UTC (permalink / raw)
  To: John Garry, Hannes Reinecke, martin.petersen, james.bottomley,
	bmarzins
  Cc: jmeneghi, linux-scsi, michael.christie, snitzer, dm-devel,
	linux-kernel

On 3/18/26 09:59, John Garry wrote:
> On 18/03/2026 07:57, Hannes Reinecke wrote:
>>> +static inline int scsi_alua_stpg_run(struct scsi_device *sdev, bool 
>>> optimize)
>>> +{
>>> +    return 0;
>>> +}
>>>   static inline int scsi_alua_sdev_init(struct scsi_device *sdev)
>>>   {
>>>       return 0;
>>
>> No. STPG handling should be done in scsi_dh_alua _only_. We really
>> should not attempt this in the scsi core.
> 
> It's not so nice to have the functionality spread out. The way I see it 
> is that drivers/scsi/scsi_alua.c is mostly a library, but also has 
> functionality to "drive" ALUA for native SCSI multipathing.
> 
> Anyway, can you confirm which of the following do you think from this 
> series should be in scsi_dh_alua.c:
> 
> - scsi_alua_stpg_run()
> - scsi_alua_stpg()
> - submit_stpg()
> 
> You already said scsi_alua_stpg_run() should be.
> 
Gnaa. Misread that one (blame lack of coffee).
stpg should be handled in scsi_dh_alua. Arguable
we could move the utility functions (submit_stpg
and maybe scsi_alua_stpg) in the core alua code,
but scsi_alua_stpg_run() should be kept in
scsi_dh_alua.

If that makes sense ...

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                  Kernel Storage Architect
hare@suse.com                               +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich

^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 02/13] scsi: alua: Create a core ALUA driver
  2026-03-17 12:06 ` [PATCH 02/13] scsi: alua: Create a core ALUA driver John Garry
  2026-03-18  7:47   ` Hannes Reinecke
@ 2026-03-18 17:17   ` kernel test robot
  2026-03-18 22:54   ` kernel test robot
  2 siblings, 0 replies; 63+ messages in thread
From: kernel test robot @ 2026-03-18 17:17 UTC (permalink / raw)
  To: John Garry, martin.petersen, james.bottomley, hare, bmarzins
  Cc: oe-kbuild-all, jmeneghi, linux-scsi, michael.christie, snitzer,
	dm-devel, linux-kernel, John Garry

Hi John,

kernel test robot noticed the following build errors:

[auto build test ERROR on mkp-scsi/for-next]
[also build test ERROR on jejb-scsi/for-next linus/master v7.0-rc4 next-20260317]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/John-Garry/scsi-scsi_dh_alua-Delete-alua_port_group/20260318-105207
base:   https://git.kernel.org/pub/scm/linux/kernel/git/mkp/scsi.git for-next
patch link:    https://lore.kernel.org/r/20260317120703.3702387-3-john.g.garry%40oracle.com
patch subject: [PATCH 02/13] scsi: alua: Create a core ALUA driver
config: s390-randconfig-001-20260318 (https://download.01.org/0day-ci/archive/20260319/202603190113.JzA11dmp-lkp@intel.com/config)
compiler: s390-linux-gcc (GCC) 11.5.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260319/202603190113.JzA11dmp-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202603190113.JzA11dmp-lkp@intel.com/

All errors (new ones prefixed by >>, old ones prefixed by <<):

>> ERROR: modpost: "scsi_exit_alua" [drivers/scsi/scsi_mod.ko] undefined!
>> ERROR: modpost: "scsi_alua_init" [drivers/scsi/scsi_mod.ko] undefined!
>> ERROR: modpost: "scsi_alua_sdev_exit" [drivers/scsi/scsi_mod.ko] undefined!
>> ERROR: modpost: "scsi_alua_sdev_init" [drivers/scsi/scsi_mod.ko] undefined!

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 02/13] scsi: alua: Create a core ALUA driver
  2026-03-17 12:06 ` [PATCH 02/13] scsi: alua: Create a core ALUA driver John Garry
  2026-03-18  7:47   ` Hannes Reinecke
  2026-03-18 17:17   ` kernel test robot
@ 2026-03-18 22:54   ` kernel test robot
  2 siblings, 0 replies; 63+ messages in thread
From: kernel test robot @ 2026-03-18 22:54 UTC (permalink / raw)
  To: John Garry, martin.petersen, james.bottomley, hare, bmarzins
  Cc: oe-kbuild-all, jmeneghi, linux-scsi, michael.christie, snitzer,
	dm-devel, linux-kernel, John Garry

Hi John,

kernel test robot noticed the following build errors:

[auto build test ERROR on mkp-scsi/for-next]
[also build test ERROR on jejb-scsi/for-next linus/master v7.0-rc4 next-20260318]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/John-Garry/scsi-scsi_dh_alua-Delete-alua_port_group/20260318-105207
base:   https://git.kernel.org/pub/scm/linux/kernel/git/mkp/scsi.git for-next
patch link:    https://lore.kernel.org/r/20260317120703.3702387-3-john.g.garry%40oracle.com
patch subject: [PATCH 02/13] scsi: alua: Create a core ALUA driver
config: um-randconfig-001-20260319 (https://download.01.org/0day-ci/archive/20260319/202603190613.weiFq4ac-lkp@intel.com/config)
compiler: gcc-14 (Debian 14.2.0-19) 14.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260319/202603190613.weiFq4ac-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202603190613.weiFq4ac-lkp@intel.com/

All errors (new ones prefixed by >>):

   /usr/bin/ld: drivers/scsi/scsi.o: in function `exit_scsi':
>> scsi.c:(.exit.text+0x1b): undefined reference to `scsi_exit_alua'
   /usr/bin/ld: drivers/scsi/scsi.o: in function `init_scsi':
>> scsi.c:(.init.text+0x8c): undefined reference to `scsi_alua_init'
   /usr/bin/ld: drivers/scsi/scsi_scan.o: in function `scsi_add_lun':
>> scsi_scan.c:(.text+0x1797): undefined reference to `scsi_alua_sdev_init'
   /usr/bin/ld: drivers/scsi/scsi_sysfs.o: in function `scsi_device_dev_release':
>> scsi_sysfs.c:(.text+0x1e8a): undefined reference to `scsi_alua_sdev_exit'
   collect2: error: ld returned 1 exit status

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 13/13] scsi: core: Add implicit ALUA support
  2026-03-17 12:07 ` [PATCH 13/13] scsi: core: Add implicit ALUA support John Garry
  2026-03-18  8:08   ` Hannes Reinecke
@ 2026-03-18 23:08   ` kernel test robot
  2026-03-23  1:58   ` Benjamin Marzinski
  2 siblings, 0 replies; 63+ messages in thread
From: kernel test robot @ 2026-03-18 23:08 UTC (permalink / raw)
  To: John Garry, martin.petersen, james.bottomley, hare, bmarzins
  Cc: oe-kbuild-all, jmeneghi, linux-scsi, michael.christie, snitzer,
	dm-devel, linux-kernel, John Garry

Hi John,

kernel test robot noticed the following build errors:

[auto build test ERROR on mkp-scsi/for-next]
[also build test ERROR on jejb-scsi/for-next linus/master v7.0-rc4 next-20260318]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/John-Garry/scsi-scsi_dh_alua-Delete-alua_port_group/20260318-105207
base:   https://git.kernel.org/pub/scm/linux/kernel/git/mkp/scsi.git for-next
patch link:    https://lore.kernel.org/r/20260317120703.3702387-14-john.g.garry%40oracle.com
patch subject: [PATCH 13/13] scsi: core: Add implicit ALUA support
config: s390-randconfig-001-20260318 (https://download.01.org/0day-ci/archive/20260319/202603190739.QIFfPfdg-lkp@intel.com/config)
compiler: s390-linux-gcc (GCC) 11.5.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260319/202603190739.QIFfPfdg-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202603190739.QIFfPfdg-lkp@intel.com/

All errors (new ones prefixed by >>, old ones prefixed by <<):

>> ERROR: modpost: "scsi_device_alua_rescan" [drivers/scsi/scsi_mod.ko] undefined!
>> ERROR: modpost: "scsi_alua_check_sense" [drivers/scsi/scsi_mod.ko] undefined!
ERROR: modpost: "scsi_exit_alua" [drivers/scsi/scsi_mod.ko] undefined!
ERROR: modpost: "scsi_alua_init" [drivers/scsi/scsi_mod.ko] undefined!
ERROR: modpost: "scsi_alua_sdev_exit" [drivers/scsi/scsi_mod.ko] undefined!
ERROR: modpost: "scsi_alua_sdev_init" [drivers/scsi/scsi_mod.ko] undefined!
>> ERROR: modpost: "scsi_device_alua_implicit" [drivers/scsi/scsi_mod.ko] undefined!

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 00/13] scsi: Core ALUA driver
  2026-03-17 12:06 [PATCH 00/13] scsi: Core ALUA driver John Garry
                   ` (12 preceding siblings ...)
  2026-03-17 12:07 ` [PATCH 13/13] scsi: core: Add implicit ALUA support John Garry
@ 2026-03-22 17:37 ` Benjamin Marzinski
  2026-03-23  9:57   ` John Garry
  13 siblings, 1 reply; 63+ messages in thread
From: Benjamin Marzinski @ 2026-03-22 17:37 UTC (permalink / raw)
  To: John Garry
  Cc: martin.petersen, james.bottomley, hare, jmeneghi, linux-scsi,
	michael.christie, snitzer, dm-devel, linux-kernel

On Tue, Mar 17, 2026 at 12:06:50PM +0000, John Garry wrote:
> Following on the back of the ALUA support for native SCSI multipath
> proposal at [0], this is an attempt to move to a SCSI core ALUA driver.
> 
> Essentially this series move the bulk of the ALUA handling from
> scsi_dh_alua.c to a core driver. We still need to support ALUA for DH, so
> the scsi_dh_alua.c is still responsible for driving ALUA support and the
> SCSI core ALUA driver just provides a set of library functions for that.
> 
> The SCSI core ALUA driver also provides implicit ALUA support for no DH,
> like when we would be native SCSI multipath.
> 
> This series is just really an RFC quality work and its purpose is
> to decide on the direction of ALUA support for native SCSI multipath.
> 
> I think that this work is a real regression possibility for
> dm-multipath, so we need to be careful.

At the risk of showing just how limited my SCSI knowledge is, I need to
ask, Is any of this actually necessary to get native scsi multipath
working with Implicit ALUA?

If the goal is to limit this to IMPLICT ALUA only, I was expecting that
you could just leave the scsi_dh_alua code completely alone. If native
scsi multipathing didn't disable the device handler, it seemed that this
would basically just work. With the device handler attached, when the
array updates the ALUA state, that should, at least I believe, trigger a
unit attention that will fire off a RTPG command. That should update the
sdev->access_state, which the multipath code could use to pick the
correct path. Right? What am I missing here? Is this just a parallel
exercise to overhaul the ALUA code?

-Ben

> 
> [0] https://lore.kernel.org/linux-scsi/20260310114925.1222263-1-john.g.garry@oracle.com/T/#m9c054433076812dff464d0e3b50a00620cfe0af1
> 
> John Garry (13):
>   scsi: scsi_dh_alua: Delete alua_port_group
>   scsi: alua: Create a core ALUA driver
>   scsi: alua: Add scsi_alua_rtpg()
>   scsi: alua: Add scsi_alua_stpg()
>   scsi: alua: Add scsi_alua_tur()
>   scsi: alua: Add scsi_alua_rtpg_run()
>   scsi: alua: Add scsi_alua_stpg_run()
>   scsi: alua: Add scsi_alua_check_tpgs()
>   scsi: alua: Add scsi_alua_handle_state_transition()
>   scsi: alua: Add scsi_alua_prep_fn()
>   scsi: alua: Add scsi_device_alua_implicit()
>   scsi: scsi_dh_alua: Switch to use core support
>   scsi: core: Add implicit ALUA support
> 
>  drivers/scsi/Kconfig                       |   10 +-
>  drivers/scsi/Makefile                      |    1 +
>  drivers/scsi/device_handler/Kconfig        |    1 +
>  drivers/scsi/device_handler/scsi_dh_alua.c | 1003 ++------------------
>  drivers/scsi/scsi.c                        |    7 +
>  drivers/scsi/scsi_alua.c                   |  748 +++++++++++++++
>  drivers/scsi/scsi_error.c                  |    7 +
>  drivers/scsi/scsi_lib.c                    |    7 +
>  drivers/scsi/scsi_scan.c                   |    6 +
>  drivers/scsi/scsi_sysfs.c                  |    7 +-
>  include/scsi/scsi_alua.h                   |  103 ++
>  include/scsi/scsi_device.h                 |    1 +
>  12 files changed, 977 insertions(+), 924 deletions(-)
>  create mode 100644 drivers/scsi/scsi_alua.c
>  create mode 100644 include/scsi/scsi_alua.h
> 
> -- 
> 2.43.5


^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 01/13] scsi: scsi_dh_alua: Delete alua_port_group
  2026-03-17 12:06 ` [PATCH 01/13] scsi: scsi_dh_alua: Delete alua_port_group John Garry
  2026-03-18  7:44   ` Hannes Reinecke
@ 2026-03-23  0:08   ` Benjamin Marzinski
  2026-03-23 10:33     ` John Garry
  1 sibling, 1 reply; 63+ messages in thread
From: Benjamin Marzinski @ 2026-03-23  0:08 UTC (permalink / raw)
  To: John Garry
  Cc: martin.petersen, james.bottomley, hare, jmeneghi, linux-scsi,
	michael.christie, snitzer, dm-devel, linux-kernel

On Tue, Mar 17, 2026 at 12:06:51PM +0000, John Garry wrote:
> Delete the alua_port_group usage, as it is more accurate to manage the
> port group info per-scsi device - see [0]
> 
> [0] https://lore.kernel.org/linux-scsi/20260310114925.1222263-1-john.g.garry@oracle.com/T/#m4ffc0d07f169b70b8fd2407bae9632aa0f8c1f9a
> 
> For now, the handler data will be used to hold the ALUA-related info.
> 
> Signed-off-by: John Garry <john.g.garry@oracle.com>
> ---
>  drivers/scsi/device_handler/scsi_dh_alua.c | 663 ++++++---------------
>  1 file changed, 180 insertions(+), 483 deletions(-)
> 
> diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c
> index efb08b9b145a1..067021fffc16f 100644
> --- a/drivers/scsi/device_handler/scsi_dh_alua.c
> +++ b/drivers/scsi/device_handler/scsi_dh_alua.c
> @@ -54,41 +54,27 @@ static uint optimize_stpg;
>  module_param(optimize_stpg, uint, S_IRUGO|S_IWUSR);
>  MODULE_PARM_DESC(optimize_stpg, "Allow use of a non-optimized path, rather than sending a STPG, when implicit TPGS is supported (0=No,1=Yes). Default is 0.");
>  
> -static LIST_HEAD(port_group_list);
> -static DEFINE_SPINLOCK(port_group_lock);
>  static struct workqueue_struct *kaluad_wq;
>  
> -struct alua_port_group {
> -	struct kref		kref;
> -	struct rcu_head		rcu;
> -	struct list_head	node;
> -	struct list_head	dh_list;
> -	unsigned char		device_id_str[256];
> -	int			device_id_len;
> +struct alua_dh_data {
>  	int			group_id;
> -	int			tpgs;
> +	struct scsi_device	*sdev;
> +	int			init_error;
> +	struct mutex		init_mutex;
> +	bool			disabled;

disabled doesn't have a use anymore, since you aren't retrying RTPGs on
different devices.

> +	unsigned		flags; /* used for optimizing STPG */
> +	spinlock_t		lock;
> +
> +	/* alua stuff */
>  	int			state;
>  	int			pref;
>  	int			valid_states;
> -	unsigned		flags; /* used for optimizing STPG */
> +	int			tpgs;
>  	unsigned char		transition_tmo;
>  	unsigned long		expiry;
>  	unsigned long		interval;
>  	struct delayed_work	rtpg_work;
> -	spinlock_t		lock;
>  	struct list_head	rtpg_list;
> -	struct scsi_device	*rtpg_sdev;
> -};
> -
> -struct alua_dh_data {
> -	struct list_head	node;
> -	struct alua_port_group __rcu *pg;
> -	int			group_id;
> -	spinlock_t		pg_lock;
> -	struct scsi_device	*sdev;
> -	int			init_error;
> -	struct mutex		init_mutex;
> -	bool			disabled;
>  };
>  
>  struct alua_queue_data {
> @@ -101,24 +87,10 @@ struct alua_queue_data {
>  #define ALUA_POLICY_SWITCH_ALL		1
>  
>  static void alua_rtpg_work(struct work_struct *work);
> -static bool alua_rtpg_queue(struct alua_port_group *pg,
> -			    struct scsi_device *sdev,
> +static bool alua_rtpg_queue(struct scsi_device *sdev,
>  			    struct alua_queue_data *qdata, bool force);
>  static void alua_check(struct scsi_device *sdev, bool force);
>  
> -static void release_port_group(struct kref *kref)
> -{
> -	struct alua_port_group *pg;
> -
> -	pg = container_of(kref, struct alua_port_group, kref);
> -	if (pg->rtpg_sdev)
> -		flush_delayed_work(&pg->rtpg_work);
> -	spin_lock(&port_group_lock);
> -	list_del(&pg->node);
> -	spin_unlock(&port_group_lock);
> -	kfree_rcu(pg, rcu);
> -}
> -
>  /*
>   * submit_rtpg - Issue a REPORT TARGET GROUP STATES command
>   * @sdev: sdev the command should be sent to
> @@ -182,88 +154,6 @@ static int submit_stpg(struct scsi_device *sdev, int group_id,
>  				ALUA_FAILOVER_RETRIES, &exec_args);
>  }
>  
> -static struct alua_port_group *alua_find_get_pg(char *id_str, size_t id_size,
> -						int group_id)
> -{
> -	struct alua_port_group *pg;
> -
> -	if (!id_str || !id_size || !strlen(id_str))
> -		return NULL;
> -
> -	list_for_each_entry(pg, &port_group_list, node) {
> -		if (pg->group_id != group_id)
> -			continue;
> -		if (!pg->device_id_len || pg->device_id_len != id_size)
> -			continue;
> -		if (strncmp(pg->device_id_str, id_str, id_size))
> -			continue;
> -		if (!kref_get_unless_zero(&pg->kref))
> -			continue;
> -		return pg;
> -	}
> -
> -	return NULL;
> -}
> -
> -/*
> - * alua_alloc_pg - Allocate a new port_group structure
> - * @sdev: scsi device
> - * @group_id: port group id
> - * @tpgs: target port group settings
> - *
> - * Allocate a new port_group structure for a given
> - * device.
> - */
> -static struct alua_port_group *alua_alloc_pg(struct scsi_device *sdev,
> -					     int group_id, int tpgs)
> -{
> -	struct alua_port_group *pg, *tmp_pg;
> -
> -	pg = kzalloc_obj(struct alua_port_group);
> -	if (!pg)
> -		return ERR_PTR(-ENOMEM);
> -
> -	pg->device_id_len = scsi_vpd_lun_id(sdev, pg->device_id_str,
> -					    sizeof(pg->device_id_str));
> -	if (pg->device_id_len <= 0) {
> -		/*
> -		 * TPGS supported but no device identification found.
> -		 * Generate private device identification.
> -		 */
> -		sdev_printk(KERN_INFO, sdev,
> -			    "%s: No device descriptors found\n",
> -			    ALUA_DH_NAME);
> -		pg->device_id_str[0] = '\0';
> -		pg->device_id_len = 0;
> -	}
> -	pg->group_id = group_id;
> -	pg->tpgs = tpgs;
> -	pg->state = SCSI_ACCESS_STATE_OPTIMAL;
> -	pg->valid_states = TPGS_SUPPORT_ALL;
> -	if (optimize_stpg)
> -		pg->flags |= ALUA_OPTIMIZE_STPG;
> -	kref_init(&pg->kref);
> -	INIT_DELAYED_WORK(&pg->rtpg_work, alua_rtpg_work);
> -	INIT_LIST_HEAD(&pg->rtpg_list);
> -	INIT_LIST_HEAD(&pg->node);
> -	INIT_LIST_HEAD(&pg->dh_list);
> -	spin_lock_init(&pg->lock);
> -
> -	spin_lock(&port_group_lock);
> -	tmp_pg = alua_find_get_pg(pg->device_id_str, pg->device_id_len,
> -				  group_id);
> -	if (tmp_pg) {
> -		spin_unlock(&port_group_lock);
> -		kfree(pg);
> -		return tmp_pg;
> -	}
> -
> -	list_add(&pg->node, &port_group_list);
> -	spin_unlock(&port_group_lock);
> -
> -	return pg;
> -}
> -
>  /*
>   * alua_check_tpgs - Evaluate TPGS setting
>   * @sdev: device to be checked
> @@ -326,13 +216,10 @@ static int alua_check_tpgs(struct scsi_device *sdev)
>  static int alua_check_vpd(struct scsi_device *sdev, struct alua_dh_data *h,
>  			  int tpgs)
>  {
> -	int rel_port = -1, group_id;
> -	struct alua_port_group *pg, *old_pg = NULL;
> -	bool pg_updated = false;
> -	unsigned long flags;
> +	int rel_port = -1;
>  
> -	group_id = scsi_vpd_tpg_id(sdev, &rel_port);
> -	if (group_id < 0) {
> +	h->group_id = scsi_vpd_tpg_id(sdev, &rel_port);
> +	if (h->group_id < 0) {
>  		/*
>  		 * Internal error; TPGS supported but required
>  		 * VPD identification descriptors not present.
> @@ -343,51 +230,9 @@ static int alua_check_vpd(struct scsi_device *sdev, struct alua_dh_data *h,
>  			    ALUA_DH_NAME);
>  		return SCSI_DH_DEV_UNSUPP;
>  	}
> +	h->tpgs = tpgs;
>  
> -	pg = alua_alloc_pg(sdev, group_id, tpgs);
> -	if (IS_ERR(pg)) {
> -		if (PTR_ERR(pg) == -ENOMEM)
> -			return SCSI_DH_NOMEM;
> -		return SCSI_DH_DEV_UNSUPP;
> -	}
> -	if (pg->device_id_len)
> -		sdev_printk(KERN_INFO, sdev,
> -			    "%s: device %s port group %x rel port %x\n",
> -			    ALUA_DH_NAME, pg->device_id_str,
> -			    group_id, rel_port);
> -	else
> -		sdev_printk(KERN_INFO, sdev,
> -			    "%s: port group %x rel port %x\n",
> -			    ALUA_DH_NAME, group_id, rel_port);
> -
> -	kref_get(&pg->kref);
> -
> -	/* Check for existing port group references */
> -	spin_lock(&h->pg_lock);
> -	old_pg = rcu_dereference_protected(h->pg, lockdep_is_held(&h->pg_lock));
> -	if (old_pg != pg) {
> -		/* port group has changed. Update to new port group */
> -		if (h->pg) {
> -			spin_lock_irqsave(&old_pg->lock, flags);
> -			list_del_rcu(&h->node);
> -			spin_unlock_irqrestore(&old_pg->lock, flags);
> -		}
> -		rcu_assign_pointer(h->pg, pg);
> -		pg_updated = true;
> -	}
> -
> -	spin_lock_irqsave(&pg->lock, flags);
> -	if (pg_updated)
> -		list_add_rcu(&h->node, &pg->dh_list);
> -	spin_unlock_irqrestore(&pg->lock, flags);
> -
> -	spin_unlock(&h->pg_lock);
> -
> -	alua_rtpg_queue(pg, sdev, NULL, true);
> -	kref_put(&pg->kref, release_port_group);
> -
> -	if (old_pg)
> -		kref_put(&old_pg->kref, release_port_group);
> +	alua_rtpg_queue(sdev, NULL, true);
>  
>  	return SCSI_DH_OK;
>  }
> @@ -417,14 +262,8 @@ static char print_alua_state(unsigned char state)
>  static void alua_handle_state_transition(struct scsi_device *sdev)
>  {
>  	struct alua_dh_data *h = sdev->handler_data;
> -	struct alua_port_group *pg;
> -
> -	rcu_read_lock();
> -	pg = rcu_dereference(h->pg);
> -	if (pg)
> -		pg->state = SCSI_ACCESS_STATE_TRANSITIONING;
> -	rcu_read_unlock();
> -	alua_check(sdev, false);
> +
> +	h->state = SCSI_ACCESS_STATE_TRANSITIONING;
>  }
>  
>  static enum scsi_disposition alua_check_sense(struct scsi_device *sdev,
> @@ -532,10 +371,10 @@ static int alua_tur(struct scsi_device *sdev)
>   * Returns SCSI_DH_DEV_OFFLINED if the path is
>   * found to be unusable.
>   */
> -static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg)
> +static int alua_rtpg(struct scsi_device *sdev)
>  {
>  	struct scsi_sense_hdr sense_hdr;
> -	struct alua_port_group *tmp_pg;
> +	struct alua_dh_data *h = sdev->handler_data;
>  	int len, k, off, bufflen = ALUA_RTPG_SIZE;
>  	int group_id_old, state_old, pref_old, valid_states_old;
>  	unsigned char *desc, *buff;
> @@ -545,19 +384,32 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg)
>  	unsigned char orig_transition_tmo;
>  	unsigned long flags;
>  	bool transitioning_sense = false;
> +	int rel_port, group_id = scsi_vpd_tpg_id(sdev, &rel_port);
> +
> +	if (group_id < 0) {
> +		/*
> +		 * Internal error; TPGS supported but required
> +		 * VPD identification descriptors not present.
> +		 * Disable ALUA support
> +		 */
> +		sdev_printk(KERN_INFO, sdev,
> +			    "%s: No target port descriptors found\n",
> +			    ALUA_DH_NAME);
> +		return SCSI_DH_DEV_UNSUPP;
> +	}
>  
> -	group_id_old = pg->group_id;
> -	state_old = pg->state;
> -	pref_old = pg->pref;
> -	valid_states_old = pg->valid_states;
> +	group_id_old = h->group_id;
> +	state_old = h->state;
> +	pref_old = h->pref;
> +	valid_states_old = h->valid_states;
>  
> -	if (!pg->expiry) {
> +	if (!h->expiry) {
>  		unsigned long transition_tmo = ALUA_FAILOVER_TIMEOUT * HZ;
>  
> -		if (pg->transition_tmo)
> -			transition_tmo = pg->transition_tmo * HZ;
> +		if (h->transition_tmo)
> +			transition_tmo = h->transition_tmo * HZ;
>  
> -		pg->expiry = round_jiffies_up(jiffies + transition_tmo);
> +		h->expiry = round_jiffies_up(jiffies + transition_tmo);
>  	}
>  
>  	buff = kzalloc(bufflen, GFP_KERNEL);
> @@ -566,7 +418,7 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg)
>  
>   retry:
>  	err = 0;
> -	retval = submit_rtpg(sdev, buff, bufflen, &sense_hdr, pg->flags);
> +	retval = submit_rtpg(sdev, buff, bufflen, &sense_hdr, h->flags);
>  
>  	if (retval) {
>  		/*
> @@ -578,7 +430,7 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg)
>  		 * So ignore any errors to avoid spurious failures during
>  		 * path failover.
>  		 */
> -		if ((pg->valid_states & ~TPGS_SUPPORT_OPTIMIZED) == 0) {
> +		if ((h->valid_states & ~TPGS_SUPPORT_OPTIMIZED) == 0) {
>  			sdev_printk(KERN_INFO, sdev,
>  				    "%s: ignoring rtpg result %d\n",
>  				    ALUA_DH_NAME, retval);
> @@ -607,9 +459,9 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg)
>  		 * Note:  some arrays return a sense key of ILLEGAL_REQUEST
>  		 * with ASC 00h if they don't support the extended header.
>  		 */
> -		if (!(pg->flags & ALUA_RTPG_EXT_HDR_UNSUPP) &&
> +		if (!(h->flags & ALUA_RTPG_EXT_HDR_UNSUPP) &&
>  		    sense_hdr.sense_key == ILLEGAL_REQUEST) {
> -			pg->flags |= ALUA_RTPG_EXT_HDR_UNSUPP;
> +			h->flags |= ALUA_RTPG_EXT_HDR_UNSUPP;
>  			goto retry;
>  		}
>  		/*
> @@ -628,7 +480,7 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg)
>  		if (sense_hdr.sense_key == UNIT_ATTENTION)
>  			err = SCSI_DH_RETRY;
>  		if (err == SCSI_DH_RETRY &&
> -		    pg->expiry != 0 && time_before(jiffies, pg->expiry)) {
> +		    h->expiry != 0 && time_before(jiffies, h->expiry)) {
>  			sdev_printk(KERN_ERR, sdev, "%s: rtpg retry\n",
>  				    ALUA_DH_NAME);
>  			scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr);
> @@ -639,7 +491,7 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg)
>  			    ALUA_DH_NAME);
>  		scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr);
>  		kfree(buff);
> -		pg->expiry = 0;
> +		h->expiry = 0;
>  		return SCSI_DH_IO;
>  	}
>  
> @@ -654,23 +506,23 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg)
>  			sdev_printk(KERN_WARNING, sdev,
>  				    "%s: kmalloc buffer failed\n",__func__);
>  			/* Temporary failure, bypass */
> -			pg->expiry = 0;
> +			h->expiry = 0;
>  			return SCSI_DH_DEV_TEMP_BUSY;
>  		}
>  		goto retry;
>  	}
>  
> -	orig_transition_tmo = pg->transition_tmo;
> +	orig_transition_tmo = h->transition_tmo;
>  	if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR && buff[5] != 0)
> -		pg->transition_tmo = buff[5];
> +		h->transition_tmo = buff[5];
>  	else
> -		pg->transition_tmo = ALUA_FAILOVER_TIMEOUT;
> +		h->transition_tmo = ALUA_FAILOVER_TIMEOUT;
>  
> -	if (orig_transition_tmo != pg->transition_tmo) {
> +	if (orig_transition_tmo != h->transition_tmo) {
>  		sdev_printk(KERN_INFO, sdev,
>  			    "%s: transition timeout set to %d seconds\n",
> -			    ALUA_DH_NAME, pg->transition_tmo);
> -		pg->expiry = jiffies + pg->transition_tmo * HZ;
> +			    ALUA_DH_NAME, h->transition_tmo);
> +		h->expiry = jiffies + h->transition_tmo * HZ;
>  	}
>  
>  	if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR)
> @@ -681,95 +533,71 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg)
>  	for (k = tpg_desc_tbl_off, desc = buff + tpg_desc_tbl_off;
>  	     k < len;
>  	     k += off, desc += off) {
> -		u16 group_id = get_unaligned_be16(&desc[2]);
> -
> -		spin_lock_irqsave(&port_group_lock, flags);
> -		tmp_pg = alua_find_get_pg(pg->device_id_str, pg->device_id_len,
> -					  group_id);
> -		spin_unlock_irqrestore(&port_group_lock, flags);
> -		if (tmp_pg) {
> -			if (spin_trylock_irqsave(&tmp_pg->lock, flags)) {
> -				if ((tmp_pg == pg) ||
> -				    !(tmp_pg->flags & ALUA_PG_RUNNING)) {
> -					struct alua_dh_data *h;
> -
> -					tmp_pg->state = desc[0] & 0x0f;
> -					tmp_pg->pref = desc[0] >> 7;
> -					rcu_read_lock();
> -					list_for_each_entry_rcu(h,
> -						&tmp_pg->dh_list, node) {
> -						if (!h->sdev)
> -							continue;
> -						h->sdev->access_state = desc[0];
> -					}
> -					rcu_read_unlock();
> -				}
> -				if (tmp_pg == pg)
> -					tmp_pg->valid_states = desc[1];
> -				spin_unlock_irqrestore(&tmp_pg->lock, flags);
> -			}
> -			kref_put(&tmp_pg->kref, release_port_group);
> +		u16 group_id_desc = get_unaligned_be16(&desc[2]);
> +
> +		spin_lock_irqsave(&h->lock, flags);
> +		if (group_id_desc == group_id) {
> +			h->group_id = group_id;
> +			WRITE_ONCE(h->state, desc[0] & 0x0f);
> +			h->pref = desc[0] >> 7;
> +			WRITE_ONCE(sdev->access_state, desc[0]);
> +			h->valid_states = desc[1];

instead of alua_rtpg() updating the access_state all of the devices in
all the port groups, and the state and pref of all the port groups. It
now just sets these for one device. It seems like it's wasting a lot of
information that it used to use. For instance, now when a scsi command
returns a unit attention that the ALUA state has changed, it won't get
updated on all the devices, just the one that got the unit attention.

>  		}
> +		spin_unlock_irqrestore(&h->lock, flags);
>  		off = 8 + (desc[7] * 4);
>  	}
>  
>   skip_rtpg:
> -	spin_lock_irqsave(&pg->lock, flags);
> +	spin_lock_irqsave(&h->lock, flags);
>  	if (transitioning_sense)
> -		pg->state = SCSI_ACCESS_STATE_TRANSITIONING;
> +		h->state = SCSI_ACCESS_STATE_TRANSITIONING;
>  
> -	if (group_id_old != pg->group_id || state_old != pg->state ||
> -		pref_old != pg->pref || valid_states_old != pg->valid_states)
> +	if (group_id_old != h->group_id || state_old != h->state ||
> +		pref_old != h->pref || valid_states_old != h->valid_states)
>  		sdev_printk(KERN_INFO, sdev,
>  			"%s: port group %02x state %c %s supports %c%c%c%c%c%c%c\n",
> -			ALUA_DH_NAME, pg->group_id, print_alua_state(pg->state),
> -			pg->pref ? "preferred" : "non-preferred",
> -			pg->valid_states&TPGS_SUPPORT_TRANSITION?'T':'t',
> -			pg->valid_states&TPGS_SUPPORT_OFFLINE?'O':'o',
> -			pg->valid_states&TPGS_SUPPORT_LBA_DEPENDENT?'L':'l',
> -			pg->valid_states&TPGS_SUPPORT_UNAVAILABLE?'U':'u',
> -			pg->valid_states&TPGS_SUPPORT_STANDBY?'S':'s',
> -			pg->valid_states&TPGS_SUPPORT_NONOPTIMIZED?'N':'n',
> -			pg->valid_states&TPGS_SUPPORT_OPTIMIZED?'A':'a');
> -
> -	switch (pg->state) {
> +			ALUA_DH_NAME, h->group_id, print_alua_state(h->state),
> +			h->pref ? "preferred" : "non-preferred",
> +			h->valid_states&TPGS_SUPPORT_TRANSITION?'T':'t',
> +			h->valid_states&TPGS_SUPPORT_OFFLINE?'O':'o',
> +			h->valid_states&TPGS_SUPPORT_LBA_DEPENDENT?'L':'l',
> +			h->valid_states&TPGS_SUPPORT_UNAVAILABLE?'U':'u',
> +			h->valid_states&TPGS_SUPPORT_STANDBY?'S':'s',
> +			h->valid_states&TPGS_SUPPORT_NONOPTIMIZED?'N':'n',
> +			h->valid_states&TPGS_SUPPORT_OPTIMIZED?'A':'a');
> +
> +	switch (h->state) {
>  	case SCSI_ACCESS_STATE_TRANSITIONING:
> -		if (time_before(jiffies, pg->expiry)) {
> +		if (time_before(jiffies, h->expiry)) {
>  			/* State transition, retry */
> -			pg->interval = ALUA_RTPG_RETRY_DELAY;
> +			h->interval = ALUA_RTPG_RETRY_DELAY;
>  			err = SCSI_DH_RETRY;
>  		} else {
>  			struct alua_dh_data *h;
> +			unsigned char access_state;
>  
>  			/* Transitioning time exceeded, set port to standby */
>  			err = SCSI_DH_IO;
> -			pg->state = SCSI_ACCESS_STATE_STANDBY;
> -			pg->expiry = 0;
> -			rcu_read_lock();
> -			list_for_each_entry_rcu(h, &pg->dh_list, node) {
> -				if (!h->sdev)
> -					continue;
> -				h->sdev->access_state =
> -					(pg->state & SCSI_ACCESS_STATE_MASK);
> -				if (pg->pref)
> -					h->sdev->access_state |=
> -						SCSI_ACCESS_STATE_PREFERRED;
> -			}
> -			rcu_read_unlock();
> +			h->state = SCSI_ACCESS_STATE_STANDBY;
> +			h->expiry = 0;
> +			access_state = h->state & SCSI_ACCESS_STATE_MASK;
> +			if (h->pref)
> +				access_state |= SCSI_ACCESS_STATE_PREFERRED;
> +			WRITE_ONCE(sdev->access_state, access_state);
>  		}
>  		break;
>  	case SCSI_ACCESS_STATE_OFFLINE:
>  		/* Path unusable */
>  		err = SCSI_DH_DEV_OFFLINED;
> -		pg->expiry = 0;
> +		h->expiry = 0;
>  		break;
>  	default:
>  		/* Useable path if active */
>  		err = SCSI_DH_OK;
> -		pg->expiry = 0;
> +		h->expiry = 0;
>  		break;
>  	}
> -	spin_unlock_irqrestore(&pg->lock, flags);
> +	spin_unlock_irqrestore(&h->lock, flags);
>  	kfree(buff);
>  	return err;
>  }
> @@ -782,22 +610,23 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg)
>   * a re-evaluation of the target group state or SCSI_DH_OK
>   * if no further action needs to be taken.
>   */
> -static unsigned alua_stpg(struct scsi_device *sdev, struct alua_port_group *pg)
> +static unsigned alua_stpg(struct scsi_device *sdev)
>  {
>  	int retval;
>  	struct scsi_sense_hdr sense_hdr;
> +	struct alua_dh_data *h = sdev->handler_data;
>  
> -	if (!(pg->tpgs & TPGS_MODE_EXPLICIT)) {
> +	if (!(h->tpgs & TPGS_MODE_EXPLICIT)) {
>  		/* Only implicit ALUA supported, retry */
>  		return SCSI_DH_RETRY;
>  	}
> -	switch (pg->state) {
> +	switch (h->state) {
>  	case SCSI_ACCESS_STATE_OPTIMAL:
>  		return SCSI_DH_OK;
>  	case SCSI_ACCESS_STATE_ACTIVE:
> -		if ((pg->flags & ALUA_OPTIMIZE_STPG) &&
> -		    !pg->pref &&
> -		    (pg->tpgs & TPGS_MODE_IMPLICIT))
> +		if ((h->flags & ALUA_OPTIMIZE_STPG) &&
> +		    !h->pref &&
> +		    (h->tpgs & TPGS_MODE_IMPLICIT))
>  			return SCSI_DH_OK;
>  		break;
>  	case SCSI_ACCESS_STATE_STANDBY:
> @@ -810,10 +639,10 @@ static unsigned alua_stpg(struct scsi_device *sdev, struct alua_port_group *pg)
>  	default:
>  		sdev_printk(KERN_INFO, sdev,
>  			    "%s: stpg failed, unhandled TPGS state %d",
> -			    ALUA_DH_NAME, pg->state);
> +			    ALUA_DH_NAME, h->state);
>  		return SCSI_DH_NOSYS;
>  	}
> -	retval = submit_stpg(sdev, pg->group_id, &sense_hdr);
> +	retval = submit_stpg(sdev, h->group_id, &sense_hdr);
>  
>  	if (retval) {
>  		if (retval < 0 || !scsi_sense_valid(&sense_hdr)) {
> @@ -832,144 +661,75 @@ static unsigned alua_stpg(struct scsi_device *sdev, struct alua_port_group *pg)
>  	return SCSI_DH_RETRY;
>  }
>  
> -/*
> - * The caller must call scsi_device_put() on the returned pointer if it is not
> - * NULL.
> - */
> -static struct scsi_device * __must_check
> -alua_rtpg_select_sdev(struct alua_port_group *pg)
> -{
> -	struct alua_dh_data *h;
> -	struct scsi_device *sdev = NULL, *prev_sdev;
> -
> -	lockdep_assert_held(&pg->lock);
> -	if (WARN_ON(!pg->rtpg_sdev))
> -		return NULL;
> -
> -	/*
> -	 * RCU protection isn't necessary for dh_list here
> -	 * as we hold pg->lock, but for access to h->pg.
> -	 */
> -	rcu_read_lock();
> -	list_for_each_entry_rcu(h, &pg->dh_list, node) {
> -		if (!h->sdev)
> -			continue;
> -		if (h->sdev == pg->rtpg_sdev) {
> -			h->disabled = true;
> -			continue;
> -		}
> -		if (rcu_dereference(h->pg) == pg &&
> -		    !h->disabled &&
> -		    !scsi_device_get(h->sdev)) {
> -			sdev = h->sdev;
> -			break;
> -		}
> -	}
> -	rcu_read_unlock();
> -
> -	if (!sdev) {
> -		pr_warn("%s: no device found for rtpg\n",
> -			(pg->device_id_len ?
> -			 (char *)pg->device_id_str : "(nameless PG)"));
> -		return NULL;
> -	}
> -
> -	sdev_printk(KERN_INFO, sdev, "rtpg retry on different device\n");
> -
> -	prev_sdev = pg->rtpg_sdev;
> -	pg->rtpg_sdev = sdev;
> -
> -	return prev_sdev;
> -}
> -
>  static void alua_rtpg_work(struct work_struct *work)
>  {
> -	struct alua_port_group *pg =
> -		container_of(work, struct alua_port_group, rtpg_work.work);
> -	struct scsi_device *sdev, *prev_sdev = NULL;
> +	struct alua_dh_data *h =
> +		container_of(work, struct alua_dh_data, rtpg_work.work);
> +	struct scsi_device *sdev = h->sdev;
>  	LIST_HEAD(qdata_list);
>  	int err = SCSI_DH_OK;
>  	struct alua_queue_data *qdata, *tmp;
> -	struct alua_dh_data *h;
>  	unsigned long flags;
>  
> -	spin_lock_irqsave(&pg->lock, flags);
> -	sdev = pg->rtpg_sdev;
> -	if (!sdev) {
> -		WARN_ON(pg->flags & ALUA_PG_RUN_RTPG);
> -		WARN_ON(pg->flags & ALUA_PG_RUN_STPG);
> -		spin_unlock_irqrestore(&pg->lock, flags);
> -		kref_put(&pg->kref, release_port_group);
> -		return;
> -	}
> -	pg->flags |= ALUA_PG_RUNNING;
> -	if (pg->flags & ALUA_PG_RUN_RTPG) {
> -		int state = pg->state;
> +	spin_lock_irqsave(&h->lock, flags);
> +	h->flags |= ALUA_PG_RUNNING;
> +	if (h->flags & ALUA_PG_RUN_RTPG) {
> +		int state = h->state;
>  
> -		pg->flags &= ~ALUA_PG_RUN_RTPG;
> -		spin_unlock_irqrestore(&pg->lock, flags);
> +		h->flags &= ~ALUA_PG_RUN_RTPG;
> +		spin_unlock_irqrestore(&h->lock, flags);
>  		if (state == SCSI_ACCESS_STATE_TRANSITIONING) {
>  			if (alua_tur(sdev) == SCSI_DH_RETRY) {
> -				spin_lock_irqsave(&pg->lock, flags);
> -				pg->flags &= ~ALUA_PG_RUNNING;
> -				pg->flags |= ALUA_PG_RUN_RTPG;
> -				if (!pg->interval)
> -					pg->interval = ALUA_RTPG_RETRY_DELAY;
> -				spin_unlock_irqrestore(&pg->lock, flags);
> -				queue_delayed_work(kaluad_wq, &pg->rtpg_work,
> -						   pg->interval * HZ);
> +				spin_lock_irqsave(&h->lock, flags);
> +				h->flags &= ~ALUA_PG_RUNNING;
> +				h->flags |= ALUA_PG_RUN_RTPG;
> +				if (!h->interval)
> +					h->interval = ALUA_RTPG_RETRY_DELAY;
> +				spin_unlock_irqrestore(&h->lock, flags);
> +				queue_delayed_work(kaluad_wq, &h->rtpg_work,
> +						   h->interval * HZ);
>  				return;
>  			}
>  			/* Send RTPG on failure or if TUR indicates SUCCESS */
>  		}
> -		err = alua_rtpg(sdev, pg);
> -		spin_lock_irqsave(&pg->lock, flags);
> +		err = alua_rtpg(sdev);
> +		spin_lock_irqsave(&h->lock, flags);
>  
> -		/* If RTPG failed on the current device, try using another */
> -		if (err == SCSI_DH_RES_TEMP_UNAVAIL &&
> -		    (prev_sdev = alua_rtpg_select_sdev(pg)))
> -			err = SCSI_DH_IMM_RETRY;

Previously, if the rtpg failed on a device, another device would be
tried, and the unusable device's alua state would get updated, along
with all the other device's states. Now I don't see how a failed device
gets its state updated.

-Ben

> -
> -		if (err == SCSI_DH_RETRY || err == SCSI_DH_IMM_RETRY ||
> -		    pg->flags & ALUA_PG_RUN_RTPG) {
> -			pg->flags &= ~ALUA_PG_RUNNING;
> +		if (err == SCSI_DH_RETRY || h->flags & ALUA_PG_RUN_RTPG) {
> +			h->flags &= ~ALUA_PG_RUNNING;
>  			if (err == SCSI_DH_IMM_RETRY)
> -				pg->interval = 0;
> -			else if (!pg->interval && !(pg->flags & ALUA_PG_RUN_RTPG))
> -				pg->interval = ALUA_RTPG_RETRY_DELAY;
> -			pg->flags |= ALUA_PG_RUN_RTPG;
> -			spin_unlock_irqrestore(&pg->lock, flags);
> +				h->interval = 0;
> +			else if (!h->interval && !(h->flags & ALUA_PG_RUN_RTPG))
> +				h->interval = ALUA_RTPG_RETRY_DELAY;
> +			h->flags |= ALUA_PG_RUN_RTPG;
> +			spin_unlock_irqrestore(&h->lock, flags);
>  			goto queue_rtpg;
>  		}
>  		if (err != SCSI_DH_OK)
> -			pg->flags &= ~ALUA_PG_RUN_STPG;
> +			h->flags &= ~ALUA_PG_RUN_STPG;
>  	}
> -	if (pg->flags & ALUA_PG_RUN_STPG) {
> -		pg->flags &= ~ALUA_PG_RUN_STPG;
> -		spin_unlock_irqrestore(&pg->lock, flags);
> -		err = alua_stpg(sdev, pg);
> -		spin_lock_irqsave(&pg->lock, flags);
> -		if (err == SCSI_DH_RETRY || pg->flags & ALUA_PG_RUN_RTPG) {
> -			pg->flags |= ALUA_PG_RUN_RTPG;
> -			pg->interval = 0;
> -			pg->flags &= ~ALUA_PG_RUNNING;
> -			spin_unlock_irqrestore(&pg->lock, flags);
> +	if (h->flags & ALUA_PG_RUN_STPG) {
> +		h->flags &= ~ALUA_PG_RUN_STPG;
> +		spin_unlock_irqrestore(&h->lock, flags);
> +		err = alua_stpg(sdev);
> +		spin_lock_irqsave(&h->lock, flags);
> +		if (err == SCSI_DH_RETRY || h->flags & ALUA_PG_RUN_RTPG) {
> +			h->flags |= ALUA_PG_RUN_RTPG;
> +			h->interval = 0;
> +			h->flags &= ~ALUA_PG_RUNNING;
> +			spin_unlock_irqrestore(&h->lock, flags);
>  			goto queue_rtpg;
>  		}
>  	}
>  
> -	list_splice_init(&pg->rtpg_list, &qdata_list);
> +	list_splice_init(&h->rtpg_list, &qdata_list);
>  	/*
>  	 * We went through an RTPG, for good or bad.
> -	 * Re-enable all devices for the next attempt.
> +	 * Re-enable the device for the next attempt.
>  	 */
> -	list_for_each_entry(h, &pg->dh_list, node)
> -		h->disabled = false;
> -	pg->rtpg_sdev = NULL;
> -	spin_unlock_irqrestore(&pg->lock, flags);
> +	h->disabled = false;
> +	spin_unlock_irqrestore(&h->lock, flags);
>  
> -	if (prev_sdev)
> -		scsi_device_put(prev_sdev);
>  
>  	list_for_each_entry_safe(qdata, tmp, &qdata_list, entry) {
>  		list_del(&qdata->entry);
> @@ -977,22 +737,19 @@ static void alua_rtpg_work(struct work_struct *work)
>  			qdata->callback_fn(qdata->callback_data, err);
>  		kfree(qdata);
>  	}
> -	spin_lock_irqsave(&pg->lock, flags);
> -	pg->flags &= ~ALUA_PG_RUNNING;
> -	spin_unlock_irqrestore(&pg->lock, flags);
> +	spin_lock_irqsave(&h->lock, flags);
> +	h->flags &= ~ALUA_PG_RUNNING;
> +	spin_unlock_irqrestore(&h->lock, flags);
>  	scsi_device_put(sdev);
> -	kref_put(&pg->kref, release_port_group);
> +
>  	return;
>  
>  queue_rtpg:
> -	if (prev_sdev)
> -		scsi_device_put(prev_sdev);
> -	queue_delayed_work(kaluad_wq, &pg->rtpg_work, pg->interval * HZ);
> +	queue_delayed_work(kaluad_wq, &h->rtpg_work, h->interval * HZ);
>  }
>  
>  /**
>   * alua_rtpg_queue() - cause RTPG to be submitted asynchronously
> - * @pg: ALUA port group associated with @sdev.
>   * @sdev: SCSI device for which to submit an RTPG.
>   * @qdata: Information about the callback to invoke after the RTPG.
>   * @force: Whether or not to submit an RTPG if a work item that will submit an
> @@ -1004,51 +761,34 @@ static void alua_rtpg_work(struct work_struct *work)
>   * Context: may be called from atomic context (alua_check()) only if the caller
>   *	holds an sdev reference.
>   */
> -static bool alua_rtpg_queue(struct alua_port_group *pg,
> -			    struct scsi_device *sdev,
> +static bool alua_rtpg_queue(struct scsi_device *sdev,
>  			    struct alua_queue_data *qdata, bool force)
>  {
>  	int start_queue = 0;
> +	struct alua_dh_data *h = sdev->handler_data;
>  	unsigned long flags;
>  
> -	if (WARN_ON_ONCE(!pg) || scsi_device_get(sdev))
> +	if (scsi_device_get(sdev))
>  		return false;
>  
> -	spin_lock_irqsave(&pg->lock, flags);
> +	spin_lock_irqsave(&h->lock, flags);
>  	if (qdata) {
> -		list_add_tail(&qdata->entry, &pg->rtpg_list);
> -		pg->flags |= ALUA_PG_RUN_STPG;
> +		list_add_tail(&qdata->entry, &h->rtpg_list);
> +		h->flags |= ALUA_PG_RUN_STPG;
>  		force = true;
>  	}
> -	if (pg->rtpg_sdev == NULL) {
> -		struct alua_dh_data *h = sdev->handler_data;
> -
> -		rcu_read_lock();
> -		if (h && rcu_dereference(h->pg) == pg) {
> -			pg->interval = 0;
> -			pg->flags |= ALUA_PG_RUN_RTPG;
> -			kref_get(&pg->kref);
> -			pg->rtpg_sdev = sdev;
> -			start_queue = 1;
> -		}
> -		rcu_read_unlock();
> -	} else if (!(pg->flags & ALUA_PG_RUN_RTPG) && force) {
> -		pg->flags |= ALUA_PG_RUN_RTPG;
> +	if (!(h->flags & ALUA_PG_RUN_RTPG) && force) {
> +		h->flags |= ALUA_PG_RUN_RTPG;
>  		/* Do not queue if the worker is already running */
> -		if (!(pg->flags & ALUA_PG_RUNNING)) {
> -			kref_get(&pg->kref);
> +		if (!(h->flags & ALUA_PG_RUNNING))
>  			start_queue = 1;
> -		}
>  	}
>  
> -	spin_unlock_irqrestore(&pg->lock, flags);
> -
> +	spin_unlock_irqrestore(&h->lock, flags);
>  	if (start_queue) {
> -		if (queue_delayed_work(kaluad_wq, &pg->rtpg_work,
> +		if (queue_delayed_work(kaluad_wq, &h->rtpg_work,
>  				msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS)))
>  			sdev = NULL;
> -		else
> -			kref_put(&pg->kref, release_port_group);
>  	}
>  	if (sdev)
>  		scsi_device_put(sdev);
> @@ -1088,7 +828,6 @@ static int alua_initialize(struct scsi_device *sdev, struct alua_dh_data *h)
>  static int alua_set_params(struct scsi_device *sdev, const char *params)
>  {
>  	struct alua_dh_data *h = sdev->handler_data;
> -	struct alua_port_group *pg = NULL;
>  	unsigned int optimize = 0, argc;
>  	const char *p = params;
>  	int result = SCSI_DH_OK;
> @@ -1102,19 +841,12 @@ static int alua_set_params(struct scsi_device *sdev, const char *params)
>  	if ((sscanf(p, "%u", &optimize) != 1) || (optimize > 1))
>  		return -EINVAL;
>  
> -	rcu_read_lock();
> -	pg = rcu_dereference(h->pg);
> -	if (!pg) {
> -		rcu_read_unlock();
> -		return -ENXIO;
> -	}
> -	spin_lock_irqsave(&pg->lock, flags);
> +	spin_lock_irqsave(&h->lock, flags);
>  	if (optimize)
> -		pg->flags |= ALUA_OPTIMIZE_STPG;
> +		h->flags |= ALUA_OPTIMIZE_STPG;
>  	else
> -		pg->flags &= ~ALUA_OPTIMIZE_STPG;
> -	spin_unlock_irqrestore(&pg->lock, flags);
> -	rcu_read_unlock();
> +		h->flags &= ~ALUA_OPTIMIZE_STPG;
> +	spin_unlock_irqrestore(&h->lock, flags);
>  
>  	return result;
>  }
> @@ -1132,10 +864,8 @@ static int alua_set_params(struct scsi_device *sdev, const char *params)
>  static int alua_activate(struct scsi_device *sdev,
>  			activate_complete fn, void *data)
>  {
> -	struct alua_dh_data *h = sdev->handler_data;
>  	int err = SCSI_DH_OK;
>  	struct alua_queue_data *qdata;
> -	struct alua_port_group *pg;
>  
>  	qdata = kzalloc_obj(*qdata);
>  	if (!qdata) {
> @@ -1145,26 +875,12 @@ static int alua_activate(struct scsi_device *sdev,
>  	qdata->callback_fn = fn;
>  	qdata->callback_data = data;
>  
> -	mutex_lock(&h->init_mutex);
> -	rcu_read_lock();
> -	pg = rcu_dereference(h->pg);
> -	if (!pg || !kref_get_unless_zero(&pg->kref)) {
> -		rcu_read_unlock();
> -		kfree(qdata);
> -		err = h->init_error;
> -		mutex_unlock(&h->init_mutex);
> -		goto out;
> -	}
> -	rcu_read_unlock();
> -	mutex_unlock(&h->init_mutex);
> -
> -	if (alua_rtpg_queue(pg, sdev, qdata, true)) {
> +	if (alua_rtpg_queue(sdev, qdata, true)) {
>  		fn = NULL;
>  	} else {
>  		kfree(qdata);
>  		err = SCSI_DH_DEV_OFFLINED;
>  	}
> -	kref_put(&pg->kref, release_port_group);
>  out:
>  	if (fn)
>  		fn(data, err);
> @@ -1179,18 +895,7 @@ static int alua_activate(struct scsi_device *sdev,
>   */
>  static void alua_check(struct scsi_device *sdev, bool force)
>  {
> -	struct alua_dh_data *h = sdev->handler_data;
> -	struct alua_port_group *pg;
> -
> -	rcu_read_lock();
> -	pg = rcu_dereference(h->pg);
> -	if (!pg || !kref_get_unless_zero(&pg->kref)) {
> -		rcu_read_unlock();
> -		return;
> -	}
> -	rcu_read_unlock();
> -	alua_rtpg_queue(pg, sdev, NULL, force);
> -	kref_put(&pg->kref, release_port_group);
> +	alua_rtpg_queue(sdev, NULL, force);
>  }
>  
>  /*
> @@ -1202,14 +907,12 @@ static void alua_check(struct scsi_device *sdev, bool force)
>  static blk_status_t alua_prep_fn(struct scsi_device *sdev, struct request *req)
>  {
>  	struct alua_dh_data *h = sdev->handler_data;
> -	struct alua_port_group *pg;
> -	unsigned char state = SCSI_ACCESS_STATE_OPTIMAL;
> +	unsigned long flags;
> +	unsigned char state;
>  
> -	rcu_read_lock();
> -	pg = rcu_dereference(h->pg);
> -	if (pg)
> -		state = pg->state;
> -	rcu_read_unlock();
> +	spin_lock_irqsave(&h->lock, flags);
> +	state = h->state;
> +	spin_unlock_irqrestore(&h->lock, flags);
>  
>  	switch (state) {
>  	case SCSI_ACCESS_STATE_OPTIMAL:
> @@ -1242,20 +945,26 @@ static int alua_bus_attach(struct scsi_device *sdev)
>  	h = kzalloc_obj(*h);
>  	if (!h)
>  		return SCSI_DH_NOMEM;
> -	spin_lock_init(&h->pg_lock);
> -	rcu_assign_pointer(h->pg, NULL);
> +	spin_lock_init(&h->lock);
>  	h->init_error = SCSI_DH_OK;
>  	h->sdev = sdev;
> -	INIT_LIST_HEAD(&h->node);
> +	INIT_DELAYED_WORK(&h->rtpg_work, alua_rtpg_work);
> +	INIT_LIST_HEAD(&h->rtpg_list);
>  
>  	mutex_init(&h->init_mutex);
> +
> +	h->state = SCSI_ACCESS_STATE_OPTIMAL;
> +	h->valid_states = TPGS_SUPPORT_ALL;
> +	if (optimize_stpg)
> +		h->flags |= ALUA_OPTIMIZE_STPG;
> +
> +	sdev->handler_data = h;
>  	err = alua_initialize(sdev, h);
>  	if (err != SCSI_DH_OK && err != SCSI_DH_DEV_OFFLINED)
>  		goto failed;
> -
> -	sdev->handler_data = h;
>  	return SCSI_DH_OK;
>  failed:
> +	sdev->handler_data = NULL;
>  	kfree(h);
>  	return err;
>  }
> @@ -1267,20 +976,8 @@ static int alua_bus_attach(struct scsi_device *sdev)
>  static void alua_bus_detach(struct scsi_device *sdev)
>  {
>  	struct alua_dh_data *h = sdev->handler_data;
> -	struct alua_port_group *pg;
> -
> -	spin_lock(&h->pg_lock);
> -	pg = rcu_dereference_protected(h->pg, lockdep_is_held(&h->pg_lock));
> -	rcu_assign_pointer(h->pg, NULL);
> -	spin_unlock(&h->pg_lock);
> -	if (pg) {
> -		spin_lock_irq(&pg->lock);
> -		list_del_rcu(&h->node);
> -		spin_unlock_irq(&pg->lock);
> -		kref_put(&pg->kref, release_port_group);
> -	}
> +
>  	sdev->handler_data = NULL;
> -	synchronize_rcu();
>  	kfree(h);
>  }
>  
> -- 
> 2.43.5


^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 12/13] scsi: scsi_dh_alua: Switch to use core support
  2026-03-17 12:07 ` [PATCH 12/13] scsi: scsi_dh_alua: Switch to use core support John Garry
@ 2026-03-23  1:47   ` Benjamin Marzinski
  2026-03-23 11:59     ` John Garry
  0 siblings, 1 reply; 63+ messages in thread
From: Benjamin Marzinski @ 2026-03-23  1:47 UTC (permalink / raw)
  To: John Garry
  Cc: martin.petersen, james.bottomley, hare, jmeneghi, linux-scsi,
	michael.christie, snitzer, dm-devel, linux-kernel

On Tue, Mar 17, 2026 at 12:07:02PM +0000, John Garry wrote:
> Switch to use core scsi ALUA support.
> 
> We still need to drive the state machine for explicit ALUA.
> 
> Signed-off-by: John Garry <john.g.garry@oracle.com>
> ---
>  drivers/scsi/device_handler/scsi_dh_alua.c | 580 +--------------------
>  1 file changed, 21 insertions(+), 559 deletions(-)
> 
> diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c
> index 067021fffc16f..4d53fab85a7ed 100644
> --- a/drivers/scsi/device_handler/scsi_dh_alua.c
> +++ b/drivers/scsi/device_handler/scsi_dh_alua.c
> @@ -10,6 +10,7 @@
>  #include <linux/module.h>
>  #include <linux/unaligned.h>
>  #include <scsi/scsi.h>
> +#include <scsi/scsi_alua.h>
>  #include <scsi/scsi_proto.h>
>  #include <scsi/scsi_dbg.h>
>  #include <scsi/scsi_eh.h>
> @@ -44,7 +45,6 @@
>  
>  /* device handler flags */
>  #define ALUA_OPTIMIZE_STPG		0x01
> -#define ALUA_RTPG_EXT_HDR_UNSUPP	0x02
>  /* State machine flags */
>  #define ALUA_PG_RUN_RTPG		0x10
>  #define ALUA_PG_RUN_STPG		0x20
> @@ -65,14 +65,6 @@ struct alua_dh_data {
>  	unsigned		flags; /* used for optimizing STPG */
>  	spinlock_t		lock;
>  
> -	/* alua stuff */
> -	int			state;
> -	int			pref;
> -	int			valid_states;
> -	int			tpgs;
> -	unsigned char		transition_tmo;
> -	unsigned long		expiry;
> -	unsigned long		interval;
>  	struct delayed_work	rtpg_work;
>  	struct list_head	rtpg_list;
>  };
> @@ -91,121 +83,6 @@ static bool alua_rtpg_queue(struct scsi_device *sdev,
>  			    struct alua_queue_data *qdata, bool force);
>  static void alua_check(struct scsi_device *sdev, bool force);
>  
> -/*
> - * submit_rtpg - Issue a REPORT TARGET GROUP STATES command
> - * @sdev: sdev the command should be sent to
> - */
> -static int submit_rtpg(struct scsi_device *sdev, unsigned char *buff,
> -		       int bufflen, struct scsi_sense_hdr *sshdr, int flags)
> -{
> -	u8 cdb[MAX_COMMAND_SIZE];
> -	blk_opf_t opf = REQ_OP_DRV_IN | REQ_FAILFAST_DEV |
> -				REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER;
> -	const struct scsi_exec_args exec_args = {
> -		.sshdr = sshdr,
> -	};
> -
> -	/* Prepare the command. */
> -	memset(cdb, 0x0, MAX_COMMAND_SIZE);
> -	cdb[0] = MAINTENANCE_IN;
> -	if (!(flags & ALUA_RTPG_EXT_HDR_UNSUPP))
> -		cdb[1] = MI_REPORT_TARGET_PGS | MI_EXT_HDR_PARAM_FMT;
> -	else
> -		cdb[1] = MI_REPORT_TARGET_PGS;
> -	put_unaligned_be32(bufflen, &cdb[6]);
> -
> -	return scsi_execute_cmd(sdev, cdb, opf, buff, bufflen,
> -				ALUA_FAILOVER_TIMEOUT * HZ,
> -				ALUA_FAILOVER_RETRIES, &exec_args);
> -}
> -
> -/*
> - * submit_stpg - Issue a SET TARGET PORT GROUP command
> - *
> - * Currently we're only setting the current target port group state
> - * to 'active/optimized' and let the array firmware figure out
> - * the states of the remaining groups.
> - */
> -static int submit_stpg(struct scsi_device *sdev, int group_id,
> -		       struct scsi_sense_hdr *sshdr)
> -{
> -	u8 cdb[MAX_COMMAND_SIZE];
> -	unsigned char stpg_data[8];
> -	int stpg_len = 8;
> -	blk_opf_t opf = REQ_OP_DRV_OUT | REQ_FAILFAST_DEV |
> -				REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER;
> -	const struct scsi_exec_args exec_args = {
> -		.sshdr = sshdr,
> -	};
> -
> -	/* Prepare the data buffer */
> -	memset(stpg_data, 0, stpg_len);
> -	stpg_data[4] = SCSI_ACCESS_STATE_OPTIMAL;
> -	put_unaligned_be16(group_id, &stpg_data[6]);
> -
> -	/* Prepare the command. */
> -	memset(cdb, 0x0, MAX_COMMAND_SIZE);
> -	cdb[0] = MAINTENANCE_OUT;
> -	cdb[1] = MO_SET_TARGET_PGS;
> -	put_unaligned_be32(stpg_len, &cdb[6]);
> -
> -	return scsi_execute_cmd(sdev, cdb, opf, stpg_data,
> -				stpg_len, ALUA_FAILOVER_TIMEOUT * HZ,
> -				ALUA_FAILOVER_RETRIES, &exec_args);
> -}
> -
> -/*
> - * alua_check_tpgs - Evaluate TPGS setting
> - * @sdev: device to be checked
> - *
> - * Examine the TPGS setting of the sdev to find out if ALUA
> - * is supported.
> - */
> -static int alua_check_tpgs(struct scsi_device *sdev)
> -{
> -	int tpgs = TPGS_MODE_NONE;
> -
> -	/*
> -	 * ALUA support for non-disk devices is fraught with
> -	 * difficulties, so disable it for now.
> -	 */
> -	if (sdev->type != TYPE_DISK) {
> -		sdev_printk(KERN_INFO, sdev,
> -			    "%s: disable for non-disk devices\n",
> -			    ALUA_DH_NAME);
> -		return tpgs;
> -	}
> -
> -	tpgs = scsi_device_tpgs(sdev);
> -	switch (tpgs) {
> -	case TPGS_MODE_EXPLICIT|TPGS_MODE_IMPLICIT:
> -		sdev_printk(KERN_INFO, sdev,
> -			    "%s: supports implicit and explicit TPGS\n",
> -			    ALUA_DH_NAME);
> -		break;
> -	case TPGS_MODE_EXPLICIT:
> -		sdev_printk(KERN_INFO, sdev, "%s: supports explicit TPGS\n",
> -			    ALUA_DH_NAME);
> -		break;
> -	case TPGS_MODE_IMPLICIT:
> -		sdev_printk(KERN_INFO, sdev, "%s: supports implicit TPGS\n",
> -			    ALUA_DH_NAME);
> -		break;
> -	case TPGS_MODE_NONE:
> -		sdev_printk(KERN_INFO, sdev, "%s: not supported\n",
> -			    ALUA_DH_NAME);
> -		break;
> -	default:
> -		sdev_printk(KERN_INFO, sdev,
> -			    "%s: unsupported TPGS setting %d\n",
> -			    ALUA_DH_NAME, tpgs);
> -		tpgs = TPGS_MODE_NONE;
> -		break;
> -	}
> -
> -	return tpgs;
> -}
> -
>  /*
>   * alua_check_vpd - Evaluate INQUIRY vpd page 0x83
>   * @sdev: device to be checked
> @@ -216,56 +93,11 @@ static int alua_check_tpgs(struct scsi_device *sdev)
>  static int alua_check_vpd(struct scsi_device *sdev, struct alua_dh_data *h,
>  			  int tpgs)
>  {
> -	int rel_port = -1;
> -
> -	h->group_id = scsi_vpd_tpg_id(sdev, &rel_port);
> -	if (h->group_id < 0) {
> -		/*
> -		 * Internal error; TPGS supported but required
> -		 * VPD identification descriptors not present.
> -		 * Disable ALUA support
> -		 */
> -		sdev_printk(KERN_INFO, sdev,
> -			    "%s: No target port descriptors found\n",
> -			    ALUA_DH_NAME);
> -		return SCSI_DH_DEV_UNSUPP;
> -	}
> -	h->tpgs = tpgs;
> -
>  	alua_rtpg_queue(sdev, NULL, true);
>  
>  	return SCSI_DH_OK;
>  }
>  
> -static char print_alua_state(unsigned char state)
> -{
> -	switch (state) {
> -	case SCSI_ACCESS_STATE_OPTIMAL:
> -		return 'A';
> -	case SCSI_ACCESS_STATE_ACTIVE:
> -		return 'N';
> -	case SCSI_ACCESS_STATE_STANDBY:
> -		return 'S';
> -	case SCSI_ACCESS_STATE_UNAVAILABLE:
> -		return 'U';
> -	case SCSI_ACCESS_STATE_LBA:
> -		return 'L';
> -	case SCSI_ACCESS_STATE_OFFLINE:
> -		return 'O';
> -	case SCSI_ACCESS_STATE_TRANSITIONING:
> -		return 'T';
> -	default:
> -		return 'X';
> -	}
> -}
> -
> -static void alua_handle_state_transition(struct scsi_device *sdev)
> -{
> -	struct alua_dh_data *h = sdev->handler_data;
> -
> -	h->state = SCSI_ACCESS_STATE_TRANSITIONING;
> -}
> -
>  static enum scsi_disposition alua_check_sense(struct scsi_device *sdev,
>  					      struct scsi_sense_hdr *sense_hdr)
>  {
> @@ -275,7 +107,7 @@ static enum scsi_disposition alua_check_sense(struct scsi_device *sdev,
>  			/*
>  			 * LUN Not Accessible - ALUA state transition
>  			 */
> -			alua_handle_state_transition(sdev);
> +			scsi_alua_handle_state_transition(sdev);
>  			return NEEDS_RETRY;
>  		}
>  		break;
> @@ -284,7 +116,7 @@ static enum scsi_disposition alua_check_sense(struct scsi_device *sdev,
>  			/*
>  			 * LUN Not Accessible - ALUA state transition
>  			 */
> -			alua_handle_state_transition(sdev);
> +			scsi_alua_handle_state_transition(sdev);
>  			return NEEDS_RETRY;
>  		}
>  		if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x00) {
> @@ -338,329 +170,6 @@ static enum scsi_disposition alua_check_sense(struct scsi_device *sdev,
>  	return SCSI_RETURN_NOT_HANDLED;
>  }
>  
> -/*
> - * alua_tur - Send a TEST UNIT READY
> - * @sdev: device to which the TEST UNIT READY command should be send
> - *
> - * Send a TEST UNIT READY to @sdev to figure out the device state
> - * Returns SCSI_DH_RETRY if the sense code is NOT READY/ALUA TRANSITIONING,
> - * SCSI_DH_OK if no error occurred, and SCSI_DH_IO otherwise.
> - */
> -static int alua_tur(struct scsi_device *sdev)
> -{
> -	struct scsi_sense_hdr sense_hdr;
> -	int retval;
> -
> -	retval = scsi_test_unit_ready(sdev, ALUA_FAILOVER_TIMEOUT * HZ,
> -				      ALUA_FAILOVER_RETRIES, &sense_hdr);
> -	if ((sense_hdr.sense_key == NOT_READY ||
> -	     sense_hdr.sense_key == UNIT_ATTENTION) &&
> -	    sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a)
> -		return SCSI_DH_RETRY;
> -	else if (retval)
> -		return SCSI_DH_IO;
> -	else
> -		return SCSI_DH_OK;
> -}
> -
> -/*
> - * alua_rtpg - Evaluate REPORT TARGET GROUP STATES
> - * @sdev: the device to be evaluated.
> - *
> - * Evaluate the Target Port Group State.
> - * Returns SCSI_DH_DEV_OFFLINED if the path is
> - * found to be unusable.
> - */
> -static int alua_rtpg(struct scsi_device *sdev)
> -{
> -	struct scsi_sense_hdr sense_hdr;
> -	struct alua_dh_data *h = sdev->handler_data;
> -	int len, k, off, bufflen = ALUA_RTPG_SIZE;
> -	int group_id_old, state_old, pref_old, valid_states_old;
> -	unsigned char *desc, *buff;
> -	unsigned err;
> -	int retval;
> -	unsigned int tpg_desc_tbl_off;
> -	unsigned char orig_transition_tmo;
> -	unsigned long flags;
> -	bool transitioning_sense = false;
> -	int rel_port, group_id = scsi_vpd_tpg_id(sdev, &rel_port);
> -
> -	if (group_id < 0) {
> -		/*
> -		 * Internal error; TPGS supported but required
> -		 * VPD identification descriptors not present.
> -		 * Disable ALUA support
> -		 */
> -		sdev_printk(KERN_INFO, sdev,
> -			    "%s: No target port descriptors found\n",
> -			    ALUA_DH_NAME);
> -		return SCSI_DH_DEV_UNSUPP;
> -	}
> -
> -	group_id_old = h->group_id;
> -	state_old = h->state;
> -	pref_old = h->pref;
> -	valid_states_old = h->valid_states;
> -
> -	if (!h->expiry) {
> -		unsigned long transition_tmo = ALUA_FAILOVER_TIMEOUT * HZ;
> -
> -		if (h->transition_tmo)
> -			transition_tmo = h->transition_tmo * HZ;
> -
> -		h->expiry = round_jiffies_up(jiffies + transition_tmo);
> -	}
> -
> -	buff = kzalloc(bufflen, GFP_KERNEL);
> -	if (!buff)
> -		return SCSI_DH_DEV_TEMP_BUSY;
> -
> - retry:
> -	err = 0;
> -	retval = submit_rtpg(sdev, buff, bufflen, &sense_hdr, h->flags);
> -
> -	if (retval) {
> -		/*
> -		 * Some (broken) implementations have a habit of returning
> -		 * an error during things like firmware update etc.
> -		 * But if the target only supports active/optimized there's
> -		 * not much we can do; it's not that we can switch paths
> -		 * or anything.
> -		 * So ignore any errors to avoid spurious failures during
> -		 * path failover.
> -		 */
> -		if ((h->valid_states & ~TPGS_SUPPORT_OPTIMIZED) == 0) {
> -			sdev_printk(KERN_INFO, sdev,
> -				    "%s: ignoring rtpg result %d\n",
> -				    ALUA_DH_NAME, retval);
> -			kfree(buff);
> -			return SCSI_DH_OK;
> -		}
> -		if (retval < 0 || !scsi_sense_valid(&sense_hdr)) {
> -			sdev_printk(KERN_INFO, sdev,
> -				    "%s: rtpg failed, result %d\n",
> -				    ALUA_DH_NAME, retval);
> -			kfree(buff);
> -			if (retval < 0)
> -				return SCSI_DH_DEV_TEMP_BUSY;
> -			if (host_byte(retval) == DID_NO_CONNECT)
> -				return SCSI_DH_RES_TEMP_UNAVAIL;
> -			return SCSI_DH_IO;
> -		}
> -
> -		/*
> -		 * submit_rtpg() has failed on existing arrays
> -		 * when requesting extended header info, and
> -		 * the array doesn't support extended headers,
> -		 * even though it shouldn't according to T10.
> -		 * The retry without rtpg_ext_hdr_req set
> -		 * handles this.
> -		 * Note:  some arrays return a sense key of ILLEGAL_REQUEST
> -		 * with ASC 00h if they don't support the extended header.
> -		 */
> -		if (!(h->flags & ALUA_RTPG_EXT_HDR_UNSUPP) &&
> -		    sense_hdr.sense_key == ILLEGAL_REQUEST) {
> -			h->flags |= ALUA_RTPG_EXT_HDR_UNSUPP;
> -			goto retry;
> -		}
> -		/*
> -		 * If the array returns with 'ALUA state transition'
> -		 * sense code here it cannot return RTPG data during
> -		 * transition. So set the state to 'transitioning' directly.
> -		 */
> -		if (sense_hdr.sense_key == NOT_READY &&
> -		    sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a) {
> -			transitioning_sense = true;
> -			goto skip_rtpg;
> -		}
> -		/*
> -		 * Retry on any other UNIT ATTENTION occurred.
> -		 */
> -		if (sense_hdr.sense_key == UNIT_ATTENTION)
> -			err = SCSI_DH_RETRY;
> -		if (err == SCSI_DH_RETRY &&
> -		    h->expiry != 0 && time_before(jiffies, h->expiry)) {
> -			sdev_printk(KERN_ERR, sdev, "%s: rtpg retry\n",
> -				    ALUA_DH_NAME);
> -			scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr);
> -			kfree(buff);
> -			return err;
> -		}
> -		sdev_printk(KERN_ERR, sdev, "%s: rtpg failed\n",
> -			    ALUA_DH_NAME);
> -		scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr);
> -		kfree(buff);
> -		h->expiry = 0;
> -		return SCSI_DH_IO;
> -	}
> -
> -	len = get_unaligned_be32(&buff[0]) + 4;
> -
> -	if (len > bufflen) {
> -		/* Resubmit with the correct length */
> -		kfree(buff);
> -		bufflen = len;
> -		buff = kmalloc(bufflen, GFP_KERNEL);
> -		if (!buff) {
> -			sdev_printk(KERN_WARNING, sdev,
> -				    "%s: kmalloc buffer failed\n",__func__);
> -			/* Temporary failure, bypass */
> -			h->expiry = 0;
> -			return SCSI_DH_DEV_TEMP_BUSY;
> -		}
> -		goto retry;
> -	}
> -
> -	orig_transition_tmo = h->transition_tmo;
> -	if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR && buff[5] != 0)
> -		h->transition_tmo = buff[5];
> -	else
> -		h->transition_tmo = ALUA_FAILOVER_TIMEOUT;
> -
> -	if (orig_transition_tmo != h->transition_tmo) {
> -		sdev_printk(KERN_INFO, sdev,
> -			    "%s: transition timeout set to %d seconds\n",
> -			    ALUA_DH_NAME, h->transition_tmo);
> -		h->expiry = jiffies + h->transition_tmo * HZ;
> -	}
> -
> -	if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR)
> -		tpg_desc_tbl_off = 8;
> -	else
> -		tpg_desc_tbl_off = 4;
> -
> -	for (k = tpg_desc_tbl_off, desc = buff + tpg_desc_tbl_off;
> -	     k < len;
> -	     k += off, desc += off) {
> -		u16 group_id_desc = get_unaligned_be16(&desc[2]);
> -
> -		spin_lock_irqsave(&h->lock, flags);
> -		if (group_id_desc == group_id) {
> -			h->group_id = group_id;
> -			WRITE_ONCE(h->state, desc[0] & 0x0f);
> -			h->pref = desc[0] >> 7;
> -			WRITE_ONCE(sdev->access_state, desc[0]);
> -			h->valid_states = desc[1];
> -		}
> -		spin_unlock_irqrestore(&h->lock, flags);
> -		off = 8 + (desc[7] * 4);
> -	}
> -
> - skip_rtpg:
> -	spin_lock_irqsave(&h->lock, flags);
> -	if (transitioning_sense)
> -		h->state = SCSI_ACCESS_STATE_TRANSITIONING;
> -
> -	if (group_id_old != h->group_id || state_old != h->state ||
> -		pref_old != h->pref || valid_states_old != h->valid_states)
> -		sdev_printk(KERN_INFO, sdev,
> -			"%s: port group %02x state %c %s supports %c%c%c%c%c%c%c\n",
> -			ALUA_DH_NAME, h->group_id, print_alua_state(h->state),
> -			h->pref ? "preferred" : "non-preferred",
> -			h->valid_states&TPGS_SUPPORT_TRANSITION?'T':'t',
> -			h->valid_states&TPGS_SUPPORT_OFFLINE?'O':'o',
> -			h->valid_states&TPGS_SUPPORT_LBA_DEPENDENT?'L':'l',
> -			h->valid_states&TPGS_SUPPORT_UNAVAILABLE?'U':'u',
> -			h->valid_states&TPGS_SUPPORT_STANDBY?'S':'s',
> -			h->valid_states&TPGS_SUPPORT_NONOPTIMIZED?'N':'n',
> -			h->valid_states&TPGS_SUPPORT_OPTIMIZED?'A':'a');
> -
> -	switch (h->state) {
> -	case SCSI_ACCESS_STATE_TRANSITIONING:
> -		if (time_before(jiffies, h->expiry)) {
> -			/* State transition, retry */
> -			h->interval = ALUA_RTPG_RETRY_DELAY;
> -			err = SCSI_DH_RETRY;
> -		} else {
> -			struct alua_dh_data *h;
> -			unsigned char access_state;
> -
> -			/* Transitioning time exceeded, set port to standby */
> -			err = SCSI_DH_IO;
> -			h->state = SCSI_ACCESS_STATE_STANDBY;
> -			h->expiry = 0;
> -			access_state = h->state & SCSI_ACCESS_STATE_MASK;
> -			if (h->pref)
> -				access_state |= SCSI_ACCESS_STATE_PREFERRED;
> -			WRITE_ONCE(sdev->access_state, access_state);
> -		}
> -		break;
> -	case SCSI_ACCESS_STATE_OFFLINE:
> -		/* Path unusable */
> -		err = SCSI_DH_DEV_OFFLINED;
> -		h->expiry = 0;
> -		break;
> -	default:
> -		/* Useable path if active */
> -		err = SCSI_DH_OK;
> -		h->expiry = 0;
> -		break;
> -	}
> -	spin_unlock_irqrestore(&h->lock, flags);
> -	kfree(buff);
> -	return err;
> -}
> -
> -/*
> - * alua_stpg - Issue a SET TARGET PORT GROUP command
> - *
> - * Issue a SET TARGET PORT GROUP command and evaluate the
> - * response. Returns SCSI_DH_RETRY per default to trigger
> - * a re-evaluation of the target group state or SCSI_DH_OK
> - * if no further action needs to be taken.
> - */
> -static unsigned alua_stpg(struct scsi_device *sdev)
> -{
> -	int retval;
> -	struct scsi_sense_hdr sense_hdr;
> -	struct alua_dh_data *h = sdev->handler_data;
> -
> -	if (!(h->tpgs & TPGS_MODE_EXPLICIT)) {
> -		/* Only implicit ALUA supported, retry */
> -		return SCSI_DH_RETRY;
> -	}
> -	switch (h->state) {
> -	case SCSI_ACCESS_STATE_OPTIMAL:
> -		return SCSI_DH_OK;
> -	case SCSI_ACCESS_STATE_ACTIVE:
> -		if ((h->flags & ALUA_OPTIMIZE_STPG) &&
> -		    !h->pref &&
> -		    (h->tpgs & TPGS_MODE_IMPLICIT))
> -			return SCSI_DH_OK;
> -		break;
> -	case SCSI_ACCESS_STATE_STANDBY:
> -	case SCSI_ACCESS_STATE_UNAVAILABLE:
> -		break;
> -	case SCSI_ACCESS_STATE_OFFLINE:
> -		return SCSI_DH_IO;
> -	case SCSI_ACCESS_STATE_TRANSITIONING:
> -		break;
> -	default:
> -		sdev_printk(KERN_INFO, sdev,
> -			    "%s: stpg failed, unhandled TPGS state %d",
> -			    ALUA_DH_NAME, h->state);
> -		return SCSI_DH_NOSYS;
> -	}
> -	retval = submit_stpg(sdev, h->group_id, &sense_hdr);
> -
> -	if (retval) {
> -		if (retval < 0 || !scsi_sense_valid(&sense_hdr)) {
> -			sdev_printk(KERN_INFO, sdev,
> -				    "%s: stpg failed, result %d",
> -				    ALUA_DH_NAME, retval);
> -			if (retval < 0)
> -				return SCSI_DH_DEV_TEMP_BUSY;
> -		} else {
> -			sdev_printk(KERN_INFO, sdev, "%s: stpg failed\n",
> -				    ALUA_DH_NAME);
> -			scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr);
> -		}
> -	}
> -	/* Retry RTPG */
> -	return SCSI_DH_RETRY;
> -}
> -
>  static void alua_rtpg_work(struct work_struct *work)
>  {
>  	struct alua_dh_data *h =
> @@ -670,56 +179,41 @@ static void alua_rtpg_work(struct work_struct *work)
>  	int err = SCSI_DH_OK;
>  	struct alua_queue_data *qdata, *tmp;
>  	unsigned long flags;
> +	int ret;
>  
>  	spin_lock_irqsave(&h->lock, flags);
>  	h->flags |= ALUA_PG_RUNNING;
>  	if (h->flags & ALUA_PG_RUN_RTPG) {
> -		int state = h->state;
>  
>  		h->flags &= ~ALUA_PG_RUN_RTPG;
>  		spin_unlock_irqrestore(&h->lock, flags);
> -		if (state == SCSI_ACCESS_STATE_TRANSITIONING) {
> -			if (alua_tur(sdev) == SCSI_DH_RETRY) {
> -				spin_lock_irqsave(&h->lock, flags);
> -				h->flags &= ~ALUA_PG_RUNNING;
> -				h->flags |= ALUA_PG_RUN_RTPG;
> -				if (!h->interval)
> -					h->interval = ALUA_RTPG_RETRY_DELAY;
> -				spin_unlock_irqrestore(&h->lock, flags);
> -				queue_delayed_work(kaluad_wq, &h->rtpg_work,
> -						   h->interval * HZ);
> -				return;
> -			}
> -			/* Send RTPG on failure or if TUR indicates SUCCESS */
> -		}
> -		err = alua_rtpg(sdev);
> -		spin_lock_irqsave(&h->lock, flags);
> -
> -		if (err == SCSI_DH_RETRY || h->flags & ALUA_PG_RUN_RTPG) {
> +		ret = scsi_alua_rtpg_run(sdev);
> +		if (ret == -EAGAIN) {

This no longer handles the case where you want to trigger a new rtpg as
soon as the running one finishes. I think it should be checking
(ret == -EAGAIN || h->flags & ALUA_PG_RUN_RTPG)
with a spinlock held.

> +			spin_lock_irqsave(&h->lock, flags);
>  			h->flags &= ~ALUA_PG_RUNNING;
> -			if (err == SCSI_DH_IMM_RETRY)
> -				h->interval = 0;
> -			else if (!h->interval && !(h->flags & ALUA_PG_RUN_RTPG))
> -				h->interval = ALUA_RTPG_RETRY_DELAY;
>  			h->flags |= ALUA_PG_RUN_RTPG;
>  			spin_unlock_irqrestore(&h->lock, flags);
> -			goto queue_rtpg;
> +			queue_delayed_work(kaluad_wq, &h->rtpg_work,
> +							   sdev->alua->interval * HZ);
> +			return;
>  		}
> -		if (err != SCSI_DH_OK)
> -			h->flags &= ~ALUA_PG_RUN_STPG;
> +		if (err != 0)
> +				h->flags &= ~ALUA_PG_RUN_STPG;
>  	}
> +	spin_lock_irqsave(&h->lock, flags);

If h->flags & ALUA_PG_RUN_RTPG is false above, h->lock will already be
locked.

>  	if (h->flags & ALUA_PG_RUN_STPG) {
>  		h->flags &= ~ALUA_PG_RUN_STPG;
>  		spin_unlock_irqrestore(&h->lock, flags);
> -		err = alua_stpg(sdev);
> -		spin_lock_irqsave(&h->lock, flags);
> -		if (err == SCSI_DH_RETRY || h->flags & ALUA_PG_RUN_RTPG) {
> +		ret = scsi_alua_stpg_run(sdev, h->flags & ALUA_OPTIMIZE_STPG);
> +		if (err == -EAGAIN || h->flags & ALUA_PG_RUN_RTPG) {

To avoid a race with resetting ALUA_PG_RUN_RTPG, this check needs to be
done with the spinlock held.

-Ben

> +			spin_lock_irqsave(&h->lock, flags);
>  			h->flags |= ALUA_PG_RUN_RTPG;
> -			h->interval = 0;
>  			h->flags &= ~ALUA_PG_RUNNING;
>  			spin_unlock_irqrestore(&h->lock, flags);
>  			goto queue_rtpg;
>  		}
> +	} else {
> +		spin_unlock_irqrestore(&h->lock, flags);
>  	}
>  
>  	list_splice_init(&h->rtpg_list, &qdata_list);
> @@ -728,8 +222,6 @@ static void alua_rtpg_work(struct work_struct *work)
>  	 * Re-enable the device for the next attempt.
>  	 */
>  	h->disabled = false;
> -	spin_unlock_irqrestore(&h->lock, flags);
> -
>  
>  	list_for_each_entry_safe(qdata, tmp, &qdata_list, entry) {
>  		list_del(&qdata->entry);
> @@ -745,7 +237,7 @@ static void alua_rtpg_work(struct work_struct *work)
>  	return;
>  
>  queue_rtpg:
> -	queue_delayed_work(kaluad_wq, &h->rtpg_work, h->interval * HZ);
> +	queue_delayed_work(kaluad_wq, &h->rtpg_work, sdev->alua->interval * HZ);
>  }
>  
>  /**
> @@ -809,7 +301,7 @@ static int alua_initialize(struct scsi_device *sdev, struct alua_dh_data *h)
>  
>  	mutex_lock(&h->init_mutex);
>  	h->disabled = false;
> -	tpgs = alua_check_tpgs(sdev);
> +	tpgs = scsi_alua_check_tpgs(sdev);
>  	if (tpgs != TPGS_MODE_NONE)
>  		err = alua_check_vpd(sdev, h, tpgs);
>  	h->init_error = err;
> @@ -898,34 +390,6 @@ static void alua_check(struct scsi_device *sdev, bool force)
>  	alua_rtpg_queue(sdev, NULL, force);
>  }
>  
> -/*
> - * alua_prep_fn - request callback
> - *
> - * Fail I/O to all paths not in state
> - * active/optimized or active/non-optimized.
> - */
> -static blk_status_t alua_prep_fn(struct scsi_device *sdev, struct request *req)
> -{
> -	struct alua_dh_data *h = sdev->handler_data;
> -	unsigned long flags;
> -	unsigned char state;
> -
> -	spin_lock_irqsave(&h->lock, flags);
> -	state = h->state;
> -	spin_unlock_irqrestore(&h->lock, flags);
> -
> -	switch (state) {
> -	case SCSI_ACCESS_STATE_OPTIMAL:
> -	case SCSI_ACCESS_STATE_ACTIVE:
> -	case SCSI_ACCESS_STATE_LBA:
> -	case SCSI_ACCESS_STATE_TRANSITIONING:
> -		return BLK_STS_OK;
> -	default:
> -		req->rq_flags |= RQF_QUIET;
> -		return BLK_STS_IOERR;
> -	}
> -}
> -
>  static void alua_rescan(struct scsi_device *sdev)
>  {
>  	struct alua_dh_data *h = sdev->handler_data;
> @@ -953,8 +417,6 @@ static int alua_bus_attach(struct scsi_device *sdev)
>  
>  	mutex_init(&h->init_mutex);
>  
> -	h->state = SCSI_ACCESS_STATE_OPTIMAL;
> -	h->valid_states = TPGS_SUPPORT_ALL;
>  	if (optimize_stpg)
>  		h->flags |= ALUA_OPTIMIZE_STPG;
>  
> @@ -986,7 +448,7 @@ static struct scsi_device_handler alua_dh = {
>  	.module = THIS_MODULE,
>  	.attach = alua_bus_attach,
>  	.detach = alua_bus_detach,
> -	.prep_fn = alua_prep_fn,
> +	.prep_fn = scsi_alua_prep_fn,
>  	.check_sense = alua_check_sense,
>  	.activate = alua_activate,
>  	.rescan = alua_rescan,
> -- 
> 2.43.5


^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 13/13] scsi: core: Add implicit ALUA support
  2026-03-17 12:07 ` [PATCH 13/13] scsi: core: Add implicit ALUA support John Garry
  2026-03-18  8:08   ` Hannes Reinecke
  2026-03-18 23:08   ` kernel test robot
@ 2026-03-23  1:58   ` Benjamin Marzinski
  2026-03-23 12:52     ` John Garry
  2 siblings, 1 reply; 63+ messages in thread
From: Benjamin Marzinski @ 2026-03-23  1:58 UTC (permalink / raw)
  To: John Garry
  Cc: martin.petersen, james.bottomley, hare, jmeneghi, linux-scsi,
	michael.christie, snitzer, dm-devel, linux-kernel

On Tue, Mar 17, 2026 at 12:07:03PM +0000, John Garry wrote:
> For when no device handler is used, add ALUA support.
> 
> This will be equivalent to when native SCSI multipathing is used.
> 
> Essentially all the same handling is available as DH alua driver for
> rescan, request prep, sense handling.
> 
> Signed-off-by: John Garry <john.g.garry@oracle.com>
> ---
>  drivers/scsi/scsi_alua.c  | 93 +++++++++++++++++++++++++++++++++++++++
>  drivers/scsi/scsi_error.c |  7 +++
>  drivers/scsi/scsi_lib.c   |  7 +++
>  drivers/scsi/scsi_scan.c  |  2 +
>  drivers/scsi/scsi_sysfs.c |  4 +-
>  include/scsi/scsi_alua.h  | 14 ++++++
>  6 files changed, 126 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/scsi/scsi_alua.c b/drivers/scsi/scsi_alua.c
> index d3fcd887e5018..ee0229b1a9d12 100644
> --- a/drivers/scsi/scsi_alua.c
> +++ b/drivers/scsi/scsi_alua.c
> @@ -562,6 +562,90 @@ int scsi_alua_stpg_run(struct scsi_device *sdev, bool optimize)
>  }
>  EXPORT_SYMBOL_GPL(scsi_alua_stpg_run);
>  
> +enum scsi_disposition scsi_alua_check_sense(struct scsi_device *sdev,
> +					      struct scsi_sense_hdr *sense_hdr)

This seems like it should be shareable with scsi_dh_alua as well.  In
might need to take a function to call for rescanning and have
alua_check_sense() be a wrapper around it, but since the force argument
to alua_check() is now always set to true in scsi_dh_alua, it's
unnecessary, so both it and scsi_device_alua_rescan() can have the
same arguments.

> +{
> +	switch (sense_hdr->sense_key) {
> +	case NOT_READY:
> +		if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0a) {
> +			/*
> +			 * LUN Not Accessible - ALUA state transition
> +			 */
> +			scsi_alua_handle_state_transition(sdev);
> +			return NEEDS_RETRY;
> +		}
> +		break;
> +	case UNIT_ATTENTION:
> +		if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0a) {
> +			/*
> +			 * LUN Not Accessible - ALUA state transition
> +			 */
> +			scsi_alua_handle_state_transition(sdev);
> +			return NEEDS_RETRY;
> +		}
> +		if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x00) {
> +			/*
> +			 * Power On, Reset, or Bus Device Reset.
> +			 * Might have obscured a state transition,
> +			 * so schedule a recheck.
> +			 */
> +			scsi_device_alua_rescan(sdev);
> +			return ADD_TO_MLQUEUE;
> +		}
> +		if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x04)
> +			/*
> +			 * Device internal reset
> +			 */
> +			return ADD_TO_MLQUEUE;
> +		if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x01)
> +			/*
> +			 * Mode Parameters Changed
> +			 */
> +			return ADD_TO_MLQUEUE;
> +		if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x06) {
> +			/*
> +			 * ALUA state changed
> +			 */
> +			scsi_device_alua_rescan(sdev);
> +			return ADD_TO_MLQUEUE;
> +		}
> +		if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x07) {
> +			/*
> +			 * Implicit ALUA state transition failed
> +			 */
> +			scsi_device_alua_rescan(sdev);
> +			return ADD_TO_MLQUEUE;
> +		}
> +		if (sense_hdr->asc == 0x3f && sense_hdr->ascq == 0x03)
> +			/*
> +			 * Inquiry data has changed
> +			 */
> +			return ADD_TO_MLQUEUE;
> +		if (sense_hdr->asc == 0x3f && sense_hdr->ascq == 0x0e)
> +			/*
> +			 * REPORTED_LUNS_DATA_HAS_CHANGED is reported
> +			 * when switching controllers on targets like
> +			 * Intel Multi-Flex. We can just retry.
> +			 */
> +			return ADD_TO_MLQUEUE;
> +		break;
> +	}
> +
> +	return SCSI_RETURN_NOT_HANDLED;
> +}
> +
> +static void alua_rtpg_work(struct work_struct *work)
> +{
> +	struct alua_data *alua =
> +		container_of(work, struct alua_data, work.work);
> +	int ret;
> +
> +	ret = scsi_alua_rtpg_run(alua->sdev);
> +
> +	if (ret == -EAGAIN)
> +		queue_delayed_work(kalua_wq, &alua->work, alua->interval * HZ);
> +}
> +
>  int scsi_alua_sdev_init(struct scsi_device *sdev)
>  {
>  	int rel_port, ret, tpgs;
> @@ -591,6 +675,7 @@ int scsi_alua_sdev_init(struct scsi_device *sdev)
>  		goto out_free_data;
>  	}
>  
> +	INIT_DELAYED_WORK(&sdev->alua->work, alua_rtpg_work);
>  	sdev->alua->sdev = sdev;
>  	sdev->alua->tpgs = tpgs;
>  	spin_lock_init(&sdev->alua->lock);
> @@ -638,6 +723,14 @@ bool scsi_device_alua_implicit(struct scsi_device *sdev)
>  	return sdev->alua->tpgs & TPGS_MODE_IMPLICIT;
>  }
>  
> +void scsi_device_alua_rescan(struct scsi_device *sdev)
> +{
> +	struct alua_data *alua = sdev->alua;
> +
> +	queue_delayed_work(kalua_wq, &alua->work,
> +				msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS));

This code doesn't support triggering a new rtpg while the current one is
running.  I'll leave it to people with more scsi expertise to say how
important that is, but the scsi_dh_alua code now will always trigger a
new rtpg in this case (or at least it would, with the issues from patch
12 fixed).

-Ben

> +}
> +
>  int scsi_alua_init(void)
>  {
>  	kalua_wq = alloc_workqueue("kalua", WQ_MEM_RECLAIM | WQ_PERCPU, 0);
> diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
> index 147127fb4db9c..a542e7a85a24d 100644
> --- a/drivers/scsi/scsi_error.c
> +++ b/drivers/scsi/scsi_error.c
> @@ -29,6 +29,7 @@
>  #include <linux/jiffies.h>
>  
>  #include <scsi/scsi.h>
> +#include <scsi/scsi_alua.h>
>  #include <scsi/scsi_cmnd.h>
>  #include <scsi/scsi_dbg.h>
>  #include <scsi/scsi_device.h>
> @@ -578,6 +579,12 @@ enum scsi_disposition scsi_check_sense(struct scsi_cmnd *scmd)
>  		if (rc != SCSI_RETURN_NOT_HANDLED)
>  			return rc;
>  		/* handler does not care. Drop down to default handling */
> +	} else if (scsi_device_alua_implicit(sdev)) {
> +		enum scsi_disposition rc;
> +
> +		rc = scsi_alua_check_sense(sdev, &sshdr);
> +		if (rc != SCSI_RETURN_NOT_HANDLED)
> +			return rc;
>  	}
>  
>  	if (scmd->cmnd[0] == TEST_UNIT_READY &&
> diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
> index d3a8cd4166f92..e5bcee555ea10 100644
> --- a/drivers/scsi/scsi_lib.c
> +++ b/drivers/scsi/scsi_lib.c
> @@ -26,6 +26,7 @@
>  #include <linux/unaligned.h>
>  
>  #include <scsi/scsi.h>
> +#include <scsi/scsi_alua.h>
>  #include <scsi/scsi_cmnd.h>
>  #include <scsi/scsi_dbg.h>
>  #include <scsi/scsi_device.h>
> @@ -1719,6 +1720,12 @@ static blk_status_t scsi_prepare_cmd(struct request *req)
>  	if (sdev->handler && sdev->handler->prep_fn) {
>  		blk_status_t ret = sdev->handler->prep_fn(sdev, req);
>  
> +		if (ret != BLK_STS_OK)
> +			return ret;
> +	} else if (scsi_device_alua_implicit(sdev)) {
> +		/* We should be able to make this common for ALUA DH as well */
> +		blk_status_t ret = scsi_alua_prep_fn(sdev, req);
> +
>  		if (ret != BLK_STS_OK)
>  			return ret;
>  	}
> diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
> index 3af64d1231445..73caf83bd1097 100644
> --- a/drivers/scsi/scsi_scan.c
> +++ b/drivers/scsi/scsi_scan.c
> @@ -1744,6 +1744,8 @@ int scsi_rescan_device(struct scsi_device *sdev)
>  
>  	if (sdev->handler && sdev->handler->rescan)
>  		sdev->handler->rescan(sdev);
> +	else if (scsi_device_alua_implicit(sdev))
> +		scsi_device_alua_rescan(sdev);
>  
>  	if (dev->driver && try_module_get(dev->driver->owner)) {
>  		struct scsi_driver *drv = to_scsi_driver(dev->driver);
> diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
> index 6c4c3c22f6acf..71a9613898cfc 100644
> --- a/drivers/scsi/scsi_sysfs.c
> +++ b/drivers/scsi/scsi_sysfs.c
> @@ -1152,7 +1152,7 @@ sdev_show_access_state(struct device *dev,
>  	unsigned char access_state;
>  	const char *access_state_name;
>  
> -	if (!sdev->handler)
> +	if (!sdev->handler && !scsi_device_alua_implicit(sdev))
>  		return -EINVAL;
>  
>  	access_state = (sdev->access_state & SCSI_ACCESS_STATE_MASK);
> @@ -1409,6 +1409,8 @@ int scsi_sysfs_add_sdev(struct scsi_device *sdev)
>  	scsi_autopm_get_device(sdev);
>  
>  	scsi_dh_add_device(sdev);
> +	if (!sdev->handler && scsi_device_alua_implicit(sdev))
> +		scsi_device_alua_rescan(sdev);
>  
>  	error = device_add(&sdev->sdev_gendev);
>  	if (error) {
> diff --git a/include/scsi/scsi_alua.h b/include/scsi/scsi_alua.h
> index 2d5db944f75b7..8e506d1d66cce 100644
> --- a/include/scsi/scsi_alua.h
> +++ b/include/scsi/scsi_alua.h
> @@ -24,6 +24,7 @@ struct alua_data {
>  	unsigned char		transition_tmo;
>  	unsigned long		expiry;
>  	unsigned long		interval;
> +	struct delayed_work	work;
>  	struct scsi_device	*sdev;
>  	spinlock_t		lock;
>  };
> @@ -35,11 +36,15 @@ void scsi_alua_handle_state_transition(struct scsi_device *sdev);
>  
>  int scsi_alua_check_tpgs(struct scsi_device *sdev);
>  
> +enum scsi_disposition scsi_alua_check_sense(struct scsi_device *sdev,
> +				struct scsi_sense_hdr *sense_hdr);
> +
>  int scsi_alua_rtpg_run(struct scsi_device *sdev);
>  int scsi_alua_stpg_run(struct scsi_device *sdev, bool optimize);
>  
>  blk_status_t scsi_alua_prep_fn(struct scsi_device *sdev, struct request *req);
>  
> +void scsi_device_alua_rescan(struct scsi_device *sdev);
>  bool scsi_device_alua_implicit(struct scsi_device *sdev);
>  
>  int scsi_alua_init(void);
> @@ -53,6 +58,12 @@ static inline int scsi_alua_check_tpgs(struct scsi_device *sdev)
>  {
>  	return 0;
>  }
> +static inline
> +enum scsi_disposition scsi_alua_check_sense(struct scsi_device *sdev,
> +				struct scsi_sense_hdr *sense_hdr)
> +{
> +	return SCSI_RETURN_NOT_HANDLED;
> +}
>  static inline int scsi_alua_rtpg_run(struct scsi_device *sdev)
>  {
>  	return 0;
> @@ -66,6 +77,9 @@ blk_status_t scsi_alua_prep_fn(struct scsi_device *sdev, struct request *req)
>  {
>  	return BLK_STS_OK;
>  }
> +static inline void scsi_device_alua_rescan(struct scsi_device *sdev)
> +{
> +}
>  static inline bool scsi_device_alua_implicit(struct scsi_device *sdev)
>  {
>  	return false;
> -- 
> 2.43.5


^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 00/13] scsi: Core ALUA driver
  2026-03-22 17:37 ` [PATCH 00/13] scsi: Core ALUA driver Benjamin Marzinski
@ 2026-03-23  9:57   ` John Garry
  2026-03-23 16:25     ` Benjamin Marzinski
  0 siblings, 1 reply; 63+ messages in thread
From: John Garry @ 2026-03-23  9:57 UTC (permalink / raw)
  To: Benjamin Marzinski
  Cc: martin.petersen, james.bottomley, hare, jmeneghi, linux-scsi,
	michael.christie, snitzer, dm-devel, linux-kernel

On 22/03/2026 17:37, Benjamin Marzinski wrote:
>> I think that this work is a real regression possibility for
>> dm-multipath, so we need to be careful.
> At the risk of showing just how limited my SCSI knowledge is, I need to
> ask, Is any of this actually necessary to get native scsi multipath
> working with Implicit ALUA?
> 
> If the goal is to limit this to IMPLICT ALUA only, I was expecting that
> you could just leave the scsi_dh_alua code completely alone. If native
> scsi multipathing didn't disable the device handler, it seemed that this
> would basically just work. With the device handler attached,

We only get the scsi_dh_activate() -> alua_activate() call from 
dm-mpath.c, and that callchain could not happen for native SCSI 
multipath. But, yes, we do the alua_rtpg_queue() call from a rescan, but 
we should be checking if the path is available first (and not rely on a 
rescan).

> when the
> array updates the ALUA state, that should, at least I believe, trigger a
> unit attention that will fire off a RTPG command. That should update the
> sdev->access_state, which the multipath code could use to pick the
> correct path. Right? What am I missing here?
> Is this just a parallel
> exercise to overhaul the ALUA code?

The SCSI community would rather not see more usage for device handlers.

How we then get ALUA support for native SCSI multipath is the question. 
My original series just really duplicated the scsi_dh_alua.c RTPG 
support for native SCSI multipath into a limited "core" driver. Hannes 
thinks that a core ALUA driver to also support DH would be better 
(IIUC), which I am attempting in this series. I will re-iterate that I 
would rather not touch scsi_dh_alua.c, unless the changes are simple and 
obvious(ly correct).

Thanks,
John

^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 01/13] scsi: scsi_dh_alua: Delete alua_port_group
  2026-03-23  0:08   ` Benjamin Marzinski
@ 2026-03-23 10:33     ` John Garry
  2026-03-23 16:15       ` Benjamin Marzinski
  0 siblings, 1 reply; 63+ messages in thread
From: John Garry @ 2026-03-23 10:33 UTC (permalink / raw)
  To: Benjamin Marzinski
  Cc: martin.petersen, james.bottomley, hare, jmeneghi, linux-scsi,
	michael.christie, snitzer, dm-devel, linux-kernel

On 23/03/2026 00:08, Benjamin Marzinski wrote:
>>       k += off, desc += off) {
>> -		u16 group_id = get_unaligned_be16(&desc[2]);
>> -
>> -		spin_lock_irqsave(&port_group_lock, flags);
>> -		tmp_pg = alua_find_get_pg(pg->device_id_str, pg->device_id_len,
>> -					  group_id);
>> -		spin_unlock_irqrestore(&port_group_lock, flags);
>> -		if (tmp_pg) {
>> -			if (spin_trylock_irqsave(&tmp_pg->lock, flags)) {
>> -				if ((tmp_pg == pg) ||
>> -				    !(tmp_pg->flags & ALUA_PG_RUNNING)) {
>> -					struct alua_dh_data *h;
>> -
>> -					tmp_pg->state = desc[0] & 0x0f;
>> -					tmp_pg->pref = desc[0] >> 7;
>> -					rcu_read_lock();
>> -					list_for_each_entry_rcu(h,
>> -						&tmp_pg->dh_list, node) {
>> -						if (!h->sdev)
>> -							continue;
>> -						h->sdev->access_state = desc[0];
>> -					}
>> -					rcu_read_unlock();
>> -				}
>> -				if (tmp_pg == pg)
>> -					tmp_pg->valid_states = desc[1];
>> -				spin_unlock_irqrestore(&tmp_pg->lock, flags);
>> -			}
>> -			kref_put(&tmp_pg->kref, release_port_group);
>> +		u16 group_id_desc = get_unaligned_be16(&desc[2]);
>> +
>> +		spin_lock_irqsave(&h->lock, flags);
>> +		if (group_id_desc == group_id) {
>> +			h->group_id = group_id;
>> +			WRITE_ONCE(h->state, desc[0] & 0x0f);
>> +			h->pref = desc[0] >> 7;
>> +			WRITE_ONCE(sdev->access_state, desc[0]);
>> +			h->valid_states = desc[1];
> instead of alua_rtpg() updating the access_state all of the devices in
> all the port groups, and the state and pref of all the port groups. It
> now just sets these for one device. It seems like it's wasting a lot of
> information that it used to use. For instance, now when a scsi command
> returns a unit attention that the ALUA state has changed, it won't get
> updated on all the devices, just the one that got the unit attention.

The fabric should then trigger this PG info update be re-scanned 
per-path/sdev (and not just a single sdev in the PG). From testing with 
a linux target, this is what happens - a UA is triggered per path when I 
changed the PG access state.

> 
>>   		}
>> +		spin_unlock_irqrestore(&h->lock, flags);
>>   		off = 8 + (desc[7] * 4);
>>   	}
>>   
>>    skip_rtpg:
>> -	spin_lock_irqsave(&pg->lock, flags);
>> +	spin_lock_irqsave(&h->lock, flags);
>>   	if (transitioning_sense)
>> -		pg->state = SCSI_ACCESS_STATE_TRANSITIONING;
>> +		h->state = SCSI_ACCESS_STATE_TRANSITIONING;
>>  

...

>> -
>>   static void alua_rtpg_work(struct work_struct *work)
>>   {
>> -	struct alua_port_group *pg =
>> -		container_of(work, struct alua_port_group, rtpg_work.work);
>> -	struct scsi_device *sdev, *prev_sdev = NULL;
>> +	struct alua_dh_data *h =
>> +		container_of(work, struct alua_dh_data, rtpg_work.work);
>> +	struct scsi_device *sdev = h->sdev;
>>   	LIST_HEAD(qdata_list);
>>   	int err = SCSI_DH_OK;
>>   	struct alua_queue_data *qdata, *tmp;
>> -	struct alua_dh_data *h;
>>   	unsigned long flags;
>>   
>> -	spin_lock_irqsave(&pg->lock, flags);
>> -	sdev = pg->rtpg_sdev;
>> -	if (!sdev) {
>> -		WARN_ON(pg->flags & ALUA_PG_RUN_RTPG);
>> -		WARN_ON(pg->flags & ALUA_PG_RUN_STPG);
>> -		spin_unlock_irqrestore(&pg->lock, flags);
>> -		kref_put(&pg->kref, release_port_group);
>> -		return;
>> -	}
>> -	pg->flags |= ALUA_PG_RUNNING;
>> -	if (pg->flags & ALUA_PG_RUN_RTPG) {
>> -		int state = pg->state;
>> +	spin_lock_irqsave(&h->lock, flags);
>> +	h->flags |= ALUA_PG_RUNNING;
>> +	if (h->flags & ALUA_PG_RUN_RTPG) {
>> +		int state = h->state;
>>   
>> -		pg->flags &= ~ALUA_PG_RUN_RTPG;
>> -		spin_unlock_irqrestore(&pg->lock, flags);
>> +		h->flags &= ~ALUA_PG_RUN_RTPG;
>> +		spin_unlock_irqrestore(&h->lock, flags);
>>   		if (state == SCSI_ACCESS_STATE_TRANSITIONING) {
>>   			if (alua_tur(sdev) == SCSI_DH_RETRY) {
>> -				spin_lock_irqsave(&pg->lock, flags);
>> -				pg->flags &= ~ALUA_PG_RUNNING;
>> -				pg->flags |= ALUA_PG_RUN_RTPG;
>> -				if (!pg->interval)
>> -					pg->interval = ALUA_RTPG_RETRY_DELAY;
>> -				spin_unlock_irqrestore(&pg->lock, flags);
>> -				queue_delayed_work(kaluad_wq, &pg->rtpg_work,
>> -						   pg->interval * HZ);
>> +				spin_lock_irqsave(&h->lock, flags);
>> +				h->flags &= ~ALUA_PG_RUNNING;
>> +				h->flags |= ALUA_PG_RUN_RTPG;
>> +				if (!h->interval)
>> +					h->interval = ALUA_RTPG_RETRY_DELAY;
>> +				spin_unlock_irqrestore(&h->lock, flags);
>> +				queue_delayed_work(kaluad_wq, &h->rtpg_work,
>> +						   h->interval * HZ);
>>   				return;
>>   			}
>>   			/* Send RTPG on failure or if TUR indicates SUCCESS */
>>   		}
>> -		err = alua_rtpg(sdev, pg);
>> -		spin_lock_irqsave(&pg->lock, flags);
>> +		err = alua_rtpg(sdev);
>> +		spin_lock_irqsave(&h->lock, flags);
>>   
>> -		/* If RTPG failed on the current device, try using another */
>> -		if (err == SCSI_DH_RES_TEMP_UNAVAIL &&
>> -		    (prev_sdev = alua_rtpg_select_sdev(pg)))
>> -			err = SCSI_DH_IMM_RETRY;
> Previously, if the rtpg failed on a device, another device would be
> tried, and the unusable device's alua state would get updated, along
> with all the other device's states.

Where specifically are you referring to here please?

> Now I don't see how a failed device
> gets its state updated.

AFAICS, I am only not omitted how we iterate through the devices per-PG, 
as now we just do this work for all paths/scsi devices.

Thanks,
John

^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 12/13] scsi: scsi_dh_alua: Switch to use core support
  2026-03-23  1:47   ` Benjamin Marzinski
@ 2026-03-23 11:59     ` John Garry
  0 siblings, 0 replies; 63+ messages in thread
From: John Garry @ 2026-03-23 11:59 UTC (permalink / raw)
  To: Benjamin Marzinski
  Cc: martin.petersen, james.bottomley, hare, jmeneghi, linux-scsi,
	michael.christie, snitzer, dm-devel, linux-kernel

On 23/03/2026 01:47, Benjamin Marzinski wrote:
>>   static void alua_rtpg_work(struct work_struct *work)
>>   {
>>   	struct alua_dh_data *h =
>> @@ -670,56 +179,41 @@ static void alua_rtpg_work(struct work_struct *work)
>>   	int err = SCSI_DH_OK;
>>   	struct alua_queue_data *qdata, *tmp;
>>   	unsigned long flags;
>> +	int ret;
>>   
>>   	spin_lock_irqsave(&h->lock, flags);
>>   	h->flags |= ALUA_PG_RUNNING;
>>   	if (h->flags & ALUA_PG_RUN_RTPG) {
>> -		int state = h->state;
>>   
>>   		h->flags &= ~ALUA_PG_RUN_RTPG;
>>   		spin_unlock_irqrestore(&h->lock, flags);
>> -		if (state == SCSI_ACCESS_STATE_TRANSITIONING) {
>> -			if (alua_tur(sdev) == SCSI_DH_RETRY) {
>> -				spin_lock_irqsave(&h->lock, flags);
>> -				h->flags &= ~ALUA_PG_RUNNING;
>> -				h->flags |= ALUA_PG_RUN_RTPG;
>> -				if (!h->interval)
>> -					h->interval = ALUA_RTPG_RETRY_DELAY;
>> -				spin_unlock_irqrestore(&h->lock, flags);
>> -				queue_delayed_work(kaluad_wq, &h->rtpg_work,
>> -						   h->interval * HZ);
>> -				return;
>> -			}
>> -			/* Send RTPG on failure or if TUR indicates SUCCESS */
>> -		}
>> -		err = alua_rtpg(sdev);
>> -		spin_lock_irqsave(&h->lock, flags);
>> -
>> -		if (err == SCSI_DH_RETRY || h->flags & ALUA_PG_RUN_RTPG) {
>> +		ret = scsi_alua_rtpg_run(sdev);
>> +		if (ret == -EAGAIN) {
> This no longer handles the case where you want to trigger a new rtpg as
> soon as the running one finishes. I think it should be checking
> (ret == -EAGAIN || h->flags & ALUA_PG_RUN_RTPG)
> with a spinlock held.
> 

Yeah, this is all tricky to handle, as the code in scsi_dh_alua.c was 
handling the error codes with the state machine, and I want to move the 
error handling into the core driver.

As for your specific point, I think that ALUA_PG_RUN_RTPG can only now 
be set from outside this work handler, and that should also trigger the 
work (so the check on ALUA_PG_RUN_RTPG was not really required). But, I 
think that I can just have as before (with the h->flags & 
ALUA_PG_RUN_RTPG check)

>> +			spin_lock_irqsave(&h->lock, flags);
>>   			h->flags &= ~ALUA_PG_RUNNING;
>> -			if (err == SCSI_DH_IMM_RETRY)
>> -				h->interval = 0;
>> -			else if (!h->interval && !(h->flags & ALUA_PG_RUN_RTPG))
>> -				h->interval = ALUA_RTPG_RETRY_DELAY;
>>   			h->flags |= ALUA_PG_RUN_RTPG;
>>   			spin_unlock_irqrestore(&h->lock, flags);
>> -			goto queue_rtpg;
>> +			queue_delayed_work(kaluad_wq, &h->rtpg_work,
>> +							   sdev->alua->interval * HZ);
>> +			return;
>>   		}
>> -		if (err != SCSI_DH_OK)
>> -			h->flags &= ~ALUA_PG_RUN_STPG;
>> +		if (err != 0)
>> +				h->flags &= ~ALUA_PG_RUN_STPG;
>>   	}
>> +	spin_lock_irqsave(&h->lock, flags);
> If h->flags & ALUA_PG_RUN_RTPG is false above, h->lock will already be
> locked.
> 

Right, that is a bug

>>   	if (h->flags & ALUA_PG_RUN_STPG) {
>>   		h->flags &= ~ALUA_PG_RUN_STPG;
>>   		spin_unlock_irqrestore(&h->lock, flags);
>> -		err = alua_stpg(sdev);
>> -		spin_lock_irqsave(&h->lock, flags);
>> -		if (err == SCSI_DH_RETRY || h->flags & ALUA_PG_RUN_RTPG) {
>> +		ret = scsi_alua_stpg_run(sdev, h->flags & ALUA_OPTIMIZE_STPG);
>> +		if (err == -EAGAIN || h->flags & ALUA_PG_RUN_RTPG) {
> To avoid a race with resetting ALUA_PG_RUN_RTPG, this check needs to be
> done with the spinlock held.

Yes,

Thanks,
John


^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 13/13] scsi: core: Add implicit ALUA support
  2026-03-23  1:58   ` Benjamin Marzinski
@ 2026-03-23 12:52     ` John Garry
  2026-03-23 17:29       ` Benjamin Marzinski
  0 siblings, 1 reply; 63+ messages in thread
From: John Garry @ 2026-03-23 12:52 UTC (permalink / raw)
  To: Benjamin Marzinski
  Cc: martin.petersen, james.bottomley, hare, jmeneghi, linux-scsi,
	michael.christie, snitzer, dm-devel, linux-kernel

On 23/03/2026 01:58, Benjamin Marzinski wrote:
> On Tue, Mar 17, 2026 at 12:07:03PM +0000, John Garry wrote:
>> For when no device handler is used, add ALUA support.
>>
>> This will be equivalent to when native SCSI multipathing is used.
>>
>> Essentially all the same handling is available as DH alua driver for
>> rescan, request prep, sense handling.
>>
>> Signed-off-by: John Garry <john.g.garry@oracle.com>
>> ---
>>   drivers/scsi/scsi_alua.c  | 93 +++++++++++++++++++++++++++++++++++++++
>>   drivers/scsi/scsi_error.c |  7 +++
>>   drivers/scsi/scsi_lib.c   |  7 +++
>>   drivers/scsi/scsi_scan.c  |  2 +
>>   drivers/scsi/scsi_sysfs.c |  4 +-
>>   include/scsi/scsi_alua.h  | 14 ++++++
>>   6 files changed, 126 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/scsi/scsi_alua.c b/drivers/scsi/scsi_alua.c
>> index d3fcd887e5018..ee0229b1a9d12 100644
>> --- a/drivers/scsi/scsi_alua.c
>> +++ b/drivers/scsi/scsi_alua.c
>> @@ -562,6 +562,90 @@ int scsi_alua_stpg_run(struct scsi_device *sdev, bool optimize)
>>   }
>>   EXPORT_SYMBOL_GPL(scsi_alua_stpg_run);
>>   
>> +enum scsi_disposition scsi_alua_check_sense(struct scsi_device *sdev,
>> +					      struct scsi_sense_hdr *sense_hdr)
> 
> This seems like it should be shareable with scsi_dh_alua as well.  In
> might need to take a function to call for rescanning and have
> alua_check_sense() be a wrapper around it, but since the force argument
> to alua_check() is now always set to true in scsi_dh_alua, it's
> unnecessary, so both it and scsi_device_alua_rescan() can have the
> same arguments.

Yeah, I tried it and I just thought that adding the rescan callback was 
a bit messy. I can go with the single function if we think it's better.

> 
>> +{
>> +	switch (sense_hdr->sense_key) {
>> +	case NOT_READY:
>> +		if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0a) {
>> +			/*
>> +			 * LUN Not Accessible - ALUA state transition
>> +			 */
>> +			scsi_alua_handle_state_transition(sdev);
>> +			return NEEDS_RETRY;
>> +		}
>> +		break;
>> +	case UNIT_ATTENTION:
>> +		if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0a) {
>> +			/*
>> +			 * LUN Not Accessible - ALUA state transition
>> +			 */
>> +			scsi_alua_handle_state_transition(sdev);
>> +			return NEEDS_RETRY;
>> +		}
>> +		if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x00) {
>> +			/*
>> +			 * Power On, Reset, or Bus Device Reset.
>> +			 * Might have obscured a state transition,
>> +			 * so schedule a recheck.
>> +			 */
>> +			scsi_device_alua_rescan(sdev);
>> +			return ADD_TO_MLQUEUE;
>> +		}
>> +		if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x04)
>> +			/*
>> +			 * Device internal reset
>> +			 */
>> +			return ADD_TO_MLQUEUE;
>> +		if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x01)
>> +			/*
>> +			 * Mode Parameters Changed
>> +			 */
>> +			return ADD_TO_MLQUEUE;
>> +		if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x06) {
>> +			/*
>> +			 * ALUA state changed
>> +			 */
>> +			scsi_device_alua_rescan(sdev);
>> +			return ADD_TO_MLQUEUE;
>> +		}
>> +		if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x07) {
>> +			/*
>> +			 * Implicit ALUA state transition failed
>> +			 */
>> +			scsi_device_alua_rescan(sdev);
>> +			return ADD_TO_MLQUEUE;
>> +		}
>> +		if (sense_hdr->asc == 0x3f && sense_hdr->ascq == 0x03)
>> +			/*
>> +			 * Inquiry data has changed
>> +			 */
>> +			return ADD_TO_MLQUEUE;
>> +		if (sense_hdr->asc == 0x3f && sense_hdr->ascq == 0x0e)
>> +			/*
>> +			 * REPORTED_LUNS_DATA_HAS_CHANGED is reported
>> +			 * when switching controllers on targets like
>> +			 * Intel Multi-Flex. We can just retry.
>> +			 */
>> +			return ADD_TO_MLQUEUE;
>> +		break;
>> +	}
>> +
>> +	return SCSI_RETURN_NOT_HANDLED;
>> +}
>> +
>> +static void alua_rtpg_work(struct work_struct *work)
>> +{
>> +	struct alua_data *alua =
>> +		container_of(work, struct alua_data, work.work);
>> +	int ret;
>> +
>> +	ret = scsi_alua_rtpg_run(alua->sdev);
>> +
>> +	if (ret == -EAGAIN)
>> +		queue_delayed_work(kalua_wq, &alua->work, alua->interval * HZ);
>> +}
>> +
>>   int scsi_alua_sdev_init(struct scsi_device *sdev)
>>   {
>>   	int rel_port, ret, tpgs;
>> @@ -591,6 +675,7 @@ int scsi_alua_sdev_init(struct scsi_device *sdev)
>>   		goto out_free_data;
>>   	}
>>   
>> +	INIT_DELAYED_WORK(&sdev->alua->work, alua_rtpg_work);
>>   	sdev->alua->sdev = sdev;
>>   	sdev->alua->tpgs = tpgs;
>>   	spin_lock_init(&sdev->alua->lock);
>> @@ -638,6 +723,14 @@ bool scsi_device_alua_implicit(struct scsi_device *sdev)
>>   	return sdev->alua->tpgs & TPGS_MODE_IMPLICIT;
>>   }
>>   
>> +void scsi_device_alua_rescan(struct scsi_device *sdev)
>> +{
>> +	struct alua_data *alua = sdev->alua;
>> +
>> +	queue_delayed_work(kalua_wq, &alua->work,
>> +				msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS));
> 
> This code doesn't support triggering a new rtpg while the current one is
> running.  I'll leave it to people with more scsi expertise to say how
> important that is, but the scsi_dh_alua code now will always trigger a
> new rtpg in this case (or at least it would, with the issues from patch
> 12 fixed).
> 

If the work is running and we call queue_delayed_work() on the same 
work_struct, then it is enqueued again. If the work is pending and we 
call queue_delayed_work(), then it is not requeued (as it is already 
queued).

Thanks,
John

^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 02/13] scsi: alua: Create a core ALUA driver
  2026-03-18  7:47   ` Hannes Reinecke
@ 2026-03-23 12:56     ` John Garry
  0 siblings, 0 replies; 63+ messages in thread
From: John Garry @ 2026-03-23 12:56 UTC (permalink / raw)
  To: Hannes Reinecke, martin.petersen, james.bottomley, hare, bmarzins
  Cc: jmeneghi, linux-scsi, michael.christie, snitzer, dm-devel,
	linux-kernel

On 18/03/2026 07:47, Hannes Reinecke wrote:
> On 3/17/26 13:06, John Garry wrote:
>> Add a dedicated ALUA driver which can be used for native SCSI multipath
>> and also DH-based ALUA support.
>>
> Is this really a 'driver'? It's more additional functionality for a SCSI
> device, and not really a driver.
> At least I _think_ it is ...

Actually it's more of a library than anything :)

>> +
>> +static struct workqueue_struct *kalua_wq;
>> +
>> +int scsi_alua_sdev_init(struct scsi_device *sdev)
>> +{
>> +    int rel_port, ret, tpgs;
>> +
>> +    tpgs = scsi_device_tpgs(sdev);
>> +    if (!tpgs)
>> +        return 0;
>> +
>> +    sdev->alua = kzalloc(sizeof(*sdev->alua), GFP_KERNEL);
>> +    if (!sdev->alua)
>> +        return -ENOMEM;
>> +
> 
> Why do you allocate a separate structure?
> Is this structure shared with something?
> Wouldn't it be better to just add some field to the scsi_device?
>

I could embed the structure in scsi_device, but it's just convenient to 
ever check sdev->alua to see if alua is supported and also know that the 
members are initialized and hold valid values.

Thanks,
John

^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 03/13] scsi: alua: Add scsi_alua_rtpg()
  2026-03-18  7:50   ` Hannes Reinecke
@ 2026-03-23 12:58     ` John Garry
  0 siblings, 0 replies; 63+ messages in thread
From: John Garry @ 2026-03-23 12:58 UTC (permalink / raw)
  To: Hannes Reinecke, martin.petersen, james.bottomley, hare, bmarzins
  Cc: jmeneghi, linux-scsi, michael.christie, snitzer, dm-devel,
	linux-kernel

On 18/03/2026 07:50, Hannes Reinecke wrote:
>> diff --git a/include/scsi/scsi_alua.h b/include/scsi/scsi_alua.h
>> index 07cdcb4f5b518..068277261ed9d 100644
>> --- a/include/scsi/scsi_alua.h
>> +++ b/include/scsi/scsi_alua.h
>> @@ -16,7 +16,15 @@
>>   struct alua_data {
>>       int            group_id;
>>       int            tpgs;
>> +    int            state;
>> +    int            pref;
>> +    int            valid_states;
>> +    bool            rtpg_ext_hdr_unsupp;
>> +    unsigned char        transition_tmo;
>> +    unsigned long        expiry;
>> +    unsigned long        interval;
>>       struct scsi_device    *sdev;
>> +    spinlock_t        lock;
>>   };
>>   int scsi_alua_sdev_init(struct scsi_device *sdev);
> 
> Ah, right. Now I see where you want to go with the separate
> structure. Still wonder why you need the 'sdev' back link in
> there, though.

at some points we need to look up the sdev from alua data, like 
scsi_alua_rtpg_run()

> 
> Other than that:
> 
> Reviewed-by: Hannes Reinecke <hare@suse.de>

cheers

^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 05/13] scsi: alua: Add scsi_alua_tur()
  2026-03-18  7:54   ` Hannes Reinecke
@ 2026-03-23 13:42     ` John Garry
  2026-03-24 10:49       ` John Garry
  0 siblings, 1 reply; 63+ messages in thread
From: John Garry @ 2026-03-23 13:42 UTC (permalink / raw)
  To: Hannes Reinecke, martin.petersen, james.bottomley, hare, bmarzins
  Cc: jmeneghi, linux-scsi, michael.christie, snitzer, dm-devel,
	linux-kernel

On 18/03/2026 07:54, Hannes Reinecke wrote:
>>   /*
>>    * submit_rtpg - Issue a REPORT TARGET GROUP STATES command
>>    * @sdev: sdev the command should be sent to
> 
> ???
> And this function is useful _why_?
> We're just sending a normal 'TEST UNIT READY', it has nothing to
> do with ALUA. Why do we have a special function here?

This is used in the STPG code, and I added the STPG code to scsi_alua.c

Thanks,
John

^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 09/13] scsi: alua: Add scsi_alua_handle_state_transition()
  2026-03-18  7:58   ` Hannes Reinecke
@ 2026-03-23 13:43     ` John Garry
  0 siblings, 0 replies; 63+ messages in thread
From: John Garry @ 2026-03-23 13:43 UTC (permalink / raw)
  To: Hannes Reinecke, martin.petersen, james.bottomley, hare, bmarzins
  Cc: jmeneghi, linux-scsi, michael.christie, snitzer, dm-devel,
	linux-kernel

On 18/03/2026 07:58, Hannes Reinecke wrote:
>>   int scsi_alua_check_tpgs(struct scsi_device *sdev);
>>   int scsi_alua_rtpg_run(struct scsi_device *sdev);
>> @@ -39,6 +41,9 @@ int scsi_alua_init(void);
>>   void scsi_exit_alua(void);
>>   #else //CONFIG_SCSI_ALUA
>> +static inline void scsi_alua_handle_state_transition(struct 
>> scsi_device *sdev)
>> +{
>> +}
>>   static inline int scsi_alua_check_tpgs(struct scsi_device *sdev)
>>   {
>>       return 0;
> 
> ???
> This doesn't handle a state transition, it just _sets_ the state 
> transition. Please fold it into the patch where the state transition
> is actually handled.

Sure, doing it like this is a bit silly.

Thanks,
John

^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 10/13] scsi: alua: Add scsi_alua_prep_fn()
  2026-03-18  8:01   ` Hannes Reinecke
@ 2026-03-23 13:49     ` John Garry
  0 siblings, 0 replies; 63+ messages in thread
From: John Garry @ 2026-03-23 13:49 UTC (permalink / raw)
  To: Hannes Reinecke, martin.petersen, james.bottomley, hare, bmarzins
  Cc: jmeneghi, linux-scsi, michael.christie, snitzer, dm-devel,
	linux-kernel

On 18/03/2026 08:01, Hannes Reinecke wrote:
>>   }
>> +static inline
>> +blk_status_t scsi_alua_prep_fn(struct scsi_device *sdev, struct 
>> request *req)
>> +{
>> +    return BLK_STS_OK;
>> +}
>>   static inline int scsi_alua_sdev_init(struct scsi_device *sdev)
>>   {
>>       return 0;
> 
> Hmm. The 'prep_fn' thingie was implemented such that other drivers (like 
> scsi_dh) could intercept the scsi prep function and inject their own
> stuff. But now with this patchset the functionality is in the scsi core,
> so really we should do away with the prep_fn here and call the functions
> directly.

ok, so I can just stop setting alua_dh.prep_fn and then have in 
scsi_prepare_cmd():


if (sdev->handler && sdev->handler->prep_fn) {
	blk_status_t ret = sdev->handler->prep_fn(sdev, req);

	if (ret != BLK_STS_OK)
		return ret;
} else if (sdev->alua) {
	/* We should be able to make this common for ALUA DH as well */
	blk_status_t ret = scsi_alua_prep_fn(sdev, req);

	if (ret != BLK_STS_OK)
		return ret;
}

Or even check sdev->alua first, as that is the most popular DH.

Thanks,
John

^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 11/13] scsi: alua: Add scsi_device_alua_implicit()
  2026-03-18  8:02   ` Hannes Reinecke
@ 2026-03-23 13:50     ` John Garry
  0 siblings, 0 replies; 63+ messages in thread
From: John Garry @ 2026-03-23 13:50 UTC (permalink / raw)
  To: Hannes Reinecke, martin.petersen, james.bottomley, hare, bmarzins
  Cc: jmeneghi, linux-scsi, michael.christie, snitzer, dm-devel,
	linux-kernel

On 18/03/2026 08:02, Hannes Reinecke wrote:
>> +
>>   int scsi_alua_init(void);
>>   void scsi_exit_alua(void);
>>   #else //CONFIG_SCSI_ALUA
>> @@ -64,6 +66,10 @@ blk_status_t scsi_alua_prep_fn(struct scsi_device 
>> *sdev, struct request *req)
>>   {
>>       return BLK_STS_OK;
>>   }
>> +static inline bool scsi_device_alua_implicit(struct scsi_device *sdev)
>> +{
>> +    return false;
>> +}
>>   static inline int scsi_alua_sdev_init(struct scsi_device *sdev)
>>   {
>>       return 0;
> 
> Hmm. Can you fold it into the patch where it's actually called?
> It's getting hard to review without that.

Sure, this series is taking baby steps ..

Thanks,
John

^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 07/13] scsi: alua: Add scsi_alua_stpg_run()
  2026-03-18  9:24       ` Hannes Reinecke
@ 2026-03-23 13:58         ` John Garry
  0 siblings, 0 replies; 63+ messages in thread
From: John Garry @ 2026-03-23 13:58 UTC (permalink / raw)
  To: Hannes Reinecke, Hannes Reinecke, martin.petersen,
	james.bottomley, bmarzins
  Cc: jmeneghi, linux-scsi, michael.christie, snitzer, dm-devel,
	linux-kernel

On 18/03/2026 09:24, Hannes Reinecke wrote:
>>
>> It's not so nice to have the functionality spread out. The way I see 
>> it is that drivers/scsi/scsi_alua.c is mostly a library, but also has 
>> functionality to "drive" ALUA for native SCSI multipathing.
>>
>> Anyway, can you confirm which of the following do you think from this 
>> series should be in scsi_dh_alua.c:
>>
>> - scsi_alua_stpg_run()
>> - scsi_alua_stpg()
>> - submit_stpg()
>>
>> You already said scsi_alua_stpg_run() should be.
>>
> Gnaa. Misread that one (blame lack of coffee).
> stpg should be handled in scsi_dh_alua. Arguable
> we could move the utility functions (submit_stpg
> and maybe scsi_alua_stpg) in the core alua code,
> but scsi_alua_stpg_run() should be kept in
> scsi_dh_alua.
> 
> If that makes sense ...

scsi_alua_stpg_run() hardly does anything - scsi_alua_stpg() has the 
bulk of the functionality. I think that it's nicer to co-locate this 
functionality (with the rtpg code), as when we separate we have 
different code read/writing alua_data structure.

However, I have been hearing that SCSI core code only needs implicit 
support, so I can try that (which is keep everything STPG in scsi_dh_alua.c)

Thanks,
John

^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 01/13] scsi: scsi_dh_alua: Delete alua_port_group
  2026-03-23 10:33     ` John Garry
@ 2026-03-23 16:15       ` Benjamin Marzinski
  2026-03-23 18:07         ` John Garry
  0 siblings, 1 reply; 63+ messages in thread
From: Benjamin Marzinski @ 2026-03-23 16:15 UTC (permalink / raw)
  To: John Garry
  Cc: martin.petersen, james.bottomley, hare, jmeneghi, linux-scsi,
	michael.christie, snitzer, dm-devel, linux-kernel

On Mon, Mar 23, 2026 at 10:33:12AM +0000, John Garry wrote:
> On 23/03/2026 00:08, Benjamin Marzinski wrote:
> > >       k += off, desc += off) {
> > > -		u16 group_id = get_unaligned_be16(&desc[2]);
> > > -
> > > -		spin_lock_irqsave(&port_group_lock, flags);
> > > -		tmp_pg = alua_find_get_pg(pg->device_id_str, pg->device_id_len,
> > > -					  group_id);
> > > -		spin_unlock_irqrestore(&port_group_lock, flags);
> > > -		if (tmp_pg) {
> > > -			if (spin_trylock_irqsave(&tmp_pg->lock, flags)) {
> > > -				if ((tmp_pg == pg) ||
> > > -				    !(tmp_pg->flags & ALUA_PG_RUNNING)) {
> > > -					struct alua_dh_data *h;
> > > -
> > > -					tmp_pg->state = desc[0] & 0x0f;
> > > -					tmp_pg->pref = desc[0] >> 7;
> > > -					rcu_read_lock();
> > > -					list_for_each_entry_rcu(h,
> > > -						&tmp_pg->dh_list, node) {
> > > -						if (!h->sdev)
> > > -							continue;
> > > -						h->sdev->access_state = desc[0];
> > > -					}
> > > -					rcu_read_unlock();
> > > -				}
> > > -				if (tmp_pg == pg)
> > > -					tmp_pg->valid_states = desc[1];
> > > -				spin_unlock_irqrestore(&tmp_pg->lock, flags);
> > > -			}
> > > -			kref_put(&tmp_pg->kref, release_port_group);
> > > +		u16 group_id_desc = get_unaligned_be16(&desc[2]);
> > > +
> > > +		spin_lock_irqsave(&h->lock, flags);
> > > +		if (group_id_desc == group_id) {
> > > +			h->group_id = group_id;
> > > +			WRITE_ONCE(h->state, desc[0] & 0x0f);
> > > +			h->pref = desc[0] >> 7;
> > > +			WRITE_ONCE(sdev->access_state, desc[0]);
> > > +			h->valid_states = desc[1];
> > instead of alua_rtpg() updating the access_state all of the devices in
> > all the port groups, and the state and pref of all the port groups. It
> > now just sets these for one device. It seems like it's wasting a lot of
> > information that it used to use. For instance, now when a scsi command
> > returns a unit attention that the ALUA state has changed, it won't get
> > updated on all the devices, just the one that got the unit attention.
> 
> The fabric should then trigger this PG info update be re-scanned
> per-path/sdev (and not just a single sdev in the PG). From testing with a
> linux target, this is what happens - a UA is triggered per path when I
> changed the PG access state.
> 
> > 
> > >   		}
> > > +		spin_unlock_irqrestore(&h->lock, flags);
> > >   		off = 8 + (desc[7] * 4);
> > >   	}
> > >    skip_rtpg:
> > > -	spin_lock_irqsave(&pg->lock, flags);
> > > +	spin_lock_irqsave(&h->lock, flags);
> > >   	if (transitioning_sense)
> > > -		pg->state = SCSI_ACCESS_STATE_TRANSITIONING;
> > > +		h->state = SCSI_ACCESS_STATE_TRANSITIONING;
> 
> ...
> 
> > > -
> > >   static void alua_rtpg_work(struct work_struct *work)
> > >   {
> > > -	struct alua_port_group *pg =
> > > -		container_of(work, struct alua_port_group, rtpg_work.work);
> > > -	struct scsi_device *sdev, *prev_sdev = NULL;
> > > +	struct alua_dh_data *h =
> > > +		container_of(work, struct alua_dh_data, rtpg_work.work);
> > > +	struct scsi_device *sdev = h->sdev;
> > >   	LIST_HEAD(qdata_list);
> > >   	int err = SCSI_DH_OK;
> > >   	struct alua_queue_data *qdata, *tmp;
> > > -	struct alua_dh_data *h;
> > >   	unsigned long flags;
> > > -	spin_lock_irqsave(&pg->lock, flags);
> > > -	sdev = pg->rtpg_sdev;
> > > -	if (!sdev) {
> > > -		WARN_ON(pg->flags & ALUA_PG_RUN_RTPG);
> > > -		WARN_ON(pg->flags & ALUA_PG_RUN_STPG);
> > > -		spin_unlock_irqrestore(&pg->lock, flags);
> > > -		kref_put(&pg->kref, release_port_group);
> > > -		return;
> > > -	}
> > > -	pg->flags |= ALUA_PG_RUNNING;
> > > -	if (pg->flags & ALUA_PG_RUN_RTPG) {
> > > -		int state = pg->state;
> > > +	spin_lock_irqsave(&h->lock, flags);
> > > +	h->flags |= ALUA_PG_RUNNING;
> > > +	if (h->flags & ALUA_PG_RUN_RTPG) {
> > > +		int state = h->state;
> > > -		pg->flags &= ~ALUA_PG_RUN_RTPG;
> > > -		spin_unlock_irqrestore(&pg->lock, flags);
> > > +		h->flags &= ~ALUA_PG_RUN_RTPG;
> > > +		spin_unlock_irqrestore(&h->lock, flags);
> > >   		if (state == SCSI_ACCESS_STATE_TRANSITIONING) {
> > >   			if (alua_tur(sdev) == SCSI_DH_RETRY) {
> > > -				spin_lock_irqsave(&pg->lock, flags);
> > > -				pg->flags &= ~ALUA_PG_RUNNING;
> > > -				pg->flags |= ALUA_PG_RUN_RTPG;
> > > -				if (!pg->interval)
> > > -					pg->interval = ALUA_RTPG_RETRY_DELAY;
> > > -				spin_unlock_irqrestore(&pg->lock, flags);
> > > -				queue_delayed_work(kaluad_wq, &pg->rtpg_work,
> > > -						   pg->interval * HZ);
> > > +				spin_lock_irqsave(&h->lock, flags);
> > > +				h->flags &= ~ALUA_PG_RUNNING;
> > > +				h->flags |= ALUA_PG_RUN_RTPG;
> > > +				if (!h->interval)
> > > +					h->interval = ALUA_RTPG_RETRY_DELAY;
> > > +				spin_unlock_irqrestore(&h->lock, flags);
> > > +				queue_delayed_work(kaluad_wq, &h->rtpg_work,
> > > +						   h->interval * HZ);
> > >   				return;
> > >   			}
> > >   			/* Send RTPG on failure or if TUR indicates SUCCESS */
> > >   		}
> > > -		err = alua_rtpg(sdev, pg);
> > > -		spin_lock_irqsave(&pg->lock, flags);
> > > +		err = alua_rtpg(sdev);
> > > +		spin_lock_irqsave(&h->lock, flags);
> > > -		/* If RTPG failed on the current device, try using another */
> > > -		if (err == SCSI_DH_RES_TEMP_UNAVAIL &&
> > > -		    (prev_sdev = alua_rtpg_select_sdev(pg)))
> > > -			err = SCSI_DH_IMM_RETRY;
> > Previously, if the rtpg failed on a device, another device would be
> > tried, and the unusable device's alua state would get updated, along
> > with all the other device's states.
> 
> Where specifically are you referring to here please?

The removed code above here calls alua_rtpg_select_sdev() to select a
new device to retry the rtpg on, and returns with SCSI_DH_IMM_RETRY, to
retrigger the rtpg on that device. If the rtpg completed on any device,
it would update the state on all the devices. But if we are depending
each device issuing its own rtp to update its state, what happens to
the devices that can't complete the rtpg? I assume the correct answer is
to give them some failed state.
 
-Ben

> > Now I don't see how a failed device
> > gets its state updated.
> 
> AFAICS, I am only not omitted how we iterate through the devices per-PG, as
> now we just do this work for all paths/scsi devices.
> 
> Thanks,
> John


^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 00/13] scsi: Core ALUA driver
  2026-03-23  9:57   ` John Garry
@ 2026-03-23 16:25     ` Benjamin Marzinski
  2026-03-23 18:04       ` John Garry
  0 siblings, 1 reply; 63+ messages in thread
From: Benjamin Marzinski @ 2026-03-23 16:25 UTC (permalink / raw)
  To: John Garry
  Cc: martin.petersen, james.bottomley, hare, jmeneghi, linux-scsi,
	michael.christie, snitzer, dm-devel, linux-kernel

On Mon, Mar 23, 2026 at 09:57:15AM +0000, John Garry wrote:
> On 22/03/2026 17:37, Benjamin Marzinski wrote:
> > > I think that this work is a real regression possibility for
> > > dm-multipath, so we need to be careful.
> > At the risk of showing just how limited my SCSI knowledge is, I need to
> > ask, Is any of this actually necessary to get native scsi multipath
> > working with Implicit ALUA?
> > 
> > If the goal is to limit this to IMPLICT ALUA only, I was expecting that
> > you could just leave the scsi_dh_alua code completely alone. If native
> > scsi multipathing didn't disable the device handler, it seemed that this
> > would basically just work. With the device handler attached,
> 
> We only get the scsi_dh_activate() -> alua_activate() call from dm-mpath.c,
> and that callchain could not happen for native SCSI multipath. But, yes, we
> do the alua_rtpg_queue() call from a rescan, but we should be checking if
> the path is available first (and not rely on a rescan).
> 
> > when the
> > array updates the ALUA state, that should, at least I believe, trigger a
> > unit attention that will fire off a RTPG command. That should update the
> > sdev->access_state, which the multipath code could use to pick the
> > correct path. Right? What am I missing here?
> > Is this just a parallel
> > exercise to overhaul the ALUA code?
> 
> The SCSI community would rather not see more usage for device handlers.

I guess it depends on what you mean by using a device handler. I don't
think the Native SCSI multipath code would need to actively interface
with the device handler code to support IMPLICIT ALUA. IIUC, looking at
sdev->access_state should be enough to pick the correct path. If that's
right, then it doesn't really matter to the multipath code whether this
is getting updated in scsi_dh_alua.c or scsi_alua.c. So refactoring the
scsi ALUA handling code seems orthogonal to the adding IMPLICIT ALUA
support to the Native scsi multipathing code.

-Ben

> 
> How we then get ALUA support for native SCSI multipath is the question. My
> original series just really duplicated the scsi_dh_alua.c RTPG support for
> native SCSI multipath into a limited "core" driver. Hannes thinks that a
> core ALUA driver to also support DH would be better (IIUC), which I am
> attempting in this series. I will re-iterate that I would rather not touch
> scsi_dh_alua.c, unless the changes are simple and obvious(ly correct).
> 
> Thanks,
> John


^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 13/13] scsi: core: Add implicit ALUA support
  2026-03-23 12:52     ` John Garry
@ 2026-03-23 17:29       ` Benjamin Marzinski
  2026-03-23 18:13         ` John Garry
  0 siblings, 1 reply; 63+ messages in thread
From: Benjamin Marzinski @ 2026-03-23 17:29 UTC (permalink / raw)
  To: John Garry
  Cc: martin.petersen, james.bottomley, hare, jmeneghi, linux-scsi,
	michael.christie, snitzer, dm-devel, linux-kernel

On Mon, Mar 23, 2026 at 12:52:18PM +0000, John Garry wrote:
> On 23/03/2026 01:58, Benjamin Marzinski wrote:
> > On Tue, Mar 17, 2026 at 12:07:03PM +0000, John Garry wrote:
> > > For when no device handler is used, add ALUA support.
> > > 
> > > This will be equivalent to when native SCSI multipathing is used.
> > > 
> > > Essentially all the same handling is available as DH alua driver for
> > > rescan, request prep, sense handling.
> > > 
> > > Signed-off-by: John Garry <john.g.garry@oracle.com>
> > > ---
> > >   drivers/scsi/scsi_alua.c  | 93 +++++++++++++++++++++++++++++++++++++++
> > >   drivers/scsi/scsi_error.c |  7 +++
> > >   drivers/scsi/scsi_lib.c   |  7 +++
> > >   drivers/scsi/scsi_scan.c  |  2 +
> > >   drivers/scsi/scsi_sysfs.c |  4 +-
> > >   include/scsi/scsi_alua.h  | 14 ++++++
> > >   6 files changed, 126 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/drivers/scsi/scsi_alua.c b/drivers/scsi/scsi_alua.c
> > > index d3fcd887e5018..ee0229b1a9d12 100644
> > > --- a/drivers/scsi/scsi_alua.c
> > > +++ b/drivers/scsi/scsi_alua.c
> > > @@ -562,6 +562,90 @@ int scsi_alua_stpg_run(struct scsi_device *sdev, bool optimize)
> > >   }
> > >   EXPORT_SYMBOL_GPL(scsi_alua_stpg_run);
> > > +enum scsi_disposition scsi_alua_check_sense(struct scsi_device *sdev,
> > > +					      struct scsi_sense_hdr *sense_hdr)
> > 
> > This seems like it should be shareable with scsi_dh_alua as well.  In
> > might need to take a function to call for rescanning and have
> > alua_check_sense() be a wrapper around it, but since the force argument
> > to alua_check() is now always set to true in scsi_dh_alua, it's
> > unnecessary, so both it and scsi_device_alua_rescan() can have the
> > same arguments.
> 
> Yeah, I tried it and I just thought that adding the rescan callback was a
> bit messy. I can go with the single function if we think it's better.

I would defer to the opinion of an acutal SCSI maintainer (which I am
not) on this.

> 
> > 
> > > +{
> > > +	switch (sense_hdr->sense_key) {
> > > +	case NOT_READY:
> > > +		if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0a) {
> > > +			/*
> > > +			 * LUN Not Accessible - ALUA state transition
> > > +			 */
> > > +			scsi_alua_handle_state_transition(sdev);
> > > +			return NEEDS_RETRY;
> > > +		}
> > > +		break;
> > > +	case UNIT_ATTENTION:
> > > +		if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0a) {
> > > +			/*
> > > +			 * LUN Not Accessible - ALUA state transition
> > > +			 */
> > > +			scsi_alua_handle_state_transition(sdev);
> > > +			return NEEDS_RETRY;
> > > +		}
> > > +		if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x00) {
> > > +			/*
> > > +			 * Power On, Reset, or Bus Device Reset.
> > > +			 * Might have obscured a state transition,
> > > +			 * so schedule a recheck.
> > > +			 */
> > > +			scsi_device_alua_rescan(sdev);
> > > +			return ADD_TO_MLQUEUE;
> > > +		}
> > > +		if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x04)
> > > +			/*
> > > +			 * Device internal reset
> > > +			 */
> > > +			return ADD_TO_MLQUEUE;
> > > +		if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x01)
> > > +			/*
> > > +			 * Mode Parameters Changed
> > > +			 */
> > > +			return ADD_TO_MLQUEUE;
> > > +		if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x06) {
> > > +			/*
> > > +			 * ALUA state changed
> > > +			 */
> > > +			scsi_device_alua_rescan(sdev);
> > > +			return ADD_TO_MLQUEUE;
> > > +		}
> > > +		if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x07) {
> > > +			/*
> > > +			 * Implicit ALUA state transition failed
> > > +			 */
> > > +			scsi_device_alua_rescan(sdev);
> > > +			return ADD_TO_MLQUEUE;
> > > +		}
> > > +		if (sense_hdr->asc == 0x3f && sense_hdr->ascq == 0x03)
> > > +			/*
> > > +			 * Inquiry data has changed
> > > +			 */
> > > +			return ADD_TO_MLQUEUE;
> > > +		if (sense_hdr->asc == 0x3f && sense_hdr->ascq == 0x0e)
> > > +			/*
> > > +			 * REPORTED_LUNS_DATA_HAS_CHANGED is reported
> > > +			 * when switching controllers on targets like
> > > +			 * Intel Multi-Flex. We can just retry.
> > > +			 */
> > > +			return ADD_TO_MLQUEUE;
> > > +		break;
> > > +	}
> > > +
> > > +	return SCSI_RETURN_NOT_HANDLED;
> > > +}
> > > +
> > > +static void alua_rtpg_work(struct work_struct *work)
> > > +{
> > > +	struct alua_data *alua =
> > > +		container_of(work, struct alua_data, work.work);
> > > +	int ret;
> > > +
> > > +	ret = scsi_alua_rtpg_run(alua->sdev);
> > > +
> > > +	if (ret == -EAGAIN)
> > > +		queue_delayed_work(kalua_wq, &alua->work, alua->interval * HZ);
> > > +}
> > > +
> > >   int scsi_alua_sdev_init(struct scsi_device *sdev)
> > >   {
> > >   	int rel_port, ret, tpgs;
> > > @@ -591,6 +675,7 @@ int scsi_alua_sdev_init(struct scsi_device *sdev)
> > >   		goto out_free_data;
> > >   	}
> > > +	INIT_DELAYED_WORK(&sdev->alua->work, alua_rtpg_work);
> > >   	sdev->alua->sdev = sdev;
> > >   	sdev->alua->tpgs = tpgs;
> > >   	spin_lock_init(&sdev->alua->lock);
> > > @@ -638,6 +723,14 @@ bool scsi_device_alua_implicit(struct scsi_device *sdev)
> > >   	return sdev->alua->tpgs & TPGS_MODE_IMPLICIT;
> > >   }
> > > +void scsi_device_alua_rescan(struct scsi_device *sdev)
> > > +{
> > > +	struct alua_data *alua = sdev->alua;
> > > +
> > > +	queue_delayed_work(kalua_wq, &alua->work,
> > > +				msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS));
> > 
> > This code doesn't support triggering a new rtpg while the current one is
> > running.  I'll leave it to people with more scsi expertise to say how
> > important that is, but the scsi_dh_alua code now will always trigger a
> > new rtpg in this case (or at least it would, with the issues from patch
> > 12 fixed).
> > 
> 
> If the work is running and we call queue_delayed_work() on the same
> work_struct, then it is enqueued again. If the work is pending and we call
> queue_delayed_work(), then it is not requeued (as it is already queued).

Oops. You're correct.

-Ben

> Thanks,
> John


^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 00/13] scsi: Core ALUA driver
  2026-03-23 16:25     ` Benjamin Marzinski
@ 2026-03-23 18:04       ` John Garry
  2026-03-23 19:45         ` Benjamin Marzinski
  0 siblings, 1 reply; 63+ messages in thread
From: John Garry @ 2026-03-23 18:04 UTC (permalink / raw)
  To: Benjamin Marzinski
  Cc: martin.petersen, james.bottomley, hare, jmeneghi, linux-scsi,
	michael.christie, snitzer, dm-devel, linux-kernel

On 23/03/2026 16:25, Benjamin Marzinski wrote:
>>> If the goal is to limit this to IMPLICT ALUA only, I was expecting that
>>> you could just leave the scsi_dh_alua code completely alone. If native
>>> scsi multipathing didn't disable the device handler, it seemed that this
>>> would basically just work. With the device handler attached,
>> We only get the scsi_dh_activate() -> alua_activate() call from dm-mpath.c,
>> and that callchain could not happen for native SCSI multipath. But, yes, we
>> do the alua_rtpg_queue() call from a rescan, but we should be checking if
>> the path is available first (and not rely on a rescan).
>>
>>> when the
>>> array updates the ALUA state, that should, at least I believe, trigger a
>>> unit attention that will fire off a RTPG command. That should update the
>>> sdev->access_state, which the multipath code could use to pick the
>>> correct path. Right? What am I missing here?
>>> Is this just a parallel
>>> exercise to overhaul the ALUA code?
>> The SCSI community would rather not see more usage for device handlers.
> I guess it depends on what you mean by using a device handler.

My meaning is anything in drivers/scsi/device_handler

> I don't
> think the Native SCSI multipath code would need to actively interface
> with the device handler code to support IMPLICIT ALUA. IIUC, looking at
> sdev->access_state should be enough to pick the correct path.

We also have the functionality from alua_check_sense() to consider.

> If that's
> right, then it doesn't really matter to the multipath code whether this
> is getting updated in scsi_dh_alua.c or scsi_alua.c. 
 > So refactoring the> scsi ALUA handling code seems orthogonal to the 
adding IMPLICIT ALUA
> support to the Native scsi multipathing code.

DH support is considered legacy. As I understand, DH was originally 
added for early explicit ALUA support and other DH-related standards, 
and explicit ALUA is considered flawed. So that is why Martin/Hannes 
doesn't want to see more users (for DH). This is my understanding.

Now I a need to try to separate out the ALUA parts we need from 
scsi_dh_alua.c into SCSI core code. I'll talk to Martin about this 
approach again.

Thanks,
John


^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 01/13] scsi: scsi_dh_alua: Delete alua_port_group
  2026-03-23 16:15       ` Benjamin Marzinski
@ 2026-03-23 18:07         ` John Garry
  0 siblings, 0 replies; 63+ messages in thread
From: John Garry @ 2026-03-23 18:07 UTC (permalink / raw)
  To: Benjamin Marzinski
  Cc: martin.petersen, james.bottomley, hare, jmeneghi, linux-scsi,
	michael.christie, snitzer, dm-devel, linux-kernel

On 23/03/2026 16:15, Benjamin Marzinski wrote:
>>>>    		}
>>>> -		err = alua_rtpg(sdev, pg);
>>>> -		spin_lock_irqsave(&pg->lock, flags);
>>>> +		err = alua_rtpg(sdev);
>>>> +		spin_lock_irqsave(&h->lock, flags);
>>>> -		/* If RTPG failed on the current device, try using another */
>>>> -		if (err == SCSI_DH_RES_TEMP_UNAVAIL &&
>>>> -		    (prev_sdev = alua_rtpg_select_sdev(pg)))
>>>> -			err = SCSI_DH_IMM_RETRY;
>>> Previously, if the rtpg failed on a device, another device would be
>>> tried, and the unusable device's alua state would get updated, along
>>> with all the other device's states.
>> Where specifically are you referring to here please?
> The removed code above here calls alua_rtpg_select_sdev() to select a
> new device to retry the rtpg on, and returns with SCSI_DH_IMM_RETRY, to
> retrigger the rtpg on that device. If the rtpg completed on any device,
> it would update the state on all the devices. But if we are depending
> each device issuing its own rtp to update its state, what happens to
> the devices that can't complete the rtpg? I assume the correct answer is
> to give them some failed state.
>   

Yes, I am relying for each scsi device to issue the RTPG. If they cannot 
each run it, then they should indeed give failed state and be offlined.

Thanks,
John

^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 13/13] scsi: core: Add implicit ALUA support
  2026-03-23 17:29       ` Benjamin Marzinski
@ 2026-03-23 18:13         ` John Garry
  0 siblings, 0 replies; 63+ messages in thread
From: John Garry @ 2026-03-23 18:13 UTC (permalink / raw)
  To: Benjamin Marzinski
  Cc: martin.petersen, james.bottomley, hare, jmeneghi, linux-scsi,
	michael.christie, snitzer, dm-devel, linux-kernel

On 23/03/2026 17:29, Benjamin Marzinski wrote:
>> Yeah, I tried it and I just thought that adding the rescan callback was a
>> bit messy. I can go with the single function if we think it's better.
> I would defer to the opinion of an acutal SCSI maintainer (which I am
> not) on this.

I'll check for a better way to factor out this code so that it does not 
need to be duplicated.

Thanks,
John

^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 00/13] scsi: Core ALUA driver
  2026-03-23 18:04       ` John Garry
@ 2026-03-23 19:45         ` Benjamin Marzinski
  2026-03-24 10:57           ` John Garry
  0 siblings, 1 reply; 63+ messages in thread
From: Benjamin Marzinski @ 2026-03-23 19:45 UTC (permalink / raw)
  To: John Garry
  Cc: martin.petersen, james.bottomley, hare, jmeneghi, linux-scsi,
	michael.christie, snitzer, dm-devel, linux-kernel

On Mon, Mar 23, 2026 at 06:04:54PM +0000, John Garry wrote:
> On 23/03/2026 16:25, Benjamin Marzinski wrote:
> > > > If the goal is to limit this to IMPLICT ALUA only, I was expecting that
> > > > you could just leave the scsi_dh_alua code completely alone. If native
> > > > scsi multipathing didn't disable the device handler, it seemed that this
> > > > would basically just work. With the device handler attached,
> > > We only get the scsi_dh_activate() -> alua_activate() call from dm-mpath.c,
> > > and that callchain could not happen for native SCSI multipath. But, yes, we
> > > do the alua_rtpg_queue() call from a rescan, but we should be checking if
> > > the path is available first (and not rely on a rescan).
> > > 
> > > > when the
> > > > array updates the ALUA state, that should, at least I believe, trigger a
> > > > unit attention that will fire off a RTPG command. That should update the
> > > > sdev->access_state, which the multipath code could use to pick the
> > > > correct path. Right? What am I missing here?
> > > > Is this just a parallel
> > > > exercise to overhaul the ALUA code?
> > > The SCSI community would rather not see more usage for device handlers.
> > I guess it depends on what you mean by using a device handler.
> 
> My meaning is anything in drivers/scsi/device_handler
> 
> > I don't
> > think the Native SCSI multipath code would need to actively interface
> > with the device handler code to support IMPLICIT ALUA. IIUC, looking at
> > sdev->access_state should be enough to pick the correct path.
> 
> We also have the functionality from alua_check_sense() to consider.

But the multipath code won't call that directly. Right now, the scsi
device handler will, at least for every scsi device except ones using
the Native Multipath code. My point is that this would just work, except
that the Native Multipath code goes out of its way to break it, by
disabling device handlers, and I don't really see the point of disabling
something that every other scsi device, multipathed or not, has enabled.
It's not like leaving it enabled makes it any harder to move the
implicit ALUA support from the device handler to the generic scsi code,
if that's the goal, since the Native Multipath code doesn't care who is
issuing those rtpgs and updating the state.

I guess this is more of a question for Hannes. Is the goal to turn off
automatic device handler attachment in general, and go back to making
dm-multipath attach device handlers to the scsi devices it is using? If
not, then I don't see any reason to have the Native Multipath code
disable it. If it allowed device handlers to get attached, these two
developement efforts (native scsi multipath and refactoring the alua
support) could go on in parallel.

Or am I missing something here?
-Ben

> 
> > If that's
> > right, then it doesn't really matter to the multipath code whether this
> > is getting updated in scsi_dh_alua.c or scsi_alua.c.
> > So refactoring the> scsi ALUA handling code seems orthogonal to the adding
> IMPLICIT ALUA
> > support to the Native scsi multipathing code.
> 
> DH support is considered legacy. As I understand, DH was originally added
> for early explicit ALUA support and other DH-related standards, and explicit
> ALUA is considered flawed. So that is why Martin/Hannes doesn't want to see
> more users (for DH). This is my understanding.
> 
> Now I a need to try to separate out the ALUA parts we need from
> scsi_dh_alua.c into SCSI core code. I'll talk to Martin about this approach
> again.
> 
> Thanks,
> John


^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 05/13] scsi: alua: Add scsi_alua_tur()
  2026-03-23 13:42     ` John Garry
@ 2026-03-24 10:49       ` John Garry
  0 siblings, 0 replies; 63+ messages in thread
From: John Garry @ 2026-03-24 10:49 UTC (permalink / raw)
  To: Hannes Reinecke, martin.petersen, james.bottomley, hare, bmarzins
  Cc: jmeneghi, linux-scsi, michael.christie, snitzer, dm-devel,
	linux-kernel

On 23/03/2026 13:42, John Garry wrote:
> On 18/03/2026 07:54, Hannes Reinecke wrote:
>>>   /*
>>>    * submit_rtpg - Issue a REPORT TARGET GROUP STATES command
>>>    * @sdev: sdev the command should be sent to
>>
>> ???
>> And this function is useful _why_?
>> We're just sending a normal 'TEST UNIT READY', it has nothing to
>> do with ALUA. Why do we have a special function here?
> 
> This is used in the STPG code, and I added the STPG code to scsi_alua.c

I meant to say that this is used in RTPG code from the following:


commit 9d2c30395213166e0b5614fe97576a789864e5de
Author: Hannes Reinecke <hare@suse.de>
Date:   Fri Feb 19 09:17:15 2016 +0100

     scsi_dh_alua: Send TEST UNIT READY to poll for transitioning

     Sending a 'REPORT TARGET PORT GROUP' command is a costly operation,
     as the array has to gather information about all ports.
     So instead of using RTPG to poll for a status update when a port
     is in transitioning we should be sending a TEST UNIT READY, and
     wait for the sense code to report success.


^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 00/13] scsi: Core ALUA driver
  2026-03-23 19:45         ` Benjamin Marzinski
@ 2026-03-24 10:57           ` John Garry
  2026-03-24 13:58             ` Benjamin Marzinski
  0 siblings, 1 reply; 63+ messages in thread
From: John Garry @ 2026-03-24 10:57 UTC (permalink / raw)
  To: Benjamin Marzinski
  Cc: martin.petersen, james.bottomley, hare, jmeneghi, linux-scsi,
	michael.christie, snitzer, dm-devel, linux-kernel

On 23/03/2026 19:45, Benjamin Marzinski wrote:
>>> I don't
>>> think the Native SCSI multipath code would need to actively interface
>>> with the device handler code to support IMPLICIT ALUA. IIUC, looking at
>>> sdev->access_state should be enough to pick the correct path.
>> We also have the functionality from alua_check_sense() to consider.
> But the multipath code won't call that directly. Right now, the scsi
> device handler will, at least for every scsi device except ones using
> the Native Multipath code. My point is that this would just work, except
> that the Native Multipath code goes out of its way to break it, by
> disabling device handlers, and I don't really see the point of disabling
> something that every other scsi device, multipathed or not, has enabled.
> It's not like leaving it enabled makes it any harder to move the
> implicit ALUA support from the device handler to the generic scsi code,
> if that's the goal, since the Native Multipath code doesn't care who is
> issuing those rtpgs and updating the state.
> 
> I guess this is more of a question for Hannes. Is the goal to turn off
> automatic device handler attachment in general, and go back to making
> dm-multipath attach device handlers to the scsi devices it is using?

I'm not answering for Hannes, but I don't think that is the goal.

> If
> not, then I don't see any reason to have the Native Multipath code
> disable it. 

It was just disabled it as we now had another method in the scsi core 
code to get ALUA info.

My plan would be - based on this series - to not attach DH just when 
using native SCSI multipath for a device.

> If it allowed device handlers to get attached, these two
> developement efforts (native scsi multipath and refactoring the alua
> support) could go on in parallel.
> 
> Or am I missing something here?

It just seems to be about this DH stuff is that there is bad history 
there and no more users are wanted.

But now I am getting bogged down in this ALUA support because of that, 
which I feared would happen.

Thanks,
John


^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 00/13] scsi: Core ALUA driver
  2026-03-24 10:57           ` John Garry
@ 2026-03-24 13:58             ` Benjamin Marzinski
  2026-03-24 15:12               ` John Garry
  2026-03-26 10:17               ` Hannes Reinecke
  0 siblings, 2 replies; 63+ messages in thread
From: Benjamin Marzinski @ 2026-03-24 13:58 UTC (permalink / raw)
  To: John Garry
  Cc: martin.petersen, james.bottomley, hare, jmeneghi, linux-scsi,
	michael.christie, snitzer, dm-devel, linux-kernel

On Tue, Mar 24, 2026 at 10:57:20AM +0000, John Garry wrote:
> On 23/03/2026 19:45, Benjamin Marzinski wrote:
> > > > I don't
> > > > think the Native SCSI multipath code would need to actively interface
> > > > with the device handler code to support IMPLICIT ALUA. IIUC, looking at
> > > > sdev->access_state should be enough to pick the correct path.
> > > We also have the functionality from alua_check_sense() to consider.
> > But the multipath code won't call that directly. Right now, the scsi
> > device handler will, at least for every scsi device except ones using
> > the Native Multipath code. My point is that this would just work, except
> > that the Native Multipath code goes out of its way to break it, by
> > disabling device handlers, and I don't really see the point of disabling
> > something that every other scsi device, multipathed or not, has enabled.
> > It's not like leaving it enabled makes it any harder to move the
> > implicit ALUA support from the device handler to the generic scsi code,
> > if that's the goal, since the Native Multipath code doesn't care who is
> > issuing those rtpgs and updating the state.
> > 
> > I guess this is more of a question for Hannes. Is the goal to turn off
> > automatic device handler attachment in general, and go back to making
> > dm-multipath attach device handlers to the scsi devices it is using?
> 
> I'm not answering for Hannes, but I don't think that is the goal.
> 
> > If
> > not, then I don't see any reason to have the Native Multipath code
> > disable it.
> 
> It was just disabled it as we now had another method in the scsi core code
> to get ALUA info.
> 
> My plan would be - based on this series - to not attach DH just when using
> native SCSI multipath for a device.
> 
> > If it allowed device handlers to get attached, these two
> > developement efforts (native scsi multipath and refactoring the alua
> > support) could go on in parallel.
> > 
> > Or am I missing something here?
> 
> It just seems to be about this DH stuff is that there is bad history there
> and no more users are wanted.

Just to be clear, if the idea was that the Native Multipath code
shouldn't use include/scsi/scsi_dh.h, I completely agree with that. But
I don't see why it can't make use of the results of the existing
implicit ALUA support, since IIUC it doesn't need the scsi_dh interface
to do that. That shouldn't interfere with any refactoring that people
want to do of how the scsi layer actually handles ALUA support. Again,
this is more for Hannes than you, John.

-Ben

> 
> But now I am getting bogged down in this ALUA support because of that, which
> I feared would happen.
> 
> Thanks,
> John


^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 00/13] scsi: Core ALUA driver
  2026-03-24 13:58             ` Benjamin Marzinski
@ 2026-03-24 15:12               ` John Garry
  2026-03-24 15:48                 ` Benjamin Marzinski
  2026-03-26 10:19                 ` Hannes Reinecke
  2026-03-26 10:17               ` Hannes Reinecke
  1 sibling, 2 replies; 63+ messages in thread
From: John Garry @ 2026-03-24 15:12 UTC (permalink / raw)
  To: Benjamin Marzinski
  Cc: martin.petersen, james.bottomley, hare, jmeneghi, linux-scsi,
	michael.christie, snitzer, dm-devel, linux-kernel

On 24/03/2026 13:58, Benjamin Marzinski wrote:
>>> If it allowed device handlers to get attached, these two
>>> developement efforts (native scsi multipath and refactoring the alua
>>> support) could go on in parallel.
>>>
>>> Or am I missing something here?
>> It just seems to be about this DH stuff is that there is bad history there
>> and no more users are wanted.
> Just to be clear, if the idea was that the Native Multipath code
> shouldn't use include/scsi/scsi_dh.h, I completely agree with that. But
> I don't see why it can't make use of the results of the existing
> implicit ALUA support, since IIUC it doesn't need the scsi_dh interface
> to do that.

We would need something like the following to ensure that DH ALUA is 
present to update sdev access_state:

@@ -80,6 +80,7 @@ config SCSI_MULTIPATH
         bool "SCSI multipath support"
         depends on SCSI_MOD
         select LIBMULTIPATH
+       select SCSI_DH_ALUA
         help
           This option enables support for native SCSI multipath support for
           SCSI host.

And that is even enough, as Kconfigs should only specify build requirements.

We really should be also calling something like scsi_dh_attach() for 
scsi multipath to ensure that DH is attached (and running to update 
sdev->access_state).

And I am not sure how the dh alua module is even autoloaded. I think 
that on my ubuntu machine the multipath-tools.service does it - 
something like this would not be nice for native SCSI multipath support.

  That shouldn't interfere with any refactoring that people
> want to do of how the scsi layer actually handles ALUA support. Again,
> this is more for Hannes than you, John.

Thanks,
John

^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 00/13] scsi: Core ALUA driver
  2026-03-24 15:12               ` John Garry
@ 2026-03-24 15:48                 ` Benjamin Marzinski
  2026-03-24 16:25                   ` John Garry
  2026-03-26 10:19                 ` Hannes Reinecke
  1 sibling, 1 reply; 63+ messages in thread
From: Benjamin Marzinski @ 2026-03-24 15:48 UTC (permalink / raw)
  To: John Garry
  Cc: martin.petersen, james.bottomley, hare, jmeneghi, linux-scsi,
	michael.christie, snitzer, dm-devel, linux-kernel

On Tue, Mar 24, 2026 at 03:12:38PM +0000, John Garry wrote:
> On 24/03/2026 13:58, Benjamin Marzinski wrote:
> > > > If it allowed device handlers to get attached, these two
> > > > developement efforts (native scsi multipath and refactoring the alua
> > > > support) could go on in parallel.
> > > > 
> > > > Or am I missing something here?
> > > It just seems to be about this DH stuff is that there is bad history there
> > > and no more users are wanted.
> > Just to be clear, if the idea was that the Native Multipath code
> > shouldn't use include/scsi/scsi_dh.h, I completely agree with that. But
> > I don't see why it can't make use of the results of the existing
> > implicit ALUA support, since IIUC it doesn't need the scsi_dh interface
> > to do that.
> 
> We would need something like the following to ensure that DH ALUA is present
> to update sdev access_state:
> 
> @@ -80,6 +80,7 @@ config SCSI_MULTIPATH
>         bool "SCSI multipath support"
>         depends on SCSI_MOD
>         select LIBMULTIPATH
> +       select SCSI_DH_ALUA
>         help
>           This option enables support for native SCSI multipath support for
>           SCSI host.

DM_MULTIPATH doesn't force the device handlers to be built. You just
don't have their support if they aren't there. Granted, it does make
sure that if they are built, you can't build dm-multipath directly into
the kernel, if the device handlers are built as modules.

> 
> And that is even enough, as Kconfigs should only specify build requirements.
> 
> We really should be also calling something like scsi_dh_attach() for scsi
> multipath to ensure that DH is attached (and running to update
> sdev->access_state).

That isn't necessary. If there is an alua device handler, kernel will
auto-attach it to any device that supports alua (see scsi_dh_add_device
and scsi_dh_find_driver). DM-multipath's calling of scsi_dh_attach() is
mostly a historical relic.

> And I am not sure how the dh alua module is even autoloaded. I think that on
> my ubuntu machine the multipath-tools.service does it - something like this
> would not be nice for native SCSI multipath support.

Fair point. Depending on how the kernel is built, there could be system
configuration work that needs to happen if implicit alua support
wasn't in the generic scsi code. But as far as the kernel code goes, I
still see them as parallel efforts. 

-Ben

>  That shouldn't interfere with any refactoring that people
> > want to do of how the scsi layer actually handles ALUA support. Again,
> > this is more for Hannes than you, John.
> 
> Thanks,
> John


^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 00/13] scsi: Core ALUA driver
  2026-03-24 15:48                 ` Benjamin Marzinski
@ 2026-03-24 16:25                   ` John Garry
  0 siblings, 0 replies; 63+ messages in thread
From: John Garry @ 2026-03-24 16:25 UTC (permalink / raw)
  To: Benjamin Marzinski
  Cc: martin.petersen, james.bottomley, hare, jmeneghi, linux-scsi,
	michael.christie, snitzer, dm-devel, linux-kernel

On 24/03/2026 15:48, Benjamin Marzinski wrote:
>> We would need something like the following to ensure that DH ALUA is present
>> to update sdev access_state:
>>
>> @@ -80,6 +80,7 @@ config SCSI_MULTIPATH
>>          bool "SCSI multipath support"
>>          depends on SCSI_MOD
>>          select LIBMULTIPATH
>> +       select SCSI_DH_ALUA
>>          help
>>            This option enables support for native SCSI multipath support for
>>            SCSI host.
> DM_MULTIPATH doesn't force the device handlers to be built. You just
> don't have their support if they aren't there. Granted, it does make
> sure that if they are built, you can't build dm-multipath directly into
> the kernel, if the device handlers are built as modules.

Note that dm-mpath does not even work without DH ALUA module:

device-mapper: table: 252:1: multipath: error attaching hardware
handler (-EINVAL)

> 
>> And that is even enough, as Kconfigs should only specify build requirements.
>>
>> We really should be also calling something like scsi_dh_attach() for scsi
>> multipath to ensure that DH is attached (and running to update
>> sdev->access_state).
> That isn't necessary. If there is an alua device handler, kernel will
> auto-attach it to any device that supports alua (see scsi_dh_add_device
> and scsi_dh_find_driver). DM-multipath's calling of scsi_dh_attach() is
> mostly a historical relic.
> 

We still need to know that DH is attached to know that whatever is in 
sdev->access_state is valid for scsi multipath.

>> And I am not sure how the dh alua module is even autoloaded. I think that on
>> my ubuntu machine the multipath-tools.service does it - something like this
>> would not be nice for native SCSI multipath support.
> Fair point. Depending on how the kernel is built, there could be system
> configuration work that needs to happen if implicit alua support
> wasn't in the generic scsi code. But as far as the kernel code goes, I
> still see them as parallel efforts.

I see what you are saying, I'll defer to Martin/Hannes on this.

Thanks,
John


^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 00/13] scsi: Core ALUA driver
  2026-03-24 13:58             ` Benjamin Marzinski
  2026-03-24 15:12               ` John Garry
@ 2026-03-26 10:17               ` Hannes Reinecke
  1 sibling, 0 replies; 63+ messages in thread
From: Hannes Reinecke @ 2026-03-26 10:17 UTC (permalink / raw)
  To: Benjamin Marzinski, John Garry
  Cc: martin.petersen, james.bottomley, jmeneghi, linux-scsi,
	michael.christie, snitzer, dm-devel, linux-kernel

On 3/24/26 14:58, Benjamin Marzinski wrote:
> On Tue, Mar 24, 2026 at 10:57:20AM +0000, John Garry wrote:
>> On 23/03/2026 19:45, Benjamin Marzinski wrote:
>>>>> I don't
>>>>> think the Native SCSI multipath code would need to actively interface
>>>>> with the device handler code to support IMPLICIT ALUA. IIUC, looking at
>>>>> sdev->access_state should be enough to pick the correct path.
>>>> We also have the functionality from alua_check_sense() to consider.
>>> But the multipath code won't call that directly. Right now, the scsi
>>> device handler will, at least for every scsi device except ones using
>>> the Native Multipath code. My point is that this would just work, except
>>> that the Native Multipath code goes out of its way to break it, by
>>> disabling device handlers, and I don't really see the point of disabling
>>> something that every other scsi device, multipathed or not, has enabled.
>>> It's not like leaving it enabled makes it any harder to move the
>>> implicit ALUA support from the device handler to the generic scsi code,
>>> if that's the goal, since the Native Multipath code doesn't care who is
>>> issuing those rtpgs and updating the state.
>>>
>>> I guess this is more of a question for Hannes. Is the goal to turn off
>>> automatic device handler attachment in general, and go back to making
>>> dm-multipath attach device handlers to the scsi devices it is using?
>>
>> I'm not answering for Hannes, but I don't think that is the goal.
>>
>>> If
>>> not, then I don't see any reason to have the Native Multipath code
>>> disable it.
>>
>> It was just disabled it as we now had another method in the scsi core code
>> to get ALUA info.
>>
>> My plan would be - based on this series - to not attach DH just when using
>> native SCSI multipath for a device.
>>
>>> If it allowed device handlers to get attached, these two
>>> developement efforts (native scsi multipath and refactoring the alua
>>> support) could go on in parallel.
>>>
>>> Or am I missing something here?
>>
>> It just seems to be about this DH stuff is that there is bad history there
>> and no more users are wanted.
> 
> Just to be clear, if the idea was that the Native Multipath code
> shouldn't use include/scsi/scsi_dh.h, I completely agree with that. But
> I don't see why it can't make use of the results of the existing
> implicit ALUA support, since IIUC it doesn't need the scsi_dh interface
> to do that. That shouldn't interfere with any refactoring that people
> want to do of how the scsi layer actually handles ALUA support. Again,
> this is more for Hannes than you, John.
> 
Oh, it's not that it technically cannot use it.
It's just a design thingie: my idea for the native SCSI multipathing
is that it should be _simple_. There really is not point (and, in fact,
was one of the main motivators of this idea) to re-implement every nook
and crannie from dm-multipathing.
SCSI multipathing should only handle implicit ALUA, and leave every
other functionality to dm-multipathing.

And on the other side, I always found it completely irritating that
one had to enable multipathing in order to get ALUA support (ie
being able to figure out the ALUA state). The ALUA state is a property
of the LUN, and the system has to react on that one. It really doesn't
matter whether the system has multipathing enabled; if the LUN is in
ALUA Standby state we cannot send I/O, full stop.

And that's what we're trying to achieve here; move implicit alua support
from SCSI DH into the SCSI core, and leave SCSI DH to handle explicit
ALUA support.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                  Kernel Storage Architect
hare@suse.com                               +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich

^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 00/13] scsi: Core ALUA driver
  2026-03-24 15:12               ` John Garry
  2026-03-24 15:48                 ` Benjamin Marzinski
@ 2026-03-26 10:19                 ` Hannes Reinecke
  2026-03-26 12:16                   ` John Garry
  1 sibling, 1 reply; 63+ messages in thread
From: Hannes Reinecke @ 2026-03-26 10:19 UTC (permalink / raw)
  To: John Garry, Benjamin Marzinski
  Cc: martin.petersen, james.bottomley, jmeneghi, linux-scsi,
	michael.christie, snitzer, dm-devel, linux-kernel

On 3/24/26 16:12, John Garry wrote:
> On 24/03/2026 13:58, Benjamin Marzinski wrote:
>>>> If it allowed device handlers to get attached, these two
>>>> developement efforts (native scsi multipath and refactoring the alua
>>>> support) could go on in parallel.
>>>>
>>>> Or am I missing something here?
>>> It just seems to be about this DH stuff is that there is bad history 
>>> there
>>> and no more users are wanted.
>> Just to be clear, if the idea was that the Native Multipath code
>> shouldn't use include/scsi/scsi_dh.h, I completely agree with that. But
>> I don't see why it can't make use of the results of the existing
>> implicit ALUA support, since IIUC it doesn't need the scsi_dh interface
>> to do that.
> 
> We would need something like the following to ensure that DH ALUA is 
> present to update sdev access_state:
> 
> @@ -80,6 +80,7 @@ config SCSI_MULTIPATH
>          bool "SCSI multipath support"
>          depends on SCSI_MOD
>          select LIBMULTIPATH
> +       select SCSI_DH_ALUA
>          help
>            This option enables support for native SCSI multipath support 
> for
>            SCSI host.
> 
> And that is even enough, as Kconfigs should only specify build 
> requirements.
> 
> We really should be also calling something like scsi_dh_attach() for 
> scsi multipath to ensure that DH is attached (and running to update 
> sdev->access_state).
> 
> And I am not sure how the dh alua module is even autoloaded. I think 
> that on my ubuntu machine the multipath-tools.service does it - 
> something like this would not be nice for native SCSI multipath support.
> 
Gnaa. But then we don't need this patchset at all.
Main point was that we _do not_ need to hook into scsi dh for implicit
ALUA.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                  Kernel Storage Architect
hare@suse.com                               +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich

^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 00/13] scsi: Core ALUA driver
  2026-03-26 10:19                 ` Hannes Reinecke
@ 2026-03-26 12:16                   ` John Garry
  2026-03-27  7:02                     ` Hannes Reinecke
  0 siblings, 1 reply; 63+ messages in thread
From: John Garry @ 2026-03-26 12:16 UTC (permalink / raw)
  To: Hannes Reinecke, Benjamin Marzinski
  Cc: martin.petersen, james.bottomley, jmeneghi, linux-scsi,
	michael.christie, snitzer, dm-devel, linux-kernel

On 26/03/2026 10:19, Hannes Reinecke wrote:
>> @@ -80,6 +80,7 @@ config SCSI_MULTIPATH
>>          bool "SCSI multipath support"
>>          depends on SCSI_MOD
>>          select LIBMULTIPATH
>> +       select SCSI_DH_ALUA
>>          help
>>            This option enables support for native SCSI multipath 
>> support for
>>            SCSI host.
>>
>> And that is even enough, as Kconfigs should only specify build 
>> requirements.
>>
>> We really should be also calling something like scsi_dh_attach() for 
>> scsi multipath to ensure that DH is attached (and running to update 
>> sdev->access_state).
>>
>> And I am not sure how the dh alua module is even autoloaded. I think 
>> that on my ubuntu machine the multipath-tools.service does it - 
>> something like this would not be nice for native SCSI multipath support.
>>
> Gnaa. But then we don't need this patchset at all.
> Main point was that we _do not_ need to hook into scsi dh for implicit
> ALUA.

But again I don't think that this is good enough. Native SCSI 
multipathing will read sdev->access_state to know ALUA state. We can't 
just rely on dh alua module running and doing what we need to know that 
this value is valid. AFAICS, dm mpath relies on dh alua module to even 
work at all:

device-mapper: table: 252:1: multipath: error attaching hardware
handler (-EINVAL)

At this point I am more inclined to just have a small SCSI core ALUA 
support for implicit ALUA, and allow scsi_dh_alua.c reuse functions from 
that but not use sdev->alua structure, like in this series - trying that 
is turning into a mess, I am finding.

Thanks,
John

^ permalink raw reply	[flat|nested] 63+ messages in thread

* Re: [PATCH 00/13] scsi: Core ALUA driver
  2026-03-26 12:16                   ` John Garry
@ 2026-03-27  7:02                     ` Hannes Reinecke
  0 siblings, 0 replies; 63+ messages in thread
From: Hannes Reinecke @ 2026-03-27  7:02 UTC (permalink / raw)
  To: John Garry, Benjamin Marzinski
  Cc: martin.petersen, james.bottomley, jmeneghi, linux-scsi,
	michael.christie, snitzer, dm-devel, linux-kernel

On 3/26/26 13:16, John Garry wrote:
> On 26/03/2026 10:19, Hannes Reinecke wrote:
>>> @@ -80,6 +80,7 @@ config SCSI_MULTIPATH
>>>          bool "SCSI multipath support"
>>>          depends on SCSI_MOD
>>>          select LIBMULTIPATH
>>> +       select SCSI_DH_ALUA
>>>          help
>>>            This option enables support for native SCSI multipath 
>>> support for
>>>            SCSI host.
>>>
>>> And that is even enough, as Kconfigs should only specify build 
>>> requirements.
>>>
>>> We really should be also calling something like scsi_dh_attach() for 
>>> scsi multipath to ensure that DH is attached (and running to update 
>>> sdev->access_state).
>>>
>>> And I am not sure how the dh alua module is even autoloaded. I think 
>>> that on my ubuntu machine the multipath-tools.service does it - 
>>> something like this would not be nice for native SCSI multipath support.
>>>
>> Gnaa. But then we don't need this patchset at all.
>> Main point was that we _do not_ need to hook into scsi dh for implicit
>> ALUA.
> 
> But again I don't think that this is good enough. Native SCSI 
> multipathing will read sdev->access_state to know ALUA state. We can't 
> just rely on dh alua module running and doing what we need to know that 
> this value is valid. AFAICS, dm mpath relies on dh alua module to even 
> work at all:
> 
> device-mapper: table: 252:1: multipath: error attaching hardware
> handler (-EINVAL)
> 
> At this point I am more inclined to just have a small SCSI core ALUA 
> support for implicit ALUA, and allow scsi_dh_alua.c reuse functions from 
> that but not use sdev->alua structure, like in this series - trying that 
> is turning into a mess, I am finding.
> 
... where one finds himself inclined to think: 'told you so' :-)

And really, the SCSI alua handling doesn't need to be precise, in the
sense that the state is always up-to-date. It should be sufficient to
read the state once during startup, and then resend RTPG whenever we
get a sense code indicating that the ALUA state doesn't match.
We will be missing any changes between non-optimized and optimized,
but they are meaningless for the SCSI core anyway.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                  Kernel Storage Architect
hare@suse.com                               +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich

^ permalink raw reply	[flat|nested] 63+ messages in thread

end of thread, other threads:[~2026-03-27  7:02 UTC | newest]

Thread overview: 63+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-17 12:06 [PATCH 00/13] scsi: Core ALUA driver John Garry
2026-03-17 12:06 ` [PATCH 01/13] scsi: scsi_dh_alua: Delete alua_port_group John Garry
2026-03-18  7:44   ` Hannes Reinecke
2026-03-18  8:53     ` John Garry
2026-03-23  0:08   ` Benjamin Marzinski
2026-03-23 10:33     ` John Garry
2026-03-23 16:15       ` Benjamin Marzinski
2026-03-23 18:07         ` John Garry
2026-03-17 12:06 ` [PATCH 02/13] scsi: alua: Create a core ALUA driver John Garry
2026-03-18  7:47   ` Hannes Reinecke
2026-03-23 12:56     ` John Garry
2026-03-18 17:17   ` kernel test robot
2026-03-18 22:54   ` kernel test robot
2026-03-17 12:06 ` [PATCH 03/13] scsi: alua: Add scsi_alua_rtpg() John Garry
2026-03-18  7:50   ` Hannes Reinecke
2026-03-23 12:58     ` John Garry
2026-03-17 12:06 ` [PATCH 04/13] scsi: alua: Add scsi_alua_stpg() John Garry
2026-03-18  7:53   ` Hannes Reinecke
2026-03-17 12:06 ` [PATCH 05/13] scsi: alua: Add scsi_alua_tur() John Garry
2026-03-18  7:54   ` Hannes Reinecke
2026-03-23 13:42     ` John Garry
2026-03-24 10:49       ` John Garry
2026-03-17 12:06 ` [PATCH 06/13] scsi: alua: Add scsi_alua_rtpg_run() John Garry
2026-03-17 12:06 ` [PATCH 07/13] scsi: alua: Add scsi_alua_stpg_run() John Garry
2026-03-18  7:57   ` Hannes Reinecke
2026-03-18  8:59     ` John Garry
2026-03-18  9:24       ` Hannes Reinecke
2026-03-23 13:58         ` John Garry
2026-03-17 12:06 ` [PATCH 08/13] scsi: alua: Add scsi_alua_check_tpgs() John Garry
2026-03-18  7:57   ` Hannes Reinecke
2026-03-17 12:06 ` [PATCH 09/13] scsi: alua: Add scsi_alua_handle_state_transition() John Garry
2026-03-18  7:58   ` Hannes Reinecke
2026-03-23 13:43     ` John Garry
2026-03-17 12:07 ` [PATCH 10/13] scsi: alua: Add scsi_alua_prep_fn() John Garry
2026-03-18  8:01   ` Hannes Reinecke
2026-03-23 13:49     ` John Garry
2026-03-17 12:07 ` [PATCH 11/13] scsi: alua: Add scsi_device_alua_implicit() John Garry
2026-03-18  8:02   ` Hannes Reinecke
2026-03-23 13:50     ` John Garry
2026-03-17 12:07 ` [PATCH 12/13] scsi: scsi_dh_alua: Switch to use core support John Garry
2026-03-23  1:47   ` Benjamin Marzinski
2026-03-23 11:59     ` John Garry
2026-03-17 12:07 ` [PATCH 13/13] scsi: core: Add implicit ALUA support John Garry
2026-03-18  8:08   ` Hannes Reinecke
2026-03-18 23:08   ` kernel test robot
2026-03-23  1:58   ` Benjamin Marzinski
2026-03-23 12:52     ` John Garry
2026-03-23 17:29       ` Benjamin Marzinski
2026-03-23 18:13         ` John Garry
2026-03-22 17:37 ` [PATCH 00/13] scsi: Core ALUA driver Benjamin Marzinski
2026-03-23  9:57   ` John Garry
2026-03-23 16:25     ` Benjamin Marzinski
2026-03-23 18:04       ` John Garry
2026-03-23 19:45         ` Benjamin Marzinski
2026-03-24 10:57           ` John Garry
2026-03-24 13:58             ` Benjamin Marzinski
2026-03-24 15:12               ` John Garry
2026-03-24 15:48                 ` Benjamin Marzinski
2026-03-24 16:25                   ` John Garry
2026-03-26 10:19                 ` Hannes Reinecke
2026-03-26 12:16                   ` John Garry
2026-03-27  7:02                     ` Hannes Reinecke
2026-03-26 10:17               ` Hannes Reinecke

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox