Linux SCSI subsystem development
 help / color / mirror / Atom feed
* [PATCH] scsi: libsas: Fix dev_list race conditions with proper locking
@ 2026-01-29  9:38 Chaohai Chen
  2026-01-30  3:02 ` Damien Le Moal
  2026-01-30  8:27 ` Jason Yan
  0 siblings, 2 replies; 10+ messages in thread
From: Chaohai Chen @ 2026-01-29  9:38 UTC (permalink / raw)
  To: john.g.garry, yanaijie, James.Bottomley, martin.petersen, dlemoal,
	wdhh6, johannes.thumshirn, mingo, cassel, tglx
  Cc: linux-scsi, linux-kernel

Multiple functions in libsas were accessing port->dev_list without
proper locking, leading to potential race conditions that could cause:
- Use-after-free when devices are removed during list traversal
- List corruption from concurrent modifications
- System crashes from accessing freed memory

This patch adds proper dev_list_lock protection to the following functions:

1. sas_ex_level_discovery(): Added locking around list traversal with
   safe iteration and reference counting for devices. The lock is
   released before calling functions that may sleep
   (sas_ex_discover_devices).

2. sas_dev_present_in_domain(): Added locking for read-only list access
   to prevent reading inconsistent list state.

3. sas_suspend_devices(): Added locking around list traversal to prevent
   concurrent modifications during device suspension.

4. sas_unregister_domain_devices(): Added proper locking with reference
   counting. The lock is released before calling sas_unregister_dev()
   which may sleep, but device references are held to prevent premature
   removal.

5. sas_port_event_worker(): Added locking around list traversal with
   reference counting for devices accessed outside the lock.

All modifications follow the pattern of:
- Hold dev_list_lock during list traversal
- Use list_for_each_entry_safe where list may be modified
- Take device reference (kref_get) before releasing lock
- Release lock before calling functions that may sleep
- Release device reference (sas_put_device) after use

Signed-off-by: Chaohai Chen <wdhh6@aliyun.com>
---
 drivers/scsi/libsas/sas_discover.c | 14 +++++++++++++
 drivers/scsi/libsas/sas_expander.c | 32 +++++++++++++++++++++++-------
 drivers/scsi/libsas/sas_port.c     | 10 ++++++++++
 3 files changed, 49 insertions(+), 7 deletions(-)

diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c
index b07062db50b2..3c18fdfde8c2 100644
--- a/drivers/scsi/libsas/sas_discover.c
+++ b/drivers/scsi/libsas/sas_discover.c
@@ -245,8 +245,10 @@ static void sas_suspend_devices(struct work_struct *work)
 	 * suspension, we force the issue here to keep the reference
 	 * counts aligned
 	 */
+	spin_lock_irq(&port->dev_list_lock);
 	list_for_each_entry(dev, &port->dev_list, dev_list_node)
 		sas_notify_lldd_dev_gone(dev);
+	spin_unlock_irq(&port->dev_list_lock);
 
 	/* we are suspending, so we know events are disabled and
 	 * phy_list is not being mutated
@@ -410,11 +412,23 @@ void sas_unregister_domain_devices(struct asd_sas_port *port, bool gone)
 {
 	struct domain_device *dev, *n;
 
+	/* Lock while iterating to prevent concurrent modifications.
+	 * We need to unlock before calling sas_unregister_dev() as it
+	 * may sleep, but we hold a reference to prevent device removal.
+	 */
+	spin_lock_irq(&port->dev_list_lock);
 	list_for_each_entry_safe_reverse(dev, n, &port->dev_list, dev_list_node) {
 		if (gone)
 			set_bit(SAS_DEV_GONE, &dev->state);
+		kref_get(&dev->kref);
+		spin_unlock_irq(&port->dev_list_lock);
+
 		sas_unregister_dev(port, dev);
+		sas_put_device(dev);
+
+		spin_lock_irq(&port->dev_list_lock);
 	}
+	spin_unlock_irq(&port->dev_list_lock);
 
 	list_for_each_entry_safe(dev, n, &port->disco_list, disco_list_node)
 		sas_unregister_dev(port, dev);
diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
index d953225f6cc2..c82c9b3d5103 100644
--- a/drivers/scsi/libsas/sas_expander.c
+++ b/drivers/scsi/libsas/sas_expander.c
@@ -643,14 +643,21 @@ static int sas_dev_present_in_domain(struct asd_sas_port *port,
 					    u8 *sas_addr)
 {
 	struct domain_device *dev;
+	int found = 0;
 
 	if (SAS_ADDR(port->sas_addr) == SAS_ADDR(sas_addr))
 		return 1;
+
+	spin_lock_irq(&port->dev_list_lock);
 	list_for_each_entry(dev, &port->dev_list, dev_list_node) {
-		if (SAS_ADDR(dev->sas_addr) == SAS_ADDR(sas_addr))
-			return 1;
+		if (SAS_ADDR(dev->sas_addr) == SAS_ADDR(sas_addr)) {
+			found = 1;
+			break;
+		}
 	}
-	return 0;
+	spin_unlock_irq(&port->dev_list_lock);
+
+	return found;
 }
 
 #define RPEL_REQ_SIZE	16
@@ -1579,20 +1586,31 @@ static int sas_discover_expander(struct domain_device *dev)
 static int sas_ex_level_discovery(struct asd_sas_port *port, const int level)
 {
 	int res = 0;
-	struct domain_device *dev;
+	struct domain_device *dev, *n;
 
-	list_for_each_entry(dev, &port->dev_list, dev_list_node) {
+	spin_lock_irq(&port->dev_list_lock);
+	list_for_each_entry_safe(dev, n, &port->dev_list, dev_list_node) {
 		if (dev_is_expander(dev->dev_type)) {
 			struct sas_expander_device *ex =
 				rphy_to_expander_device(dev->rphy);
 
-			if (level == ex->level)
+			if (level == ex->level) {
+				kref_get(&dev->kref);
+				spin_unlock_irq(&port->dev_list_lock);
 				res = sas_ex_discover_devices(dev, -1);
-			else if (level > 0)
+				sas_put_device(dev);
+				spin_lock_irq(&port->dev_list_lock);
+			} else if (level > 0) {
+				kref_get(&port->port_dev->kref);
+				spin_unlock_irq(&port->dev_list_lock);
 				res = sas_ex_discover_devices(port->port_dev, -1);
+				sas_put_device(port->port_dev);
+				spin_lock_irq(&port->dev_list_lock);
+			}
 
 		}
 	}
+	spin_unlock_irq(&port->dev_list_lock);
 
 	return res;
 }
diff --git a/drivers/scsi/libsas/sas_port.c b/drivers/scsi/libsas/sas_port.c
index de7556070048..491c9f7104c6 100644
--- a/drivers/scsi/libsas/sas_port.c
+++ b/drivers/scsi/libsas/sas_port.c
@@ -44,13 +44,19 @@ static void sas_resume_port(struct asd_sas_phy *phy)
 	 * 1/ presume every device came back
 	 * 2/ force the next revalidation to check all expander phys
 	 */
+	spin_lock_irq(&port->dev_list_lock);
 	list_for_each_entry_safe(dev, n, &port->dev_list, dev_list_node) {
 		int i, rc;
 
+		kref_get(&dev->kref);
+		spin_unlock_irq(&port->dev_list_lock);
+
 		rc = sas_notify_lldd_dev_found(dev);
 		if (rc) {
 			sas_unregister_dev(port, dev);
 			sas_destruct_devices(port);
+			sas_put_device(dev);
+			spin_lock_irq(&port->dev_list_lock);
 			continue;
 		}
 
@@ -62,7 +68,11 @@ static void sas_resume_port(struct asd_sas_phy *phy)
 				phy->phy_change_count = -1;
 			}
 		}
+
+		sas_put_device(dev);
+		spin_lock_irq(&port->dev_list_lock);
 	}
+	spin_unlock_irq(&port->dev_list_lock);
 
 	sas_discover_event(port, DISCE_RESUME);
 }
-- 
2.43.7


^ permalink raw reply related	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2026-02-02  9:03 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-01-29  9:38 [PATCH] scsi: libsas: Fix dev_list race conditions with proper locking Chaohai Chen
2026-01-30  3:02 ` Damien Le Moal
2026-01-31 11:19   ` Chaohai Chen
2026-02-02  1:21     ` Damien Le Moal
2026-02-02  7:36       ` Chaohai Chen
2026-02-02  7:45         ` Damien Le Moal
2026-02-02  9:03         ` Jason Yan
2026-01-30  8:27 ` Jason Yan
2026-01-31 10:47   ` Chaohai Chen
2026-02-02  1:46     ` Damien Le Moal

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox