public inbox for linux-scsi@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] libsas: fix/amend device gone notification in sas_deform_port()
@ 2011-02-17  3:11 Dan Williams
  2011-02-17 23:36 ` David Milburn
  2011-03-26 22:27 ` Dan Williams
  0 siblings, 2 replies; 7+ messages in thread
From: Dan Williams @ 2011-02-17  3:11 UTC (permalink / raw)
  To: james.bottomley
  Cc: Haipao Fan, dave.jiang, linux-scsi, jacek.danecki, Maciej Trela,
	jeffrey.d.skirvin, David Milburn, edmund.nadolski,
	Darrick J. Wong

Commit 56dd2c06 "libsas: Don't issue commands to devices that have been
hot-removed" edited Darrick's original patch to remove setting 'gone' in
the sas_deform_port() path because that prevented scsi sync cache
commands from being issued when the driver was unloaded.  However, this
allows true device gone notifications (as signaled port phy events) to
trigger sync cache commands to devices that are known to be unreachable.

Teach libsas which sas_deform_port() invocations are likely device gone
events.

This patch also introduces sas_device_gone() which hopefully allows
subtle/tricky locking to be dropped from lldd drivers, like the
following in mvsas which is broken if the ata path is ever converted to
call lldd_execute_task() with irqs enabled:

	flags_libsas = 0;
	[...]
	spin_unlock_irqrestore(dev->sata_dev.ap->lock, flags_libsas);
	spin_unlock_irqrestore(&mvi->lock, flags);
	t->task_done(t);
	spin_lock_irqsave(&mvi->lock, flags);
	spin_lock_irqsave(dev->sata_dev.ap->lock, flags_libsas);

Cc: Darrick J. Wong <djwong@us.ibm.com>
Cc: Haipao Fan <haipao.fan@intel.com>
Cc: Maciej Trela <maciej.trela@intel.com>
Cc: David Milburn <dmilburn@redhat.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---

The sas_device_gone() change could be split into its own patch for bisection
purposes, let me know if you want me to resubmit.  This is being driven by a
lockdep error in isci as it calls task->done() from lldd_execute_task()
when it finds lldd_dev == NULL for the ata domain_device.

--
Dan

 drivers/scsi/libsas/sas_expander.c |   20 +++++++++++++++++---
 drivers/scsi/libsas/sas_internal.h |    3 ++-
 drivers/scsi/libsas/sas_phy.c      |    4 ++--
 drivers/scsi/libsas/sas_port.c     |   21 ++++++++++++---------
 4 files changed, 33 insertions(+), 15 deletions(-)

diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
index 505ffe3..7a785df 100644
--- a/drivers/scsi/libsas/sas_expander.c
+++ b/drivers/scsi/libsas/sas_expander.c
@@ -1721,13 +1721,27 @@ out:
 	return res;
 }
 
+/* FIXME: delete this routine when/if sas_ata stops submitting tasks
+ * with host_lock held
+ */
+void sas_device_gone(struct domain_device *dev)
+{
+	struct sas_rphy *rphy = dev->rphy;
+	struct Scsi_Host *shost = dev_to_shost(rphy->dev.parent);
+
+	/* take the lock to synchronize against incoming sata i/o */
+	spin_lock_irq(shost->host_lock);
+	dev->gone = 1;
+	spin_unlock_irq(shost->host_lock);
+}
+
 static void sas_unregister_ex_tree(struct domain_device *dev)
 {
 	struct expander_device *ex = &dev->ex_dev;
 	struct domain_device *child, *n;
 
 	list_for_each_entry_safe(child, n, &ex->children, siblings) {
-		child->gone = 1;
+		sas_device_gone(child);
 		if (child->dev_type == EDGE_DEV ||
 		    child->dev_type == FANOUT_DEV)
 			sas_unregister_ex_tree(child);
@@ -1748,7 +1762,7 @@ static void sas_unregister_devs_sas_addr(struct domain_device *parent,
 			&ex_dev->children, siblings) {
 			if (SAS_ADDR(child->sas_addr) ==
 			    SAS_ADDR(phy->attached_sas_addr)) {
-				child->gone = 1;
+				sas_device_gone(child);
 				if (child->dev_type == EDGE_DEV ||
 				    child->dev_type == FANOUT_DEV)
 					sas_unregister_ex_tree(child);
@@ -1757,7 +1771,7 @@ static void sas_unregister_devs_sas_addr(struct domain_device *parent,
 				break;
 			}
 		}
-		parent->gone = 1;
+		sas_device_gone(parent);
 		sas_disable_routing(parent, phy->attached_sas_addr);
 	}
 	memset(phy->attached_sas_addr, 0, SAS_ADDR_SIZE);
diff --git a/drivers/scsi/libsas/sas_internal.h b/drivers/scsi/libsas/sas_internal.h
index 2d5c460..4db2752 100644
--- a/drivers/scsi/libsas/sas_internal.h
+++ b/drivers/scsi/libsas/sas_internal.h
@@ -61,7 +61,7 @@ int  sas_init_queue(struct sas_ha_struct *sas_ha);
 int  sas_init_events(struct sas_ha_struct *sas_ha);
 void sas_shutdown_queue(struct sas_ha_struct *sas_ha);
 
-void sas_deform_port(struct asd_sas_phy *phy);
+void sas_deform_port(struct asd_sas_phy *phy, int gone);
 
 void sas_porte_bytes_dmaed(struct work_struct *work);
 void sas_porte_broadcast_rcvd(struct work_struct *work);
@@ -71,6 +71,7 @@ void sas_porte_hard_reset(struct work_struct *work);
 
 int sas_notify_lldd_dev_found(struct domain_device *);
 void sas_notify_lldd_dev_gone(struct domain_device *);
+void sas_device_gone(struct domain_device *dev);
 
 int sas_smp_phy_control(struct domain_device *dev, int phy_id,
 			enum phy_func phy_func, struct sas_phy_linkrates *);
diff --git a/drivers/scsi/libsas/sas_phy.c b/drivers/scsi/libsas/sas_phy.c
index b459c4b..e0f5018 100644
--- a/drivers/scsi/libsas/sas_phy.c
+++ b/drivers/scsi/libsas/sas_phy.c
@@ -39,7 +39,7 @@ static void sas_phye_loss_of_signal(struct work_struct *work)
 	sas_begin_event(PHYE_LOSS_OF_SIGNAL, &phy->ha->event_lock,
 			&phy->phy_events_pending);
 	phy->error = 0;
-	sas_deform_port(phy);
+	sas_deform_port(phy, 1);
 }
 
 static void sas_phye_oob_done(struct work_struct *work)
@@ -66,7 +66,7 @@ static void sas_phye_oob_error(struct work_struct *work)
 	sas_begin_event(PHYE_OOB_ERROR, &phy->ha->event_lock,
 			&phy->phy_events_pending);
 
-	sas_deform_port(phy);
+	sas_deform_port(phy, 1);
 
 	if (!port && phy->enabled && i->dft->lldd_control_phy) {
 		phy->error++;
diff --git a/drivers/scsi/libsas/sas_port.c b/drivers/scsi/libsas/sas_port.c
index 5257fdf..be490e4 100644
--- a/drivers/scsi/libsas/sas_port.c
+++ b/drivers/scsi/libsas/sas_port.c
@@ -57,7 +57,7 @@ static void sas_form_port(struct asd_sas_phy *phy)
 
 	if (port) {
 		if (!phy_is_wideport_member(port, phy))
-			sas_deform_port(phy);
+			sas_deform_port(phy, 0);
 		else {
 			SAS_DPRINTK("%s: phy%d belongs to port%d already(%d)!\n",
 				    __func__, phy->id, phy->port->id,
@@ -153,28 +153,31 @@ static void sas_form_port(struct asd_sas_phy *phy)
  * This is called when the physical link to the other phy has been
  * lost (on this phy), in Event thread context. We cannot delay here.
  */
-void sas_deform_port(struct asd_sas_phy *phy)
+void sas_deform_port(struct asd_sas_phy *phy, int gone)
 {
 	struct sas_ha_struct *sas_ha = phy->ha;
 	struct asd_sas_port *port = phy->port;
 	struct sas_internal *si =
 		to_sas_internal(sas_ha->core.shost->transportt);
+	struct domain_device *dev;
 	unsigned long flags;
 
 	if (!port)
 		return;		  /* done by a phy event */
 
-	if (port->port_dev)
-		port->port_dev->pathways--;
+	dev = port->port_dev;
+	if (dev)
+		dev->pathways--;
 
 	if (port->num_phys == 1) {
+		if (dev && gone)
+			sas_device_gone(dev);
 		sas_unregister_domain_devices(port);
 		sas_port_delete(port->port);
 		port->port = NULL;
 	} else
 		sas_port_delete_phy(port->port, phy->phy);
 
-
 	if (si->dft->lldd_port_deformed)
 		si->dft->lldd_port_deformed(phy);
 
@@ -244,7 +247,7 @@ void sas_porte_link_reset_err(struct work_struct *work)
 	sas_begin_event(PORTE_LINK_RESET_ERR, &phy->ha->event_lock,
 			&phy->port_events_pending);
 
-	sas_deform_port(phy);
+	sas_deform_port(phy, 1);
 }
 
 void sas_porte_timer_event(struct work_struct *work)
@@ -256,7 +259,7 @@ void sas_porte_timer_event(struct work_struct *work)
 	sas_begin_event(PORTE_TIMER_EVENT, &phy->ha->event_lock,
 			&phy->port_events_pending);
 
-	sas_deform_port(phy);
+	sas_deform_port(phy, 1);
 }
 
 void sas_porte_hard_reset(struct work_struct *work)
@@ -268,7 +271,7 @@ void sas_porte_hard_reset(struct work_struct *work)
 	sas_begin_event(PORTE_HARD_RESET, &phy->ha->event_lock,
 			&phy->port_events_pending);
 
-	sas_deform_port(phy);
+	sas_deform_port(phy, 1);
 }
 
 /* ---------- SAS port registration ---------- */
@@ -306,6 +309,6 @@ void sas_unregister_ports(struct sas_ha_struct *sas_ha)
 
 	for (i = 0; i < sas_ha->num_phys; i++)
 		if (sas_ha->sas_phy[i]->port)
-			sas_deform_port(sas_ha->sas_phy[i]);
+			sas_deform_port(sas_ha->sas_phy[i], 0);
 
 }


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] libsas: fix/amend device gone notification in sas_deform_port()
  2011-02-17  3:11 [PATCH] libsas: fix/amend device gone notification in sas_deform_port() Dan Williams
@ 2011-02-17 23:36 ` David Milburn
  2011-02-18  0:19   ` Dan Williams
  2011-03-26 22:27 ` Dan Williams
  1 sibling, 1 reply; 7+ messages in thread
From: David Milburn @ 2011-02-17 23:36 UTC (permalink / raw)
  To: Dan Williams
  Cc: james.bottomley, Haipao Fan, dave.jiang, linux-scsi,
	jacek.danecki, Maciej Trela, jeffrey.d.skirvin, edmund.nadolski,
	Darrick J. Wong

Dan Williams wrote:
> Commit 56dd2c06 "libsas: Don't issue commands to devices that have been
> hot-removed" edited Darrick's original patch to remove setting 'gone' in
> the sas_deform_port() path because that prevented scsi sync cache
> commands from being issued when the driver was unloaded.  However, this
> allows true device gone notifications (as signaled port phy events) to
> trigger sync cache commands to devices that are known to be unreachable.
> 
> Teach libsas which sas_deform_port() invocations are likely device gone
> events.
> 
> This patch also introduces sas_device_gone() which hopefully allows
> subtle/tricky locking to be dropped from lldd drivers, like the
> following in mvsas which is broken if the ata path is ever converted to
> call lldd_execute_task() with irqs enabled:
> 
> 	flags_libsas = 0;
> 	[...]
> 	spin_unlock_irqrestore(dev->sata_dev.ap->lock, flags_libsas);
> 	spin_unlock_irqrestore(&mvi->lock, flags);
> 	t->task_done(t);
> 	spin_lock_irqsave(&mvi->lock, flags);
> 	spin_lock_irqsave(dev->sata_dev.ap->lock, flags_libsas);
> 
> Cc: Darrick J. Wong <djwong@us.ibm.com>
> Cc: Haipao Fan <haipao.fan@intel.com>
> Cc: Maciej Trela <maciej.trela@intel.com>
> Cc: David Milburn <dmilburn@redhat.com>
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> ---
> 
> The sas_device_gone() change could be split into its own patch for bisection
> purposes, let me know if you want me to resubmit.  This is being driven by a
> lockdep error in isci as it calls task->done() from lldd_execute_task()
> when it finds lldd_dev == NULL for the ata domain_device.

Dan,

So, you are seeing sas_ata_task_done() trying to get the ata_port lock,
but it has already been taken earlier in the code path?

ata_scsi_queuecmd (spin_lock_irqsave(ap->lock, irq_flags)
   __ata_scsi_queuecmd
     ata_scsi_translate
       ata_qc_issue
         sas_ata_qc_issue
           isci_task_execute_task
             isci_task_complete_for_upper_layer
               sas_ata_task_done 
(spin_lock_irqsave(dev->sata_dev.ap->lock, flags)

Thanks,
David


> 
> --
> Dan
> 
>  drivers/scsi/libsas/sas_expander.c |   20 +++++++++++++++++---
>  drivers/scsi/libsas/sas_internal.h |    3 ++-
>  drivers/scsi/libsas/sas_phy.c      |    4 ++--
>  drivers/scsi/libsas/sas_port.c     |   21 ++++++++++++---------
>  4 files changed, 33 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
> index 505ffe3..7a785df 100644
> --- a/drivers/scsi/libsas/sas_expander.c
> +++ b/drivers/scsi/libsas/sas_expander.c
> @@ -1721,13 +1721,27 @@ out:
>  	return res;
>  }
>  
> +/* FIXME: delete this routine when/if sas_ata stops submitting tasks
> + * with host_lock held
> + */
> +void sas_device_gone(struct domain_device *dev)
> +{
> +	struct sas_rphy *rphy = dev->rphy;
> +	struct Scsi_Host *shost = dev_to_shost(rphy->dev.parent);
> +
> +	/* take the lock to synchronize against incoming sata i/o */
> +	spin_lock_irq(shost->host_lock);
> +	dev->gone = 1;
> +	spin_unlock_irq(shost->host_lock);
> +}
> +
>  static void sas_unregister_ex_tree(struct domain_device *dev)
>  {
>  	struct expander_device *ex = &dev->ex_dev;
>  	struct domain_device *child, *n;
>  
>  	list_for_each_entry_safe(child, n, &ex->children, siblings) {
> -		child->gone = 1;
> +		sas_device_gone(child);
>  		if (child->dev_type == EDGE_DEV ||
>  		    child->dev_type == FANOUT_DEV)
>  			sas_unregister_ex_tree(child);
> @@ -1748,7 +1762,7 @@ static void sas_unregister_devs_sas_addr(struct domain_device *parent,
>  			&ex_dev->children, siblings) {
>  			if (SAS_ADDR(child->sas_addr) ==
>  			    SAS_ADDR(phy->attached_sas_addr)) {
> -				child->gone = 1;
> +				sas_device_gone(child);
>  				if (child->dev_type == EDGE_DEV ||
>  				    child->dev_type == FANOUT_DEV)
>  					sas_unregister_ex_tree(child);
> @@ -1757,7 +1771,7 @@ static void sas_unregister_devs_sas_addr(struct domain_device *parent,
>  				break;
>  			}
>  		}
> -		parent->gone = 1;
> +		sas_device_gone(parent);
>  		sas_disable_routing(parent, phy->attached_sas_addr);
>  	}
>  	memset(phy->attached_sas_addr, 0, SAS_ADDR_SIZE);
> diff --git a/drivers/scsi/libsas/sas_internal.h b/drivers/scsi/libsas/sas_internal.h
> index 2d5c460..4db2752 100644
> --- a/drivers/scsi/libsas/sas_internal.h
> +++ b/drivers/scsi/libsas/sas_internal.h
> @@ -61,7 +61,7 @@ int  sas_init_queue(struct sas_ha_struct *sas_ha);
>  int  sas_init_events(struct sas_ha_struct *sas_ha);
>  void sas_shutdown_queue(struct sas_ha_struct *sas_ha);
>  
> -void sas_deform_port(struct asd_sas_phy *phy);
> +void sas_deform_port(struct asd_sas_phy *phy, int gone);
>  
>  void sas_porte_bytes_dmaed(struct work_struct *work);
>  void sas_porte_broadcast_rcvd(struct work_struct *work);
> @@ -71,6 +71,7 @@ void sas_porte_hard_reset(struct work_struct *work);
>  
>  int sas_notify_lldd_dev_found(struct domain_device *);
>  void sas_notify_lldd_dev_gone(struct domain_device *);
> +void sas_device_gone(struct domain_device *dev);
>  
>  int sas_smp_phy_control(struct domain_device *dev, int phy_id,
>  			enum phy_func phy_func, struct sas_phy_linkrates *);
> diff --git a/drivers/scsi/libsas/sas_phy.c b/drivers/scsi/libsas/sas_phy.c
> index b459c4b..e0f5018 100644
> --- a/drivers/scsi/libsas/sas_phy.c
> +++ b/drivers/scsi/libsas/sas_phy.c
> @@ -39,7 +39,7 @@ static void sas_phye_loss_of_signal(struct work_struct *work)
>  	sas_begin_event(PHYE_LOSS_OF_SIGNAL, &phy->ha->event_lock,
>  			&phy->phy_events_pending);
>  	phy->error = 0;
> -	sas_deform_port(phy);
> +	sas_deform_port(phy, 1);
>  }
>  
>  static void sas_phye_oob_done(struct work_struct *work)
> @@ -66,7 +66,7 @@ static void sas_phye_oob_error(struct work_struct *work)
>  	sas_begin_event(PHYE_OOB_ERROR, &phy->ha->event_lock,
>  			&phy->phy_events_pending);
>  
> -	sas_deform_port(phy);
> +	sas_deform_port(phy, 1);
>  
>  	if (!port && phy->enabled && i->dft->lldd_control_phy) {
>  		phy->error++;
> diff --git a/drivers/scsi/libsas/sas_port.c b/drivers/scsi/libsas/sas_port.c
> index 5257fdf..be490e4 100644
> --- a/drivers/scsi/libsas/sas_port.c
> +++ b/drivers/scsi/libsas/sas_port.c
> @@ -57,7 +57,7 @@ static void sas_form_port(struct asd_sas_phy *phy)
>  
>  	if (port) {
>  		if (!phy_is_wideport_member(port, phy))
> -			sas_deform_port(phy);
> +			sas_deform_port(phy, 0);
>  		else {
>  			SAS_DPRINTK("%s: phy%d belongs to port%d already(%d)!\n",
>  				    __func__, phy->id, phy->port->id,
> @@ -153,28 +153,31 @@ static void sas_form_port(struct asd_sas_phy *phy)
>   * This is called when the physical link to the other phy has been
>   * lost (on this phy), in Event thread context. We cannot delay here.
>   */
> -void sas_deform_port(struct asd_sas_phy *phy)
> +void sas_deform_port(struct asd_sas_phy *phy, int gone)
>  {
>  	struct sas_ha_struct *sas_ha = phy->ha;
>  	struct asd_sas_port *port = phy->port;
>  	struct sas_internal *si =
>  		to_sas_internal(sas_ha->core.shost->transportt);
> +	struct domain_device *dev;
>  	unsigned long flags;
>  
>  	if (!port)
>  		return;		  /* done by a phy event */
>  
> -	if (port->port_dev)
> -		port->port_dev->pathways--;
> +	dev = port->port_dev;
> +	if (dev)
> +		dev->pathways--;
>  
>  	if (port->num_phys == 1) {
> +		if (dev && gone)
> +			sas_device_gone(dev);
>  		sas_unregister_domain_devices(port);
>  		sas_port_delete(port->port);
>  		port->port = NULL;
>  	} else
>  		sas_port_delete_phy(port->port, phy->phy);
>  
> -
>  	if (si->dft->lldd_port_deformed)
>  		si->dft->lldd_port_deformed(phy);
>  
> @@ -244,7 +247,7 @@ void sas_porte_link_reset_err(struct work_struct *work)
>  	sas_begin_event(PORTE_LINK_RESET_ERR, &phy->ha->event_lock,
>  			&phy->port_events_pending);
>  
> -	sas_deform_port(phy);
> +	sas_deform_port(phy, 1);
>  }
>  
>  void sas_porte_timer_event(struct work_struct *work)
> @@ -256,7 +259,7 @@ void sas_porte_timer_event(struct work_struct *work)
>  	sas_begin_event(PORTE_TIMER_EVENT, &phy->ha->event_lock,
>  			&phy->port_events_pending);
>  
> -	sas_deform_port(phy);
> +	sas_deform_port(phy, 1);
>  }
>  
>  void sas_porte_hard_reset(struct work_struct *work)
> @@ -268,7 +271,7 @@ void sas_porte_hard_reset(struct work_struct *work)
>  	sas_begin_event(PORTE_HARD_RESET, &phy->ha->event_lock,
>  			&phy->port_events_pending);
>  
> -	sas_deform_port(phy);
> +	sas_deform_port(phy, 1);
>  }
>  
>  /* ---------- SAS port registration ---------- */
> @@ -306,6 +309,6 @@ void sas_unregister_ports(struct sas_ha_struct *sas_ha)
>  
>  	for (i = 0; i < sas_ha->num_phys; i++)
>  		if (sas_ha->sas_phy[i]->port)
> -			sas_deform_port(sas_ha->sas_phy[i]);
> +			sas_deform_port(sas_ha->sas_phy[i], 0);
>  
>  }
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] libsas: fix/amend device gone notification in sas_deform_port()
  2011-02-17 23:36 ` David Milburn
@ 2011-02-18  0:19   ` Dan Williams
  2011-03-04 22:44     ` David Milburn
  0 siblings, 1 reply; 7+ messages in thread
From: Dan Williams @ 2011-02-18  0:19 UTC (permalink / raw)
  To: David Milburn
  Cc: james.bottomley@suse.de, Fan, Haipao, Jiang, Dave,
	linux-scsi@vger.kernel.org, Danecki, Jacek, Trela, Maciej,
	Skirvin, Jeffrey D, Nadolski, Edmund, Darrick J. Wong

On 2/17/2011 3:36 PM, David Milburn wrote:
> Dan Williams wrote:
>> Commit 56dd2c06 "libsas: Don't issue commands to devices that have been
>> hot-removed" edited Darrick's original patch to remove setting 'gone' in
>> the sas_deform_port() path because that prevented scsi sync cache
>> commands from being issued when the driver was unloaded.  However, this
>> allows true device gone notifications (as signaled port phy events) to
>> trigger sync cache commands to devices that are known to be unreachable.
>>
>> Teach libsas which sas_deform_port() invocations are likely device gone
>> events.
>>
>> This patch also introduces sas_device_gone() which hopefully allows
>> subtle/tricky locking to be dropped from lldd drivers, like the
>> following in mvsas which is broken if the ata path is ever converted to
>> call lldd_execute_task() with irqs enabled:
>>
>> 	flags_libsas = 0;
>> 	[...]
>> 	spin_unlock_irqrestore(dev->sata_dev.ap->lock, flags_libsas);
>> 	spin_unlock_irqrestore(&mvi->lock, flags);
>> 	t->task_done(t);
>> 	spin_lock_irqsave(&mvi->lock, flags);
>> 	spin_lock_irqsave(dev->sata_dev.ap->lock, flags_libsas);
>>
>> Cc: Darrick J. Wong<djwong@us.ibm.com>
>> Cc: Haipao Fan<haipao.fan@intel.com>
>> Cc: Maciej Trela<maciej.trela@intel.com>
>> Cc: David Milburn<dmilburn@redhat.com>
>> Signed-off-by: Dan Williams<dan.j.williams@intel.com>
>> ---
>>
>> The sas_device_gone() change could be split into its own patch for bisection
>> purposes, let me know if you want me to resubmit.  This is being driven by a
>> lockdep error in isci as it calls task->done() from lldd_execute_task()
>> when it finds lldd_dev == NULL for the ata domain_device.
>
> Dan,
>
> So, you are seeing sas_ata_task_done() trying to get the ata_port lock,
> but it has already been taken earlier in the code path?
>
> ata_scsi_queuecmd (spin_lock_irqsave(ap->lock, irq_flags)
>     __ata_scsi_queuecmd
>       ata_scsi_translate
>         ata_qc_issue
>           sas_ata_qc_issue
>             isci_task_execute_task
>               isci_task_complete_for_upper_layer
>                 sas_ata_task_done
> (spin_lock_irqsave(dev->sata_dev.ap->lock, flags)

Exactly.

sas_device_gone() should prevent an lldd from ever attempting an i/o to 
a missing sata device (one that has been notified via lldd_dev_gone). 
Although, I have only had time to verify the simple unplug case and 
sync-cache commands at driver unload.  We'll still need to handle 
missing ssp devices, but there are no lldd-external locking concerns in 
that path.

--
Dan

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] libsas: fix/amend device gone notification in sas_deform_port()
  2011-02-18  0:19   ` Dan Williams
@ 2011-03-04 22:44     ` David Milburn
  2011-03-04 22:53       ` Dan Williams
  0 siblings, 1 reply; 7+ messages in thread
From: David Milburn @ 2011-03-04 22:44 UTC (permalink / raw)
  To: Dan Williams
  Cc: james.bottomley@suse.de, Fan, Haipao, Jiang, Dave,
	linux-scsi@vger.kernel.org, Danecki, Jacek, Trela, Maciej,
	Skirvin, Jeffrey D, Nadolski, Edmund, Darrick J. Wong

Dan Williams wrote:
> On 2/17/2011 3:36 PM, David Milburn wrote:
>> Dan Williams wrote:
>>> Commit 56dd2c06 "libsas: Don't issue commands to devices that have been
>>> hot-removed" edited Darrick's original patch to remove setting 'gone' in
>>> the sas_deform_port() path because that prevented scsi sync cache
>>> commands from being issued when the driver was unloaded.  However, this
>>> allows true device gone notifications (as signaled port phy events) to
>>> trigger sync cache commands to devices that are known to be unreachable.
>>>
>>> Teach libsas which sas_deform_port() invocations are likely device gone
>>> events.
>>>
>>> This patch also introduces sas_device_gone() which hopefully allows
>>> subtle/tricky locking to be dropped from lldd drivers, like the
>>> following in mvsas which is broken if the ata path is ever converted to
>>> call lldd_execute_task() with irqs enabled:
>>>
>>>     flags_libsas = 0;
>>>     [...]
>>>     spin_unlock_irqrestore(dev->sata_dev.ap->lock, flags_libsas);
>>>     spin_unlock_irqrestore(&mvi->lock, flags);
>>>     t->task_done(t);
>>>     spin_lock_irqsave(&mvi->lock, flags);
>>>     spin_lock_irqsave(dev->sata_dev.ap->lock, flags_libsas);
>>>
>>> Cc: Darrick J. Wong<djwong@us.ibm.com>
>>> Cc: Haipao Fan<haipao.fan@intel.com>
>>> Cc: Maciej Trela<maciej.trela@intel.com>
>>> Cc: David Milburn<dmilburn@redhat.com>
>>> Signed-off-by: Dan Williams<dan.j.williams@intel.com>
>>> ---
>>>
>>> The sas_device_gone() change could be split into its own patch for 
>>> bisection
>>> purposes, let me know if you want me to resubmit.  This is being 
>>> driven by a
>>> lockdep error in isci as it calls task->done() from lldd_execute_task()
>>> when it finds lldd_dev == NULL for the ata domain_device.
>>
>> Dan,
>>
>> So, you are seeing sas_ata_task_done() trying to get the ata_port lock,
>> but it has already been taken earlier in the code path?
>>
>> ata_scsi_queuecmd (spin_lock_irqsave(ap->lock, irq_flags)
>>     __ata_scsi_queuecmd
>>       ata_scsi_translate
>>         ata_qc_issue
>>           sas_ata_qc_issue
>>             isci_task_execute_task
>>               isci_task_complete_for_upper_layer
>>                 sas_ata_task_done
>> (spin_lock_irqsave(dev->sata_dev.ap->lock, flags)
> 
> Exactly.
> 
> sas_device_gone() should prevent an lldd from ever attempting an i/o to 
> a missing sata device (one that has been notified via lldd_dev_gone). 
> Although, I have only had time to verify the simple unplug case and 
> sync-cache commands at driver unload.  We'll still need to handle 
> missing ssp devices, but there are no lldd-external locking concerns in 
> that path.
> 

Dan,

One other question, should sas_device_gone() be sync'ing with sata io
thru ata port lock instead of the Scsi_Host lock?

Looking at sas_queuecommand, the host_lock is released and the ata
port lock is taken before calling ata_sas_queuecmd, and ata port lock is 
held
down the __ata_scsi_queuecmd path.

Should sas_device_gone get the ap->lock from the domain_device->
sata_dev to better sync against sata io?

Thanks,
David



> -- 
> Dan


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] libsas: fix/amend device gone notification in sas_deform_port()
  2011-03-04 22:44     ` David Milburn
@ 2011-03-04 22:53       ` Dan Williams
  2011-03-04 23:08         ` David Milburn
  0 siblings, 1 reply; 7+ messages in thread
From: Dan Williams @ 2011-03-04 22:53 UTC (permalink / raw)
  To: David Milburn
  Cc: james.bottomley@suse.de, Fan, Haipao, Jiang, Dave,
	linux-scsi@vger.kernel.org, Danecki, Jacek, Trela, Maciej,
	Skirvin, Jeffrey D, Nadolski, Edmund, Darrick J. Wong

On Fri, 2011-03-04 at 14:44 -0800, David Milburn wrote:
> Dan Williams wrote:
> >> Dan,
> >>
> >> So, you are seeing sas_ata_task_done() trying to get the ata_port lock,
> >> but it has already been taken earlier in the code path?
> >>
> >> ata_scsi_queuecmd (spin_lock_irqsave(ap->lock, irq_flags)
> >>     __ata_scsi_queuecmd
> >>       ata_scsi_translate
> >>         ata_qc_issue
> >>           sas_ata_qc_issue
> >>             isci_task_execute_task
> >>               isci_task_complete_for_upper_layer
> >>                 sas_ata_task_done
> >> (spin_lock_irqsave(dev->sata_dev.ap->lock, flags)
> > 
> > Exactly.
> > 
> > sas_device_gone() should prevent an lldd from ever attempting an i/o to 
> > a missing sata device (one that has been notified via lldd_dev_gone). 
> > Although, I have only had time to verify the simple unplug case and 
> > sync-cache commands at driver unload.  We'll still need to handle 
> > missing ssp devices, but there are no lldd-external locking concerns in 
> > that path.
> > 
> 
> Dan,
> 
> One other question, should sas_device_gone() be sync'ing with sata io
> thru ata port lock instead of the Scsi_Host lock?
> 
> Looking at sas_queuecommand, the host_lock is released and the ata
> port lock is taken before calling ata_sas_queuecmd, and ata port lock is 
> held
> down the __ata_scsi_queuecmd path.
> 
> Should sas_device_gone get the ap->lock from the domain_device->
> sata_dev to better sync against sata io?

Unless I am mistaken they are one in the same, see ata_sas_port_alloc:

struct ata_port *ata_sas_port_alloc(struct ata_host *host,
                                    struct ata_port_info *port_info,
                                    struct Scsi_Host *shost)
{
        struct ata_port *ap;

        ap = ata_port_alloc(host);
        if (!ap)
                return NULL;

        ap->port_no = 0;
        ap->lock = shost->host_lock;
        ap->pio_mask = port_info->pio_mask;
        ap->mwdma_mask = port_info->mwdma_mask;
        ap->udma_mask = port_info->udma_mask;
        ap->flags |= port_info->flags;
        ap->ops = port_info->port_ops;
        ap->cbl = ATA_CBL_SATA;

        return ap;
}

--
Dan



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] libsas: fix/amend device gone notification in sas_deform_port()
  2011-03-04 22:53       ` Dan Williams
@ 2011-03-04 23:08         ` David Milburn
  0 siblings, 0 replies; 7+ messages in thread
From: David Milburn @ 2011-03-04 23:08 UTC (permalink / raw)
  To: Dan Williams
  Cc: james.bottomley@suse.de, Fan, Haipao, Jiang, Dave,
	linux-scsi@vger.kernel.org, Danecki, Jacek, Trela, Maciej,
	Skirvin, Jeffrey D, Nadolski, Edmund, Darrick J. Wong

Dan Williams wrote:
> On Fri, 2011-03-04 at 14:44 -0800, David Milburn wrote:
>> Dan Williams wrote:
>>>> Dan,
>>>>
>>>> So, you are seeing sas_ata_task_done() trying to get the ata_port lock,
>>>> but it has already been taken earlier in the code path?
>>>>
>>>> ata_scsi_queuecmd (spin_lock_irqsave(ap->lock, irq_flags)
>>>>     __ata_scsi_queuecmd
>>>>       ata_scsi_translate
>>>>         ata_qc_issue
>>>>           sas_ata_qc_issue
>>>>             isci_task_execute_task
>>>>               isci_task_complete_for_upper_layer
>>>>                 sas_ata_task_done
>>>> (spin_lock_irqsave(dev->sata_dev.ap->lock, flags)
>>> Exactly.
>>>
>>> sas_device_gone() should prevent an lldd from ever attempting an i/o to 
>>> a missing sata device (one that has been notified via lldd_dev_gone). 
>>> Although, I have only had time to verify the simple unplug case and 
>>> sync-cache commands at driver unload.  We'll still need to handle 
>>> missing ssp devices, but there are no lldd-external locking concerns in 
>>> that path.
>>>
>> Dan,
>>
>> One other question, should sas_device_gone() be sync'ing with sata io
>> thru ata port lock instead of the Scsi_Host lock?
>>
>> Looking at sas_queuecommand, the host_lock is released and the ata
>> port lock is taken before calling ata_sas_queuecmd, and ata port lock is 
>> held
>> down the __ata_scsi_queuecmd path.
>>
>> Should sas_device_gone get the ap->lock from the domain_device->
>> sata_dev to better sync against sata io?
> 
> Unless I am mistaken they are one in the same, see ata_sas_port_alloc:
> 
> struct ata_port *ata_sas_port_alloc(struct ata_host *host,
>                                     struct ata_port_info *port_info,
>                                     struct Scsi_Host *shost)
> {
>         struct ata_port *ap;
> 
>         ap = ata_port_alloc(host);
>         if (!ap)
>                 return NULL;
> 
>         ap->port_no = 0;
>         ap->lock = shost->host_lock;
>         ap->pio_mask = port_info->pio_mask;
>         ap->mwdma_mask = port_info->mwdma_mask;
>         ap->udma_mask = port_info->udma_mask;
>         ap->flags |= port_info->flags;
>         ap->ops = port_info->port_ops;
>         ap->cbl = ATA_CBL_SATA;
> 
>         return ap;
> }

Your right, thanks.

David

> 
> --
> Dan
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] libsas: fix/amend device gone notification in sas_deform_port()
  2011-02-17  3:11 [PATCH] libsas: fix/amend device gone notification in sas_deform_port() Dan Williams
  2011-02-17 23:36 ` David Milburn
@ 2011-03-26 22:27 ` Dan Williams
  1 sibling, 0 replies; 7+ messages in thread
From: Dan Williams @ 2011-03-26 22:27 UTC (permalink / raw)
  To: james.bottomley
  Cc: Haipao Fan, dave.jiang, linux-scsi, jacek.danecki, Maciej Trela,
	jeffrey.d.skirvin, David Milburn, edmund.nadolski,
	Darrick J. Wong, Jack Wang

On Wed, Feb 16, 2011 at 7:11 PM, Dan Williams <dan.j.williams@intel.com> wrote:
> Commit 56dd2c06 "libsas: Don't issue commands to devices that have been
> hot-removed" edited Darrick's original patch to remove setting 'gone' in
> the sas_deform_port() path because that prevented scsi sync cache
> commands from being issued when the driver was unloaded.  However, this
> allows true device gone notifications (as signaled port phy events) to
> trigger sync cache commands to devices that are known to be unreachable.
>
> Teach libsas which sas_deform_port() invocations are likely device gone
> events.
>
> This patch also introduces sas_device_gone() which hopefully allows
> subtle/tricky locking to be dropped from lldd drivers, like the
> following in mvsas which is broken if the ata path is ever converted to
> call lldd_execute_task() with irqs enabled:
>
>        flags_libsas = 0;
>        [...]
>        spin_unlock_irqrestore(dev->sata_dev.ap->lock, flags_libsas);
>        spin_unlock_irqrestore(&mvi->lock, flags);
>        t->task_done(t);
>        spin_lock_irqsave(&mvi->lock, flags);
>        spin_lock_irqsave(dev->sata_dev.ap->lock, flags_libsas);
>
> Cc: Darrick J. Wong <djwong@us.ibm.com>
> Cc: Haipao Fan <haipao.fan@intel.com>
> Cc: Maciej Trela <maciej.trela@intel.com>
> Cc: David Milburn <dmilburn@redhat.com>
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>

James,

This was acked-by Jack Wang, and has been under test in the isci tree
for quite a while with no ill effects.  Please consider it for a post
39-rc1 fix or otherwise disposition it.

Thanks,
Dan
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2011-03-26 22:27 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-02-17  3:11 [PATCH] libsas: fix/amend device gone notification in sas_deform_port() Dan Williams
2011-02-17 23:36 ` David Milburn
2011-02-18  0:19   ` Dan Williams
2011-03-04 22:44     ` David Milburn
2011-03-04 22:53       ` Dan Williams
2011-03-04 23:08         ` David Milburn
2011-03-26 22:27 ` Dan Williams

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox