Re: [PATCH] fc transport: pre-emptively terminate i/o upon dev_loss

linux-scsi.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* Re: [PATCH] fc transport: pre-emptively terminate i/o upon dev_loss_tmo
@ 2009-04-22 18:01 James Smart
  2009-04-22 19:18 ` Mike Christie
  2009-05-11 12:25 ` Ralph Wuerthner
  0 siblings, 2 replies; 11+ messages in thread
From: James Smart @ 2009-04-22 18:01 UTC (permalink / raw)
  To: linux-scsi, ralphw@linux.vnet.ibm.com, michaelc@cs.wisc.edu

Mike, Ralph,

See if this helps. It compiles, but I haven't tested it.

This modifies the final deletion steps, after dev_loss_tmo has fired,
such that we keep a flag indicating we're deleting, and we only clear
the flag once the additional steps that we had to perform w/o lock,
are complete.  Then, in fc_remote_port_add(), when we fall into the
case of using the rports for the bindings, which corresponds to the
"open" unlocked code area, we stall and wait for the delete to
finish before completing the rest of the add.

I'm a little nervous about the delay in fc_remote_port_add, but it
should be quick, and our callees should be in a context that it's
allowed.

-- james s


 Signed-off-by: James Smart <james.smart@emulex.com>

 ---

 drivers/scsi/scsi_transport_fc.c |   66
+++++++++++++++++++++++++++++++++++++--
 include/scsi/scsi_transport_fc.h |    5 ++
 2 files changed, 69 insertions(+), 2 deletions(-)


diff -upNr a/drivers/scsi/scsi_transport_fc.c
b/drivers/scsi/scsi_transport_fc.c
--- a/drivers/scsi/scsi_transport_fc.c	2009-04-06 12:13:13.000000000
-0400
+++ b/drivers/scsi/scsi_transport_fc.c	2009-04-22 13:44:22.000000000
-0400
@@ -144,6 +144,7 @@ static struct {
 	{ FC_PORTSTATE_ERROR,		"Error" },
 	{ FC_PORTSTATE_LOOPBACK,	"Loopback" },
 	{ FC_PORTSTATE_DELETED,		"Deleted" },
+	{ FC_PORTSTATE_DELETING,	"Deleting" },
 };
 fc_enum_name_search(port_state, fc_port_state, fc_port_state_names)
 #define FC_PORTSTATE_MAX_NAMELEN	20
@@ -2347,9 +2348,28 @@ fc_starget_delete(struct work_struct *wo
 {
 	struct fc_rport *rport =
 		container_of(work, struct fc_rport, stgt_delete_work);
+	struct Scsi_Host *shost = rport_to_shost(rport);
+	unsigned long flags;
 
 	fc_terminate_rport_io(rport);
 	scsi_remove_target(&rport->dev);
+
+	spin_lock_irqsave(shost->host_lock, flags);
+
+	rport->flags &= ~FC_RPORT_STGT_DEL_INPROG;
+
+	/*
+	 * if the final transition to NOTPRESENT was waiting on our
+	 * scsi teardown calls to finish, make the transition now.
+	 */
+	if (rport->flags & FC_RPORT_NOTPRESENT_NEEDED) {
+		BUG_ON(rport->port_state != FC_PORTSTATE_DELETING);
+		rport->flags &= ~FC_RPORT_NOTPRESENT_NEEDED;
+		rport->port_state = FC_PORTSTATE_NOTPRESENT;
+		complete(&rport->deleting);
+	}
+
+	spin_unlock_irqrestore(shost->host_lock, flags);
 }
 
 
@@ -2694,6 +2714,19 @@ fc_remote_port_add(struct Scsi_Host *sho
 			}
 
 			if (match) {
+				if (rport->port_state == FC_PORTSTATE_DELETING) {
+					/*
+					 * We need to stall until the delete
+					 * steps are complete
+					 */
+					spin_unlock_irqrestore(shost->host_lock,
+								flags);
+					wait_for_completion(&rport->deleting);
+					spin_lock_irqsave(shost->host_lock,
+								flags);
+					BUG_ON(rport->port_state !=
+						FC_PORTSTATE_NOTPRESENT);
+				}
 				list_move_tail(&rport->peers, &fc_host->rports);
 				break;
 			}
@@ -3007,7 +3040,8 @@ fc_timeout_deleted_rport(struct work_str
 	rport->maxframe_size = -1;
 	rport->supported_classes = FC_COS_UNSPECIFIED;
 	rport->roles = FC_PORT_ROLE_UNKNOWN;
-	rport->port_state = FC_PORTSTATE_NOTPRESENT;
+	rport->port_state = FC_PORTSTATE_DELETING;
+	init_completion(&rport->deleting);
 	rport->flags &= ~FC_RPORT_FAST_FAIL_TIMEDOUT;
 	rport->flags |= FC_RPORT_DEVLOSS_CALLBK_DONE;
 
@@ -3019,7 +3053,8 @@ fc_timeout_deleted_rport(struct work_str
 	spin_unlock_irqrestore(shost->host_lock, flags);
 	fc_terminate_rport_io(rport);
 
-	BUG_ON(rport->port_state != FC_PORTSTATE_NOTPRESENT);
+	spin_lock_irqsave(shost->host_lock, flags);
+	BUG_ON(rport->port_state != FC_PORTSTATE_DELETING);
 
 	/* remove the identifiers that aren't used in the consisting binding
*/
 	switch (fc_host->tgtid_bind_type) {
@@ -3040,12 +3075,20 @@ fc_timeout_deleted_rport(struct work_str
 	}
 
 	/*
+	 * Indicate we have an outstanding workq element to delete
+	 * the starget
+	 */
+	rport->flags |= FC_RPORT_STGT_DEL_INPROG;
+
+	/*
 	 * As this only occurs if the remote port (scsi target)
 	 * went away and didn't come back - we'll remove
 	 * all attached scsi devices.
 	 */
 	fc_queue_work(shost, &rport->stgt_delete_work);
 
+	spin_unlock_irqrestore(shost->host_lock, flags);
+
 	/*
 	 * Notify the driver that the rport is now dead. The LLDD will
 	 * also guarantee that any communication to the rport is terminated
@@ -3054,6 +3097,25 @@ fc_timeout_deleted_rport(struct work_str
 	 */
 	if (i->f->dev_loss_tmo_callbk)
 		i->f->dev_loss_tmo_callbk(rport);
+
+	spin_lock_irqsave(shost->host_lock, flags);
+
+	/*
+	 * if the starget teardown is complete, we can do the
+	 * DELETING->NOTPRESENT transition.
+	 */
+	if ( !(rport->flags & FC_RPORT_STGT_DEL_INPROG)) {
+		rport->port_state = FC_PORTSTATE_NOTPRESENT;
+		complete(&rport->deleting);
+
+	} else
+		/*
+		 * Otherwise, mark the rport so that starget teardown
+		 * does the DELETING->NOTPRESENT transition.
+		 */
+		rport->flags |= FC_RPORT_NOTPRESENT_NEEDED;
+
+	spin_unlock_irqrestore(shost->host_lock, flags);
 }
 
 
diff -upNr a/include/scsi/scsi_transport_fc.h
b/include/scsi/scsi_transport_fc.h
--- a/include/scsi/scsi_transport_fc.h	2009-01-27 09:44:40.000000000
-0500
+++ b/include/scsi/scsi_transport_fc.h	2009-04-22 13:22:04.000000000
-0400
@@ -82,6 +82,7 @@ enum fc_port_state {
 	FC_PORTSTATE_ERROR,
 	FC_PORTSTATE_LOOPBACK,
 	FC_PORTSTATE_DELETED,
+	FC_PORTSTATE_DELETING,
 };
 
 
@@ -352,6 +353,7 @@ struct fc_rport {	/* aka fc_starget_attr
  	struct delayed_work fail_io_work;
  	struct work_struct stgt_delete_work;
 	struct work_struct rport_delete_work;
+	struct completion deleting;
 } __attribute__((aligned(sizeof(unsigned long))));
 
 /* bit field values for struct fc_rport "flags" field: */
@@ -359,6 +361,8 @@ struct fc_rport {	/* aka fc_starget_attr
 #define FC_RPORT_SCAN_PENDING		0x02
 #define FC_RPORT_FAST_FAIL_TIMEDOUT	0x04
 #define FC_RPORT_DEVLOSS_CALLBK_DONE	0x08
+#define FC_RPORT_STGT_DEL_INPROG	0x10
+#define FC_RPORT_NOTPRESENT_NEEDED	0x20
 
 #define	dev_to_rport(d)				\
 	container_of(d, struct fc_rport, dev)
@@ -685,6 +689,7 @@ fc_remote_port_chkready(struct fc_rport 
 			result = DID_NO_CONNECT << 16;
 		break;
 	case FC_PORTSTATE_BLOCKED:
+	case FC_PORTSTATE_DELETING:
 		if (rport->flags & FC_RPORT_FAST_FAIL_TIMEDOUT)
 			result = DID_TRANSPORT_FAILFAST << 16;
 		else


--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] fc transport: pre-emptively terminate i/o upon dev_loss_tmo
  2009-04-22 18:01 [PATCH] fc transport: pre-emptively terminate i/o upon dev_loss_tmo James Smart
@ 2009-04-22 19:18 ` Mike Christie
  2009-04-22 19:20   ` James Smart
  2009-04-22 20:49   ` Abhijeet Arvind Joglekar (abjoglek)
  2009-05-11 12:25 ` Ralph Wuerthner
  1 sibling, 2 replies; 11+ messages in thread
From: Mike Christie @ 2009-04-22 19:18 UTC (permalink / raw)
  To: James.Smart; +Cc: linux-scsi, ralphw@linux.vnet.ibm.com, Abhijeet Joglekar

James Smart wrote:
> Mike, Ralph,
> 
> See if this helps. It compiles, but I haven't tested it.
> 
> This modifies the final deletion steps, after dev_loss_tmo has fired,
> such that we keep a flag indicating we're deleting, and we only clear
> the flag once the additional steps that we had to perform w/o lock,
> are complete.  Then, in fc_remote_port_add(), when we fall into the
> case of using the rports for the bindings, which corresponds to the
> "open" unlocked code area, we stall and wait for the delete to
> finish before completing the rest of the add.
> 
> I'm a little nervous about the delay in fc_remote_port_add, but it
> should be quick, and our callees should be in a context that it's
> allowed.
> 

I think we can just revert the patch if you want. I do not think it is 
needed for the fnic driver anymore. It was needed because fnic needed 
the rport port id to be set when terminate_rport_io was called right? I 
do not think it is needed now, because it looks like the driver will 
look that up from another code path now.
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] fc transport: pre-emptively terminate i/o upon dev_loss_tmo
  2009-04-22 19:18 ` Mike Christie
@ 2009-04-22 19:20   ` James Smart
  2009-04-22 20:49   ` Abhijeet Arvind Joglekar (abjoglek)
  1 sibling, 0 replies; 11+ messages in thread
From: James Smart @ 2009-04-22 19:20 UTC (permalink / raw)
  To: Mike Christie
  Cc: linux-scsi@vger.kernel.org, ralphw@linux.vnet.ibm.com,
	Abhijeet Joglekar



Mike Christie wrote:
> James Smart wrote:
>   
>> Mike, Ralph,
>>
>> See if this helps. It compiles, but I haven't tested it.
>>
>> This modifies the final deletion steps, after dev_loss_tmo has fired,
>> such that we keep a flag indicating we're deleting, and we only clear
>> the flag once the additional steps that we had to perform w/o lock,
>> are complete.  Then, in fc_remote_port_add(), when we fall into the
>> case of using the rports for the bindings, which corresponds to the
>> "open" unlocked code area, we stall and wait for the delete to
>> finish before completing the rest of the add.
>> 
>> I'm a little nervous about the delay in fc_remote_port_add, but it
>> should be quick, and our callees should be in a context that it's
>> allowed.
>>
>>     
>
> I think we can just revert the patch if you want. I do not think it is 
> needed for the fnic driver anymore. It was needed because fnic needed 
> the rport port id to be set when terminate_rport_io was called right? 
Correct.
> I 
> do not think it is needed now, because it looks like the driver will 
> look that up from another code path now.
>   
If this is true - then yes, reverting the patch would be the best option.

-- james

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [PATCH] fc transport: pre-emptively terminate i/o upon dev_loss_tmo
  2009-04-22 19:18 ` Mike Christie
  2009-04-22 19:20   ` James Smart
@ 2009-04-22 20:49   ` Abhijeet Arvind Joglekar (abjoglek)
  1 sibling, 0 replies; 11+ messages in thread
From: Abhijeet Arvind Joglekar (abjoglek) @ 2009-04-22 20:49 UTC (permalink / raw)
  To: Mike Christie, James.Smart; +Cc: linux-scsi, ralphw, Abhijeet Joglekar

> -----Original Message-----
> From: Mike Christie [mailto:michaelc@cs.wisc.edu] 
> Sent: Wednesday, April 22, 2009 12:18 PM
> To: James.Smart@Emulex.Com
> Cc: linux-scsi@vger.kernel.org; ralphw@linux.vnet.ibm.com; 
> Abhijeet Joglekar
> Subject: Re: [PATCH] fc transport: pre-emptively terminate 
> i/o upon dev_loss_tmo
> 
> James Smart wrote:
> > Mike, Ralph,
> > 
> > See if this helps. It compiles, but I haven't tested it.
> > 
> > This modifies the final deletion steps, after dev_loss_tmo 
> has fired, 
> > such that we keep a flag indicating we're deleting, and we 
> only clear 
> > the flag once the additional steps that we had to perform w/o lock, 
> > are complete.  Then, in fc_remote_port_add(), when we fall into the 
> > case of using the rports for the bindings, which corresponds to the 
> > "open" unlocked code area, we stall and wait for the delete 
> to finish 
> > before completing the rest of the add.
> > 
> > I'm a little nervous about the delay in fc_remote_port_add, but it 
> > should be quick, and our callees should be in a context that it's 
> > allowed.
> > 
> 
> I think we can just revert the patch if you want. I do not 
> think it is needed for the fnic driver anymore. It was needed 
> because fnic needed the rport port id to be set when 
> terminate_rport_io was called right? I do not think it is 
> needed now, because it looks like the driver will look that 
> up from another code path now.

Yes, it is ok to revert the patch. fnic is not using the rport_id in its terminate_rport_io callback.

-- abhijeet

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] fc transport: pre-emptively terminate i/o upon dev_loss_tmo
  2009-04-22 18:01 [PATCH] fc transport: pre-emptively terminate i/o upon dev_loss_tmo James Smart
  2009-04-22 19:18 ` Mike Christie
@ 2009-05-11 12:25 ` Ralph Wuerthner
  1 sibling, 0 replies; 11+ messages in thread
From: Ralph Wuerthner @ 2009-05-11 12:25 UTC (permalink / raw)
  To: James.Smart; +Cc: linux-scsi, michaelc@cs.wisc.edu

On Wed, 22 Apr 2009 14:01:31 -0400
James Smart <James.Smart@Emulex.Com> wrote:

> Mike, Ralph,
> 
> See if this helps. It compiles, but I haven't tested it.
> 
> This modifies the final deletion steps, after dev_loss_tmo has fired,
> such that we keep a flag indicating we're deleting, and we only clear
> the flag once the additional steps that we had to perform w/o lock,
> are complete.  Then, in fc_remote_port_add(), when we fall into the
> case of using the rports for the bindings, which corresponds to the
> "open" unlocked code area, we stall and wait for the delete to
> finish before completing the rest of the add.
> 
> I'm a little nervous about the delay in fc_remote_port_add, but it
> should be quick, and our callees should be in a context that it's
> allowed.
> 
> -- james s
> 
> 
>  Signed-off-by: James Smart <james.smart@emulex.com>
> 
>  ---
> 
>  drivers/scsi/scsi_transport_fc.c |   66
> +++++++++++++++++++++++++++++++++++++--
>  include/scsi/scsi_transport_fc.h |    5 ++
>  2 files changed, 69 insertions(+), 2 deletions(-)
> 
> 
> diff -upNr a/drivers/scsi/scsi_transport_fc.c
> b/drivers/scsi/scsi_transport_fc.c
> --- a/drivers/scsi/scsi_transport_fc.c	2009-04-06
> 12:13:13.000000000 -0400
> +++ b/drivers/scsi/scsi_transport_fc.c	2009-04-22
> 13:44:22.000000000 -0400
> @@ -144,6 +144,7 @@ static struct {
>  	{ FC_PORTSTATE_ERROR,		"Error" },
>  	{ FC_PORTSTATE_LOOPBACK,	"Loopback" },
>  	{ FC_PORTSTATE_DELETED,		"Deleted" },
> +	{ FC_PORTSTATE_DELETING,	"Deleting" },
>  };
>  fc_enum_name_search(port_state, fc_port_state, fc_port_state_names)
>  #define FC_PORTSTATE_MAX_NAMELEN	20
> @@ -2347,9 +2348,28 @@ fc_starget_delete(struct work_struct *wo
>  {
>  	struct fc_rport *rport =
>  		container_of(work, struct fc_rport,
> stgt_delete_work);
> +	struct Scsi_Host *shost = rport_to_shost(rport);
> +	unsigned long flags;
> 
>  	fc_terminate_rport_io(rport);
>  	scsi_remove_target(&rport->dev);
> +
> +	spin_lock_irqsave(shost->host_lock, flags);
> +
> +	rport->flags &= ~FC_RPORT_STGT_DEL_INPROG;
> +
> +	/*
> +	 * if the final transition to NOTPRESENT was waiting on our
> +	 * scsi teardown calls to finish, make the transition now.
> +	 */
> +	if (rport->flags & FC_RPORT_NOTPRESENT_NEEDED) {
> +		BUG_ON(rport->port_state != FC_PORTSTATE_DELETING);
> +		rport->flags &= ~FC_RPORT_NOTPRESENT_NEEDED;
> +		rport->port_state = FC_PORTSTATE_NOTPRESENT;
> +		complete(&rport->deleting);
> +	}
> +
> +	spin_unlock_irqrestore(shost->host_lock, flags);
>  }
> 
> 
> @@ -2694,6 +2714,19 @@ fc_remote_port_add(struct Scsi_Host *sho
>  			}
> 
>  			if (match) {
> +				if (rport->port_state ==
> FC_PORTSTATE_DELETING) {
> +					/*
> +					 * We need to stall until
> the delete
> +					 * steps are complete
> +					 */
> +
> spin_unlock_irqrestore(shost->host_lock,
> +
> flags);
> +
> wait_for_completion(&rport->deleting);
> +
> spin_lock_irqsave(shost->host_lock,
> +
> flags);
> +					BUG_ON(rport->port_state !=
> +
> FC_PORTSTATE_NOTPRESENT);
> +				}
>  				list_move_tail(&rport->peers,
> &fc_host->rports); break;
>  			}
> @@ -3007,7 +3040,8 @@ fc_timeout_deleted_rport(struct work_str
>  	rport->maxframe_size = -1;
>  	rport->supported_classes = FC_COS_UNSPECIFIED;
>  	rport->roles = FC_PORT_ROLE_UNKNOWN;
> -	rport->port_state = FC_PORTSTATE_NOTPRESENT;
> +	rport->port_state = FC_PORTSTATE_DELETING;
> +	init_completion(&rport->deleting);
>  	rport->flags &= ~FC_RPORT_FAST_FAIL_TIMEDOUT;
>  	rport->flags |= FC_RPORT_DEVLOSS_CALLBK_DONE;
> 
> @@ -3019,7 +3053,8 @@ fc_timeout_deleted_rport(struct work_str
>  	spin_unlock_irqrestore(shost->host_lock, flags);
>  	fc_terminate_rport_io(rport);
> 
> -	BUG_ON(rport->port_state != FC_PORTSTATE_NOTPRESENT);
> +	spin_lock_irqsave(shost->host_lock, flags);
> +	BUG_ON(rport->port_state != FC_PORTSTATE_DELETING);
> 
>  	/* remove the identifiers that aren't used in the consisting
> binding */
>  	switch (fc_host->tgtid_bind_type) {
> @@ -3040,12 +3075,20 @@ fc_timeout_deleted_rport(struct work_str
>  	}
> 
>  	/*
> +	 * Indicate we have an outstanding workq element to delete
> +	 * the starget
> +	 */
> +	rport->flags |= FC_RPORT_STGT_DEL_INPROG;
> +
> +	/*
>  	 * As this only occurs if the remote port (scsi target)
>  	 * went away and didn't come back - we'll remove
>  	 * all attached scsi devices.
>  	 */
>  	fc_queue_work(shost, &rport->stgt_delete_work);
> 
> +	spin_unlock_irqrestore(shost->host_lock, flags);
> +
>  	/*
>  	 * Notify the driver that the rport is now dead. The LLDD
> will
>  	 * also guarantee that any communication to the rport is
> terminated @@ -3054,6 +3097,25 @@ fc_timeout_deleted_rport(struct
> work_str */
>  	if (i->f->dev_loss_tmo_callbk)
>  		i->f->dev_loss_tmo_callbk(rport);
> +
> +	spin_lock_irqsave(shost->host_lock, flags);
> +
> +	/*
> +	 * if the starget teardown is complete, we can do the
> +	 * DELETING->NOTPRESENT transition.
> +	 */
> +	if ( !(rport->flags & FC_RPORT_STGT_DEL_INPROG)) {
> +		rport->port_state = FC_PORTSTATE_NOTPRESENT;
> +		complete(&rport->deleting);
> +
> +	} else
> +		/*
> +		 * Otherwise, mark the rport so that starget teardown
> +		 * does the DELETING->NOTPRESENT transition.
> +		 */
> +		rport->flags |= FC_RPORT_NOTPRESENT_NEEDED;
> +
> +	spin_unlock_irqrestore(shost->host_lock, flags);
>  }
> 
> 
> diff -upNr a/include/scsi/scsi_transport_fc.h
> b/include/scsi/scsi_transport_fc.h
> --- a/include/scsi/scsi_transport_fc.h	2009-01-27
> 09:44:40.000000000 -0500
> +++ b/include/scsi/scsi_transport_fc.h	2009-04-22
> 13:22:04.000000000 -0400
> @@ -82,6 +82,7 @@ enum fc_port_state {
>  	FC_PORTSTATE_ERROR,
>  	FC_PORTSTATE_LOOPBACK,
>  	FC_PORTSTATE_DELETED,
> +	FC_PORTSTATE_DELETING,
>  };
> 
> 
> @@ -352,6 +353,7 @@ struct fc_rport {	/* aka fc_starget_attr
>   	struct delayed_work fail_io_work;
>   	struct work_struct stgt_delete_work;
>  	struct work_struct rport_delete_work;
> +	struct completion deleting;
>  } __attribute__((aligned(sizeof(unsigned long))));
> 
>  /* bit field values for struct fc_rport "flags" field: */
> @@ -359,6 +361,8 @@ struct fc_rport {	/* aka fc_starget_attr
>  #define FC_RPORT_SCAN_PENDING		0x02
>  #define FC_RPORT_FAST_FAIL_TIMEDOUT	0x04
>  #define FC_RPORT_DEVLOSS_CALLBK_DONE	0x08
> +#define FC_RPORT_STGT_DEL_INPROG	0x10
> +#define FC_RPORT_NOTPRESENT_NEEDED	0x20
> 
>  #define	dev_to_rport(d)				\
>  	container_of(d, struct fc_rport, dev)
> @@ -685,6 +689,7 @@ fc_remote_port_chkready(struct fc_rport 
>  			result = DID_NO_CONNECT << 16;
>  		break;
>  	case FC_PORTSTATE_BLOCKED:
> +	case FC_PORTSTATE_DELETING:
>  		if (rport->flags & FC_RPORT_FAST_FAIL_TIMEDOUT)
>  			result = DID_TRANSPORT_FAILFAST << 16;
>  		else
> 
> 

We tested this patch on our systems and at least it didn't break
anything. But we don't know for sure if we really hit the original
problem again since it is very timing sensitive. I'm proposing to
include this patch upstream.

-- 
Ralph Wuerthner
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] fc transport: pre-emptively terminate i/o upon dev_loss_tmo
@ 2009-04-09 11:04 Ralph Wuerthner
  2009-04-21 21:43 ` Mike Christie
  0 siblings, 1 reply; 11+ messages in thread
From: Ralph Wuerthner @ 2009-04-09 11:04 UTC (permalink / raw)
  To: linux-scsi, james.smart; +Cc: michaelc

Mike Christie wrote:
> James Smart wrote:
> > Pre-emptively terminate i/o on the rport if dev_loss_tmo has fired.
> > The desire is to terminate everything, so that the i/o is cleaned up
> > prior to the sdev's being unblocked, thus any outstanding timeouts/aborts
> > are avoided.
> > 
> > Also, we do this early enough such that the rport's port_id field is
> > still valid. FCOE libFC code needs this info to find the i/o's to
> > terminate.
> > 
> > -- james s
> > 
> >  Signed-off-by: James Smart <james.smart@emulex.com>
> > 
> >  ---
> > 
> >  scsi_transport_fc.c |   11 ++++++++++-
> >  1 file changed, 10 insertions(+), 1 deletion(-)
> > 
> > 
> > diff -upNr a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
> > --- a/drivers/scsi/scsi_transport_fc.c	2008-10-18 10:32:52.000000000 -0400
> > +++ b/drivers/scsi/scsi_transport_fc.c	2008-12-05 12:13:54.000000000 -0500
> > @@ -3012,6 +3012,16 @@ fc_timeout_deleted_rport(struct work_str
> >  	rport->port_state = FC_PORTSTATE_NOTPRESENT;
> >  	rport->flags &= ~FC_RPORT_FAST_FAIL_TIMEDOUT;
> >  
> > +	/*
> > +	 * Pre-emptively kill I/O rather than waiting for the work queue
> > +	 * item to teardown the starget. (FCOE libFC folks prefer this
> > +	 * and to have the rport_port_id still set when it's done).
> > +	 */
> > +	spin_unlock_irqrestore(shost->host_lock, flags);
> > +	fc_terminate_rport_io(rport);
> > +
> > +	BUG_ON(rport->port_state != FC_PORTSTATE_NOTPRESENT);
> > +
> 
> I think we could this this bug_on or we might hit a problem below where 
> this thread is changing the port_id but some other thread is calling 
> fc_remote_port_add and changging the port id.
> 
> I think the problem is that fc_remote_port_add only calls fc_flush_work 
> which flushes the fc_host work_q:
> 
> 
> struct fc_rport *
> () (struct Scsi_Host *shost, int channel,
>          struct fc_rport_identifiers  *ids)
> {
>          struct fc_internal *fci = to_fc_internal(shost->transportt);
>          struct fc_host_attrs *fc_host = shost_to_fc_host(shost);
>          struct fc_rport *rport;
>          unsigned long flags;
>          int match = 0;
> 
>          /* ensure any stgt delete functions are done */
>          fc_flush_work(shost);
> 
> 
> But fc_timeout_deleted_rport dev_loss_work is running on the devloss_work_q.
> 
> So if fc_timeout_deleted_rport grabs the host lock first, it will set 
> the port state to FC_PORTSTATE_NOTPRESENT, then drop the lock.
> 
> Once fc_timeout_delete_rport drops the lock, fc_remote_port_add could 
> have already passed the fc_flush_work() call because there was nothing 
> on it. fc_remote_port_add would then drop down to the list search on the 
> bindings array and see the remote port. It would then start setting the 
> port state, id and port_name and node_name, but at the same time, 
> because the host lock no longer guards it, fc_timedout_deleted_rport 
> could be fiddling with the same fields and we could end up a mix of 
> values or it could be running over the BUG_ON.
> 
> Is that right?
> 
> If so do we just want to flush the devloss_work_queue in fc_remote_port_add?
> 
> 
> 
> >  	/* remove the identifiers that aren't used in the consisting binding */
> >  	switch (fc_host->tgtid_bind_type) {
> >  	case FC_TGTID_BIND_BY_WWPN:
> > @@ -3035,7 +3045,6 @@ fc_timeout_deleted_rport(struct work_str
> >  	 * went away and didn't come back - we'll remove
> >  	 * all attached scsi devices.
> >  	 */
> > -	spin_unlock_irqrestore(shost->host_lock, flags);
> >  
> >  	scsi_target_unblock(&rport->dev);
> >  	fc_queue_work(shost, &rport->stgt_delete_work);
> > 

One of our tester hit the problem Michael describes above: right after
releasing the shost->host_lock lock in fc_timeout_deleted_rport() the
remote port was rediscovered by the LLDD (in our case zfcp),
fc_remote_port_add() was called and BUG_ON() triggered.

But flushing the devloss_work_queue in fc_remote_port_add() won't help
either because it still would be possible that right after the
flush_workqueue() call a timer could trigger and fc_timeout_deleted_rport()
can be called on a different cpu before fc_remote_port_add() grabs
the lock and we end up again with inconsistent data.

To me the purpose of the BUG_ON() is not clear: re-adding a remote port
should be possible at any time even after the devloss timeout triggered.
And as Michael already mentioned, modifying the identifiers without
holding the lock can lead to inconsistent remote port data. What about
the following solution:

---
 drivers/scsi/scsi_transport_fc.c |   36 +++++++++++++++++++-----------------
 1 file changed, 19 insertions(+), 17 deletions(-)

Index: linux-2.5/drivers/scsi/scsi_transport_fc.c
===================================================================
--- linux-2.5.orig/drivers/scsi/scsi_transport_fc.c
+++ linux-2.5/drivers/scsi/scsi_transport_fc.c
@@ -3027,25 +3027,27 @@ fc_timeout_deleted_rport(struct work_str
 	spin_unlock_irqrestore(shost->host_lock, flags);
 	fc_terminate_rport_io(rport);
 
-	BUG_ON(rport->port_state != FC_PORTSTATE_NOTPRESENT);
-
 	/* remove the identifiers that aren't used in the consisting binding */
-	switch (fc_host->tgtid_bind_type) {
-	case FC_TGTID_BIND_BY_WWPN:
-		rport->node_name = -1;
-		rport->port_id = -1;
-		break;
-	case FC_TGTID_BIND_BY_WWNN:
-		rport->port_name = -1;
-		rport->port_id = -1;
-		break;
-	case FC_TGTID_BIND_BY_ID:
-		rport->node_name = -1;
-		rport->port_name = -1;
-		break;
-	case FC_TGTID_BIND_NONE:	/* to keep compiler happy */
-		break;
+	spin_lock_irqsave(shost->host_lock, flags);
+	if (rport->port_state == FC_PORTSTATE_NOTPRESENT) {
+		switch (fc_host->tgtid_bind_type) {
+		case FC_TGTID_BIND_BY_WWPN:
+			rport->node_name = -1;
+			rport->port_id = -1;
+			break;
+		case FC_TGTID_BIND_BY_WWNN:
+			rport->port_name = -1;
+			rport->port_id = -1;
+			break;
+		case FC_TGTID_BIND_BY_ID:
+			rport->node_name = -1;
+			rport->port_name = -1;
+			break;
+		case FC_TGTID_BIND_NONE:	/* to keep compiler happy */
+			break;
+		}
 	}
+	spin_unlock_irqrestore(shost->host_lock, flags);
 
 	/*
 	 * As this only occurs if the remote port (scsi target)

--
Ralph Wuerthner

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] fc transport: pre-emptively terminate i/o upon dev_loss_tmo
  2009-04-09 11:04 Ralph Wuerthner
@ 2009-04-21 21:43 ` Mike Christie
  2009-04-21 21:52   ` Mike Christie
  0 siblings, 1 reply; 11+ messages in thread
From: Mike Christie @ 2009-04-21 21:43 UTC (permalink / raw)
  To: Ralph Wuerthner; +Cc: linux-scsi, james.smart

Ralph Wuerthner wrote:
> Mike Christie wrote:
>> James Smart wrote:
>>> Pre-emptively terminate i/o on the rport if dev_loss_tmo has fired.
>>> The desire is to terminate everything, so that the i/o is cleaned up
>>> prior to the sdev's being unblocked, thus any outstanding timeouts/aborts
>>> are avoided.
>>>
>>> Also, we do this early enough such that the rport's port_id field is
>>> still valid. FCOE libFC code needs this info to find the i/o's to
>>> terminate.
>>>
>>> -- james s
>>>
>>>  Signed-off-by: James Smart <james.smart@emulex.com>
>>>
>>>  ---
>>>
>>>  scsi_transport_fc.c |   11 ++++++++++-
>>>  1 file changed, 10 insertions(+), 1 deletion(-)
>>>
>>>
>>> diff -upNr a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
>>> --- a/drivers/scsi/scsi_transport_fc.c	2008-10-18 10:32:52.000000000 -0400
>>> +++ b/drivers/scsi/scsi_transport_fc.c	2008-12-05 12:13:54.000000000 -0500
>>> @@ -3012,6 +3012,16 @@ fc_timeout_deleted_rport(struct work_str
>>>  	rport->port_state = FC_PORTSTATE_NOTPRESENT;
>>>  	rport->flags &= ~FC_RPORT_FAST_FAIL_TIMEDOUT;
>>>  
>>> +	/*
>>> +	 * Pre-emptively kill I/O rather than waiting for the work queue
>>> +	 * item to teardown the starget. (FCOE libFC folks prefer this
>>> +	 * and to have the rport_port_id still set when it's done).
>>> +	 */
>>> +	spin_unlock_irqrestore(shost->host_lock, flags);
>>> +	fc_terminate_rport_io(rport);
>>> +
>>> +	BUG_ON(rport->port_state != FC_PORTSTATE_NOTPRESENT);
>>> +
>> I think we could this this bug_on or we might hit a problem below where 
>> this thread is changing the port_id but some other thread is calling 
>> fc_remote_port_add and changging the port id.
>>
>> I think the problem is that fc_remote_port_add only calls fc_flush_work 
>> which flushes the fc_host work_q:
>>
>>
>> struct fc_rport *
>> () (struct Scsi_Host *shost, int channel,
>>          struct fc_rport_identifiers  *ids)
>> {
>>          struct fc_internal *fci = to_fc_internal(shost->transportt);
>>          struct fc_host_attrs *fc_host = shost_to_fc_host(shost);
>>          struct fc_rport *rport;
>>          unsigned long flags;
>>          int match = 0;
>>
>>          /* ensure any stgt delete functions are done */
>>          fc_flush_work(shost);
>>
>>
>> But fc_timeout_deleted_rport dev_loss_work is running on the devloss_work_q.
>>
>> So if fc_timeout_deleted_rport grabs the host lock first, it will set 
>> the port state to FC_PORTSTATE_NOTPRESENT, then drop the lock.
>>
>> Once fc_timeout_delete_rport drops the lock, fc_remote_port_add could 
>> have already passed the fc_flush_work() call because there was nothing 
>> on it. fc_remote_port_add would then drop down to the list search on the 
>> bindings array and see the remote port. It would then start setting the 
>> port state, id and port_name and node_name, but at the same time, 
>> because the host lock no longer guards it, fc_timedout_deleted_rport 
>> could be fiddling with the same fields and we could end up a mix of 
>> values or it could be running over the BUG_ON.
>>
>> Is that right?
>>
>> If so do we just want to flush the devloss_work_queue in fc_remote_port_add?
>>
>>
>>
>>>  	/* remove the identifiers that aren't used in the consisting binding */
>>>  	switch (fc_host->tgtid_bind_type) {
>>>  	case FC_TGTID_BIND_BY_WWPN:
>>> @@ -3035,7 +3045,6 @@ fc_timeout_deleted_rport(struct work_str
>>>  	 * went away and didn't come back - we'll remove
>>>  	 * all attached scsi devices.
>>>  	 */
>>> -	spin_unlock_irqrestore(shost->host_lock, flags);
>>>  
>>>  	scsi_target_unblock(&rport->dev);
>>>  	fc_queue_work(shost, &rport->stgt_delete_work);
>>>
> 
> One of our tester hit the problem Michael describes above: right after
> releasing the shost->host_lock lock in fc_timeout_deleted_rport() the
> remote port was rediscovered by the LLDD (in our case zfcp),
> fc_remote_port_add() was called and BUG_ON() triggered.
> 
> But flushing the devloss_work_queue in fc_remote_port_add() won't help
> either because it still would be possible that right after the
> flush_workqueue() call a timer could trigger and fc_timeout_deleted_rport()


What timer could that be? Only the dev_loss_work delayed work calls 
fc_timeout_deleted_rport.

Are you thinking about fc_rport_final_delete?

If you do the
                 if (!cancel_delayed_work(&rport->dev_loss_work))
                         fc_flush_devloss(shost);

sequence we should be ok I think, because after that point it should 
have either been canceled or the work has completed.

If not then I think we might have problems in other places.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] fc transport: pre-emptively terminate i/o upon dev_loss_tmo
  2009-04-21 21:43 ` Mike Christie
@ 2009-04-21 21:52   ` Mike Christie
  2009-04-21 22:12     ` Mike Christie
  0 siblings, 1 reply; 11+ messages in thread
From: Mike Christie @ 2009-04-21 21:52 UTC (permalink / raw)
  To: Ralph Wuerthner; +Cc: linux-scsi, james.smart

[-- Attachment #1: Type: text/plain, Size: 4990 bytes --]

Mike Christie wrote:
> Ralph Wuerthner wrote:
>> Mike Christie wrote:
>>> James Smart wrote:
>>>> Pre-emptively terminate i/o on the rport if dev_loss_tmo has fired.
>>>> The desire is to terminate everything, so that the i/o is cleaned up
>>>> prior to the sdev's being unblocked, thus any outstanding 
>>>> timeouts/aborts
>>>> are avoided.
>>>>
>>>> Also, we do this early enough such that the rport's port_id field is
>>>> still valid. FCOE libFC code needs this info to find the i/o's to
>>>> terminate.
>>>>
>>>> -- james s
>>>>
>>>>  Signed-off-by: James Smart <james.smart@emulex.com>
>>>>
>>>>  ---
>>>>
>>>>  scsi_transport_fc.c |   11 ++++++++++-
>>>>  1 file changed, 10 insertions(+), 1 deletion(-)
>>>>
>>>>
>>>> diff -upNr a/drivers/scsi/scsi_transport_fc.c 
>>>> b/drivers/scsi/scsi_transport_fc.c
>>>> --- a/drivers/scsi/scsi_transport_fc.c    2008-10-18 
>>>> 10:32:52.000000000 -0400
>>>> +++ b/drivers/scsi/scsi_transport_fc.c    2008-12-05 
>>>> 12:13:54.000000000 -0500
>>>> @@ -3012,6 +3012,16 @@ fc_timeout_deleted_rport(struct work_str
>>>>      rport->port_state = FC_PORTSTATE_NOTPRESENT;
>>>>      rport->flags &= ~FC_RPORT_FAST_FAIL_TIMEDOUT;
>>>>  
>>>> +    /*
>>>> +     * Pre-emptively kill I/O rather than waiting for the work queue
>>>> +     * item to teardown the starget. (FCOE libFC folks prefer this
>>>> +     * and to have the rport_port_id still set when it's done).
>>>> +     */
>>>> +    spin_unlock_irqrestore(shost->host_lock, flags);
>>>> +    fc_terminate_rport_io(rport);
>>>> +
>>>> +    BUG_ON(rport->port_state != FC_PORTSTATE_NOTPRESENT);
>>>> +
>>> I think we could this this bug_on or we might hit a problem below 
>>> where this thread is changing the port_id but some other thread is 
>>> calling fc_remote_port_add and changging the port id.
>>>
>>> I think the problem is that fc_remote_port_add only calls 
>>> fc_flush_work which flushes the fc_host work_q:
>>>
>>>
>>> struct fc_rport *
>>> () (struct Scsi_Host *shost, int channel,
>>>          struct fc_rport_identifiers  *ids)
>>> {
>>>          struct fc_internal *fci = to_fc_internal(shost->transportt);
>>>          struct fc_host_attrs *fc_host = shost_to_fc_host(shost);
>>>          struct fc_rport *rport;
>>>          unsigned long flags;
>>>          int match = 0;
>>>
>>>          /* ensure any stgt delete functions are done */
>>>          fc_flush_work(shost);
>>>
>>>
>>> But fc_timeout_deleted_rport dev_loss_work is running on the 
>>> devloss_work_q.
>>>
>>> So if fc_timeout_deleted_rport grabs the host lock first, it will set 
>>> the port state to FC_PORTSTATE_NOTPRESENT, then drop the lock.
>>>
>>> Once fc_timeout_delete_rport drops the lock, fc_remote_port_add could 
>>> have already passed the fc_flush_work() call because there was 
>>> nothing on it. fc_remote_port_add would then drop down to the list 
>>> search on the bindings array and see the remote port. It would then 
>>> start setting the port state, id and port_name and node_name, but at 
>>> the same time, because the host lock no longer guards it, 
>>> fc_timedout_deleted_rport could be fiddling with the same fields and 
>>> we could end up a mix of values or it could be running over the BUG_ON.
>>>
>>> Is that right?
>>>
>>> If so do we just want to flush the devloss_work_queue in 
>>> fc_remote_port_add?
>>>
>>>
>>>
>>>>      /* remove the identifiers that aren't used in the consisting 
>>>> binding */
>>>>      switch (fc_host->tgtid_bind_type) {
>>>>      case FC_TGTID_BIND_BY_WWPN:
>>>> @@ -3035,7 +3045,6 @@ fc_timeout_deleted_rport(struct work_str
>>>>       * went away and didn't come back - we'll remove
>>>>       * all attached scsi devices.
>>>>       */
>>>> -    spin_unlock_irqrestore(shost->host_lock, flags);
>>>>  
>>>>      scsi_target_unblock(&rport->dev);
>>>>      fc_queue_work(shost, &rport->stgt_delete_work);
>>>>
>>
>> One of our tester hit the problem Michael describes above: right after
>> releasing the shost->host_lock lock in fc_timeout_deleted_rport() the
>> remote port was rediscovered by the LLDD (in our case zfcp),
>> fc_remote_port_add() was called and BUG_ON() triggered.
>>
>> But flushing the devloss_work_queue in fc_remote_port_add() won't help
>> either because it still would be possible that right after the
>> flush_workqueue() call a timer could trigger and 
>> fc_timeout_deleted_rport()
> 
> 
> What timer could that be? Only the dev_loss_work delayed work calls 
> fc_timeout_deleted_rport.
> 
> Are you thinking about fc_rport_final_delete?
> 
> If you do the
>                 if (!cancel_delayed_work(&rport->dev_loss_work))
>                         fc_flush_devloss(shost);
> 
> sequence we should be ok I think, because after that point it should 
> have either been canceled or the work has completed.
> 

I did this in the attached patch. It also will prevent drivers from 
having to worry about about if the terminate rport IO or dev loss 
callback is running at the same time fc_remote_port_add is run.

[-- Attachment #2: flush-everything.patch --]
[-- Type: text/plain, Size: 614 bytes --]

diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index a152f89..a45c5ed 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -2564,6 +2564,10 @@ fc_remote_port_add(struct Scsi_Host *shost, int channel,
 	/* ensure any stgt delete functions are done */
 	fc_flush_work(shost);
 
+	/* ensure dev loss or fast io fail are done */
+	if (!cancel_delayed_work(&rport->dev_loss_work))
+		fc_flush_devloss(shost);
+
 	/*
 	 * Search the list of "active" rports, for an rport that has been
 	 * deleted, but we've held off the real delete while the target

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH] fc transport: pre-emptively terminate i/o upon dev_loss_tmo
  2009-04-21 21:52   ` Mike Christie
@ 2009-04-21 22:12     ` Mike Christie
  2009-04-22 12:13       ` Ralph Wuerthner
  0 siblings, 1 reply; 11+ messages in thread
From: Mike Christie @ 2009-04-21 22:12 UTC (permalink / raw)
  To: Ralph Wuerthner; +Cc: linux-scsi, james.smart

[-- Attachment #1: Type: text/plain, Size: 5251 bytes --]

Mike Christie wrote:
> Mike Christie wrote:
>> Ralph Wuerthner wrote:
>>> Mike Christie wrote:
>>>> James Smart wrote:
>>>>> Pre-emptively terminate i/o on the rport if dev_loss_tmo has fired.
>>>>> The desire is to terminate everything, so that the i/o is cleaned up
>>>>> prior to the sdev's being unblocked, thus any outstanding 
>>>>> timeouts/aborts
>>>>> are avoided.
>>>>>
>>>>> Also, we do this early enough such that the rport's port_id field is
>>>>> still valid. FCOE libFC code needs this info to find the i/o's to
>>>>> terminate.
>>>>>
>>>>> -- james s
>>>>>
>>>>>  Signed-off-by: James Smart <james.smart@emulex.com>
>>>>>
>>>>>  ---
>>>>>
>>>>>  scsi_transport_fc.c |   11 ++++++++++-
>>>>>  1 file changed, 10 insertions(+), 1 deletion(-)
>>>>>
>>>>>
>>>>> diff -upNr a/drivers/scsi/scsi_transport_fc.c 
>>>>> b/drivers/scsi/scsi_transport_fc.c
>>>>> --- a/drivers/scsi/scsi_transport_fc.c    2008-10-18 
>>>>> 10:32:52.000000000 -0400
>>>>> +++ b/drivers/scsi/scsi_transport_fc.c    2008-12-05 
>>>>> 12:13:54.000000000 -0500
>>>>> @@ -3012,6 +3012,16 @@ fc_timeout_deleted_rport(struct work_str
>>>>>      rport->port_state = FC_PORTSTATE_NOTPRESENT;
>>>>>      rport->flags &= ~FC_RPORT_FAST_FAIL_TIMEDOUT;
>>>>>  
>>>>> +    /*
>>>>> +     * Pre-emptively kill I/O rather than waiting for the work queue
>>>>> +     * item to teardown the starget. (FCOE libFC folks prefer this
>>>>> +     * and to have the rport_port_id still set when it's done).
>>>>> +     */
>>>>> +    spin_unlock_irqrestore(shost->host_lock, flags);
>>>>> +    fc_terminate_rport_io(rport);
>>>>> +
>>>>> +    BUG_ON(rport->port_state != FC_PORTSTATE_NOTPRESENT);
>>>>> +
>>>> I think we could this this bug_on or we might hit a problem below 
>>>> where this thread is changing the port_id but some other thread is 
>>>> calling fc_remote_port_add and changging the port id.
>>>>
>>>> I think the problem is that fc_remote_port_add only calls 
>>>> fc_flush_work which flushes the fc_host work_q:
>>>>
>>>>
>>>> struct fc_rport *
>>>> () (struct Scsi_Host *shost, int channel,
>>>>          struct fc_rport_identifiers  *ids)
>>>> {
>>>>          struct fc_internal *fci = to_fc_internal(shost->transportt);
>>>>          struct fc_host_attrs *fc_host = shost_to_fc_host(shost);
>>>>          struct fc_rport *rport;
>>>>          unsigned long flags;
>>>>          int match = 0;
>>>>
>>>>          /* ensure any stgt delete functions are done */
>>>>          fc_flush_work(shost);
>>>>
>>>>
>>>> But fc_timeout_deleted_rport dev_loss_work is running on the 
>>>> devloss_work_q.
>>>>
>>>> So if fc_timeout_deleted_rport grabs the host lock first, it will 
>>>> set the port state to FC_PORTSTATE_NOTPRESENT, then drop the lock.
>>>>
>>>> Once fc_timeout_delete_rport drops the lock, fc_remote_port_add 
>>>> could have already passed the fc_flush_work() call because there was 
>>>> nothing on it. fc_remote_port_add would then drop down to the list 
>>>> search on the bindings array and see the remote port. It would then 
>>>> start setting the port state, id and port_name and node_name, but at 
>>>> the same time, because the host lock no longer guards it, 
>>>> fc_timedout_deleted_rport could be fiddling with the same fields and 
>>>> we could end up a mix of values or it could be running over the BUG_ON.
>>>>
>>>> Is that right?
>>>>
>>>> If so do we just want to flush the devloss_work_queue in 
>>>> fc_remote_port_add?
>>>>
>>>>
>>>>
>>>>>      /* remove the identifiers that aren't used in the consisting 
>>>>> binding */
>>>>>      switch (fc_host->tgtid_bind_type) {
>>>>>      case FC_TGTID_BIND_BY_WWPN:
>>>>> @@ -3035,7 +3045,6 @@ fc_timeout_deleted_rport(struct work_str
>>>>>       * went away and didn't come back - we'll remove
>>>>>       * all attached scsi devices.
>>>>>       */
>>>>> -    spin_unlock_irqrestore(shost->host_lock, flags);
>>>>>  
>>>>>      scsi_target_unblock(&rport->dev);
>>>>>      fc_queue_work(shost, &rport->stgt_delete_work);
>>>>>
>>>
>>> One of our tester hit the problem Michael describes above: right after
>>> releasing the shost->host_lock lock in fc_timeout_deleted_rport() the
>>> remote port was rediscovered by the LLDD (in our case zfcp),
>>> fc_remote_port_add() was called and BUG_ON() triggered.
>>>
>>> But flushing the devloss_work_queue in fc_remote_port_add() won't help
>>> either because it still would be possible that right after the
>>> flush_workqueue() call a timer could trigger and 
>>> fc_timeout_deleted_rport()
>>
>>
>> What timer could that be? Only the dev_loss_work delayed work calls 
>> fc_timeout_deleted_rport.
>>
>> Are you thinking about fc_rport_final_delete?
>>
>> If you do the
>>                 if (!cancel_delayed_work(&rport->dev_loss_work))
>>                         fc_flush_devloss(shost);
>>
>> sequence we should be ok I think, because after that point it should 
>> have either been canceled or the work has completed.
>>
> 
> I did this in the attached patch. It also will prevent drivers from 
> having to worry about about if the terminate rport IO or dev loss 
> callback is running at the same time fc_remote_port_add is run.
> 

I forgot to cancel the fail io work too. I guess we can also remove the 
same calls later in that function.

[-- Attachment #2: flush-everything2.patch --]
[-- Type: text/plain, Size: 1292 bytes --]

diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index a152f89..e7bbc65 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -2564,6 +2564,14 @@ fc_remote_port_add(struct Scsi_Host *shost, int channel,
 	/* ensure any stgt delete functions are done */
 	fc_flush_work(shost);
 
+	/* ensure fail fast is done */
+	if (!cancel_delayed_work(&rport->fail_io_work))
+		fc_flush_devloss(shost);
+
+	/* ensure dev loss is done */
+	if (!cancel_delayed_work(&rport->dev_loss_work))
+		fc_flush_devloss(shost);
+
 	/*
 	 * Search the list of "active" rports, for an rport that has been
 	 * deleted, but we've held off the real delete while the target
@@ -2630,16 +2638,6 @@ fc_remote_port_add(struct Scsi_Host *shost, int channel,
 				    (!(ids->roles & FC_PORT_ROLE_FCP_TARGET)))
 					return rport;
 
-				/*
-				 * Stop the fail io and dev_loss timers.
-				 * If they flush, the port_state will
-				 * be checked and will NOOP the function.
-				 */
-				if (!cancel_delayed_work(&rport->fail_io_work))
-					fc_flush_devloss(shost);
-				if (!cancel_delayed_work(&rport->dev_loss_work))
-					fc_flush_devloss(shost);
-
 				spin_lock_irqsave(shost->host_lock, flags);
 
 				rport->flags &= ~(FC_RPORT_FAST_FAIL_TIMEDOUT |

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH] fc transport: pre-emptively terminate i/o upon dev_loss_tmo
  2009-04-21 22:12     ` Mike Christie
@ 2009-04-22 12:13       ` Ralph Wuerthner
  2009-04-22 16:19         ` James Smart
  0 siblings, 1 reply; 11+ messages in thread
From: Ralph Wuerthner @ 2009-04-22 12:13 UTC (permalink / raw)
  To: Mike Christie; +Cc: linux-scsi, james.smart

On Tue, 21 Apr 2009 17:12:18 -0500
Mike Christie <michaelc@cs.wisc.edu> wrote:

> Mike Christie wrote:
> > Mike Christie wrote:
> >> Ralph Wuerthner wrote:
> >>> Mike Christie wrote:
> >>>> James Smart wrote:
> >>>>> Pre-emptively terminate i/o on the rport if dev_loss_tmo has
> >>>>> fired. The desire is to terminate everything, so that the i/o
> >>>>> is cleaned up prior to the sdev's being unblocked, thus any
> >>>>> outstanding timeouts/aborts
> >>>>> are avoided.
> >>>>>
> >>>>> Also, we do this early enough such that the rport's port_id
> >>>>> field is still valid. FCOE libFC code needs this info to find
> >>>>> the i/o's to terminate.
> >>>>>
> >>>>> -- james s
> >>>>>
> >>>>>  Signed-off-by: James Smart <james.smart@emulex.com>
> >>>>>
> >>>>>  ---
> >>>>>
> >>>>>  scsi_transport_fc.c |   11 ++++++++++-
> >>>>>  1 file changed, 10 insertions(+), 1 deletion(-)
> >>>>>
> >>>>>
> >>>>> diff -upNr a/drivers/scsi/scsi_transport_fc.c 
> >>>>> b/drivers/scsi/scsi_transport_fc.c
> >>>>> --- a/drivers/scsi/scsi_transport_fc.c    2008-10-18 
> >>>>> 10:32:52.000000000 -0400
> >>>>> +++ b/drivers/scsi/scsi_transport_fc.c    2008-12-05 
> >>>>> 12:13:54.000000000 -0500
> >>>>> @@ -3012,6 +3012,16 @@ fc_timeout_deleted_rport(struct work_str
> >>>>>      rport->port_state = FC_PORTSTATE_NOTPRESENT;
> >>>>>      rport->flags &= ~FC_RPORT_FAST_FAIL_TIMEDOUT;
> >>>>>  
> >>>>> +    /*
> >>>>> +     * Pre-emptively kill I/O rather than waiting for the work
> >>>>> queue
> >>>>> +     * item to teardown the starget. (FCOE libFC folks prefer
> >>>>> this
> >>>>> +     * and to have the rport_port_id still set when it's done).
> >>>>> +     */
> >>>>> +    spin_unlock_irqrestore(shost->host_lock, flags);
> >>>>> +    fc_terminate_rport_io(rport);
> >>>>> +
> >>>>> +    BUG_ON(rport->port_state != FC_PORTSTATE_NOTPRESENT);
> >>>>> +
> >>>> I think we could this this bug_on or we might hit a problem
> >>>> below where this thread is changing the port_id but some other
> >>>> thread is calling fc_remote_port_add and changging the port id.
> >>>>
> >>>> I think the problem is that fc_remote_port_add only calls 
> >>>> fc_flush_work which flushes the fc_host work_q:
> >>>>
> >>>>
> >>>> struct fc_rport *
> >>>> () (struct Scsi_Host *shost, int channel,
> >>>>          struct fc_rport_identifiers  *ids)
> >>>> {
> >>>>          struct fc_internal *fci =
> >>>> to_fc_internal(shost->transportt); struct fc_host_attrs *fc_host
> >>>> = shost_to_fc_host(shost); struct fc_rport *rport;
> >>>>          unsigned long flags;
> >>>>          int match = 0;
> >>>>
> >>>>          /* ensure any stgt delete functions are done */
> >>>>          fc_flush_work(shost);
> >>>>
> >>>>
> >>>> But fc_timeout_deleted_rport dev_loss_work is running on the 
> >>>> devloss_work_q.
> >>>>
> >>>> So if fc_timeout_deleted_rport grabs the host lock first, it
> >>>> will set the port state to FC_PORTSTATE_NOTPRESENT, then drop
> >>>> the lock.
> >>>>
> >>>> Once fc_timeout_delete_rport drops the lock, fc_remote_port_add 
> >>>> could have already passed the fc_flush_work() call because there
> >>>> was nothing on it. fc_remote_port_add would then drop down to
> >>>> the list search on the bindings array and see the remote port.
> >>>> It would then start setting the port state, id and port_name and
> >>>> node_name, but at the same time, because the host lock no longer
> >>>> guards it, fc_timedout_deleted_rport could be fiddling with the
> >>>> same fields and we could end up a mix of values or it could be
> >>>> running over the BUG_ON.
> >>>>
> >>>> Is that right?
> >>>>
> >>>> If so do we just want to flush the devloss_work_queue in 
> >>>> fc_remote_port_add?
> >>>>
> >>>>
> >>>>
> >>>>>      /* remove the identifiers that aren't used in the
> >>>>> consisting binding */
> >>>>>      switch (fc_host->tgtid_bind_type) {
> >>>>>      case FC_TGTID_BIND_BY_WWPN:
> >>>>> @@ -3035,7 +3045,6 @@ fc_timeout_deleted_rport(struct work_str
> >>>>>       * went away and didn't come back - we'll remove
> >>>>>       * all attached scsi devices.
> >>>>>       */
> >>>>> -    spin_unlock_irqrestore(shost->host_lock, flags);
> >>>>>  
> >>>>>      scsi_target_unblock(&rport->dev);
> >>>>>      fc_queue_work(shost, &rport->stgt_delete_work);
> >>>>>
> >>>
> >>> One of our tester hit the problem Michael describes above: right
> >>> after releasing the shost->host_lock lock in
> >>> fc_timeout_deleted_rport() the remote port was rediscovered by
> >>> the LLDD (in our case zfcp), fc_remote_port_add() was called and
> >>> BUG_ON() triggered.
> >>>
> >>> But flushing the devloss_work_queue in fc_remote_port_add() won't
> >>> help either because it still would be possible that right after
> >>> the flush_workqueue() call a timer could trigger and 
> >>> fc_timeout_deleted_rport()
> >>
> >>
> >> What timer could that be? Only the dev_loss_work delayed work
> >> calls fc_timeout_deleted_rport.
> >>
> >> Are you thinking about fc_rport_final_delete?
> >>
> >> If you do the
> >>                 if (!cancel_delayed_work(&rport->dev_loss_work))
> >>                         fc_flush_devloss(shost);
> >>
> >> sequence we should be ok I think, because after that point it
> >> should have either been canceled or the work has completed.
> >>
> > 
> > I did this in the attached patch. It also will prevent drivers from 
> > having to worry about about if the terminate rport IO or dev loss 
> > callback is running at the same time fc_remote_port_add is run.
> > 
> 
> I forgot to cancel the fail io work too. I guess we can also remove
> the same calls later in that function.>
>
> diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
> index a152f89..e7bbc65 100644
> --- a/drivers/scsi/scsi_transport_fc.c
> +++ b/drivers/scsi/scsi_transport_fc.c
> @@ -2564,6 +2564,14 @@ fc_remote_port_add(struct Scsi_Host *shost, int channel,
>  	/* ensure any stgt delete functions are done */
>  	fc_flush_work(shost);
> 
> +	/* ensure fail fast is done */
> +	if (!cancel_delayed_work(&rport->fail_io_work))
> +		fc_flush_devloss(shost);
> +
> +	/* ensure dev loss is done */
> +	if (!cancel_delayed_work(&rport->dev_loss_work))
> +		fc_flush_devloss(shost);
> +
>  	/*
>  	 * Search the list of "active" rports, for an rport that has been
>  	 * deleted, but we've held off the real delete while the target
> @@ -2630,16 +2638,6 @@ fc_remote_port_add(struct Scsi_Host *shost, int channel,
>  				    (!(ids->roles & FC_PORT_ROLE_FCP_TARGET)))
>  					return rport;
> 
> -				/*
> -				 * Stop the fail io and dev_loss timers.
> -				 * If they flush, the port_state will
> -				 * be checked and will NOOP the function.
> -				 */
> -				if (!cancel_delayed_work(&rport->fail_io_work))
> -					fc_flush_devloss(shost);
> -				if (!cancel_delayed_work(&rport->dev_loss_work))
> -					fc_flush_devloss(shost);
> -
>  				spin_lock_irqsave(shost->host_lock, flags);
> 
>  				rport->flags &=
> ~(FC_RPORT_FAST_FAIL_TIMEDOUT |

Your patch does not work: you are canceling the fast_fail work and
dev_loss work without initializing rport first. 

We have to make sure that fc_remote_port_add(),
fc_remote_port_delete(), and fc_timeout_deleted_rport() do not
interfere with each other. Unfortunately I don't know yet how. I have to
think more about this.

-- 
Ralph Wuerthner

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] fc transport: pre-emptively terminate i/o upon dev_loss_tmo
  2009-04-22 12:13       ` Ralph Wuerthner
@ 2009-04-22 16:19         ` James Smart
  0 siblings, 0 replies; 11+ messages in thread
From: James Smart @ 2009-04-22 16:19 UTC (permalink / raw)
  To: Ralph Wuerthner; +Cc: Mike Christie, linux-scsi@vger.kernel.org

All the flushes and workq stuff is very tricky. We (Mike Reed and I) had 
a devil of a time with it on large SMP systems. So adding these quickly 
is not advised.

I believe what we need to do is add a synchronization between the delete 
and the add sequences, which right now, expect the workq and flushing to 
provide it, but left other code paths, where it had to release the locks 
post the state change, at risk. Let me put something together.

-- james s

Ralph Wuerthner wrote:
> Your patch does not work: you are canceling the fast_fail work and
> dev_loss work without initializing rport first. 
>
> We have to make sure that fc_remote_port_add(),
> fc_remote_port_delete(), and fc_timeout_deleted_rport() do not
> interfere with each other. Unfortunately I don't know yet how. I have to
> think more about this.
>
>   

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2009-05-11 12:25 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-04-22 18:01 [PATCH] fc transport: pre-emptively terminate i/o upon dev_loss_tmo James Smart
2009-04-22 19:18 ` Mike Christie
2009-04-22 19:20   ` James Smart
2009-04-22 20:49   ` Abhijeet Arvind Joglekar (abjoglek)
2009-05-11 12:25 ` Ralph Wuerthner
  -- strict thread matches above, loose matches on Subject: below --
2009-04-09 11:04 Ralph Wuerthner
2009-04-21 21:43 ` Mike Christie
2009-04-21 21:52   ` Mike Christie
2009-04-21 22:12     ` Mike Christie
2009-04-22 12:13       ` Ralph Wuerthner
2009-04-22 16:19         ` James Smart

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).