[PATCH 1/1] ibmvscsi: requeue while CRQ closed

linux-scsi.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 1/1] ibmvscsi: requeue while CRQ closed
@ 2007-11-09 15:16 Robert Jennings
  2007-11-09 17:35 ` Brian King
  2007-11-12 15:00 ` [PATCH 1/1] [v2] " Robert Jennings
  0 siblings, 2 replies; 3+ messages in thread
From: Robert Jennings @ 2007-11-09 15:16 UTC (permalink / raw)
  To: James.Bottomley, santil, brking, linux-scsi

CRQ send errors that return with H_CLOSED should return with
SCSI_MLQUEUE_HOST_BUSY until firmware alerts the client of a CRQ
transport event.  The transport event will either reinitialize and
requeue the requests, or fail and return IO with DID_ERROR.

To avoid failing the eh_* functions while re-attaching to the server
adapter this will retry for a period of time while ibmvscsi_send_srp_event
returns SCSI_MLQUEUE_HOST_BUSY.

Signed-off-by: Robert Jennings <rcj@linux.vnet.ibm.com>
Signed-off-by: Brian King <brking@linux.vnet.ibm.com>

---
 drivers/scsi/ibmvscsi/ibmvscsi.c |   59 ++++++++++++++++++++++++++++++++-------
 1 file changed, 48 insertions(+), 11 deletions(-)

Index: linux-2.6/drivers/scsi/ibmvscsi/ibmvscsi.c
===================================================================
--- linux-2.6.orig/drivers/scsi/ibmvscsi/ibmvscsi.c	2007-11-09 08:53:02.000000000 -0600
+++ linux-2.6/drivers/scsi/ibmvscsi/ibmvscsi.c	2007-11-09 08:53:36.000000000 -0600
@@ -629,6 +629,16 @@
 		list_del(&evt_struct->list);
 		del_timer(&evt_struct->timer);
 
+		/* If send_crq returns H_CLOSED, return SCSI_MLQUEUE_HOST_BUSY.
+		 * Firmware will send a CRQ with a transport event (0xFF) to
+		 * tell this client what has happened to the transport.  This
+		 * will be handled in ibmvscsi_handle_crq()
+		 */
+		if (rc == H_CLOSED) {
+			dev_warn(hostdata->dev, "send warning. "
+			         "Receive queue closed, will retry.\n");
+			goto send_busy;
+		}
 		dev_err(hostdata->dev, "send error %d\n", rc);
 		atomic_inc(&hostdata->request_limit);
 		goto send_error;
@@ -976,6 +986,7 @@
 	int rsp_rc;
 	unsigned long flags;
 	u16 lun = lun_from_dev(cmd->device);
+	unsigned long wait_switch = 0;
 
 	/* First, find this command in our sent list so we can figure
 	 * out the correct tag
@@ -1019,15 +1030,30 @@
 		    tsk_mgmt->lun, tsk_mgmt->task_tag);
 
 	evt->sync_srp = &srp_rsp;
-	init_completion(&evt->comp);
-	rsp_rc = ibmvscsi_send_srp_event(evt, hostdata, init_timeout * 2);
-	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
+
+	wait_switch = jiffies + (init_timeout * HZ);
+	do {
+		init_completion(&evt->comp);
+		rsp_rc = ibmvscsi_send_srp_event(evt, hostdata, init_timeout * 2);
+
+		if (rsp_rc != SCSI_MLQUEUE_HOST_BUSY)
+			break;
+
+		spin_unlock_irqrestore(hostdata->host->host_lock, flags);
+		msleep(10);
+		spin_lock_irqsave(hostdata->host->host_lock, flags);
+	} while (time_before(jiffies, wait_switch));
+
 	if (rsp_rc != 0) {
+		free_event_struct(&found_evt->hostdata->pool, found_evt);
+		spin_unlock_irqrestore(hostdata->host->host_lock, flags);
 		sdev_printk(KERN_ERR, cmd->device,
 			    "failed to send abort() event. rc=%d\n", rsp_rc);
 		return FAILED;
 	}
 
+	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
+
 	wait_for_completion(&evt->comp);
 
 	/* make sure we got a good response */
@@ -1099,6 +1125,7 @@
 	int rsp_rc;
 	unsigned long flags;
 	u16 lun = lun_from_dev(cmd->device);
+	unsigned long wait_switch = 0;
 
 	spin_lock_irqsave(hostdata->host->host_lock, flags);
 	evt = get_event_struct(&hostdata->pool);
@@ -1125,9 +1152,20 @@
 		    tsk_mgmt->lun);
 
 	evt->sync_srp = &srp_rsp;
-	init_completion(&evt->comp);
-	rsp_rc = ibmvscsi_send_srp_event(evt, hostdata, init_timeout * 2);
-	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
+
+	wait_switch = jiffies + (init_timeout * HZ);
+	do {
+		init_completion(&evt->comp);
+		rsp_rc = ibmvscsi_send_srp_event(evt, hostdata, init_timeout * 2);
+		spin_unlock_irqrestore(hostdata->host->host_lock, flags);
+
+		if (rsp_rc != SCSI_MLQUEUE_HOST_BUSY)
+			break;
+
+		msleep(10);
+		spin_lock_irqsave(hostdata->host->host_lock, flags);
+	} while (time_before(jiffies, wait_switch));
+
 	if (rsp_rc != 0) {
 		sdev_printk(KERN_ERR, cmd->device,
 			    "failed to send reset event. rc=%d\n", rsp_rc);

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH 1/1] ibmvscsi: requeue while CRQ closed
  2007-11-09 15:16 [PATCH 1/1] ibmvscsi: requeue while CRQ closed Robert Jennings
@ 2007-11-09 17:35 ` Brian King
  2007-11-12 15:00 ` [PATCH 1/1] [v2] " Robert Jennings
  1 sibling, 0 replies; 3+ messages in thread
From: Brian King @ 2007-11-09 17:35 UTC (permalink / raw)
  To: Robert Jennings; +Cc: James.Bottomley, santil, linux-scsi

James,

Hold off on pulling this in. The patch is currently being reworked to
fix a problem.

Thanks,

-Brian

Robert Jennings wrote:
> CRQ send errors that return with H_CLOSED should return with
> SCSI_MLQUEUE_HOST_BUSY until firmware alerts the client of a CRQ
> transport event.  The transport event will either reinitialize and
> requeue the requests, or fail and return IO with DID_ERROR.
> 
> To avoid failing the eh_* functions while re-attaching to the server
> adapter this will retry for a period of time while ibmvscsi_send_srp_event
> returns SCSI_MLQUEUE_HOST_BUSY.
> 
> Signed-off-by: Robert Jennings <rcj@linux.vnet.ibm.com>
> Signed-off-by: Brian King <brking@linux.vnet.ibm.com>
> 
> ---
>  drivers/scsi/ibmvscsi/ibmvscsi.c |   59 ++++++++++++++++++++++++++++++++-------
>  1 file changed, 48 insertions(+), 11 deletions(-)
> 
> Index: linux-2.6/drivers/scsi/ibmvscsi/ibmvscsi.c
> ===================================================================
> --- linux-2.6.orig/drivers/scsi/ibmvscsi/ibmvscsi.c	2007-11-09 08:53:02.000000000 -0600
> +++ linux-2.6/drivers/scsi/ibmvscsi/ibmvscsi.c	2007-11-09 08:53:36.000000000 -0600
> @@ -629,6 +629,16 @@
>  		list_del(&evt_struct->list);
>  		del_timer(&evt_struct->timer);
> 
> +		/* If send_crq returns H_CLOSED, return SCSI_MLQUEUE_HOST_BUSY.
> +		 * Firmware will send a CRQ with a transport event (0xFF) to
> +		 * tell this client what has happened to the transport.  This
> +		 * will be handled in ibmvscsi_handle_crq()
> +		 */
> +		if (rc == H_CLOSED) {
> +			dev_warn(hostdata->dev, "send warning. "
> +			         "Receive queue closed, will retry.\n");
> +			goto send_busy;
> +		}
>  		dev_err(hostdata->dev, "send error %d\n", rc);
>  		atomic_inc(&hostdata->request_limit);
>  		goto send_error;
> @@ -976,6 +986,7 @@
>  	int rsp_rc;
>  	unsigned long flags;
>  	u16 lun = lun_from_dev(cmd->device);
> +	unsigned long wait_switch = 0;
> 
>  	/* First, find this command in our sent list so we can figure
>  	 * out the correct tag
> @@ -1019,15 +1030,30 @@
>  		    tsk_mgmt->lun, tsk_mgmt->task_tag);
> 
>  	evt->sync_srp = &srp_rsp;
> -	init_completion(&evt->comp);
> -	rsp_rc = ibmvscsi_send_srp_event(evt, hostdata, init_timeout * 2);
> -	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
> +
> +	wait_switch = jiffies + (init_timeout * HZ);
> +	do {
> +		init_completion(&evt->comp);
> +		rsp_rc = ibmvscsi_send_srp_event(evt, hostdata, init_timeout * 2);
> +
> +		if (rsp_rc != SCSI_MLQUEUE_HOST_BUSY)
> +			break;
> +
> +		spin_unlock_irqrestore(hostdata->host->host_lock, flags);
> +		msleep(10);
> +		spin_lock_irqsave(hostdata->host->host_lock, flags);
> +	} while (time_before(jiffies, wait_switch));
> +
>  	if (rsp_rc != 0) {
> +		free_event_struct(&found_evt->hostdata->pool, found_evt);
> +		spin_unlock_irqrestore(hostdata->host->host_lock, flags);
>  		sdev_printk(KERN_ERR, cmd->device,
>  			    "failed to send abort() event. rc=%d\n", rsp_rc);
>  		return FAILED;
>  	}
> 
> +	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
> +
>  	wait_for_completion(&evt->comp);
> 
>  	/* make sure we got a good response */
> @@ -1099,6 +1125,7 @@
>  	int rsp_rc;
>  	unsigned long flags;
>  	u16 lun = lun_from_dev(cmd->device);
> +	unsigned long wait_switch = 0;
> 
>  	spin_lock_irqsave(hostdata->host->host_lock, flags);
>  	evt = get_event_struct(&hostdata->pool);
> @@ -1125,9 +1152,20 @@
>  		    tsk_mgmt->lun);
> 
>  	evt->sync_srp = &srp_rsp;
> -	init_completion(&evt->comp);
> -	rsp_rc = ibmvscsi_send_srp_event(evt, hostdata, init_timeout * 2);
> -	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
> +
> +	wait_switch = jiffies + (init_timeout * HZ);
> +	do {
> +		init_completion(&evt->comp);
> +		rsp_rc = ibmvscsi_send_srp_event(evt, hostdata, init_timeout * 2);
> +		spin_unlock_irqrestore(hostdata->host->host_lock, flags);
> +
> +		if (rsp_rc != SCSI_MLQUEUE_HOST_BUSY)
> +			break;
> +
> +		msleep(10);
> +		spin_lock_irqsave(hostdata->host->host_lock, flags);
> +	} while (time_before(jiffies, wait_switch));
> +
>  	if (rsp_rc != 0) {
>  		sdev_printk(KERN_ERR, cmd->device,
>  			    "failed to send reset event. rc=%d\n", rsp_rc);
> -
> To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


-- 
Brian King
Linux on Power Virtualization
IBM Linux Technology Center

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH 1/1] [v2] ibmvscsi: requeue while CRQ closed
  2007-11-09 15:16 [PATCH 1/1] ibmvscsi: requeue while CRQ closed Robert Jennings
  2007-11-09 17:35 ` Brian King
@ 2007-11-12 15:00 ` Robert Jennings
  1 sibling, 0 replies; 3+ messages in thread
From: Robert Jennings @ 2007-11-12 15:00 UTC (permalink / raw)
  To: James.Bottomley, santil, brking, linux-scsi

CRQ send errors that return with H_CLOSED should return with
SCSI_MLQUEUE_HOST_BUSY until firmware alerts the client of a CRQ
transport event.  The transport event will either reinitialize and
requeue the requests or fail and return IO with DID_ERROR.

To avoid failing the eh_* functions while re-attaching to the server
adapter this will retry for a period of time while ibmvscsi_send_srp_event
returns SCSI_MLQUEUE_HOST_BUSY.

In ibmvscsi_eh_abort_handler() the loop includes the search of the
event list.  The lock on the hostdata is dropped while waiting to try
again after failing ibmvscsi_send_srp_event.  The event could have been
purged if a login was in progress when the function was called.

In ibmvscsi_eh_device_reset_handler() the loop includes the call to
get_event_struct() because a failing call to ibmvscsi_send_srp_event()
will have freed the event struct.

Signed-off-by: Robert Jennings <rcj@linux.vnet.ibm.com>
Signed-off-by: Brian King <brking@linux.vnet.ibm.com>

---
 drivers/scsi/ibmvscsi/ibmvscsi.c |   59 ++++++++++++++++++++++++++++++++-------
 1 file changed, 48 insertions(+), 11 deletions(-)

Index: linux-2.6/drivers/scsi/ibmvscsi/ibmvscsi.c
===================================================================
--- linux-2.6.orig/drivers/scsi/ibmvscsi/ibmvscsi.c	2007-11-12 08:52:59.000000000 -0600
+++ linux-2.6/drivers/scsi/ibmvscsi/ibmvscsi.c	2007-11-12 08:54:17.000000000 -0600
@@ -629,6 +629,16 @@
 		list_del(&evt_struct->list);
 		del_timer(&evt_struct->timer);
 
+		/* If send_crq returns H_CLOSED, return SCSI_MLQUEUE_HOST_BUSY.
+		 * Firmware will send a CRQ with a transport event (0xFF) to
+		 * tell this client what has happened to the transport.  This
+		 * will be handled in ibmvscsi_handle_crq()
+		 */
+		if (rc == H_CLOSED) {
+			dev_warn(hostdata->dev, "send warning. "
+			         "Receive queue closed, will retry.\n");
+			goto send_busy;
+		}
 		dev_err(hostdata->dev, "send error %d\n", rc);
 		atomic_inc(&hostdata->request_limit);
 		goto send_error;
@@ -976,58 +986,74 @@
 	int rsp_rc;
 	unsigned long flags;
 	u16 lun = lun_from_dev(cmd->device);
+	unsigned long wait_switch = 0;
 
 	/* First, find this command in our sent list so we can figure
 	 * out the correct tag
 	 */
 	spin_lock_irqsave(hostdata->host->host_lock, flags);
-	found_evt = NULL;
-	list_for_each_entry(tmp_evt, &hostdata->sent, list) {
-		if (tmp_evt->cmnd == cmd) {
-			found_evt = tmp_evt;
-			break;
+	wait_switch = jiffies + (init_timeout * HZ);
+	do {
+		found_evt = NULL;
+		list_for_each_entry(tmp_evt, &hostdata->sent, list) {
+			if (tmp_evt->cmnd == cmd) {
+				found_evt = tmp_evt;
+				break;
+			}
 		}
-	}
 
-	if (!found_evt) {
-		spin_unlock_irqrestore(hostdata->host->host_lock, flags);
-		return SUCCESS;
-	}
+		if (!found_evt) {
+			spin_unlock_irqrestore(hostdata->host->host_lock, flags);
+			return SUCCESS;
+		}
 
-	evt = get_event_struct(&hostdata->pool);
-	if (evt == NULL) {
-		spin_unlock_irqrestore(hostdata->host->host_lock, flags);
-		sdev_printk(KERN_ERR, cmd->device, "failed to allocate abort event\n");
-		return FAILED;
-	}
+		evt = get_event_struct(&hostdata->pool);
+		if (evt == NULL) {
+			spin_unlock_irqrestore(hostdata->host->host_lock, flags);
+			sdev_printk(KERN_ERR, cmd->device,
+				"failed to allocate abort event\n");
+			return FAILED;
+		}
 	
-	init_event_struct(evt,
-			  sync_completion,
-			  VIOSRP_SRP_FORMAT,
-			  init_timeout);
+		init_event_struct(evt,
+				  sync_completion,
+				  VIOSRP_SRP_FORMAT,
+				  init_timeout);
 
-	tsk_mgmt = &evt->iu.srp.tsk_mgmt;
+		tsk_mgmt = &evt->iu.srp.tsk_mgmt;
 	
-	/* Set up an abort SRP command */
-	memset(tsk_mgmt, 0x00, sizeof(*tsk_mgmt));
-	tsk_mgmt->opcode = SRP_TSK_MGMT;
-	tsk_mgmt->lun = ((u64) lun) << 48;
-	tsk_mgmt->tsk_mgmt_func = SRP_TSK_ABORT_TASK;
-	tsk_mgmt->task_tag = (u64) found_evt;
-
-	sdev_printk(KERN_INFO, cmd->device, "aborting command. lun 0x%lx, tag 0x%lx\n",
-		    tsk_mgmt->lun, tsk_mgmt->task_tag);
-
-	evt->sync_srp = &srp_rsp;
-	init_completion(&evt->comp);
-	rsp_rc = ibmvscsi_send_srp_event(evt, hostdata, init_timeout * 2);
+		/* Set up an abort SRP command */
+		memset(tsk_mgmt, 0x00, sizeof(*tsk_mgmt));
+		tsk_mgmt->opcode = SRP_TSK_MGMT;
+		tsk_mgmt->lun = ((u64) lun) << 48;
+		tsk_mgmt->tsk_mgmt_func = SRP_TSK_ABORT_TASK;
+		tsk_mgmt->task_tag = (u64) found_evt;
+
+		evt->sync_srp = &srp_rsp;
+
+		init_completion(&evt->comp);
+		rsp_rc = ibmvscsi_send_srp_event(evt, hostdata, init_timeout * 2);
+
+		if (rsp_rc != SCSI_MLQUEUE_HOST_BUSY)
+			break;
+
+		spin_unlock_irqrestore(hostdata->host->host_lock, flags);
+		msleep(10);
+		spin_lock_irqsave(hostdata->host->host_lock, flags);
+	} while (time_before(jiffies, wait_switch));
+
 	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
+
 	if (rsp_rc != 0) {
 		sdev_printk(KERN_ERR, cmd->device,
 			    "failed to send abort() event. rc=%d\n", rsp_rc);
 		return FAILED;
 	}
 
+	sdev_printk(KERN_INFO, cmd->device,
+                    "aborting command. lun 0x%lx, tag 0x%lx\n",
+		    (((u64) lun) << 48), (u64) found_evt);
+
 	wait_for_completion(&evt->comp);
 
 	/* make sure we got a good response */
@@ -1099,41 +1125,56 @@
 	int rsp_rc;
 	unsigned long flags;
 	u16 lun = lun_from_dev(cmd->device);
+	unsigned long wait_switch = 0;
 
 	spin_lock_irqsave(hostdata->host->host_lock, flags);
-	evt = get_event_struct(&hostdata->pool);
-	if (evt == NULL) {
-		spin_unlock_irqrestore(hostdata->host->host_lock, flags);
-		sdev_printk(KERN_ERR, cmd->device, "failed to allocate reset event\n");
-		return FAILED;
-	}
+	wait_switch = jiffies + (init_timeout * HZ);
+	do {
+		evt = get_event_struct(&hostdata->pool);
+		if (evt == NULL) {
+			spin_unlock_irqrestore(hostdata->host->host_lock, flags);
+			sdev_printk(KERN_ERR, cmd->device,
+				"failed to allocate reset event\n");
+			return FAILED;
+		}
 	
-	init_event_struct(evt,
-			  sync_completion,
-			  VIOSRP_SRP_FORMAT,
-			  init_timeout);
+		init_event_struct(evt,
+				  sync_completion,
+				  VIOSRP_SRP_FORMAT,
+				  init_timeout);
 
-	tsk_mgmt = &evt->iu.srp.tsk_mgmt;
+		tsk_mgmt = &evt->iu.srp.tsk_mgmt;
 
-	/* Set up a lun reset SRP command */
-	memset(tsk_mgmt, 0x00, sizeof(*tsk_mgmt));
-	tsk_mgmt->opcode = SRP_TSK_MGMT;
-	tsk_mgmt->lun = ((u64) lun) << 48;
-	tsk_mgmt->tsk_mgmt_func = SRP_TSK_LUN_RESET;
+		/* Set up a lun reset SRP command */
+		memset(tsk_mgmt, 0x00, sizeof(*tsk_mgmt));
+		tsk_mgmt->opcode = SRP_TSK_MGMT;
+		tsk_mgmt->lun = ((u64) lun) << 48;
+		tsk_mgmt->tsk_mgmt_func = SRP_TSK_LUN_RESET;
 
-	sdev_printk(KERN_INFO, cmd->device, "resetting device. lun 0x%lx\n",
-		    tsk_mgmt->lun);
+		evt->sync_srp = &srp_rsp;
+
+		init_completion(&evt->comp);
+		rsp_rc = ibmvscsi_send_srp_event(evt, hostdata, init_timeout * 2);
+
+		if (rsp_rc != SCSI_MLQUEUE_HOST_BUSY)
+			break;
+
+		spin_unlock_irqrestore(hostdata->host->host_lock, flags);
+		msleep(10);
+		spin_lock_irqsave(hostdata->host->host_lock, flags);
+	} while (time_before(jiffies, wait_switch));
 
-	evt->sync_srp = &srp_rsp;
-	init_completion(&evt->comp);
-	rsp_rc = ibmvscsi_send_srp_event(evt, hostdata, init_timeout * 2);
 	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
+
 	if (rsp_rc != 0) {
 		sdev_printk(KERN_ERR, cmd->device,
 			    "failed to send reset event. rc=%d\n", rsp_rc);
 		return FAILED;
 	}
 
+	sdev_printk(KERN_INFO, cmd->device, "resetting device. lun 0x%lx\n",
+		    (((u64) lun) << 48));
+
 	wait_for_completion(&evt->comp);
 
 	/* make sure we got a good response */

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2007-11-12 15:01 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-11-09 15:16 [PATCH 1/1] ibmvscsi: requeue while CRQ closed Robert Jennings
2007-11-09 17:35 ` Brian King
2007-11-12 15:00 ` [PATCH 1/1] [v2] " Robert Jennings

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).