public inbox for linux-scsi@vger.kernel.org
 help / color / mirror / Atom feed
From: Christof Schmitt <christof.schmitt@de.ibm.com>
To: James.Bottomley@HansenPartnership.com
Cc: linux-scsi@vger.kernel.org, linux-s390@vger.kernel.org,
	Swen Schillig <swen@vnet.ibm.com>,
	Christof Schmitt <christof.schmitt@de.ibm.com>
Subject: [patch 01/11] zfcp: receiving an unsolicted status can lead to I/O stall
Date: Mon, 19 May 2008 12:17:37 +0200	[thread overview]
Message-ID: <20080519101826.226658000@de.ibm.com> (raw)
In-Reply-To: 20080519101736.590943000@de.ibm.com

[-- Attachment #1: zfcp_status_read_locking_race.diff --]
[-- Type: text/plain, Size: 6672 bytes --]

From: Swen Schillig <swen@vnet.ibm.com>

Processing of an unsolicted status request can lead to a locking race
of the request_queue's queue_lock during the recreation of the 
used up status read request while still in interrupt context 
of the response handler.

Detaching the 'refill' of the long running status read requests from
the handler to a scheduled work is solving this issue.

In addition, each refill-run is trying to re-establish the full amount 
of status read requests, which might have failed in earlier runs.

Signed-off-by: Swen Schillig <swen@vnet.ibm.com>
Signed-off-by: Christof Schmitt <christof.schmitt@de.ibm.com>
---
 drivers/s390/scsi/zfcp_aux.c |   23 +++++++++++++++++++++++
 drivers/s390/scsi/zfcp_dbf.c |    2 +-
 drivers/s390/scsi/zfcp_def.h |    4 ++--
 drivers/s390/scsi/zfcp_erp.c |   19 ++-----------------
 drivers/s390/scsi/zfcp_ext.h |    1 +
 drivers/s390/scsi/zfcp_fsf.c |   18 +++---------------
 6 files changed, 32 insertions(+), 35 deletions(-)

--- a/drivers/s390/scsi/zfcp_aux.c	2008-05-19 11:22:35.000000000 +0200
+++ b/drivers/s390/scsi/zfcp_aux.c	2008-05-19 11:28:32.000000000 +0200
@@ -970,6 +970,27 @@ static void zfcp_dummy_release(struct de
 	return;
 }
 
+int zfcp_status_read_refill(struct zfcp_adapter *adapter)
+{
+	while (atomic_read(&adapter->stat_miss) > 0)
+		if (zfcp_fsf_status_read(adapter, ZFCP_WAIT_FOR_SBAL))
+			break;
+	else
+		atomic_dec(&adapter->stat_miss);
+
+	if (ZFCP_STATUS_READS_RECOM <= atomic_read(&adapter->stat_miss)) {
+		zfcp_erp_adapter_reopen(adapter, 0, 103, NULL);
+		return 1;
+	}
+	return 0;
+}
+
+static void _zfcp_status_read_scheduler(struct work_struct *work)
+{
+	zfcp_status_read_refill(container_of(work, struct zfcp_adapter,
+					     stat_work));
+}
+
 /*
  * Enqueues an adapter at the end of the adapter list in the driver data.
  * All adapter internal structures are set up.
@@ -1063,6 +1084,7 @@ zfcp_adapter_enqueue(struct ccw_device *
 
 	/* initialize lock of associated request queue */
 	rwlock_init(&adapter->request_queue.queue_lock);
+	INIT_WORK(&adapter->stat_work, _zfcp_status_read_scheduler);
 
 	/* mark adapter unusable as long as sysfs registration is not complete */
 	atomic_set_mask(ZFCP_STATUS_COMMON_REMOVE, &adapter->status);
@@ -1123,6 +1145,7 @@ zfcp_adapter_dequeue(struct zfcp_adapter
 	int retval = 0;
 	unsigned long flags;
 
+	cancel_work_sync(&adapter->stat_work);
 	zfcp_adapter_scsi_unregister(adapter);
 	device_unregister(&adapter->generic_services);
 	zfcp_sysfs_adapter_remove_files(&adapter->ccw_device->dev);
--- a/drivers/s390/scsi/zfcp_dbf.c	2008-05-19 11:22:35.000000000 +0200
+++ b/drivers/s390/scsi/zfcp_dbf.c	2008-05-19 11:28:32.000000000 +0200
@@ -268,7 +268,7 @@ void zfcp_hba_dbf_event_fsf_unsol(const 
 	strncpy(rec->tag, "stat", ZFCP_DBF_TAG_SIZE);
 	strncpy(rec->tag2, tag, ZFCP_DBF_TAG_SIZE);
 
-	rec->u.status.failed = adapter->status_read_failed;
+	rec->u.status.failed = atomic_read(&adapter->stat_miss);
 	if (status_buffer != NULL) {
 		rec->u.status.status_type = status_buffer->status_type;
 		rec->u.status.status_subtype = status_buffer->status_subtype;
--- a/drivers/s390/scsi/zfcp_def.h	2008-05-19 11:22:35.000000000 +0200
+++ b/drivers/s390/scsi/zfcp_def.h	2008-05-19 11:28:32.000000000 +0200
@@ -136,7 +136,6 @@ zfcp_address_to_sg(void *address, struct
 #define ZFCP_QTCB_VERSION	FSF_QTCB_CURRENT_VERSION
 /* ATTENTION: value must not be used by hardware */
 #define FSF_QTCB_UNSOLICITED_STATUS		0x6305
-#define ZFCP_STATUS_READ_FAILED_THRESHOLD	3
 #define ZFCP_STATUS_READS_RECOM		        FSF_STATUS_READS_RECOM
 
 /* Do 1st retry in 1 second, then double the timeout for each following retry */
@@ -759,7 +758,8 @@ struct zfcp_adapter {
 	rwlock_t		abort_lock;        /* Protects against SCSI
 						      stack abort/command
 						      completion races */
-	u16			status_read_failed; /* # failed status reads */
+	atomic_t		stat_miss;	   /* # missing status reads*/
+	struct work_struct	stat_work;
 	atomic_t		status;	           /* status of this adapter */
 	struct list_head	erp_ready_head;	   /* error recovery for this
 						      adapter/devices */
--- a/drivers/s390/scsi/zfcp_fsf.c	2008-05-19 11:22:35.000000000 +0200
+++ b/drivers/s390/scsi/zfcp_fsf.c	2008-05-19 11:28:32.000000000 +0200
@@ -1029,21 +1029,9 @@ zfcp_fsf_status_read_handler(struct zfcp
 	 * FIXME:
 	 * allocation failure possible? (Is this code needed?)
 	 */
-	retval = zfcp_fsf_status_read(adapter, 0);
-	if (retval < 0) {
-		ZFCP_LOG_INFO("Failed to create unsolicited status read "
-			      "request for the adapter %s.\n",
-			      zfcp_get_busid_by_adapter(adapter));
-		/* temporary fix to avoid status read buffer shortage */
-		adapter->status_read_failed++;
-		if ((ZFCP_STATUS_READS_RECOM - adapter->status_read_failed)
-		    < ZFCP_STATUS_READ_FAILED_THRESHOLD) {
-			ZFCP_LOG_INFO("restart adapter %s due to status read "
-				      "buffer shortage\n",
-				      zfcp_get_busid_by_adapter(adapter));
-			zfcp_erp_adapter_reopen(adapter, 0, 103, fsf_req);
-		}
-	}
+
+	atomic_inc(&adapter->stat_miss);
+	schedule_work(&adapter->stat_work);
  out:
 	return retval;
 }
--- a/drivers/s390/scsi/zfcp_erp.c	2008-05-19 11:22:35.000000000 +0200
+++ b/drivers/s390/scsi/zfcp_erp.c	2008-05-19 11:28:32.000000000 +0200
@@ -2139,25 +2139,10 @@ static int
 zfcp_erp_adapter_strategy_open_fsf_statusread(struct zfcp_erp_action
 					      *erp_action)
 {
-	int retval = ZFCP_ERP_SUCCEEDED;
-	int temp_ret;
 	struct zfcp_adapter *adapter = erp_action->adapter;
-	int i;
 
-	adapter->status_read_failed = 0;
-	for (i = 0; i < ZFCP_STATUS_READS_RECOM; i++) {
-		temp_ret = zfcp_fsf_status_read(adapter, ZFCP_WAIT_FOR_SBAL);
-		if (temp_ret < 0) {
-			ZFCP_LOG_INFO("error: set-up of unsolicited status "
-				      "notification failed on adapter %s\n",
-				      zfcp_get_busid_by_adapter(adapter));
-			retval = ZFCP_ERP_FAILED;
-			i--;
-			break;
-		}
-	}
-
-	return retval;
+	atomic_set(&adapter->stat_miss, 16);
+	return zfcp_status_read_refill(adapter);
 }
 
 /*
--- a/drivers/s390/scsi/zfcp_ext.h	2008-05-19 11:22:35.000000000 +0200
+++ b/drivers/s390/scsi/zfcp_ext.h	2008-05-19 11:28:32.000000000 +0200
@@ -92,6 +92,7 @@ extern void zfcp_fsf_start_timer(struct 
 extern void zfcp_erp_start_timer(struct zfcp_fsf_req *);
 extern void zfcp_fsf_req_dismiss_all(struct zfcp_adapter *);
 extern int  zfcp_fsf_status_read(struct zfcp_adapter *, int);
+extern int zfcp_status_read_refill(struct zfcp_adapter *adapter);
 extern int zfcp_fsf_req_create(struct zfcp_adapter *, u32, int, mempool_t *,
 			       unsigned long *, struct zfcp_fsf_req **);
 extern int zfcp_fsf_send_ct(struct zfcp_send_ct *, mempool_t *,

-- 

  reply	other threads:[~2008-05-19 10:18 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-05-19 10:17 [patch 00/11] zfcp fixes and improvements Christof Schmitt
2008-05-19 10:17 ` Christof Schmitt [this message]
2008-05-19 10:17 ` [patch 02/11] zfcp: Fix mempool pointer for GID_PN request allocation Christof Schmitt
2008-05-19 10:17 ` [patch 03/11] zfcp: Fix fsf_status_read return code handling Christof Schmitt
2008-05-19 10:17 ` [patch 04/11] zfcp: Remove some sparse warnings Christof Schmitt
2008-05-19 10:17 ` [patch 05/11] zfcp: Remove field sbal_last from trace record Christof Schmitt
2008-05-19 10:17 ` [patch 06/11] zfcp: Rename sbal_last Christof Schmitt
2008-05-19 10:17 ` [patch 07/11] zfcp: Rename sbal_curr to sbal_last Christof Schmitt
2008-05-19 10:17 ` [patch 08/11] zfcp: Add information about interrupt to trace Christof Schmitt
2008-05-19 10:17 ` [patch 09/11] zfcp: Refine trace levels of some recovery related events Christof Schmitt
2008-05-19 10:17 ` [patch 10/11] zfcp: remove some __attribute__ ((packed)) Christof Schmitt
2008-05-19 10:17 ` [patch 11/11] zfcp: Fix sparse warning by providing new entry in dbf Christof Schmitt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080519101826.226658000@de.ibm.com \
    --to=christof.schmitt@de.ibm.com \
    --cc=James.Bottomley@HansenPartnership.com \
    --cc=linux-s390@vger.kernel.org \
    --cc=linux-scsi@vger.kernel.org \
    --cc=swen@vnet.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox