[PATCH / RFC] scsi_error handler update. (1/4)

public inbox for linux-scsi@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH / RFC] scsi_error handler update. (1/4)
@ 2003-02-11  8:13 Mike Anderson
  2003-02-11  8:15 ` [PATCH / RFC] scsi_error handler update. (2/4) Mike Anderson
                   ` (2 more replies)
  0 siblings, 3 replies; 47+ messages in thread
From: Mike Anderson @ 2003-02-11  8:13 UTC (permalink / raw)
  To: linux-scsi

This patch series is against scsi-misc-2.5.

These patches modify the scsi error handler to process cmds needing
recovery off a list. The error handler policy has been altered to do the
following:
	1.) Check for legacy behavior of requesting sense.
	2.) Abort commands marked needing to be canceled.
	3.) Ready devices through tur and eh handlers.
	4.) disposition each command on the list to be retried or
	finished.

00_serror-cmd-list-1.diff:
	- Add per host eh_cmd_list list head.
	- Add per cmd eh_list list head.

01_serror-scmd-add-1.diff:
	- Add scsi_eh_scmd_add function.

02_serror-hndlr-1.diff:
	- Change to using eh_cmd_list.
	- Change scsi_unjam_host to get sense, abort cmds, ready
	  devices, and disposition cmds for retry or finish.
	- Moved retries outside of eh.

03_serror-dev-offline-1.diff:
	- Add scsi_set_device_offline interface.

I have tested both the timeout and offline cases with the scsi_debug. I
have ran some other testing, but my machine stability is causing it to
oops in other areas.

-andmike
--
Michael Anderson
andmike@us.ibm.com

 hosts.c |   16 +---------------
 hosts.h |    2 +-
 scsi.h  |    9 +++++----
 3 files changed, 7 insertions(+), 20 deletions(-)
------

diff -Nru a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
--- a/drivers/scsi/hosts.c	Mon Feb 10 22:25:47 2003
+++ b/drivers/scsi/hosts.c	Mon Feb 10 22:25:47 2003
@@ -397,6 +397,7 @@
 	spin_lock_init(&shost->default_lock);
 	scsi_assign_lock(shost, &shost->default_lock);
 	INIT_LIST_HEAD(&shost->my_devices);
+	INIT_LIST_HEAD(&shost->eh_cmd_list);
 
 	init_waitqueue_head(&shost->host_wait);
 	shost->dma_channel = 0xff;
@@ -635,21 +636,6 @@
 	shost->host_busy--;
 	sdev->device_busy--;
 	if (shost->in_recovery && (shost->host_busy == shost->host_failed)) {
-		up(shost->eh_wait);
-		SCSI_LOG_ERROR_RECOVERY(5, printk("Waking error handler"
-					  " thread\n"));
-	}
-	spin_unlock_irqrestore(shost->host_lock, flags);
-}
-
-void scsi_host_failed_inc_and_test(struct Scsi_Host *shost)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(shost->host_lock, flags);
-	shost->in_recovery = 1;
-	shost->host_failed++;
-	if (shost->host_busy == shost->host_failed) {
 		up(shost->eh_wait);
 		SCSI_LOG_ERROR_RECOVERY(5, printk("Waking error handler"
 					  " thread\n"));
diff -Nru a/drivers/scsi/hosts.h b/drivers/scsi/hosts.h
--- a/drivers/scsi/hosts.h	Mon Feb 10 22:25:47 2003
+++ b/drivers/scsi/hosts.h	Mon Feb 10 22:25:47 2003
@@ -385,6 +385,7 @@
     spinlock_t		  default_lock;
     spinlock_t		  *host_lock;
 
+    struct list_head	eh_cmd_list;
     struct task_struct    * ehandler;  /* Error recovery thread. */
     struct semaphore      * eh_wait;   /* The error recovery thread waits on
                                           this. */
@@ -595,7 +596,6 @@
  */
 extern void scsi_host_busy_inc(struct Scsi_Host *, Scsi_Device *);
 extern void scsi_host_busy_dec_and_test(struct Scsi_Host *, Scsi_Device *);
-extern void scsi_host_failed_inc_and_test(struct Scsi_Host *);
 
 /**
  * scsi_find_device - find a device given the host

diff -Nru a/drivers/scsi/scsi.h b/drivers/scsi/scsi.h
--- a/drivers/scsi/scsi.h	Mon Feb 10 22:25:47 2003
+++ b/drivers/scsi/scsi.h	Mon Feb 10 22:25:47 2003
@@ -727,6 +728,7 @@
 
 	struct list_head list;  /* scsi_cmnd participates in queue lists */
 
+	struct list_head eh_list; /* Used to place us on the host eh list */
 	int eh_state;		/* Used for state tracking in error handlr */
 	int eh_eflags;		/* Used by error handlr */
 	void (*done) (struct scsi_cmnd *);	/* Mid-level done function */
@@ -961,12 +963,12 @@
 /*
  * Scsi Error Handler Flags
  */
-#define SCSI_EH_CMD_ERR	0x0001	/* Orig cmd error'd */
-#define SCSI_EH_CMD_FAILED	0x0002	/* Orig cmd error type failed */
-#define SCSI_EH_CMD_TIMEOUT	0x0004	/* Orig cmd error type timeout */
-#define SCSI_EH_REC_TIMEOUT	0x0008	/* Recovery cmd timeout */
+#define SCSI_EH_CANCEL_CMD	0x0001	/* Cancel this cmd */
+#define SCSI_EH_REC_TIMEOUT	0x0002	/* EH retry timed out */
 
 #define SCSI_SENSE_VALID(scmd) ((scmd->sense_buffer[0] & 0x70) == 0x70)
+
+extern int scsi_eh_scmd_add(struct scsi_cmnd *, int);
 
 int scsi_set_medium_removal(Scsi_Device *dev, char state);


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH / RFC] scsi_error handler update. (2/4)
  2003-02-11  8:13 [PATCH / RFC] scsi_error handler update. (1/4) Mike Anderson
@ 2003-02-11  8:15 ` Mike Anderson
  2003-02-11  8:17   ` [PATCH / RFC] scsi_error handler update. (3/4) Mike Anderson
  2003-02-11 16:49 ` [PATCH / RFC] scsi_error handler update. (1/4) Luben Tuikov
  2003-02-11 18:00 ` Patrick Mansfield
  2 siblings, 1 reply; 47+ messages in thread
From: Mike Anderson @ 2003-02-11  8:15 UTC (permalink / raw)
  To: linux-scsi

This patch series is against scsi-misc-2.5.
 
01_serror-scmd-add-1.diff:
 	- Add scsi_eh_scmd_add function.
 
-andmike
--
Michael Anderson
andmike@us.ibm.com

 scsi.c       |    8 ++------
 scsi_error.c |   59 +++++++++++++++++++++++++++++++++++++++++++++++------------
 2 files changed, 49 insertions(+), 18 deletions(-)
------

diff -Nru a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
--- a/drivers/scsi/scsi.c	Mon Feb 10 22:25:47 2003
+++ b/drivers/scsi/scsi.c	Mon Feb 10 22:25:47 2003
@@ -1017,13 +1017,9 @@
 				if ((status_byte(SCpnt->result) & CHECK_CONDITION) != 0) {
 					SCSI_LOG_MLCOMPLETE(3, print_sense("bh", SCpnt));
 				}
-				if (SCpnt->device->host->eh_wait != NULL) {
-					scsi_eh_eflags_set(SCpnt, SCSI_EH_CMD_FAILED | SCSI_EH_CMD_ERR);
-					SCpnt->owner = SCSI_OWNER_ERROR_HANDLER;
-					SCpnt->state = SCSI_STATE_FAILED;
 
-					scsi_host_failed_inc_and_test(SCpnt->device->host);
-				} else {
+				if (!scsi_eh_scmd_add(SCpnt, 0))
+				{
 					/*
 					 * We only get here if the error
 					 * recovery thread has died.
diff -Nru a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
--- a/drivers/scsi/scsi_error.c	Mon Feb 10 22:25:47 2003
+++ b/drivers/scsi/scsi_error.c	Mon Feb 10 22:25:47 2003
@@ -56,6 +56,49 @@
 #define HOST_RESET_SETTLE_TIME  10*HZ
 
 /**
+ * scsi_eh_scmd_add - add scsi cmd to error handling.
+ * @scmd:	scmd to run eh on.
+ * @eh_flag:	optional SCSI_EH flag.
+ *
+ * Return value:
+ *	0 on failure.
+ **/
+int scsi_eh_scmd_add(struct scsi_cmnd *scmd, int eh_flag)
+{
+	struct Scsi_Host *shost = scmd->device->host;
+	unsigned long flags;
+
+	if (shost->eh_wait == NULL)
+		return 0;
+
+	spin_lock_irqsave(shost->host_lock, flags);
+
+	scsi_eh_eflags_set(scmd, eh_flag);
+	/*
+	 * FIXME: Can we stop setting owner and state.
+	 */
+	scmd->owner = SCSI_OWNER_ERROR_HANDLER;
+	scmd->state = SCSI_STATE_FAILED;
+	/*
+	 * Set the serial_number_at_timeout to the current
+	 * serial_number
+	 */
+	scmd->serial_number_at_timeout = scmd->serial_number;
+	list_add_tail(&scmd->eh_list, &shost->eh_cmd_list);
+	shost->in_recovery = 1;
+	shost->host_failed++;
+	if (shost->host_busy == shost->host_failed) {
+		up(shost->eh_wait);
+		SCSI_LOG_ERROR_RECOVERY(5, printk("Waking error handler"
+					  " thread\n"));
+	}
+
+	spin_unlock_irqrestore(shost->host_lock, flags);
+
+	return 1;
+}
+
+/**
  * scsi_add_timer - Start timeout timer for a single scsi command.
  * @scmd:	scsi command that is about to start running.
  * @timeout:	amount of time to allow this command to run.
@@ -131,22 +174,14 @@
  **/
 void scsi_times_out(Scsi_Cmnd *scmd)
 {
-	struct Scsi_Host *shost = scmd->device->host;
-
-	/* Set the serial_number_at_timeout to the current serial_number */
-	scmd->serial_number_at_timeout = scmd->serial_number;
-
-	scsi_eh_eflags_set(scmd, SCSI_EH_CMD_TIMEOUT | SCSI_EH_CMD_ERR);
-
-	if (unlikely(shost->eh_wait == NULL)) {
+	if (unlikely(!scsi_eh_scmd_add(scmd, SCSI_EH_CANCEL_CMD))) {
 		panic("Error handler thread not present at %p %p %s %d",
-		      scmd, shost, __FILE__, __LINE__);
+		      scmd, scmd->device->host, __FILE__, __LINE__);
 	}
 
-	scsi_host_failed_inc_and_test(shost);
-
 	SCSI_LOG_TIMEOUT(3, printk("Command timed out busy=%d failed=%d\n",
-				   shost->host_busy, shost->host_failed));
+				   scmd->device->host->host_busy,
+				   scmd->device->host->host_failed));
 }
 
 /**

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH / RFC] scsi_error handler update. (3/4)
  2003-02-11  8:15 ` [PATCH / RFC] scsi_error handler update. (2/4) Mike Anderson
@ 2003-02-11  8:17   ` Mike Anderson
  2003-02-11  8:19     ` [PATCH / RFC] scsi_error handler update. (4/4) Mike Anderson
                       ` (2 more replies)
  0 siblings, 3 replies; 47+ messages in thread
From: Mike Anderson @ 2003-02-11  8:17 UTC (permalink / raw)
  To: linux-scsi

This patch series is against scsi-misc-2.5.

02_serror-hndlr-1.diff:
	- Change to using eh_cmd_list.
	- Change scsi_unjam_host to get sense, abort cmds, ready
	  devices, and disposition cmds for retry or finish.
	- Moved retries outside of eh.

-andmike
--
Michael Anderson
andmike@us.ibm.com

 scsi_error.c |  477 +++++++++++++++++++++++++++++------------------------------
 1 files changed, 241 insertions(+), 236 deletions(-)
------

diff -Nru a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
--- a/drivers/scsi/scsi_error.c	Mon Feb 10 22:25:47 2003
+++ b/drivers/scsi/scsi_error.c	Mon Feb 10 22:25:47 2003
@@ -211,36 +211,36 @@
  * @sc_list:	List for failed cmds.
  * @shost:	scsi host being recovered.
  **/
-static void scsi_eh_prt_fail_stats(Scsi_Cmnd *sc_list, struct Scsi_Host *shost)
+static inline void scsi_eh_prt_fail_stats(struct Scsi_Host *shost)
 {
-	Scsi_Cmnd *scmd;
-	Scsi_Device *sdev;
+	struct scsi_cmnd *scmd;
+	struct scsi_device *sdev;
 	int total_failures = 0;
 	int cmd_failed = 0;
-	int cmd_timed_out = 0;
+	int cmd_cancel = 0;
 	int devices_failed = 0;
 
 
 	list_for_each_entry(sdev, &shost->my_devices, siblings) {
-		for (scmd = sc_list; scmd; scmd = scmd->bh_next) {
+		list_for_each_entry(scmd, &shost->eh_cmd_list, eh_list) {
 			if (scmd->device == sdev) {
 				++total_failures;
 				if (scsi_eh_eflags_chk(scmd,
-						       SCSI_EH_CMD_TIMEOUT))
-					++cmd_timed_out;
-				else
+						       SCSI_EH_CANCEL_CMD))
+					++cmd_cancel;
+				else 
 					++cmd_failed;
 			}
 		}
 
-		if (cmd_timed_out || cmd_failed) {
+		if (cmd_cancel || cmd_failed) {
 			SCSI_LOG_ERROR_RECOVERY(3,
 				printk("%s: %d:%d:%d:%d cmds failed: %d,"
-				       " timedout: %d\n",
+				       " cancel: %d\n",
 				       __FUNCTION__, shost->host_no,
 				       sdev->channel, sdev->id, sdev->lun,
-				       cmd_failed, cmd_timed_out));
-			cmd_timed_out = 0;
+				       cmd_failed, cmd_cancel));
+			cmd_cancel = 0;
 			cmd_failed = 0;
 			++devices_failed;
 		}
@@ -253,68 +253,6 @@
 #endif
 
 /**
- * scsi_eh_get_failed - Gather failed cmds.
- * @sc_list:	A pointer to a list for failed cmds.
- * @shost:	Scsi host being recovered.
- *
- * XXX Add opaque interator for device / shost. Investigate direct
- * addition to per eh list on error allowing skipping of this step.
- **/
-static void scsi_eh_get_failed(Scsi_Cmnd **sc_list, struct Scsi_Host *shost)
-{
-	int found;
-	Scsi_Device *sdev;
-	Scsi_Cmnd *scmd;
-
-	found = 0;
-	list_for_each_entry(sdev, &shost->my_devices, siblings) {
-		unsigned long flags;
-
-		spin_lock_irqsave(&sdev->list_lock, flags);
-		list_for_each_entry(scmd, &sdev->cmd_list, list) {
-			if (scsi_eh_eflags_chk(scmd, SCSI_EH_CMD_ERR)) {
-				scmd->bh_next = *sc_list;
-				*sc_list = scmd;
-				found++;
-			} else {
-				/*
-				 * FIXME Verify how this can happen and if
-				 * this is still needed??
-				 */
-			    if (scmd->state != SCSI_STATE_INITIALIZING
-			    && scmd->state != SCSI_STATE_UNUSED) {
-				/*
-				 * Rats.  Something is still floating
-				 * around out there This could be the
-				 * result of the fact that the upper level
-				 * drivers are still frobbing commands
-				 * that might have succeeded.  There are
-				 * two outcomes. One is that the command
-				 * block will eventually be freed, and the
-				 * other one is that the command will be
-				 * queued and will be finished along the
-				 * way.
-				 */
-				SCSI_LOG_ERROR_RECOVERY(1, printk("Error hdlr"
-							  " prematurely woken"
-							  " cmds still active"
-							  " (%p %x %d)\n",
-					       scmd, scmd->state,
-					       scmd->device->id));
-				}
-			}
-		}
-		spin_unlock_irqrestore(&sdev->list_lock, flags);
-	}
-
-	SCSI_LOG_ERROR_RECOVERY(1, scsi_eh_prt_fail_stats(*sc_list, shost));
-
-	if (shost->host_failed != found)
-		printk(KERN_ERR "%s: host_failed: %d != found: %d\n", 
-		       __FUNCTION__, shost->host_failed, found);
-}
-
-/**
  * scsi_check_sense - Examine scsi cmd sense
  * @scmd:	Cmd to have sense checked.
  *
@@ -570,7 +508,8 @@
 			spin_lock_irqsave(scmd->device->host->host_lock, flags);
 			if (scmd->device->host->hostt->eh_abort_handler)
 				scmd->device->host->hostt->eh_abort_handler(scmd);
-			spin_unlock_irqrestore(scmd->device->host->host_lock, flags);
+			spin_unlock_irqrestore(scmd->device->host->host_lock,
+					       flags);
 			
 			scmd->request->rq_status = RQ_SCSI_DONE;
 			scmd->owner = SCSI_OWNER_ERROR_HANDLER;
@@ -712,6 +651,7 @@
  * scsi_eh_finish_cmd - Handle a cmd that eh is finished with.
  * @scmd:	Original SCSI cmd that eh has finished.
  * @shost:	SCSI host that cmd originally failed on.
+ * @done_list:	list_head for processed commands.
  *
  * Notes:
  *    We don't want to use the normal command completion while we are are
@@ -720,7 +660,8 @@
  *    keep a list of pending commands for final completion, and once we
  *    are ready to leave error handling we handle completion for real.
  **/
-static void scsi_eh_finish_cmd(Scsi_Cmnd *scmd, struct Scsi_Host *shost)
+static void scsi_eh_finish_cmd(Scsi_Cmnd *scmd, struct Scsi_Host *shost,
+			       struct list_head *done_list )
 {
 	shost->host_failed--;
 	scmd->state = SCSI_STATE_BHQUEUE;
@@ -731,12 +672,14 @@
 	 * things.
 	 */
 	scsi_setup_cmd_retry(scmd);
+
+	list_move_tail(&scmd->eh_list, done_list);
 }
 
 /**
  * scsi_eh_get_sense - Get device sense data.
- * @sc_todo:	list of cmds that have failed.
  * @shost:	scsi host being recovered.
+ * @done_list:	list_head for processed commands.
  *
  * Description:
  *    See if we need to request sense information.  if so, then get it
@@ -754,23 +697,23 @@
  *
  *    In 2.5 this capability will be going away.
  **/
-static int scsi_eh_get_sense(Scsi_Cmnd *sc_todo, struct Scsi_Host *shost)
+static int scsi_eh_get_sense(struct Scsi_Host *shost,
+			     struct list_head *done_list)
 {
 	int rtn;
+	struct list_head *lh, *lh_sf;
 	Scsi_Cmnd *scmd;
 
-	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: checking to see if we need"
-					  " to request sense\n",
-					  __FUNCTION__));
-
-	for (scmd = sc_todo; scmd; scmd = scmd->bh_next) {
-		if (!scsi_eh_eflags_chk(scmd, SCSI_EH_CMD_FAILED) ||
+	list_for_each_safe(lh, lh_sf, &shost->eh_cmd_list) {
+		scmd = list_entry(lh, struct scsi_cmnd, eh_list);
+		if (scsi_eh_eflags_chk(scmd, SCSI_EH_CANCEL_CMD) ||
 		    SCSI_SENSE_VALID(scmd))
 			continue;
 
 		SCSI_LOG_ERROR_RECOVERY(2, printk("%s: requesting sense"
-						  " for tgt: %d\n",
-						  __FUNCTION__, scmd->device->id));
+						  " for id: %d\n",
+						  current->comm,
+						  scmd->device->id));
 		rtn = scsi_request_sense(scmd);
 		if (rtn != SUCCESS)
 			continue;
@@ -787,7 +730,7 @@
 		 * upper level.
 		 */
 		if (rtn == SUCCESS)
-			scsi_eh_finish_cmd(scmd, shost);
+			scsi_eh_finish_cmd(scmd, shost, done_list);
 		if (rtn != NEEDS_RETRY)
 			continue;
 
@@ -806,10 +749,10 @@
 		/*
 		 * we eventually hand this one back to the top level.
 		 */
-		scsi_eh_finish_cmd(scmd, shost);
+		scsi_eh_finish_cmd(scmd, shost, done_list);
 	}
 
-	return shost->host_failed;
+	return list_empty(&shost->eh_cmd_list);
 }
 
 /**
@@ -899,9 +842,9 @@
 }
 
 /**
- * scsi_eh_abort_cmd - abort a timed-out cmd.
- * @sc_todo:	A list of cmds that have failed.
+ * scsi_eh_abort_cmds - abort canceled commands.
  * @shost:	scsi host being recovered.
+ * @done_list:	list_head for processed commands.
  *
  * Decription:
  *    Try and see whether or not it makes sense to try and abort the
@@ -910,29 +853,36 @@
  *    no sense to try and abort the command, since as far as the shost
  *    adapter is concerned, it isn't running.
  **/
-static int scsi_eh_abort_cmd(Scsi_Cmnd *sc_todo, struct Scsi_Host *shost)
+static int scsi_eh_abort_cmds(struct Scsi_Host *shost,
+			      struct list_head *done_list)
 {
-
 	int rtn;
-	Scsi_Cmnd *scmd;
+	struct list_head *lh, *lh_sf;
+	struct scsi_cmnd *scmd;
 
-	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: checking to see if we need"
-					  " to abort cmd\n", __FUNCTION__));
-
-	for (scmd = sc_todo; scmd; scmd = scmd->bh_next) {
-		if (!scsi_eh_eflags_chk(scmd, SCSI_EH_CMD_TIMEOUT))
+	list_for_each_safe(lh, lh_sf, &shost->eh_cmd_list) {
+		scmd = list_entry(lh, struct scsi_cmnd, eh_list);
+		if (!scsi_eh_eflags_chk(scmd, SCSI_EH_CANCEL_CMD))
 			continue;
-
+		SCSI_LOG_ERROR_RECOVERY(3, printk("%s: aborting cmd:"
+						  "0x%p\n", current->comm,
+						  scmd));
 		rtn = scsi_try_to_abort_cmd(scmd);
 		if (rtn == SUCCESS) {
-			if (!scsi_eh_tur(scmd)) {
-				rtn = scsi_eh_retry_cmd(scmd);
-				if (rtn == SUCCESS)
-					scsi_eh_finish_cmd(scmd, shost);
+			scsi_eh_eflags_clr(scmd,  SCSI_EH_CANCEL_CMD);
+			if (!scmd->device->online || !scsi_eh_tur(scmd)) {
+				scsi_eh_finish_cmd(scmd, shost, done_list);
 			}
-		}
+				
+		} else
+			SCSI_LOG_ERROR_RECOVERY(3, printk("%s: aborting"
+							  " cmd failed:"
+							  "0x%p\n",
+							  current->comm,
+							  scmd));
 	}
-	return shost->host_failed;
+
+	return list_empty(&shost->eh_cmd_list);
 }
 
 /**
@@ -968,9 +918,9 @@
 }
 
 /**
- * scsi_eh_bus_device_reset - send bdr is needed
- * @sc_todo:	a list of cmds that have failed.
+ * scsi_eh_bus_device_reset - send bdr if needed
  * @shost:	scsi host being recovered.
+ * @done_list:	list_head for processed commands.
  *
  * Notes:
  *    Try a bus device reset.  still, look to see whether we have multiple
@@ -978,39 +928,52 @@
  *    makes no sense to try bus_device_reset - we really would need to try
  *    a bus_reset instead. 
  **/
-static int scsi_eh_bus_device_reset(Scsi_Cmnd *sc_todo, struct Scsi_Host *shost)
+static int scsi_eh_bus_device_reset(struct Scsi_Host *shost,
+				    struct list_head *done_list)
 {
 	int rtn;
-	Scsi_Cmnd *scmd;
-	Scsi_Device *sdev;
-
-	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Trying BDR\n", __FUNCTION__));
+	struct list_head *lh, *lh_sf;
+	struct scsi_cmnd *scmd, *bdr_scmd;
+	struct scsi_device *sdev;
 
 	list_for_each_entry(sdev, &shost->my_devices, siblings) {
-		for (scmd = sc_todo; scmd; scmd = scmd->bh_next)
-			if ((scmd->device == sdev) &&
-			    scsi_eh_eflags_chk(scmd, SCSI_EH_CMD_ERR))
+		bdr_scmd = NULL;
+		list_for_each_entry(scmd, &shost->eh_cmd_list, eh_list)
+			if (scmd->device == sdev) {
+				bdr_scmd = scmd;
 				break;
+			}
 
-		if (!scmd)
+		if (!bdr_scmd)
 			continue;
 
-		/*
-		 * ok, we have a device that is having problems.  try and send
-		 * a bus device reset to it.
-		 */
-		rtn = scsi_try_bus_device_reset(scmd);
-		if ((rtn == SUCCESS) && (!scsi_eh_tur(scmd)))
-				for (scmd = sc_todo; scmd; scmd = scmd->bh_next)
-					if ((scmd->device == sdev) &&
-					    scsi_eh_eflags_chk(scmd, SCSI_EH_CMD_ERR)) {
-						rtn = scsi_eh_retry_cmd(scmd);
-						if (rtn == SUCCESS)
-							scsi_eh_finish_cmd(scmd, shost);
-					}
+		SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Sending BDR sdev:"
+						  " 0x%p\n", current->comm,
+						  sdev));
+		rtn = scsi_try_bus_device_reset(bdr_scmd);
+		if (rtn == SUCCESS) {
+			if (!sdev->online || !scsi_eh_tur(bdr_scmd)) {
+				list_for_each_safe(lh, lh_sf,
+						   &shost->eh_cmd_list) {
+					scmd = list_entry(lh, struct
+							  scsi_cmnd,
+							  eh_list);
+					if (scmd->device == sdev)
+						scsi_eh_finish_cmd(scmd,
+								   shost,
+								   done_list);
+				}
+			}
+		} else {
+			SCSI_LOG_ERROR_RECOVERY(3, printk("%s: BDR"
+							  " failed sdev:"
+							  "0x%p\n",
+							  current->comm,
+							   sdev));
+		}
 	}
 
-	return shost->host_failed;
+	return list_empty(&shost->eh_cmd_list);
 }
 
 /**
@@ -1040,7 +1003,8 @@
 		/*
 		 * Mark all affected devices to expect a unit attention.
 		 */
-		list_for_each_entry(sdev, &scmd->device->host->my_devices, siblings)
+		list_for_each_entry(sdev, &scmd->device->host->my_devices,
+				    siblings)
 			if (scmd->device->channel == sdev->channel) {
 				sdev->was_reset = 1;
 				sdev->expecting_cc_ua = 1;
@@ -1076,7 +1040,8 @@
 		/*
 		 * Mark all affected devices to expect a unit attention.
 		 */
-		list_for_each_entry(sdev, &scmd->device->host->my_devices, siblings)
+		list_for_each_entry(sdev, &scmd->device->host->my_devices,
+				    siblings)
 			if (scmd->device->channel == sdev->channel) {
 				sdev->was_reset = 1;
 				sdev->expecting_cc_ua = 1;
@@ -1086,26 +1051,20 @@
 }
 
 /**
- * scsi_eh_bus_host_reset - send a bus reset and on failure try host reset
- * @sc_todo:	a list of cmds that have failed.
+ * scsi_eh_bus_reset - send a bus reset 
  * @shost:	scsi host being recovered.
+ * @done_list:	list_head for processed commands.
  **/
-static int scsi_eh_bus_host_reset(Scsi_Cmnd *sc_todo, struct Scsi_Host *shost)
+static int scsi_eh_bus_reset(struct Scsi_Host *shost,
+			     struct list_head *done_list)
 {
 	int rtn;
+	struct list_head *lh, *lh_sf;
 	Scsi_Cmnd *scmd;
 	Scsi_Cmnd *chan_scmd;
 	unsigned int channel;
 
 	/*
-	 * if we ended up here, we have serious problems.  the only thing left
-	 * to try is a full bus reset.  if someone has grabbed the bus and isn't
-	 * letting go, then perhaps this will help.
-	 */
-	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Try Bus/Host RST\n",
-					  __FUNCTION__));
-
-	/* 
 	 * we really want to loop over the various channels, and do this on
 	 * a channel by channel basis.  we should also check to see if any
 	 * of the failed commands are on soft_reset devices, and if so, skip
@@ -1113,9 +1072,8 @@
 	 */
 
 	for (channel = 0; channel <= shost->max_channel; channel++) {
-		for (scmd = sc_todo; scmd; scmd = scmd->bh_next) {
-			if (!scsi_eh_eflags_chk(scmd, SCSI_EH_CMD_ERR))
-				continue;
+		chan_scmd = NULL;
+		list_for_each_entry(scmd, &shost->eh_cmd_list, eh_list) {
 			if (channel == scmd->device->channel) {
 				chan_scmd = scmd;
 				break;
@@ -1126,63 +1084,97 @@
 			}
 		}
 
-		if (!scmd)
+		if (!chan_scmd)
 			continue;
+		SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Sending BRST chan:"
+						  " %d\n", current->comm,
+						  channel));
+		rtn = scsi_try_bus_reset(chan_scmd);
+		if (rtn == SUCCESS) {
+			list_for_each_safe(lh, lh_sf, &shost->eh_cmd_list) {
+				scmd = list_entry(lh, struct scsi_cmnd,
+						  eh_list);
+				if (channel == scmd->device->channel)
+					if (!scmd->device->online ||
+					    !scsi_eh_tur(scmd))
+						scsi_eh_finish_cmd(scmd,
+								   shost,
+								   done_list);
+			}
+		} else {
+			SCSI_LOG_ERROR_RECOVERY(3, printk("%s: BRST"
+							  " failed chan: %d\n",
+							  current->comm,
+							  channel));
+		}
+	}
+	return list_empty(&shost->eh_cmd_list);
+}
 
-		/*
-		 * we now know that we are able to perform a reset for the
-		 * channel that scmd points to.
-		 */
-		rtn = scsi_try_bus_reset(scmd);
-		if (rtn != SUCCESS)
-			rtn = scsi_try_host_reset(scmd);
+/**
+ * scsi_eh_host_reset - send a host reset 
+ * @shost:	scsi host being recovered.
+ * @done_list:	list_head for processed commands.
+ **/
+static int scsi_eh_host_reset(struct Scsi_Host *shost,
+			      struct list_head *done_list)
+{
+	int rtn;
+	struct list_head *lh, *lh_sf;
+	Scsi_Cmnd *scmd;
 
-		if (rtn == SUCCESS) {
-			for (scmd = sc_todo; scmd; scmd = scmd->bh_next) {
-				if (!scsi_eh_eflags_chk(scmd, SCSI_EH_CMD_ERR)
-				    || channel != scmd->device->channel)
-					continue;
-				if (!scsi_eh_tur(scmd)) {
-					rtn = scsi_eh_retry_cmd(scmd);
+	if (!list_empty(&shost->eh_cmd_list)) {
+		scmd = list_entry(shost->eh_cmd_list.next,
+				  struct scsi_cmnd, eh_list);
 
-					if (rtn == SUCCESS)
-						scsi_eh_finish_cmd(scmd, shost);
-				}
+		SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Sending HRST\n"
+						  , current->comm));
+
+		rtn = scsi_try_host_reset(scmd);
+		if (rtn == SUCCESS) {
+			list_for_each_safe(lh, lh_sf, &shost->eh_cmd_list) {
+				scmd = list_entry(lh, struct scsi_cmnd, eh_list);
+				if (!scmd->device->online || !scsi_eh_tur(scmd)) 
+					scsi_eh_finish_cmd(scmd, shost,
+							   done_list);
 			}
+		} else {
+			SCSI_LOG_ERROR_RECOVERY(3, printk("%s: HRST"
+							  " failed\n",
+							  current->comm));
 		}
-
 	}
-	return shost->host_failed;
+	return list_empty(&shost->eh_cmd_list);
 }
 
 /**
  * scsi_eh_offline_sdevs - offline scsi devices that fail to recover
- * @sc_todo:	a list of cmds that have failed.
  * @shost:	scsi host being recovered.
+ * @done_list:	list_head for processed commands.
  *
  **/
-static void scsi_eh_offline_sdevs(Scsi_Cmnd *sc_todo, struct Scsi_Host *shost)
+static void scsi_eh_offline_sdevs(struct Scsi_Host *shost,
+				  struct list_head *done_list)
 {
+	struct list_head *lh, *lh_sf;
 	Scsi_Cmnd *scmd;
 
-	for (scmd = sc_todo; scmd; scmd = scmd->bh_next) {
-		if (!scsi_eh_eflags_chk(scmd, SCSI_EH_CMD_ERR))
-			continue;
-
+	list_for_each_safe(lh, lh_sf, &shost->eh_cmd_list) {
+		scmd = list_entry(lh, struct scsi_cmnd, eh_list);
 		printk(KERN_INFO "scsi: Device offlined - not"
-				" ready or command retry failed"
-				" after error recovery: host"
+		       		" ready after error recovery: host"
 				" %d channel %d id %d lun %d\n",
 				shost->host_no,
 				scmd->device->channel,
 				scmd->device->id,
 				scmd->device->lun);
-
-		if (scsi_eh_eflags_chk(scmd, SCSI_EH_CMD_TIMEOUT))
-			scmd->result |= (DRIVER_TIMEOUT << 24);
-
-		scmd->device->online = 0;
-		scsi_eh_finish_cmd(scmd, shost);
+		scmd->device->online = FALSE;
+		if (scsi_eh_eflags_chk(scmd, SCSI_EH_CANCEL_CMD)) {
+			/*
+			 * FIXME: Handle lost cmds.
+			 */
+		}
+		scsi_eh_finish_cmd(scmd, shost, done_list);
 	}
 	return;
 }
@@ -1477,6 +1469,8 @@
 
 	ASSERT_LOCK(shost->host_lock, 0);
 
+	shost->in_recovery = 0;
+
 	/*
 	 * If the door was locked, we need to insert a door lock request
 	 * onto the head of the SCSI request queue for the device.  There
@@ -1517,6 +1511,56 @@
 }
 
 /**
+ * scsi_eh_ready_devs - check device ready state and recover if not.
+ * @shost: 	host to be recovered.
+ * @done_list:	list_head for processed commands.
+ *
+ **/
+static void scsi_eh_ready_devs(struct Scsi_Host *shost,
+			       struct list_head *done_list)
+{
+	if (scsi_eh_bus_device_reset(shost, done_list))
+		if (scsi_eh_bus_reset(shost, done_list))
+			if (scsi_eh_host_reset(shost, done_list))
+				scsi_eh_offline_sdevs(shost, done_list);
+}
+
+/**
+ * scsi_eh_flush_done_list - finish processed commands or retry them.
+ * @shost: 	host to be recovered.
+ * @done_list:	list_head of processed commands.
+ *
+ **/
+static void scsi_eh_flush_done_list(struct Scsi_Host *shost,
+				    struct list_head *done_list)
+{
+	struct list_head *lh, *lh_sf;
+	Scsi_Cmnd *scmd;
+
+	list_for_each_safe(lh, lh_sf, done_list) {
+		scmd = list_entry(lh, struct scsi_cmnd, eh_list);
+		list_del_init(lh);
+		if (!scmd->device->online) {
+			 scmd->result |= (DRIVER_TIMEOUT << 24);
+		} else {
+			if (++scmd->retries < scmd->allowed) {
+				SCSI_LOG_ERROR_RECOVERY(3,
+					printk("%s: flush retry"
+					       " cmd: %p\n",
+						  current->comm,
+						  scmd));
+				scsi_retry_command(scmd);
+				continue;
+			}
+		}
+		SCSI_LOG_ERROR_RECOVERY(3, printk("%s: flush finish"
+				       " cmd: %p\n",
+					  current->comm, scmd));
+		scsi_finish_command(scmd);
+	}
+}
+
+/**
  * scsi_unjam_host - Attempt to fix a host which has a cmd that failed.
  * @shost:	Host to unjam.
  *
@@ -1541,60 +1585,15 @@
  **/
 static void scsi_unjam_host(struct Scsi_Host *shost)
 {
-	Scsi_Cmnd *sc_todo = NULL;
-	Scsi_Cmnd *scmd;
-
-	/*
-	 * Is this assert really ok anymore (andmike). Should we at least
-	 * be using spin_lock_unlocked.
-	 */
-	ASSERT_LOCK(shost->host_lock, 0);
-
-	scsi_eh_get_failed(&sc_todo, shost);
-
-	if (scsi_eh_get_sense(sc_todo, shost))
-		if (scsi_eh_abort_cmd(sc_todo, shost))
-			if (scsi_eh_bus_device_reset(sc_todo, shost))
-				if (scsi_eh_bus_host_reset(sc_todo, shost))
-					scsi_eh_offline_sdevs(sc_todo, shost);
+	LIST_HEAD(done_list);
 
-	BUG_ON(shost->host_failed);
+	SCSI_LOG_ERROR_RECOVERY(1, scsi_eh_prt_fail_stats(shost));
 
+	if (!scsi_eh_get_sense(shost, &done_list))
+		if (!scsi_eh_abort_cmds(shost, &done_list))
+			scsi_eh_ready_devs(shost, &done_list);
 
-	/*
-	 * We are currently holding these things in a linked list - we
-	 * didn't put them in the bottom half queue because we wanted to
-	 * keep things quiet while we were working on recovery, and
-	 * passing them up to the top level could easily cause the top
-	 * level to try and queue something else again.
-	 *
-	 * start by marking that the host is no longer in error recovery.
-	 */
-	shost->in_recovery = 0;
-
-	/*
-	 * take the list of commands, and stick them in the bottom half queue.
-	 * the current implementation of scsi_done will do this for us - if need
-	 * be we can create a special version of this function to do the
-	 * same job for us.
-	 */
-	for (scmd = sc_todo; scmd; scmd = sc_todo) {
-		sc_todo = scmd->bh_next;
-		scmd->bh_next = NULL;
-		/*
-		 * Oh, this is a vile hack.  scsi_done() expects a timer
-		 * to be running on the command.  If there isn't, it assumes
-		 * that the command has actually timed out, and a timer
-		 * handler is running.  That may well be how we got into
-		 * this fix, but right now things are stable.  We add
-		 * a timer back again so that we can report completion.
-		 * scsi_done() will immediately remove said timer from
-		 * the command, and then process it.
-		 */
-		scsi_add_timer(scmd, 100, scsi_eh_times_out);
-		scsi_done(scmd);
-	}
-
+	scsi_eh_flush_done_list(shost, &done_list);
 }
 
 /**
@@ -1642,7 +1641,8 @@
 	/*
 	 * Wake up the thread that created us.
 	 */
-	SCSI_LOG_ERROR_RECOVERY(3, printk("Wake up parent of scsi_eh_%d\n",shost->host_no));
+	SCSI_LOG_ERROR_RECOVERY(3, printk("Wake up parent of"
+					  " scsi_eh_%d\n",shost->host_no));
 
 	complete(shost->eh_notify);
 
@@ -1652,7 +1652,9 @@
 		 * away and die.  This typically happens if the user is
 		 * trying to unload a module.
 		 */
-		SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler scsi_eh_%d sleeping\n",shost->host_no));
+		SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler"
+						  " scsi_eh_%d"
+						  " sleeping\n",shost->host_no));
 
 		/*
 		 * Note - we always use down_interruptible with the semaphore
@@ -1667,7 +1669,9 @@
 		if (shost->eh_kill)
 			break;
 
-		SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler scsi_eh_%d waking up\n",shost->host_no));
+		SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler"
+						  " scsi_eh_%d waking"
+						  " up\n",shost->host_no));
 
 		shost->eh_active = 1;
 
@@ -1695,7 +1699,8 @@
 
 	}
 
-	SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler scsi_eh_%d exiting\n",shost->host_no));
+	SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler scsi_eh_%d"
+					  " exiting\n",shost->host_no));
 
 	/*
 	 * Make sure that nobody tries to wake us up again.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH / RFC] scsi_error handler update. (4/4)
  2003-02-11  8:17   ` [PATCH / RFC] scsi_error handler update. (3/4) Mike Anderson
@ 2003-02-11  8:19     ` Mike Anderson
  2003-02-11 22:38     ` [PATCH / RFC] scsi_error handler update. (3/4) James Bottomley
  2003-02-12 22:34     ` James Bottomley
  2 siblings, 0 replies; 47+ messages in thread
From: Mike Anderson @ 2003-02-11  8:19 UTC (permalink / raw)
  To: linux-scsi

This patch series is against scsi-misc-2.5.

03_serror-dev-offline-1.diff:
	- Add scsi_set_device_offline interface.

-andmike
--
Michael Anderson
andmike@us.ibm.com

 scsi.c      |   38 ++++++++++++++++++++++++++++++++++++++
 scsi.h      |    1 +
 scsi_syms.c |    1 +
 3 files changed, 40 insertions(+)
------

diff -Nru a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
--- a/drivers/scsi/scsi.c	Mon Feb 10 22:25:47 2003
+++ b/drivers/scsi/scsi.c	Mon Feb 10 22:25:47 2003
@@ -1519,6 +1519,44 @@
 {
 	sdev->access_count--;
 	module_put(sdev->host->hostt->module);
+}
+
+/**
+ * scsi_set_device_offline - set scsi_device offline
+ * @sdev:	pointer to struct scsi_device to offline. 
+ *
+ * Locks:	host_lock held on entry.
+ **/
+void scsi_set_device_offline(struct scsi_device *sdev)
+{
+	struct scsi_cmnd *scmd;
+	int	cmds_active = 0;
+	unsigned long flags;
+
+	sdev->online = FALSE;
+
+	spin_lock_irqsave(&sdev->list_lock, flags);
+	list_for_each_entry(scmd, &sdev->cmd_list, list) {
+		if (scmd->request && scmd->request->rq_status != RQ_INACTIVE) {
+			/*
+			 * If we are unable to remove the timer, it means
+			 * that the command has already timed out or
+			 * finished.
+			 */
+			if (!scsi_delete_timer(scmd)) {
+				continue;
+			}
+
+			++cmds_active;
+
+			scsi_eh_scmd_add(scmd, SCSI_EH_CANCEL_CMD);
+		}
+	}
+	spin_unlock_irqrestore(&sdev->list_lock, flags);
+
+	if (!cmds_active) {
+		/* FIXME: Send online state change hotplug event */
+	}
 }
 
 /*
diff -Nru a/drivers/scsi/scsi.h b/drivers/scsi/scsi.h
--- a/drivers/scsi/scsi.h	Mon Feb 10 22:25:47 2003
+++ b/drivers/scsi/scsi.h	Mon Feb 10 22:25:47 2003
@@ -456,6 +456,7 @@
 extern void scsi_slave_detach(struct scsi_device *);
 extern int scsi_device_get(struct scsi_device *);
 extern void scsi_device_put(struct scsi_device *);
+extern void scsi_set_device_offline(struct scsi_device *);
 extern void scsi_done(Scsi_Cmnd * SCpnt);
 extern void scsi_finish_command(Scsi_Cmnd *);
 extern int scsi_retry_command(Scsi_Cmnd *);

diff -Nru a/drivers/scsi/scsi_syms.c b/drivers/scsi/scsi_syms.c
--- a/drivers/scsi/scsi_syms.c	Mon Feb 10 22:25:47 2003
+++ b/drivers/scsi/scsi_syms.c	Mon Feb 10 22:25:47 2003
@@ -80,6 +80,7 @@
 EXPORT_SYMBOL(scsi_slave_detach);
 EXPORT_SYMBOL(scsi_device_get);
 EXPORT_SYMBOL(scsi_device_put);
+EXPORT_SYMBOL(scsi_set_device_offline);
 
 /*
  * This symbol is for the highlevel drivers (e.g. sg) only.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH / RFC] scsi_error handler update. (1/4)
  2003-02-11  8:13 [PATCH / RFC] scsi_error handler update. (1/4) Mike Anderson
  2003-02-11  8:15 ` [PATCH / RFC] scsi_error handler update. (2/4) Mike Anderson
@ 2003-02-11 16:49 ` Luben Tuikov
  2003-02-11 17:22   ` Mike Anderson
  2003-02-11 18:00 ` Patrick Mansfield
  2 siblings, 1 reply; 47+ messages in thread
From: Luben Tuikov @ 2003-02-11 16:49 UTC (permalink / raw)
  To: Mike Anderson; +Cc: linux-scsi

Mike Anderson wrote:
> This patch series is against scsi-misc-2.5.
> 
> These patches modify the scsi error handler to process cmds needing
> recovery off a list. The error handler policy has been altered to do the
> following:
> 	1.) Check for legacy behavior of requesting sense.
> 	2.) Abort commands marked needing to be canceled.
> 	3.) Ready devices through tur and eh handlers.
> 	4.) disposition each command on the list to be retried or
> 	finished.
> 
> 00_serror-cmd-list-1.diff:
> 	- Add per host eh_cmd_list list head.
> 	- Add per cmd eh_list list head.

Could you call this ``eh_cmd_entry'' or ``eh_entry''*,
or why don't you use the already provided ``list'' entry.

* The rest of the kernel calls ".*_?entry_?.*" list_heads
which will be used as _part_ of lists and lists themselves
as ".*_?list_?.*". (both regex)

My point here is that ``eh_list'' is symbolically quite similar
to ``eh_cmd_list'', all the while ``eh_list'' is not the ADT* List,
but an entry to/of a list.  Forget about the name of the type, i.e.
that it is struct list_head for both lists and entries of lists --
this is the whole beauty of the linux lists.

* ADT, Abstract Data Type.

The already provided ``list'' in cmd struct entry is so ambiguous
that there's no other way but to conclude that it is the entry to
a list.

In fact this is the whole point when I added it (``list'') -- so
that the cmd struct would be part of a list depending on it's _state_ ,
as I've mentioned before. ((This seems to be forming slowly.))

Question: Could you explain when a command becomes a member of
the eh_cmd_list, and when it ceases to be a member of this list?
I.e. what is true and false when a command is part and not
part of this list?  I couldn't quite get it, just looking from the code,
and probably should've looked ``closer'', but am pressed for time.
... I suspect it's something we've discussed here.

Thanks,
-- 
Luben

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH / RFC] scsi_error handler update. (1/4)
  2003-02-11 16:49 ` [PATCH / RFC] scsi_error handler update. (1/4) Luben Tuikov
@ 2003-02-11 17:22   ` Mike Anderson
  2003-02-11 19:05     ` Luben Tuikov
  0 siblings, 1 reply; 47+ messages in thread
From: Mike Anderson @ 2003-02-11 17:22 UTC (permalink / raw)
  To: Luben Tuikov; +Cc: linux-scsi

Luben Tuikov [luben@splentec.com] wrote:
> Could you call this ``eh_cmd_entry'' or ``eh_entry''*,
> or why don't you use the already provided ``list'' entry.
> 

I could change the name.

I wanted a list just for the error handler I was concerned in the
timeout (cancel cases) that until ownership was reclaimed that I did not
want to risk modification of another scsi_cmnd list_head. We should be
protected by the scsi_done scsi_delete_timer so I could possibly use the
other list head.

> Question: Could you explain when a command becomes a member of
> the eh_cmd_list, and when it ceases to be a member of this list?
> I.e. what is true and false when a command is part and not
> part of this list?  I couldn't quite get it, just looking from the code,
> and probably should've looked ``closer'', but am pressed for time.
> ... I suspect it's something we've discussed here.

A command becomes a member of the eh_cmd_list:
	- If a scsi_cmnd times out.
	- A call to scsi_set_device_offline with scsi_cmnds active.
	- A command completes with DID_TIME_OUT and is not a TUR or
	  Inquiry.
	- A command completes with message byte of != COMMAND_COMPLETE.
	- A few other cases in scsi_decide_disposition. I will document
	  them.

A command is removed from this list when:
	- Legacy request sense is done.
	- Abort was successful.
	- A function in the sequence done by scsi_eh_ready_devs is
	  successful.
-andmike
--
Michael Anderson
andmike@us.ibm.com


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH / RFC] scsi_error handler update. (1/4)
  2003-02-11  8:13 [PATCH / RFC] scsi_error handler update. (1/4) Mike Anderson
  2003-02-11  8:15 ` [PATCH / RFC] scsi_error handler update. (2/4) Mike Anderson
  2003-02-11 16:49 ` [PATCH / RFC] scsi_error handler update. (1/4) Luben Tuikov
@ 2003-02-11 18:00 ` Patrick Mansfield
  2003-02-11 18:44   ` Mike Anderson
  2 siblings, 1 reply; 47+ messages in thread
From: Patrick Mansfield @ 2003-02-11 18:00 UTC (permalink / raw)
  To: Mike Anderson; +Cc: linux-scsi

On Tue, Feb 11, 2003 at 12:13:51AM -0800, Mike Anderson wrote:
> This patch series is against scsi-misc-2.5.
> 

> +#define SCSI_EH_CANCEL_CMD	0x0001	/* Cancel this cmd */
> +#define SCSI_EH_REC_TIMEOUT	0x0002	/* EH retry timed out */

SCSI_EH_REC_TIMEOUT isn't used.

Can we add a SCSI_EH_CMD, set for any command sent down for use in error
handling, and for users (well, at least one user) of the obosolete
eh_state?

Two drivers (u14-34f.c, eata.c) use eh_state as a redundant bug check
during abort, its use there could be deleted.

One driver (drivers/fc4/fc.c, might not even work) uses eh_state to track
the state of an internal scsi_cmnd - it needs two states, or it could
be changed to track the scsi_cmnd state separately.

One driver (dpt_i2o.c) uses eh_state to figure out if it has a command
sent via error handler, if not, it resets the timeout to a fixed value of
300 seconds (TMOUT_SCSI). It could use a SCSI_EH_CMD state to match its
previous intended behaviour.

-- Patrick Mansfield

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH / RFC] scsi_error handler update. (1/4)
  2003-02-11 18:00 ` Patrick Mansfield
@ 2003-02-11 18:44   ` Mike Anderson
  0 siblings, 0 replies; 47+ messages in thread
From: Mike Anderson @ 2003-02-11 18:44 UTC (permalink / raw)
  To: Patrick Mansfield; +Cc: linux-scsi

Patrick Mansfield [patmans@us.ibm.com] wrote:
> On Tue, Feb 11, 2003 at 12:13:51AM -0800, Mike Anderson wrote:
> > This patch series is against scsi-misc-2.5.
> > 
> 
> > +#define SCSI_EH_CANCEL_CMD	0x0001	/* Cancel this cmd */
> > +#define SCSI_EH_REC_TIMEOUT	0x0002	/* EH retry timed out */
> 
> SCSI_EH_REC_TIMEOUT isn't used.
> 

SCSI_EH_REC_TIMEOUT is used in scsi_eh_times_out to indicate that a
scsi_eh_cmd timed-out. While we are not retrying failed commands in the
eh handler we are sending TURs which could timeout.

> Can we add a SCSI_EH_CMD, set for any command sent down for use in error
> handling, and for users (well, at least one user) of the obosolete
> eh_state?
> 

We need a solution, but I was trying to keep these flags for eh use
only. Since there is some broken drivers now, maybe the easiest solution
is to provide a flag.

> Two drivers (u14-34f.c, eata.c) use eh_state as a redundant bug check
> during abort, its use there could be deleted.
> 

Yes, maybe this check should be deleted.

Another option would be to use this check.

if (scmd->serial_number != scmd->serial_number_at_timeout) {
	return (FAILED);
}

> One driver (drivers/fc4/fc.c, might not even work) uses eh_state to track
> the state of an internal scsi_cmnd - it needs two states, or it could
> be changed to track the scsi_cmnd state separately.
> 

Short term might be easier to add the flag, but this driver could
allocate mem for host_scribble and use this.

> One driver (dpt_i2o.c) uses eh_state to figure out if it has a command
> sent via error handler, if not, it resets the timeout to a fixed value of
> 300 seconds (TMOUT_SCSI). It could use a SCSI_EH_CMD state to match its
> previous intended behaviour.
> 

I do not have a good answer here right now.


-andmike
--
Michael Anderson
andmike@us.ibm.com


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH / RFC] scsi_error handler update. (1/4)
  2003-02-11 17:22   ` Mike Anderson
@ 2003-02-11 19:05     ` Luben Tuikov
  2003-02-11 20:14       ` Luben Tuikov
                         ` (2 more replies)
  0 siblings, 3 replies; 47+ messages in thread
From: Luben Tuikov @ 2003-02-11 19:05 UTC (permalink / raw)
  To: Mike Anderson; +Cc: linux-scsi

Mike Anderson wrote:
> 
> I could change the name.
> 
> I wanted a list just for the error handler I was concerned in the
> timeout (cancel cases) that until ownership was reclaimed that I did not
> want to risk modification of another scsi_cmnd list_head. We should be
> protected by the scsi_done scsi_delete_timer so I could possibly use the
> other list head.

Yes, I see the predicament.

I also wanted to make note as to the choice of names of variables.
This is very important, not so much judging from kernel use, but in
general.  I've been known to change my var names over 10 times until
they've gotten to represent *and* mean what they represent and mean.
I.e. their usage == their name as close as possible.

E.g. this is why we use Leibniz's calculus notation rather than
Newton's.  Sorry for the example, but this is my background.
I.e. symbols carry out meaning in and of themselves.

In fact cmd_list is actually a cmd_q (cmd queue) and eh_cmd_list
is actually eh_cmd_q.  Since both are ordered lists of commands by
arrival time.  And will soon become priority queues when SCSI Core
gets up to date with sending down HOQ (Head Of Queue) commands,
as per SAM-2/3.  It would be nice if someone renamed those...

(back to linux-scsi :-) )

> A command becomes a member of the eh_cmd_list:
> 	- If a scsi_cmnd times out.
> 	- A call to scsi_set_device_offline with scsi_cmnds active.
> 	- A command completes with DID_TIME_OUT and is not a TUR or
> 	  Inquiry.
> 	- A command completes with message byte of != COMMAND_COMPLETE.
> 	- A few other cases in scsi_decide_disposition. I will document
> 	  them.
> 
> A command is removed from this list when:
> 	- Legacy request sense is done.
> 	- Abort was successful.
> 	- A function in the sequence done by scsi_eh_ready_devs is
> 	  successful.

Aaah, very nice.  This should be documented in the code.

Here is some comments (this means that they are comments, just that,
they don't mean that this is necessarily how I want things to be or
that I think it would be a better design... it may be worse -- the
point is to expand the brainstorm pool).

1. Is it possible that when a command times out, or when the application
client sends ABORT_TASK TMF (future), the eh thread wakes up and calls the driver's
abort function, without actually shutting down queueing or any other workings
of the host/driver/device?  Then let the LLDD cancel this command, all the while
the rest of the things can go on.

2. If the device is set off line, then there would be no point in moving
the commands from the cmd_list to eh_cmd_list, since cmd_list _becomes_
eh_cmd_list by the nature of the state of the device.

1 and 2 would mean that we don't really need the eh_cmd_list. (we do
in a different context...)

3. Why are TUR and INQUIRY treated differently on timeout?

4. If a command ``returns'' from a LLDD, and has in the message
byte something of the sort ``task not complete'' this almost
certainly means Service Response of SERVICE DELIVERY OR TARGET FAILURE.

In this case SCSI Core now owns the command and it should bubble up
to the application client and/or cancel the *other* queued commands
and/or fire a hot plug event.

In fact I've wanted for a long time a service_response byte in cmd struct
and status byte as per SAM-2/3.  This would make things so much easier
for SCSI Core.  All LLDDs would need tweaking but it is not that bad.

What are your thoughts that queuecommand() become void queuecommand()?
I.e. the only way a command is given ownerhip to a LLDD is via
queuecommand() and the only way a command is given back to SCSI Core
is via a call to scsi_done()?

Side-side comments:

I saw your done_list -- nice name.  What it the story with this list?

-- 
Luben

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH / RFC] scsi_error handler update. (1/4)
  2003-02-11 19:05     ` Luben Tuikov
@ 2003-02-11 20:14       ` Luben Tuikov
  2003-02-11 21:14       ` Mike Anderson
       [not found]       ` <3E495862.3050709@splentec.com>
  2 siblings, 0 replies; 47+ messages in thread
From: Luben Tuikov @ 2003-02-11 20:14 UTC (permalink / raw)
  To: Mike Anderson; +Cc: linux-scsi

Luben Tuikov wrote:
 >
 > 1. Is it possible that when a command times out, or when the application
 > client sends ABORT_TASK TMF (future), the eh thread wakes up and calls the driver's
 > abort function, without actually shutting down queueing or any other workings
 > of the host/driver/device?  Then let the LLDD cancel this command, all the while
 > the rest of the things can go on.

Yeah, the host_lock...

I'm not very fond of grabbing a spin lock calling a fn which may sleep
and then releasing the lock...

-- 
Luben



^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH / RFC] scsi_error handler update. (1/4)
  2003-02-11 19:05     ` Luben Tuikov
  2003-02-11 20:14       ` Luben Tuikov
@ 2003-02-11 21:14       ` Mike Anderson
       [not found]       ` <3E495862.3050709@splentec.com>
  2 siblings, 0 replies; 47+ messages in thread
From: Mike Anderson @ 2003-02-11 21:14 UTC (permalink / raw)
  To: Luben Tuikov; +Cc: linux-scsi

Luben Tuikov [luben@splentec.com] wrote:
> 1. Is it possible that when a command times out, or when the application
> client sends ABORT_TASK TMF (future), the eh thread wakes up and calls the 
> driver's
> abort function, without actually shutting down queueing or any other 
> workings
> of the host/driver/device?  Then let the LLDD cancel this command, all the 
> while
> the rest of the things can go on.
> 

I have looked at this and for the case where abort cannot fail it
looks like we could handle that, but when it does a recovery sync point
is needed to ensure we do not create a recovery storm with independent
aborts going to the next level of recovery.

	1.) Abort a cmd (effects scsi_cmd).
	2.) LUN reset (effects scsi_device).
	3.) Bus Device Reset. (effect same target siblings).
	4.) Bus reset (effect my_devices).
	5.) host reset (effect my_devices and ??).

> 2. If the device is set off line, then there would be no point in moving
> the commands from the cmd_list to eh_cmd_list, since cmd_list _becomes_
> eh_cmd_list by the nature of the state of the device.
> 

True, but since I kept the error handler behavior similar to what it is
today we may have more that one scsi_device worth of work to do so
having a queue of work per eh seems more efficient for the eh to process.

> 1 and 2 would mean that we don't really need the eh_cmd_list. (we do
> in a different context...)
> 
> 3. Why are TUR and INQUIRY treated differently on timeout?
> 

I believe this is a historical optimization for scanning. 

> 4. If a command ``returns'' from a LLDD, and has in the message
> byte something of the sort ``task not complete'' this almost
> certainly means Service Response of SERVICE DELIVERY OR TARGET FAILURE.
> 
> In this case SCSI Core now owns the command and it should bubble up
> to the application client and/or cancel the *other* queued commands
> and/or fire a hot plug event.
> 
> In fact I've wanted for a long time a service_response byte in cmd struct
> and status byte as per SAM-2/3.  This would make things so much easier
> for SCSI Core.  All LLDDs would need tweaking but it is not that bad.
> 

I was not thinking this broad of change this morning, but when I typed the
comments on when cmds would be added to the eh list based on the result
of the message byte it looked like we could use finer grain decoding /
action.

> What are your thoughts that queuecommand() become void queuecommand()?
> I.e. the only way a command is given ownership to a LLDD is via
> queuecommand() and the only way a command is given back to SCSI Core
> is via a call to scsi_done()?
> 

I think this would be a good direction to head. I would like to hear
from LLDD writers, but it should provide a more consistent interface. In
looking at a few LLD eh handlers there is some holes that if a command
already completed the handler will return failure (believing that the
mid should already know this), but there is no code currently to let the
error handler know causing more severe error handling steps to take
place. This code can be fixed, but if every command had to come though
scsi_done once queuecommand was called it would remove code assumption
on ownership.

> Side-side comments:
> 
> I saw your done_list -- nice name.  What it the story with this list?

Since the error handler can have a mix of work on its queue when woken
up this is a list that holds scsi cmnds that have been processed by the
error handler. In theory some cmnds could have needed canceled and others
are needing a (device recovery / tur / online verification) steps. 

The done list is then flush "disposition-ed" prior to the error handler
sleeping. This gives the previous behavior that no commands are released
from the error handler until completion.

-andmike
--
Michael Anderson
andmike@us.ibm.com

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH / RFC] scsi_error handler update. (1/4)
       [not found]       ` <3E495862.3050709@splentec.com>
@ 2003-02-11 21:20         ` Mike Anderson
  2003-02-11 21:22           ` Luben Tuikov
  0 siblings, 1 reply; 47+ messages in thread
From: Mike Anderson @ 2003-02-11 21:20 UTC (permalink / raw)
  To: Luben Tuikov; +Cc: linux-scsi

Luben Tuikov [luben@splentec.com] wrote:
> Luben Tuikov wrote:
> >
> >1. Is it possible that when a command times out, or when the application
> >client sends ABORT_TASK TMF (future), the eh thread wakes up and calls 
> >the driver's
> >abort function, without actually shutting down queueing or any other 
> >workings
> >of the host/driver/device?  Then let the LLDD cancel this command, all 
> >the while
> >the rest of the things can go on.
> 
> Yeah, the host_lock...
> 
> I'm not very fond of grabbing a spin lock calling a fn which may sleep
> and then releasing the lock...

We grab this lock today before calling eh handlers, but I believe some
LLDDs drop the lock if they are going to sleep.

-andmike
--
Michael Anderson
andmike@us.ibm.com


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH / RFC] scsi_error handler update. (1/4)
  2003-02-11 21:20         ` Mike Anderson
@ 2003-02-11 21:22           ` Luben Tuikov
  2003-02-11 22:41             ` Christoph Hellwig
  0 siblings, 1 reply; 47+ messages in thread
From: Luben Tuikov @ 2003-02-11 21:22 UTC (permalink / raw)
  To: Mike Anderson; +Cc: linux-scsi

Mike Anderson wrote:
> Luben Tuikov [luben@splentec.com] wrote:
> 
>>Yeah, the host_lock...
>>
>>I'm not very fond of grabbing a spin lock calling a fn which may sleep
>>and then releasing the lock...
> 
> 
> We grab this lock today before calling eh handlers, but I believe some
> LLDDs drop the lock if they are going to sleep.

:-)  yes, I know -- I was just saying that I don't like this policy
that much.

It's an oxymoronic to grab a spin lock and say to someone, ok
you can sleep, and then release the spin lock...

-- 
Luben



^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH / RFC] scsi_error handler update. (3/4)
  2003-02-11  8:17   ` [PATCH / RFC] scsi_error handler update. (3/4) Mike Anderson
  2003-02-11  8:19     ` [PATCH / RFC] scsi_error handler update. (4/4) Mike Anderson
@ 2003-02-11 22:38     ` James Bottomley
  2003-02-12  7:16       ` Mike Anderson
  2003-02-12 22:34     ` James Bottomley
  2 siblings, 1 reply; 47+ messages in thread
From: James Bottomley @ 2003-02-11 22:38 UTC (permalink / raw)
  To: Mike Anderson; +Cc: SCSI Mailing List

On Tue, 2003-02-11 at 02:17, Mike Anderson wrote:
> This patch series is against scsi-misc-2.5.
> 
> 02_serror-hndlr-1.diff:
> 	- Change to using eh_cmd_list.
> 	- Change scsi_unjam_host to get sense, abort cmds, ready
> 	  devices, and disposition cmds for retry or finish.
> 	- Moved retries outside of eh.
> 
> -andmike

I have some qualms about the locking: you protect the eh_cmd_list with
the host_lock when adding, but not when traversing in the eh_thread.  I
know this is because the eh_thread has quiesced the host before
beginning, thus there should theoretically be no returning commmands to
tamper with the list while the eh_thread is using it.  However, I think
it might be worthwhile pointing this out in a comment over the
list_for_each_entry().

James

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH / RFC] scsi_error handler update. (1/4)
  2003-02-11 21:22           ` Luben Tuikov
@ 2003-02-11 22:41             ` Christoph Hellwig
  2003-02-12 20:10               ` Luben Tuikov
  0 siblings, 1 reply; 47+ messages in thread
From: Christoph Hellwig @ 2003-02-11 22:41 UTC (permalink / raw)
  To: Luben Tuikov; +Cc: Mike Anderson, linux-scsi

On Tue, Feb 11, 2003 at 04:22:21PM -0500, Luben Tuikov wrote:
> :-)  yes, I know -- I was just saying that I don't like this policy
> that much.
> 
> It's an oxymoronic to grab a spin lock and say to someone, ok
> you can sleep, and then release the spin lock...

Agreed.  One of the things we really need to do is to change the calling
conventions of the eh methods to not have the lock held on entry.

I just wonder how we can archive that best without silently breaking
drivers.


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH / RFC] scsi_error handler update. (3/4)
  2003-02-11 22:38     ` [PATCH / RFC] scsi_error handler update. (3/4) James Bottomley
@ 2003-02-12  7:16       ` Mike Anderson
  2003-02-12 14:26         ` Luben Tuikov
  2003-02-12 14:37         ` James Bottomley
  0 siblings, 2 replies; 47+ messages in thread
From: Mike Anderson @ 2003-02-12  7:16 UTC (permalink / raw)
  To: James Bottomley; +Cc: SCSI Mailing List

James Bottomley [James.Bottomley@steeleye.com] wrote:
> On Tue, 2003-02-11 at 02:17, Mike Anderson wrote:
> > This patch series is against scsi-misc-2.5.
> > 
> > 02_serror-hndlr-1.diff:
> > 	- Change to using eh_cmd_list.
> > 	- Change scsi_unjam_host to get sense, abort cmds, ready
> > 	  devices, and disposition cmds for retry or finish.
> > 	- Moved retries outside of eh.
> > 
> > -andmike
> 
> I have some qualms about the locking: you protect the eh_cmd_list with
> the host_lock when adding, but not when traversing in the eh_thread.  I
> know this is because the eh_thread has quiesced the host before
> beginning, thus there should theoretically be no returning commmands to
> tamper with the list while the eh_thread is using it.  However, I think
> it might be worthwhile pointing this out in a comment over the
> list_for_each_entry().

How about instead of a comment I change to something like this so that
scsi_unjam_host has its own queue to work off of.

static void scsi_unjam_host(struct Scsi_Host *shost)
{
	unsigned long flags;

	LIST_HEAD(eh_work_q);
	LIST_HEAD(eh_done_q);

	spin_lock_irqsave(shost->host_lock, flags);
	list_splice(&shost->eh_cmd_list, &eh_work_q);
	INIT_LIST_HEAD(&shost->eh_cmd_list);
	spin_unlock_irqrestore(shost->host_lock, flags);

	...

}

I would also change eh_cmd_list eh_cmd_q.

-andmike
--
Michael Anderson
andmike@us.ibm.com


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH / RFC] scsi_error handler update. (3/4)
  2003-02-12  7:16       ` Mike Anderson
@ 2003-02-12 14:26         ` Luben Tuikov
  2003-02-12 14:37         ` James Bottomley
  1 sibling, 0 replies; 47+ messages in thread
From: Luben Tuikov @ 2003-02-12 14:26 UTC (permalink / raw)
  To: Mike Anderson; +Cc: James Bottomley, SCSI Mailing List

Mike Anderson wrote:
>>
>>I have some qualms about the locking: you protect the eh_cmd_list with
>>the host_lock when adding, but not when traversing in the eh_thread.  I
>>know this is because the eh_thread has quiesced the host before
>>beginning, thus there should theoretically be no returning commmands to
>>tamper with the list while the eh_thread is using it.  However, I think
>>it might be worthwhile pointing this out in a comment over the
>>list_for_each_entry().
> 
> 
> How about instead of a comment I change to something like this so that
> scsi_unjam_host has its own queue to work off of.
> 
> static void scsi_unjam_host(struct Scsi_Host *shost)
> {
> 	unsigned long flags;
> 
> 	LIST_HEAD(eh_work_q);
> 	LIST_HEAD(eh_done_q);
> 
> 	spin_lock_irqsave(shost->host_lock, flags);
> 	list_splice(&shost->eh_cmd_list, &eh_work_q);
> 	INIT_LIST_HEAD(&shost->eh_cmd_list);
> 	spin_unlock_irqrestore(shost->host_lock, flags);
> 
> 	...
> 
> }

So much better.  This is the right principle: if you need a lock to access
a list in one place, then you *always* need the lock to access the list.

When doing locking and synchronization, doing things out of principle
*will* avoid lockups, but (ab)using circumstantial facts will *definitely*
lead to a lock up.

This also leads to the same effect, and it's a common idiom in the kernel:

	LIST_HEAD(local_q);

	spin_lock_irqsave(shost->host_lock, flags);
	list_add(&local_q, shost->eh_cmd_q);
	list_del_init(shost->eh_cmd_q);
	spin_unlock_irqrestore(shost->host_lock, flags);

	/* Now we work with local_q */

> I would also change eh_cmd_list eh_cmd_q.

That sounds great.  While you're at it, how about calling
``eh_cmd_q'' something like ``failed_cmd_q' or something which
represents *what* kind commands are in this queue, rather
than the name representing *who* will work with this queue.

Remember, the whole point with queues are to represent
the *state* of the command...  And this is what the ``eh_cmd_q''
does.

Also don't forget cmd::eh_list --> not suggesting a list...
or just using the list member.

The whole point is that when a SCSI Command gets instantiated
and enters SCSI Core, it traverses the queues as it evolves,
depending on its state.  (Borrowing from my own mini-scsi-core... :-P )

(I have to say something on scsi_put/get_command() but later today.)

-- 
Luben

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH / RFC] scsi_error handler update. (3/4)
  2003-02-12  7:16       ` Mike Anderson
  2003-02-12 14:26         ` Luben Tuikov
@ 2003-02-12 14:37         ` James Bottomley
  1 sibling, 0 replies; 47+ messages in thread
From: James Bottomley @ 2003-02-12 14:37 UTC (permalink / raw)
  To: Mike Anderson; +Cc: SCSI Mailing List

On Wed, 2003-02-12 at 01:16, Mike Anderson wrote:
> How about instead of a comment I change to something like this so that
> scsi_unjam_host has its own queue to work off of.

Absolutely perfect!  Just in case anyone gets around to an error handler
that doesn't have to entirely quiesce the host, this infrastructure will
come in handy.

> static void scsi_unjam_host(struct Scsi_Host *shost)
> {
> 	unsigned long flags;
> 
> 	LIST_HEAD(eh_work_q);
> 	LIST_HEAD(eh_done_q);
> 
> 	spin_lock_irqsave(shost->host_lock, flags);
> 	list_splice(&shost->eh_cmd_list, &eh_work_q);
> 	INIT_LIST_HEAD(&shost->eh_cmd_list);

list_splice_init() does both of these operations

> 	spin_unlock_irqrestore(shost->host_lock, flags);

James



^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH / RFC] scsi_error handler update. (1/4)
  2003-02-11 22:41             ` Christoph Hellwig
@ 2003-02-12 20:10               ` Luben Tuikov
  2003-02-12 20:46                 ` Christoph Hellwig
  0 siblings, 1 reply; 47+ messages in thread
From: Luben Tuikov @ 2003-02-12 20:10 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Mike Anderson, linux-scsi

Christoph Hellwig wrote:
> On Tue, Feb 11, 2003 at 04:22:21PM -0500, Luben Tuikov wrote:
> 
>>:-)  yes, I know -- I was just saying that I don't like this policy
>>that much.
>>
>>It's an oxymoronic to grab a spin lock and say to someone, ok
>>you can sleep, and then release the spin lock...
> 
> 
> Agreed.  One of the things we really need to do is to change the calling
> conventions of the eh methods to not have the lock held on entry.
> 
> I just wonder how we can archive that best without silently breaking
> drivers.

First we have to figure out *what* is the purpose of having the lock
held before calling eh_* functions.  Is it protecting the driver
from something or is it protecting SCSI Core?

Since the point of having a thread eh is to *sleep*, then why
grab the lock...

(Needless to say in my own LLDDs I *never* need this lock -- all
eh_* and queuecommand() are completely re-entrant.)

That is, eh_* functions of a LLDD should be re-entrant.

While we are this I'd like to give this idea to you, which I've
mentioned before (this will be kind of more formal):

Proposal A: Let's make queuecommand() be void queuecommand().

This will accomplish the following:
	- the only way SCSI Core gives ownership of a scsi command
	structure to a LLDD is by calling queuecommand() -- which
	is currently the case as well.

	- the only way a LLDD gives ownership back to SCSI Core is
	by calling scsi_done(), and let there be *no* other way.

I completely detest the current dualist approach of also checking
the return value of queuecommand().

This will warrant the addition of the old res = queuecommand()
result variable, into the scsi command structure.

Now we can call this member ``service_response'' and this will
be the Service Response of the Service Delivery Subsystem, aka
Low Level Device Driver (LLDD). ((FC folks should back me up here.))

And ``result'' could become ``status''.

Both of the above are in par with SAM-2/3.

This will accomplish the fact that there'd be only one place
where a command is returned to SCSI Core from a LLDD, specifically
in the scsi softirq.  I.e. the queueing method as far as LLDDs
are concerned is clearly defined.

Then SCSI Core can get more inventful in its own queuing techinques.

I think that the above proposal can go into 2.5, with a little
(minimal) tweaking of LLDDs and SCSI Core.

As to the eh_* routines:  The current naming technique follws SPI,
which is now out of SAM and SPC.  Furthermore newer SCSI devices,
not necessarily on a parallel interface, do not define bus_reset,
host_reset and sometimes device_reset, because it is not
always clear what is the device or host or bus...

Proposal B: let the eh_* become tmf_* where TMF is
Task Management Functions.  Those are from SAM-2/3 (all re-entrant):

int tmf_abort_task(scsi command pointer);
	aborts the task sent from this initiator port,
	the LLDD will determine the initiator port.
int tmf_abort_task_set(target, lun);
	aborts all tasks sent from this initiator port,
	like sending multiple tmf_abort_task(...).
int tmf_clear_aca(target, lun);
int tmf_clear_task_set(target, lun);
int tmf_lun_reset(target, lun);
int tmf_query_task(scsi command struct);
int tmf_target_reset(target);
int tmf_wakeup(target);

Some of those are optional and depend on the functionality of the
device. Others depend on the transport.

The result of the function is the service response which is one of:
{ TMF_COMPLETE, TMF_SUCC, TMF_REJ } and Service delivery or target failiure.

I think that this framework will also be very convenient for the USB
people, since they will not define all of those, but the ones they will
define, they can map easily to their own USB infrastructure.

``target'' is a target identifier (type not known at this point).
``lun'' is a 64 bit quantity.

All those methods are to be re-entrant and a command could be cancelled
all the while the LLDD continues to process commands.  (I've done
this myself.)  This would warrant a bit different queueing technique
in SCSI Core but slowly we're getting there.

Maybe this is 2.7 stuff, but as long as ideas are popping up, we're doing
okay.

-- 
Luben

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH / RFC] scsi_error handler update. (1/4)
  2003-02-12 20:10               ` Luben Tuikov
@ 2003-02-12 20:46                 ` Christoph Hellwig
  2003-02-12 21:23                   ` Mike Anderson
  2003-02-12 21:46                   ` Luben Tuikov
  0 siblings, 2 replies; 47+ messages in thread
From: Christoph Hellwig @ 2003-02-12 20:46 UTC (permalink / raw)
  To: Luben Tuikov; +Cc: Mike Anderson, linux-scsi

On Wed, Feb 12, 2003 at 03:10:39PM -0500, Luben Tuikov wrote:
> > I just wonder how we can archive that best without silently breaking
> > drivers.
> 
> First we have to figure out *what* is the purpose of having the lock
> held before calling eh_* functions.  Is it protecting the driver
> from something or is it protecting SCSI Core?

The history is that in 2.4 the cli and BKL based serialization got replaced
with io_request_lock held over all entry points, in 2.5 that got changed
to the host lock.  So the purpose seems to be avoiding driver changes so
far.

> That is, eh_* functions of a LLDD should be re-entrant.

Agreed that they should.  The question on how to implement this
without breaking drivers remains.  Do you volunteer to do add taking
the lock into all these methods so the existing lock state remains until a
maintainer ACKs that his driver doesn't need protection / and or makes
it reenetrant?

The other way would be to add an concurrent_eh flag to the host template,
but I really hate these conditional locking schemes, they just cause confusion.

> While we are this I'd like to give this idea to you, which I've
> mentioned before (this will be kind of more formal):
> 
> Proposal A: Let's make queuecommand() be void queuecommand().
> 
> This will accomplish the following:
> 	- the only way SCSI Core gives ownership of a scsi command
> 	structure to a LLDD is by calling queuecommand() -- which
> 	is currently the case as well.
> 
> 	- the only way a LLDD gives ownership back to SCSI Core is
> 	by calling scsi_done(), and let there be *no* other way.
> 
> I completely detest the current dualist approach of also checking
> the return value of queuecommand().

That sounds nice to me.  But again, do you think you can do it for 2.5
without breaking more drivers?  (i.e. fixing everything up)

> This will warrant the addition of the old res = queuecommand()
> result variable, into the scsi command structure.
> 
> Now we can call this member ``service_response'' and this will
> be the Service Response of the Service Delivery Subsystem, aka
> Low Level Device Driver (LLDD). ((FC folks should back me up here.))
> 
> And ``result'' could become ``status''.
> 
> Both of the above are in par with SAM-2/3.

I don't particularly like that name, but having specifiers backed by
standards has it's advantage, so I'm agnostic here.

> As to the eh_* routines:  The current naming technique follws SPI,
> which is now out of SAM and SPC.  Furthermore newer SCSI devices,
> not necessarily on a parallel interface, do not define bus_reset,
> host_reset and sometimes device_reset, because it is not
> always clear what is the device or host or bus...
> 
> Proposal B: let the eh_* become tmf_* where TMF is
> Task Management Functions.  Those are from SAM-2/3 (all re-entrant):
> 
> int tmf_abort_task(scsi command pointer);
> 	aborts the task sent from this initiator port,
> 	the LLDD will determine the initiator port.
> int tmf_abort_task_set(target, lun);
> 	aborts all tasks sent from this initiator port,
> 	like sending multiple tmf_abort_task(...).
> int tmf_clear_aca(target, lun);
> int tmf_clear_task_set(target, lun);
> int tmf_lun_reset(target, lun);
> int tmf_query_task(scsi command struct);
> int tmf_target_reset(target);
> int tmf_wakeup(target);
> 
> Some of those are optional and depend on the functionality of the
> device. Others depend on the transport.
> 
> The result of the function is the service response which is one of:
> { TMF_COMPLETE, TMF_SUCC, TMF_REJ } and Service delivery or target failiure.
> 
> I think that this framework will also be very convenient for the USB
> people, since they will not define all of those, but the ones they will
> define, they can map easily to their own USB infrastructure.
> 
> ``target'' is a target identifier (type not known at this point).
> ``lun'' is a 64 bit quantity.

It should be the same type as is used for luns in other places of the
scsi midlayer (whatever that m,ay be when this gets in)

> All those methods are to be re-entrant and a command could be cancelled
> all the while the LLDD continues to process commands.  (I've done
> this myself.)  This would warrant a bit different queueing technique
> in SCSI Core but slowly we're getting there.
> 
> Maybe this is 2.7 stuff, but as long as ideas are popping up, we're doing
> okay.

I think that's clearly 2.7 stuff, we've moved scsi a lot forward in 2.5,
now we need to get most drivers working again and 2.6 out of the door.

If you have anough time a prototype now won't hurt though - the scsi code
shouldn't really change during early 2.6.


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH / RFC] scsi_error handler update. (1/4)
  2003-02-12 20:46                 ` Christoph Hellwig
@ 2003-02-12 21:23                   ` Mike Anderson
  2003-02-12 22:15                     ` Luben Tuikov
  2003-02-12 21:46                   ` Luben Tuikov
  1 sibling, 1 reply; 47+ messages in thread
From: Mike Anderson @ 2003-02-12 21:23 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Luben Tuikov, linux-scsi

Christoph Hellwig [hch@infradead.org] wrote:
> > That is, eh_* functions of a LLDD should be re-entrant.
> 
> Agreed that they should.  The question on how to implement this
> without breaking drivers remains.  Do you volunteer to do add taking
> the lock into all these methods so the existing lock state remains until a
> maintainer ACKs that his driver doesn't need protection / and or makes
> it reenetrant?
> 
> The other way would be to add an concurrent_eh flag to the host template,
> but I really hate these conditional locking schemes, they just cause confusion.
> 

The host template check may cause confusion, but today we have few
drivers that do not need the host_lock taken so we end up with code
bloat and confustion in the drivers to support a older model:
	- Mid takes lock with flags
	- LLDD drops lock and then grabs it again before return.
	- Mid drops lock with flags.

> > While we are this I'd like to give this idea to you, which I've
> > mentioned before (this will be kind of more formal):
> > 
> > Proposal A: Let's make queuecommand() be void queuecommand().
> > 
> > This will accomplish the following:
> > 	- the only way SCSI Core gives ownership of a scsi command
> > 	structure to a LLDD is by calling queuecommand() -- which
> > 	is currently the case as well.
> > 
> > 	- the only way a LLDD gives ownership back to SCSI Core is
> > 	by calling scsi_done(), and let there be *no* other way.
> > 
> > I completely detest the current dualist approach of also checking
> > the return value of queuecommand().
> 
> That sounds nice to me.  But again, do you think you can do it for 2.5
> without breaking more drivers?  (i.e. fixing everything up)
> 

I like the idea, but for 2.5 cleaning the ~95 queuecommand functions
seems large.

> > As to the eh_* routines:  The current naming technique follws SPI,
> > which is now out of SAM and SPC.  Furthermore newer SCSI devices,
> > not necessarily on a parallel interface, do not define bus_reset,
> > host_reset and sometimes device_reset, because it is not
> > always clear what is the device or host or bus...
> > 
> > Proposal B: let the eh_* become tmf_* where TMF is
> > Task Management Functions.  Those are from SAM-2/3 (all re-entrant):
> > 
> > int tmf_abort_task(scsi command pointer);
> > 	aborts the task sent from this initiator port,
> > 	the LLDD will determine the initiator port.
> > int tmf_abort_task_set(target, lun);
> > 	aborts all tasks sent from this initiator port,
> > 	like sending multiple tmf_abort_task(...).
> > int tmf_clear_aca(target, lun);
> > int tmf_clear_task_set(target, lun);
> > int tmf_lun_reset(target, lun);
> > int tmf_query_task(scsi command struct);
> > int tmf_target_reset(target);
> > int tmf_wakeup(target);
> > 
> > Some of those are optional and depend on the functionality of the
> > device. Others depend on the transport.
> > 
> > The result of the function is the service response which is one of:
> > { TMF_COMPLETE, TMF_SUCC, TMF_REJ } and Service delivery or target failiure.
> > 
> > I think that this framework will also be very convenient for the USB
> > people, since they will not define all of those, but the ones they will
> > define, they can map easily to their own USB infrastructure.
> > 
> > ``target'' is a target identifier (type not known at this point).
> > ``lun'' is a 64 bit quantity.
> 
> It should be the same type as is used for luns in other places of the
> scsi midlayer (whatever that m,ay be when this gets in)
> 
> > All those methods are to be re-entrant and a command could be cancelled
> > all the while the LLDD continues to process commands.  (I've done
> > this myself.)  This would warrant a bit different queueing technique
> > in SCSI Core but slowly we're getting there.
> > 
> > Maybe this is 2.7 stuff, but as long as ideas are popping up, we're doing
> > okay.
> 
> I think that's clearly 2.7 stuff, we've moved scsi a lot forward in 2.5,
> now we need to get most drivers working again and 2.6 out of the door.
> 
> If you have anough time a prototype now won't hurt though - the scsi code
> shouldn't really change during early 2.6.
> 

I also agree about it being a 2.7 item, but I like it as a guide of
where we should consider heading.

-andmike
--
Michael Anderson
andmike@us.ibm.com


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH / RFC] scsi_error handler update. (1/4)
  2003-02-12 20:46                 ` Christoph Hellwig
  2003-02-12 21:23                   ` Mike Anderson
@ 2003-02-12 21:46                   ` Luben Tuikov
  2003-02-13 15:47                     ` Christoph Hellwig
  1 sibling, 1 reply; 47+ messages in thread
From: Luben Tuikov @ 2003-02-12 21:46 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Mike Anderson, linux-scsi

Christoph Hellwig wrote:
> 
> The history is that in 2.4 the cli and BKL based serialization got replaced
> with io_request_lock held over all entry points, in 2.5 that got changed
> to the host lock.  So the purpose seems to be avoiding driver changes so
> far.

I was trying to identify a clear and precise reason for the lock.
(I know the history...)

It doesn't seem to be purposeful enough (the lock), since the LLDD is
allowed to sleep, and would most likely drop it. (see below)

Actually I think since it is a pointer which LLDD may set to their
own lock, it looks like it's trying to prevent LLDD from being
too simplistic (to be politically correct).

But the matter of the fact is that LLDD writers will have to
be more ``vigilent'', and if they don't know how to make their
driver functions re-entrant, they can read up on it, or take
a gradute level OS course, or get their sorry a*ses fired.

For already existing LLDDs, a few nights overhaul might do
the trick.

> Agreed that they should.  The question on how to implement this
> without breaking drivers remains.  Do you volunteer to do add taking
> the lock into all these methods so the existing lock state remains until a
> maintainer ACKs that his driver doesn't need protection / and or makes
> it reenetrant?

[On eh_* functions.]

If a LLDD drops the lock on entry and gains it again on exit, then clearly
it doesn't need it.

Else if it doesn't, it then must be relying on it elsewhere, which is
asking for trouble from eh_* point of view.  A search and replace
will do ok, with a local lock, unless it sets the pointer, in which case
this is much easier.

So, yes, I think that the host_lock can be eliminated from LLDD point of view.
It will take a bit more time than the host, lun, target work but is doable.

I think I can find the time to start working on this, as long as I know
that it's worthwhile.

> That sounds nice to me.  But again, do you think you can do it for 2.5
> without breaking more drivers?  (i.e. fixing everything up)

[On new queuecommand() prototype]

Yes, I actually think that this is *more*/easier doable than the host_lock
issue above.  Again, I'll wait for the powers that be to say
nay or yea, I just don't like wasting my time.

> I don't particularly like that name, but having specifiers backed by
> standards has it's advantage, so I'm agnostic here.

[On ``result'' becoming ``status''.]

It's like the TCP/IP networking code -- you can read the RFCs and
look at the networking code and know exactly what is going on.

It's always a benefit to stick to names as they are in the
standards -- the implementation is so much clearer and one
can find their way around quickly just knowing what the standard
says.  (For nerds like me who read them.)

> It should be the same type as is used for luns in other places of the
> scsi midlayer (whatever that m,ay be when this gets in)

[On lun being 64 bit quantity.]

Actually, talking about standards, this is what a LUN is, a 64 bit quantity.

The fact that Linux SCSI Core uses unsigned int is unfortunate, and has slowly
grown to from 8 bit days when SCSI was just SPI.

> I think that's clearly 2.7 stuff, we've moved scsi a lot forward in 2.5,
> now we need to get most drivers working again and 2.6 out of the door.

Right.

So where do we draw the line? (Rhetorical.)

queuecommand() -> void queuecommand() I think is quite doable, with
microscopic changes to LLDD -- I just need the blessing of the powers
that be.

Getting rid of the host_lock is a bit of a stretch since more
thinking would have to be involved separately for each LLDD, as outlined
above.  This will need a double blessing from the powers that be.

> If you have anough time a prototype now won't hurt though - the scsi code
> shouldn't really change during early 2.6.

Yes, I've been thinking of rewriting SCSI Core completely for 2.7, so maybe
this prototype will be its embryonic state?

Some features would be that target/lun scanning will be a completely *distinct*
functionality and one can even move it to userspace.

Another feature is that new-scsi-core will *not* do any memory allocation
for sg lists for commands -- this is the task of the application client
(block layer, app, scanning code, etc).

I.e. the sole functionality of new-scsi-core will be that of *interfacing*
with the LLDD as specified by SAM-2/3, no more, no less. (I'm flexible
on this of course. :-) )

BTW, what kind of prototype are you talking about, functional or non-functional?

(( Probably non-functional since there's a lot of things which I don't know
    how to do -- i.e. how is ``TARGET'' represented?
    It's not an integer anymore, since SPI is gone.  This will also warrant
    new LLDD, who accept the SAM interface -- i.e. identifying a device
    by the tuple (TARGET, LUN), and ``host'' is really a ``portal'', etc...
    Target is most likely the scsi name, but this gets complicated...
    ``Oh, well...'' Capt. Kirk, oh his death, star date ????.?? ))

BTW2, we can babble about this all we want here, the important
thing is what the powers that be have to say about all this.

-- 
Luben

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH / RFC] scsi_error handler update. (1/4)
  2003-02-12 21:23                   ` Mike Anderson
@ 2003-02-12 22:15                     ` Luben Tuikov
  0 siblings, 0 replies; 47+ messages in thread
From: Luben Tuikov @ 2003-02-12 22:15 UTC (permalink / raw)
  To: Mike Anderson; +Cc: Christoph Hellwig, linux-scsi

Mike Anderson wrote:
> 
> I like the idea, but for 2.5 cleaning the ~95 queuecommand functions
> seems large.

[On queuecommand() prototype.]

As I said, this would be quite easier than the host_lock.

I can do this, and would involve less work than getting rid
of the lun, target, channel from scsi_command...

It doesn't ``seem large'' *to me*.

Getting rid of the host_lock in LLDD would be a pickle though.

> I also agree about it being a 2.7 item, but I like it as a guide of
> where we should consider heading.

One can check out SAM-2/3 for the general infrastructure.

For the nitty-gritty of things like queuing, etc. I think I've
ambiguously spilled them here and there in my emails.

As to a working prototype, I have one such, but's its not GPL.

Anyway, we have plenty of ideas, all we need is the pistol pop.

BTW, the USB and FC ppl should like the newer TMF entries, and
userspace as well -- it should be able to invoke a cold target
rescan without problems.  Oh, a ``host'' should become a ``portal''
and the notion of channel/bus should disappear.  A ``target'' can
appear on more than one ``portal'', etc...

-- 
Luben

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH / RFC] scsi_error handler update. (3/4)
  2003-02-11  8:17   ` [PATCH / RFC] scsi_error handler update. (3/4) Mike Anderson
  2003-02-11  8:19     ` [PATCH / RFC] scsi_error handler update. (4/4) Mike Anderson
  2003-02-11 22:38     ` [PATCH / RFC] scsi_error handler update. (3/4) James Bottomley
@ 2003-02-12 22:34     ` James Bottomley
  2003-02-13  8:24       ` Mike Anderson
  2 siblings, 1 reply; 47+ messages in thread
From: James Bottomley @ 2003-02-12 22:34 UTC (permalink / raw)
  To: Mike Anderson; +Cc: SCSI Mailing List

On Tue, 2003-02-11 at 03:17, Mike Anderson wrote: 
> This patch series is against scsi-misc-2.5.
> 
> 02_serror-hndlr-1.diff:
> 	- Change to using eh_cmd_list.
> 	- Change scsi_unjam_host to get sense, abort cmds, ready
> 	  devices, and disposition cmds for retry or finish.
> 	- Moved retries outside of eh.
> 
> -andmike
> --
> Michael Anderson
> andmike@us.ibm.com
> 
>  scsi_error.c |  477 +++++++++++++++++++++++++++++------------------------------
>  1 files changed, 241 insertions(+), 236 deletions(-)
> ------
> 
> diff -Nru a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
> --- a/drivers/scsi/scsi_error.c	Mon Feb 10 22:25:47 2003
> +++ b/drivers/scsi/scsi_error.c	Mon Feb 10 22:25:47 2003
> @@ -211,36 +211,36 @@
>   * @sc_list:	List for failed cmds.
>   * @shost:	scsi host being recovered.
>   **/
> -static void scsi_eh_prt_fail_stats(Scsi_Cmnd *sc_list, struct Scsi_Host *shost)
> +static inline void scsi_eh_prt_fail_stats(struct Scsi_Host *shost)
>  {
> -	Scsi_Cmnd *scmd;
> -	Scsi_Device *sdev;
> +	struct scsi_cmnd *scmd;
> +	struct scsi_device *sdev;
>  	int total_failures = 0;
>  	int cmd_failed = 0;
> -	int cmd_timed_out = 0;
> +	int cmd_cancel = 0;
>  	int devices_failed = 0;
>  
>  
>  	list_for_each_entry(sdev, &shost->my_devices, siblings) {
Your eh_list here is per host, so there's no need to loop over all the
devices above. 

> -		for (scmd = sc_list; scmd; scmd = scmd->bh_next) {
> +		list_for_each_entry(scmd, &shost->eh_cmd_list, eh_list) {
>  			if (scmd->device == sdev) {
>  				++total_failures;
>  				if (scsi_eh_eflags_chk(scmd,
[...] 
> -static int scsi_eh_bus_device_reset(Scsi_Cmnd *sc_todo, struct Scsi_Host *shost)
> +static int scsi_eh_bus_device_reset(struct Scsi_Host *shost,
> +				    struct list_head *done_list)
>  {
>  	int rtn;
> -	Scsi_Cmnd *scmd;
> -	Scsi_Device *sdev;
> -
> -	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Trying BDR\n", __FUNCTION__));
> +	struct list_head *lh, *lh_sf;
> +	struct scsi_cmnd *scmd, *bdr_scmd;
> +	struct scsi_device *sdev;
>  
>  	list_for_each_entry(sdev, &shost->my_devices, siblings) {

Same problem here. 

> -		for (scmd = sc_todo; scmd; scmd = scmd->bh_next)
> -			if ((scmd->device == sdev) &&
> -			    scsi_eh_eflags_chk(scmd, SCSI_EH_CMD_ERR))
> +		bdr_scmd = NULL;
> +		list_for_each_entry(scmd, &shost->eh_cmd_list, eh_list)
> +			if (scmd->device == sdev) {
> +				bdr_scmd = scmd;
>  				break;
[...]
> @@ -1477,6 +1469,8 @@
>  
>  	ASSERT_LOCK(shost->host_lock, 0);
>  
> +	shost->in_recovery = 0;
> +
>  	/*
>  	 * If the door was locked, we need to insert a door lock request
>  	 * onto the head of the SCSI request queue for the device.  There

This is a lot better than we had previously, but still, I think resetting
in_recovery should happen after we've potentially queued door lock commands
on the queue head, since as soon as we reset this, the queue can potentially
be restarted (even without the necessary restart below this in the code).


James


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH / RFC] scsi_error handler update. (3/4)
  2003-02-12 22:34     ` James Bottomley
@ 2003-02-13  8:24       ` Mike Anderson
  0 siblings, 0 replies; 47+ messages in thread
From: Mike Anderson @ 2003-02-13  8:24 UTC (permalink / raw)
  To: James Bottomley; +Cc: SCSI Mailing List

James Bottomley [James.Bottomley@steeleye.com] wrote:
> >  	list_for_each_entry(sdev, &shost->my_devices, siblings) {
> Your eh_list here is per host, so there's no need to loop over all the
> devices above. 
> 

This is a left over artifact. Since the list is not sorted by device and
that log message displays total fails and number of devices with
failure.

> > -		for (scmd = sc_list; scmd; scmd = scmd->bh_next) {
> > +		list_for_each_entry(scmd, &shost->eh_cmd_list, eh_list) {
> >  			if (scmd->device == sdev) {
> >  				++total_failures;
> >  				if (scsi_eh_eflags_chk(scmd,
> [...] 
> > -static int scsi_eh_bus_device_reset(Scsi_Cmnd *sc_todo, struct Scsi_Host *shost)
> > +static int scsi_eh_bus_device_reset(struct Scsi_Host *shost,
> > +				    struct list_head *done_list)
> >  {
> >  	int rtn;
> > -	Scsi_Cmnd *scmd;
> > -	Scsi_Device *sdev;
> > -
> > -	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Trying BDR\n", __FUNCTION__));
> > +	struct list_head *lh, *lh_sf;
> > +	struct scsi_cmnd *scmd, *bdr_scmd;
> > +	struct scsi_device *sdev;
> >  
> >  	list_for_each_entry(sdev, &shost->my_devices, siblings) {
> 
> Same problem here. 
> 

Our failed_cmd_q may contain more than a single failed command per device so
traversing the list allows only one BDR per device failing to be sent.

> > @@ -1477,6 +1469,8 @@
> >  
> >  	ASSERT_LOCK(shost->host_lock, 0);
> >  
> > +	shost->in_recovery = 0;
> > +
> >  	/*
> >  	 * If the door was locked, we need to insert a door lock request
> >  	 * onto the head of the SCSI request queue for the device.  There
> 
> This is a lot better than we had previously, but still, I think resetting
> in_recovery should happen after we've potentially queued door lock commands
> on the queue head, since as soon as we reset this, the queue can potentially
> be restarted (even without the necessary restart below this in the code).
> 

Ok I moved it after lock door and right before the wake_up.

-andmike
--
Michael Anderson
andmike@us.ibm.com


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH / RFC] scsi_error handler update. (1/4)
  2003-02-12 21:46                   ` Luben Tuikov
@ 2003-02-13 15:47                     ` Christoph Hellwig
  2003-02-13 18:55                       ` Luben Tuikov
  0 siblings, 1 reply; 47+ messages in thread
From: Christoph Hellwig @ 2003-02-13 15:47 UTC (permalink / raw)
  To: Luben Tuikov; +Cc: Mike Anderson, linux-scsi

On Wed, Feb 12, 2003 at 04:46:29PM -0500, Luben Tuikov wrote:
> > The history is that in 2.4 the cli and BKL based serialization got replaced
> > with io_request_lock held over all entry points, in 2.5 that got changed
> > to the host lock.  So the purpose seems to be avoiding driver changes so
> > far.
> 
> I was trying to identify a clear and precise reason for the lock.
> (I know the history...)

I can't see any reason except the history of this lock.  And no this
reason is probably not clear and precise :)

> It doesn't seem to be purposeful enough (the lock), since the LLDD is
> allowed to sleep, and would most likely drop it. (see below)

*nod*

> Actually I think since it is a pointer which LLDD may set to their
> own lock, it looks like it's trying to prevent LLDD from being
> too simplistic (to be politically correct).

Once again I donm't thiuk this can be explained with anything but
history :)


> But the matter of the fact is that LLDD writers will have to
> be more ``vigilent'', and if they don't know how to make their
> driver functions re-entrant, they can read up on it, or take
> a gradute level OS course, or get their sorry a*ses fired.

Hey, that's a bit harsh (but ture..).  The real problem is that most
scsi drivers don't have a maintainer anymore, they're just odd fixed
by people who care enough for the hardware.

> If a LLDD drops the lock on entry and gains it again on exit, then clearly
> it doesn't need it.
> 
> Else if it doesn't, it then must be relying on it elsewhere, which is
> asking for trouble from eh_* point of view.  A search and replace
> will do ok, with a local lock,

right.

> unless it sets the pointer, in which case this is much easier.

which pointer?  the host lock?

> So, yes, I think that the host_lock can be eliminated from LLDD point of view.
> It will take a bit more time than the host, lun, target work but is doable.
> 
> I think I can find the time to start working on this, as long as I know
> that it's worthwhile.

I think it's absolutely worth the effort!

> [On new queuecommand() prototype]
> 
> Yes, I actually think that this is *more*/easier doable than the host_lock
> issue above.  Again, I'll wait for the powers that be to say
> nay or yea, I just don't like wasting my time.

James, any comments on this one?  I think it's something that is save for
2.5.

> > If you have anough time a prototype now won't hurt though - the scsi code
> > shouldn't really change during early 2.6.
> 
> Yes, I've been thinking of rewriting SCSI Core completely for 2.7, so maybe
> this prototype will be its embryonic state?

Maybe :)

> 
> Some features would be that target/lun scanning will be a completely *distinct*
> functionality and one can even move it to userspace.

James, outlined something similar on the last kernel summit.  I think it's
a worthwile change for 2.7 (let's hope initramfs is useable then)

> Another feature is that new-scsi-core will *not* do any memory allocation
> for sg lists for commands -- this is the task of the application client
> (block layer, app, scanning code, etc).

i.e. the scsi upper layer driver?

> BTW, what kind of prototype are you talking about, functional or non-functional?

limited functionality (i.e. just a single driver or so converted)

> BTW2, we can babble about this all we want here, the important
> thing is what the powers that be have to say about all this.

Yes, it would be interesting to hear from James on whether he ACKs the concept.
I think he has been travelling yesterday, so maybve he's still catching up
on mail.


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH / RFC] scsi_error handler update. (1/4)
  2003-02-13 15:47                     ` Christoph Hellwig
@ 2003-02-13 18:55                       ` Luben Tuikov
  2003-02-14  0:24                         ` Doug Ledford
  0 siblings, 1 reply; 47+ messages in thread
From: Luben Tuikov @ 2003-02-13 18:55 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Mike Anderson, linux-scsi

Christoph Hellwig wrote:
>>If a LLDD drops the lock on entry and gains it again on exit, then clearly
>>it doesn't need it.
>>
>>Else if it doesn't, it then must be relying on it elsewhere, which is
>>asking for trouble from eh_* point of view.  A search and replace
>>will do ok, with a local lock,
> 
> 
> right.
> 
> 
>>unless it sets the pointer, in which case this is much easier.
> 
> 
> which pointer?  the host lock?

Yes.

[On host_lock be gone:]

> I think it's absolutely worth the effort!

[On new queuecommand() prototype]

> James, any comments on this one?  I think it's something that is save for
> 2.5.

Glad to hear that you're agreeing with me, but it doesn't depend on us.

>>>If you have anough time a prototype now won't hurt though - the scsi code
>>>shouldn't really change during early 2.6.
>>
>>Yes, I've been thinking of rewriting SCSI Core completely for 2.7, so maybe
>>this prototype will be its embryonic state?
> 
> 
> Maybe :)

But of course -- you'll probably just use the idea and rewrite everything. :-)

Anyway, one of my greatest beef with the current SCSI Core is the variable
names.  At the bottom I explain/give justification of *why* indeed I have
a problem in general with naming. (I think I gave it away several times before.)

Examples:
scsi_device:
	::siblings -- this is NOT a container of siblings, which
are to be the devices themselves, but is an *entry* into a siblings container.
Thus, in the host we have a siblings container and the scsi_device
should have an ``entry'' into this container.  Furthermore, I'd not choose
``host::siblings'' but something more approachable like ``host::device_list''.

	::same_target_siblings -- this shouldn't exist by design.

	::device_busy -- this should probably be atomic_t and its name should
be more descriptive of what it *is* -- something like ``active_cmd_count''.

	Ideally when there's a queue for each state of a command, say,
incoming_q, pending_q, done_q, those will have also have their own atomic_t
count, like incoming_q_count, pending_q_count, done_q_count, and maybe their
own locks, like incoming_q_lock, pending_q_lock, done_q_lock.
This and other similar features would make new-scsi-core, completely
re-entrant and multithread capable.

scsi_host:
	::sh_list -- this is an entry, but it suggests being a list.

	::my_devices -- ``device_list'' would've been so much better.
We know that those *are* going to be the host's devices -- whom else's
are they going to be?

	::host_busy -- could this have been ever more ambiguous?

	::host_failed -- this suggests a boolean value... but it's
not.

	In general if an object should participate in some sort
of a container like a list or queue, it should do so one at a time.
This way a dependency chart is very clear and there's no loops,
which would lead to poor programming design.  Everyone should
have taken a course in this during their undergrad years at
college/university.

E.g. scsi_core <- portal <- target <- lun <- command,
where each level to the right is 0/1:N.

In fact, programming is done as soon as the *design* of
the object structure and names is complete.  The rest is
just coding the functions to describe the relationships
between the objects.

Blah... blah... blah... am I wasting my time?

>>Some features would be that target/lun scanning will be a completely *distinct*
>>functionality and one can even move it to userspace.
> 
> 
> James, outlined something similar on the last kernel summit.  I think it's
> a worthwile change for 2.7 (let's hope initramfs is useable then)

Yes, I know.  But this ``feature'' is *not* the driving force of the
new design of the new-scsi-core.  In fact, the other way around.

I.e. since new-scsi-core is jus an *interface* to the Service Delivery
Subsystem (aka LLDD), target/lun scanning is NOT part of it.  new-scsi-core
just sits between the application client sending scsi tasks (read/write/etc)
and the LLDD's interface, plus a few minor capabilites.

I.e. target/lun scanning will be an application client sending scsi commands
down the LLDDs, analyzing the results and using a well defined interface
to add/remove devices from new-scsi-core tables (given capabilities).

SCSI scanning could be a kernel module or user space process, XOR both,
depening on how the user felt this morning.

>>Another feature is that new-scsi-core will *not* do any memory allocation
>>for sg lists for commands -- this is the task of the application client
>>(block layer, app, scanning code, etc).
> 
> 
> i.e. the scsi upper layer driver?

Whoever -- the application client... block layer, etc.  new-scsi-core
is not a memory management system, it's just an interface.  This will make
it quite *simpler and smaller*, yet powerful.

>>BTW, what kind of prototype are you talking about, functional or non-functional?
> 
> 
> limited functionality (i.e. just a single driver or so converted)

Time is of the essence and given I've got a lot of my own job-work do to
and catch up with (for which get payed), I'm not sure I can start this now.
I'd need to hear something from the powers that be.  Since this is 2.7 work,
I don't expect it until the summer.  And summer time is always more relaxed.

-- 
Luben

P.S. The naming punctuality comes from the fact that symbols themselves
carry meaning -- and a proper name or symbol would dictate its proper
use.  As an example I gave Newton's and Leibniz's calculus notations.
For more information one can check out any mathematical philosophy book
or as a start, the Pulitzer prize winner ``GEB''.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH / RFC] scsi_error handler update. (1/4)
  2003-02-13 18:55                       ` Luben Tuikov
@ 2003-02-14  0:24                         ` Doug Ledford
  2003-02-14 16:38                           ` Patrick Mansfield
  2003-02-14 16:58                           ` Mike Anderson
  0 siblings, 2 replies; 47+ messages in thread
From: Doug Ledford @ 2003-02-14  0:24 UTC (permalink / raw)
  To: Luben Tuikov; +Cc: Christoph Hellwig, Mike Anderson, linux-scsi

On Thu, Feb 13, 2003 at 01:55:15PM -0500, Luben Tuikov wrote:
> 	::same_target_siblings -- this shouldn't exist by design.

Yes!  That one most certainly should exist!  There are lots of operations 
in the scsi protocol that are target specific instead of lun specific and 
having a quick, cheap way of getting to all the luns on a single target is 
important (for example, on SPI, all device transfer negotiations are 
target specific, so when you negotiate a speed with target 0 lun 0, you 
have also negotiated the speed of target 0 lun 1, and getting to and 
setting the "I've already negotiated with this device" flag on all your 
luns should be quick and easy).  Now, there are really only a few places 
where this kind of target vs. lun view is important:

device_busy:  we need to know how many commands are active on a lun, and 
in some cases also on a target.  So this should really be 
"active_cmd_count_target" and "active_cmd_count_lun" to use Luben's 
preferred names.  The active_cmd_count_target would actually have to be a 
pointer to the single real counter while each scsi device would have an 
independant lun counter.  This makes single_lun devices simpler because we 
just check that active_cmd_count_target == 0 before queueing a command.

hostdata:  ideally this would be lldd_lun_data and lldd_target_data and 
each would be a pointer that the lldd could init during slave_alloc() 
time.  That way if a lldd cares about the difference between target and 
lun data structs, then it can put them in different areas and treat them 
appropriately.

Hmmm...those are the two most important items.  There may be others I'm 
forgetting.  But, take care of those two and you should then be able to 
eliminate the same_target_siblings without incurring a bad performance 
penalty on single_lun devices.

-- 
  Doug Ledford <dledford@redhat.com>     919-754-3700 x44233
         Red Hat, Inc. 
         1801 Varsity Dr.
         Raleigh, NC 27606

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH / RFC] scsi_error handler update. (1/4)
  2003-02-14  0:24                         ` Doug Ledford
@ 2003-02-14 16:38                           ` Patrick Mansfield
  2003-02-14 16:58                           ` Mike Anderson
  1 sibling, 0 replies; 47+ messages in thread
From: Patrick Mansfield @ 2003-02-14 16:38 UTC (permalink / raw)
  To: Luben Tuikov, Christoph Hellwig, Mike Anderson, linux-scsi

On Thu, Feb 13, 2003 at 07:24:40PM -0500, Doug Ledford wrote:
> On Thu, Feb 13, 2003 at 01:55:15PM -0500, Luben Tuikov wrote:
> > 	::same_target_siblings -- this shouldn't exist by design.
> 
> Yes!  That one most certainly should exist!  There are lots of operations 
> in the scsi protocol that are target specific instead of lun specific and 

And it would be nice if each scsi_device pointed to a parent struct
scsi_target that grouped all per-target information - like (as you
mentioned) active_cmd_count_target, *lld_target_data, and other target
specific data that is now stored in scsi_device.

This might aid in probe/scanning for transports (like FCP) that can figure
out a target is there without scanning: a representation of the target can
show up without any scsi_device, and then we scan those targets.

It also gives a better representation (under sysfs) of the physical layout
of the devices.

-- Patrick Mansfield

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH / RFC] scsi_error handler update. (1/4)
  2003-02-14  0:24                         ` Doug Ledford
  2003-02-14 16:38                           ` Patrick Mansfield
@ 2003-02-14 16:58                           ` Mike Anderson
  2003-02-14 18:50                             ` Doug Ledford
  2003-02-14 19:35                             ` Luben Tuikov
  1 sibling, 2 replies; 47+ messages in thread
From: Mike Anderson @ 2003-02-14 16:58 UTC (permalink / raw)
  To: Luben Tuikov, Christoph Hellwig, linux-scsi

Doug Ledford [dledford@redhat.com] wrote:
> On Thu, Feb 13, 2003 at 01:55:15PM -0500, Luben Tuikov wrote:
> > 	::same_target_siblings -- this shouldn't exist by design.
> 
> Yes!  That one most certainly should exist!  There are lots of operations 
> in the scsi protocol that are target specific instead of lun specific and 
> having a quick, cheap way of getting to all the luns on a single target is 
> important (for example, on SPI, all device transfer negotiations are 
> target specific, so when you negotiate a speed with target 0 lun 0, you 
> have also negotiated the speed of target 0 lun 1, and getting to and 
> setting the "I've already negotiated with this device" flag on all your 
> luns should be quick and easy).  Now, there are really only a few places 
> where this kind of target vs. lun view is important:
> 
> device_busy:  we need to know how many commands are active on a lun, and 
> in some cases also on a target.  So this should really be 
> "active_cmd_count_target" and "active_cmd_count_lun" to use Luben's 
> preferred names.  The active_cmd_count_target would actually have to be a 
> pointer to the single real counter while each scsi device would have an 
> independant lun counter.  This makes single_lun devices simpler because we 
> just check that active_cmd_count_target == 0 before queueing a command.
> 
> hostdata:  ideally this would be lldd_lun_data and lldd_target_data and 
> each would be a pointer that the lldd could init during slave_alloc() 
> time.  That way if a lldd cares about the difference between target and 
> lun data structs, then it can put them in different areas and treat them 
> appropriately.
> 
> Hmmm...those are the two most important items.  There may be others I'm 
> forgetting.  But, take care of those two and you should then be able to 
> eliminate the same_target_siblings without incurring a bad performance 
> penalty on single_lun devices.

The need is there for per target data, but if we talk about future
directions it would appear a cleaner interface would be to have luns as
children of the targets (? ports ?) vs having a list of luns and post linking
relationships.

In the past I had a similar relationship of targets to luns to SDTR /
WDTR values that have been negotiated in firmware of a SCSI HA.

-andmike
--
Michael Anderson
andmike@us.ibm.com


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH / RFC] scsi_error handler update. (1/4)
  2003-02-14 16:58                           ` Mike Anderson
@ 2003-02-14 18:50                             ` Doug Ledford
  2003-02-14 19:35                             ` Luben Tuikov
  1 sibling, 0 replies; 47+ messages in thread
From: Doug Ledford @ 2003-02-14 18:50 UTC (permalink / raw)
  To: Mike Anderson; +Cc: Luben Tuikov, Christoph Hellwig, linux-scsi

On Fri, Feb 14, 2003 at 08:58:27AM -0800, Mike Anderson wrote:
> The need is there for per target data, but if we talk about future
> directions it would appear a cleaner interface would be to have luns as
> children of the targets (? ports ?) vs having a list of luns and post linking
> relationships.

Yes it would be cleaner.  I haven't had the time to work on scsi lately 
(job responsibility issues) and so I'm not one to go telling people what 
to do.  I suggested an easy alternative, while this requires, I think, 
quite a bit more work to restructure things.  If I had the time to do it 
myself this is the direction I would be looking in.  It has other side 
benefits as well, like you can simplify the scsi scanning code a little 
bit because the lun0 scsi level would always be device->target->scsi_level 
and I'm sure other minor optimizations can be made.

-- 
  Doug Ledford <dledford@redhat.com>     919-754-3700 x44233
         Red Hat, Inc. 
         1801 Varsity Dr.
         Raleigh, NC 27606

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH / RFC] scsi_error handler update. (1/4)
  2003-02-14 16:58                           ` Mike Anderson
  2003-02-14 18:50                             ` Doug Ledford
@ 2003-02-14 19:35                             ` Luben Tuikov
  2003-02-14 21:20                               ` James Bottomley
                                                 ` (2 more replies)
  1 sibling, 3 replies; 47+ messages in thread
From: Luben Tuikov @ 2003-02-14 19:35 UTC (permalink / raw)
  To: linux-scsi

Doug Ledford [dledford@redhat.com] wrote:
>
> On Thu, Feb 13, 2003 at 01:55:15PM -0500, Luben Tuikov wrote:
 >
>	::same_target_siblings -- this shouldn't exist by design.
 >
> Yes!  That one most certainly should exist!  There are lots of operations 

Sorry, I had my mind in new-scsi-core, while looking at the structs
of current-scsi-core.

I.e. I was assuming that struct scsi_target existed, while
it *only* exists in new-scsi-core... :-(

Now let's get back to theme:

Mike Anderson on Feb 14, 2003, wrote:
>
> The need is there for per target data, but if we talk about future
> directions it would appear a cleaner interface would be to have luns as
> children of the targets (? ports ?) vs having a list of luns and post linking
> relationships.

And Patrick Mansfield on Feb. 14, 2003, wrote:
 >
 > And it would be nice if each scsi_device pointed to a parent struct
 > scsi_target that grouped all per-target information - like (as you
 > mentioned) active_cmd_count_target, *lld_target_data, and other target
 > specific data that is now stored in scsi_device.
 >
 > This might aid in probe/scanning for transports (like FCP) that can figure
 > out a target is there without scanning: a representation of the target can
 > show up without any scsi_device, and then we scan those targets.
 >
 > It also gives a better representation (under sysfs) of the physical layout
 > of the devices.

Abosolutely correct!

And this is quite evident from my Feb 11, 2003 message:

	E.g. scsi_core <- portal <- target <- lun <- command,
	where each level to the right is 0/1:N.

I was going to talk about this, but no time yesterday.

Now, we have:

struct scsi_core {
	struct list_head entry;

	struct list_head portal_list;
	spinlock_t       portal_list_lock;
	atomic_t         portal_list_count;
	...
};

struct scsi_portal {
	struct list_head entry;

	struct list_head target_list;
	spinlock_t       target_list_lock;
	atomic_t         target_list_count;
	...
};

struct scsi_target {
	struct list_head entry;

	struct list_head lun_list;
	spinlock_t       lun_list_lock;
	atomic_t         lun_list_count;
	...
};

struct scsi_lun {
	struct list_head entry;

	/* pending execution status by the device server */
	struct list_head pending_cmd_q;
	spinlock_t       pending_cmd_q_lock;
	atomic_t         pending_cmd_q_count;
	...
};

struct scsi_command {
	struct list_head entry;
	...
};

Where each predecessor can contain 0/1:N of its successor.

Points:

1) If an object of type struct scsi_core exists, then this means
that SCSI Core is loaded, thus its representation in sysfs (and SCSI
Core itself, i.e. it represents itself... :-) ).
... this was the reason I wanted scsi_core in current-scsi-core,
alas, no one saw it...

((Is anyone dreaming of multiple scsi_core's, say on a NUMA machine...?))

2) struct scsi_portal represents a service delivery subsystem type,
e.g. SPI, USB, FC, iSCSI, SAS, etc.  Nitty-gritty of the *actual*
delivery subsystem should have their own sub-sub structure, since
struct scsi_portal unifies them all for scsi_core and scsi_target's
sake.

3) struct scsi_target: this is an *important* point: those appear
and dissapear asynchronously at the scsi_portal's request!  That is,
once a scsi_portal is registered with scsi_core and operational,
it will do something like:
	/* async: new device appeared ... */
	struct scsi_target *t = NULL;
	t = scsi_get_target(this_portal); /* get a struct from scsi_core */
	if (t) {
		this_portal_conf_target(portal_target_representation, t);
		scsi_register_target(t);
	}

The reason for this is that the service delivery subsystem *knows*
how to discover targets, scsi_core does not -- SCSI architecture.
New_scsi_core will discover luns present on targets.

This also means that a scsi_portal will have an interface exported
via sysfs so that one can control how/when/where/etc targets are
discovered -- this is important for FC and iSCSI ppl.

4) struct scsi_lun: this is the actual device, and can be any
kind of device at all.  It will have more command queues than this.
But, the only queue where commands will actually ``pend'' is
the pending device queue.  A done_q will most likely exist
in scsi_core.  An incoming_q will exist in either the lun
struct or the target struct, and will represent a command
which is being worked upon, i.e. someone is currently setting
the cdb, and other members; as soon as this is done, it will
go on the pending_q and *then* void queuecommand() called,
this fixes some races (been there, done that).

The locks could be a macro so that they could be either
a spinlock or rw_lock, as I have it for my own mini-scsi-core.

Furthermore using this layout there's no ether-pointers
on which SCSI Core structurally depends.

The drawback is that LLDD are not compatible with this
new-scsi-core structure.

There's a few more quirks in scsi_command, but I'll leave those
out for now, until I hear any kudos, comments and flames
regarding this layout.

BTW, if you like it, please say so, *NOT* privately to me,
but here publicly on this list -- this will not make you any
less of a man! :-)

Happy Valentine's everyone,
-- 
Luben

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH / RFC] scsi_error handler update. (1/4)
  2003-02-14 19:35                             ` Luben Tuikov
@ 2003-02-14 21:20                               ` James Bottomley
  2003-02-17 17:20                                 ` Luben Tuikov
  2003-02-17 17:35                                 ` Luben Tuikov
  2003-02-14 21:27                               ` James Bottomley
  2003-02-16  4:23                               ` Andre Hedrick
  2 siblings, 2 replies; 47+ messages in thread
From: James Bottomley @ 2003-02-14 21:20 UTC (permalink / raw)
  To: Luben Tuikov; +Cc: SCSI Mailing List

On Fri, 2003-02-14 at 14:35, Luben Tuikov wrote:

> Now, we have:
> 
> struct scsi_core {
> 	struct list_head entry;
> 
> 	struct list_head portal_list;
> 	spinlock_t       portal_list_lock;
> 	atomic_t         portal_list_count;
> 	...
> };
> 
> struct scsi_portal {
> 	struct list_head entry;
> 
> 	struct list_head target_list;
> 	spinlock_t       target_list_lock;
> 	atomic_t         target_list_count;
> 	...
> };
> 
> struct scsi_target {
> 	struct list_head entry;
> 
> 	struct list_head lun_list;
> 	spinlock_t       lun_list_lock;
> 	atomic_t         lun_list_count;
> 	...
> };
> 
> struct scsi_lun {
> 	struct list_head entry;
> 	
> 	/* pending execution status by the device server */
> 	struct list_head pending_cmd_q;
> 	spinlock_t       pending_cmd_q_lock;
> 	atomic_t         pending_cmd_q_count;
> 	...
> };
> 
> struct scsi_command {
> 	struct list_head entry;
> 	...
> };
> 
> Where each predecessor can contain 0/1:N of its successor.

I don't like this on several grounds:

1. You have a really complex locking hierarchy.  It may be
philosophical, but I regard locking heirarchies as evil.  The only good
they can possibly serve is if you give a clear demonstration that they
avoid hot lock contention.  Otherwise, they tend to create more problems
than they solve.

2. The data structures you propose don't match reality.  The reality is
that we will have to cope with SPI for a while yet, and it doesn't have
scsi_portal.

3. I don't see where the mid-layer has a use for the data.  By and
large, we queue at the LUN level and we control parameters at the host
level.  The only current times we need to even associate puns and luns
is for error handling (reset non-locality) and for certain times when
the host is negotiating transfer parameters.  The current sibling list
may be a bit ugly, but it serves its purpose.

> Points:
> 
> 1) If an object of type struct scsi_core exists, then this means
> that SCSI Core is loaded, thus its representation in sysfs (and SCSI
> Core itself, i.e. it represents itself... :-) ).
> ... this was the reason I wanted scsi_core in current-scsi-core,
> alas, no one saw it...

> ((Is anyone dreaming of multiple scsi_core's, say on a NUMA machine...?))
> 
> 2) struct scsi_portal represents a service delivery subsystem type,
> e.g. SPI, USB, FC, iSCSI, SAS, etc.  Nitty-gritty of the *actual*
> delivery subsystem should have their own sub-sub structure, since
> struct scsi_portal unifies them all for scsi_core and scsi_target's
> sake.
> 
> 3) struct scsi_target: this is an *important* point: those appear
> and dissapear asynchronously at the scsi_portal's request!  That is,
> once a scsi_portal is registered with scsi_core and operational,
> it will do something like:
> 	/* async: new device appeared ... */
> 	struct scsi_target *t = NULL;
> 	t = scsi_get_target(this_portal); /* get a struct from scsi_core */
> 	if (t) {
> 		this_portal_conf_target(portal_target_representation, t);
> 		scsi_register_target(t);
> 	}
> 
> The reason for this is that the service delivery subsystem *knows*
> how to discover targets, scsi_core does not -- SCSI architecture.
> New_scsi_core will discover luns present on targets.
> 
> This also means that a scsi_portal will have an interface exported
> via sysfs so that one can control how/when/where/etc targets are
> discovered -- this is important for FC and iSCSI ppl.
> 
> 4) struct scsi_lun: this is the actual device, and can be any
> kind of device at all.  It will have more command queues than this.
> But, the only queue where commands will actually ``pend'' is
> the pending device queue.  A done_q will most likely exist
> in scsi_core.  An incoming_q will exist in either the lun
> struct or the target struct, and will represent a command
> which is being worked upon, i.e. someone is currently setting
> the cdb, and other members; as soon as this is done, it will
> go on the pending_q and *then* void queuecommand() called,
> this fixes some races (been there, done that).
> 
> The locks could be a macro so that they could be either
> a spinlock or rw_lock, as I have it for my own mini-scsi-core.
> 
> Furthermore using this layout there's no ether-pointers
> on which SCSI Core structurally depends.
> 
> The drawback is that LLDD are not compatible with this
> new-scsi-core structure.
> 
> There's a few more quirks in scsi_command, but I'll leave those
> out for now, until I hear any kudos, comments and flames
> regarding this layout.
> 
> BTW, if you like it, please say so, *NOT* privately to me,
> but here publicly on this list -- this will not make you any
> less of a man! :-)
> 
> Happy Valentine's everyone,
> -- 
> Luben
> 
> 
> 
> -
> To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH / RFC] scsi_error handler update. (1/4)
  2003-02-14 19:35                             ` Luben Tuikov
  2003-02-14 21:20                               ` James Bottomley
@ 2003-02-14 21:27                               ` James Bottomley
  2003-02-17 17:28                                 ` Luben Tuikov
  2003-02-16  4:23                               ` Andre Hedrick
  2 siblings, 1 reply; 47+ messages in thread
From: James Bottomley @ 2003-02-14 21:27 UTC (permalink / raw)
  To: Luben Tuikov; +Cc: SCSI Mailing List

On Fri, 2003-02-14 at 14:35, Luben Tuikov wrote:
> Points:
> 
> 1) If an object of type struct scsi_core exists, then this means
> that SCSI Core is loaded, thus its representation in sysfs (and SCSI
> Core itself, i.e. it represents itself... :-) ).
> ... this was the reason I wanted scsi_core in current-scsi-core,
> alas, no one saw it...

I don't see a benefit to multiple SCSI cores.  I can see people might want to
replace the mid-layer entirely, but I can't see anyone wanting to run
two mid-layers concurrently.

> ((Is anyone dreaming of multiple scsi_core's, say on a NUMA machine...?))
> 
> 2) struct scsi_portal represents a service delivery subsystem type,
> e.g. SPI, USB, FC, iSCSI, SAS, etc.  Nitty-gritty of the *actual*
> delivery subsystem should have their own sub-sub structure, since
> struct scsi_portal unifies them all for scsi_core and scsi_target's
> sake.

For this and all the rest, I suppose I'm showing my OO bias by saying I 
do have a preference for encapsulation: That is hiding data structure
unless you have a use for it.  So what I really want are arguments for why
exposing these splits is useful.

James



^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH / RFC] scsi_error handler update. (1/4)
  2003-02-14 19:35                             ` Luben Tuikov
  2003-02-14 21:20                               ` James Bottomley
  2003-02-14 21:27                               ` James Bottomley
@ 2003-02-16  4:23                               ` Andre Hedrick
  2 siblings, 0 replies; 47+ messages in thread
From: Andre Hedrick @ 2003-02-16  4:23 UTC (permalink / raw)
  To: Luben Tuikov; +Cc: linux-scsi


What are trying to achieve, a split n-way mid-layer to provide fastpath(N)
for fail-over?  Or are you attempting to provide path(N) for some type of
load balance or flow control schedule?  Regardless, you can not do
multi-path on a given HBA, real or virtual.

Unless you have a special HPA that allows queue stalls and each
port/spindle has its own host queue.  This sounds very much like FPDMA
with DPIO for raw FIS creation and loading, aka SAS.

I have such HBA's but are cleverly coded as to not need path(N).
They fake the mid-layer to report each port as a restanding HBA, yet share
a common hostdata to allow command migration or redirection.  This yeilds
the same effect with out fudging the current driver mid-layer.

My Nickle --

Andre Hedrick
LAD Storage Consulting Group


On Fri, 14 Feb 2003, Luben Tuikov wrote:

> Doug Ledford [dledford@redhat.com] wrote:
> >
> > On Thu, Feb 13, 2003 at 01:55:15PM -0500, Luben Tuikov wrote:
>  >
> >	::same_target_siblings -- this shouldn't exist by design.
>  >
> > Yes!  That one most certainly should exist!  There are lots of operations 
> 
> Sorry, I had my mind in new-scsi-core, while looking at the structs
> of current-scsi-core.
> 
> I.e. I was assuming that struct scsi_target existed, while
> it *only* exists in new-scsi-core... :-(
> 
> Now let's get back to theme:
> 
> Mike Anderson on Feb 14, 2003, wrote:
> >
> > The need is there for per target data, but if we talk about future
> > directions it would appear a cleaner interface would be to have luns as
> > children of the targets (? ports ?) vs having a list of luns and post linking
> > relationships.
> 
> And Patrick Mansfield on Feb. 14, 2003, wrote:
>  >
>  > And it would be nice if each scsi_device pointed to a parent struct
>  > scsi_target that grouped all per-target information - like (as you
>  > mentioned) active_cmd_count_target, *lld_target_data, and other target
>  > specific data that is now stored in scsi_device.
>  >
>  > This might aid in probe/scanning for transports (like FCP) that can figure
>  > out a target is there without scanning: a representation of the target can
>  > show up without any scsi_device, and then we scan those targets.
>  >
>  > It also gives a better representation (under sysfs) of the physical layout
>  > of the devices.
> 
> Abosolutely correct!
> 
> And this is quite evident from my Feb 11, 2003 message:
> 
> 	E.g. scsi_core <- portal <- target <- lun <- command,
> 	where each level to the right is 0/1:N.
> 
> I was going to talk about this, but no time yesterday.
> 
> Now, we have:
> 
> struct scsi_core {
> 	struct list_head entry;
> 
> 	struct list_head portal_list;
> 	spinlock_t       portal_list_lock;
> 	atomic_t         portal_list_count;
> 	...
> };
> 
> struct scsi_portal {
> 	struct list_head entry;
> 
> 	struct list_head target_list;
> 	spinlock_t       target_list_lock;
> 	atomic_t         target_list_count;
> 	...
> };
> 
> struct scsi_target {
> 	struct list_head entry;
> 
> 	struct list_head lun_list;
> 	spinlock_t       lun_list_lock;
> 	atomic_t         lun_list_count;
> 	...
> };
> 
> struct scsi_lun {
> 	struct list_head entry;
> 	
> 	/* pending execution status by the device server */
> 	struct list_head pending_cmd_q;
> 	spinlock_t       pending_cmd_q_lock;
> 	atomic_t         pending_cmd_q_count;
> 	...
> };
> 
> struct scsi_command {
> 	struct list_head entry;
> 	...
> };
> 
> Where each predecessor can contain 0/1:N of its successor.
> 
> Points:
> 
> 1) If an object of type struct scsi_core exists, then this means
> that SCSI Core is loaded, thus its representation in sysfs (and SCSI
> Core itself, i.e. it represents itself... :-) ).
> ... this was the reason I wanted scsi_core in current-scsi-core,
> alas, no one saw it...
> 
> ((Is anyone dreaming of multiple scsi_core's, say on a NUMA machine...?))
> 
> 2) struct scsi_portal represents a service delivery subsystem type,
> e.g. SPI, USB, FC, iSCSI, SAS, etc.  Nitty-gritty of the *actual*
> delivery subsystem should have their own sub-sub structure, since
> struct scsi_portal unifies them all for scsi_core and scsi_target's
> sake.
> 
> 3) struct scsi_target: this is an *important* point: those appear
> and dissapear asynchronously at the scsi_portal's request!  That is,
> once a scsi_portal is registered with scsi_core and operational,
> it will do something like:
> 	/* async: new device appeared ... */
> 	struct scsi_target *t = NULL;
> 	t = scsi_get_target(this_portal); /* get a struct from scsi_core */
> 	if (t) {
> 		this_portal_conf_target(portal_target_representation, t);
> 		scsi_register_target(t);
> 	}
> 
> The reason for this is that the service delivery subsystem *knows*
> how to discover targets, scsi_core does not -- SCSI architecture.
> New_scsi_core will discover luns present on targets.
> 
> This also means that a scsi_portal will have an interface exported
> via sysfs so that one can control how/when/where/etc targets are
> discovered -- this is important for FC and iSCSI ppl.
> 
> 4) struct scsi_lun: this is the actual device, and can be any
> kind of device at all.  It will have more command queues than this.
> But, the only queue where commands will actually ``pend'' is
> the pending device queue.  A done_q will most likely exist
> in scsi_core.  An incoming_q will exist in either the lun
> struct or the target struct, and will represent a command
> which is being worked upon, i.e. someone is currently setting
> the cdb, and other members; as soon as this is done, it will
> go on the pending_q and *then* void queuecommand() called,
> this fixes some races (been there, done that).
> 
> The locks could be a macro so that they could be either
> a spinlock or rw_lock, as I have it for my own mini-scsi-core.
> 
> Furthermore using this layout there's no ether-pointers
> on which SCSI Core structurally depends.
> 
> The drawback is that LLDD are not compatible with this
> new-scsi-core structure.
> 
> There's a few more quirks in scsi_command, but I'll leave those
> out for now, until I hear any kudos, comments and flames
> regarding this layout.
> 
> BTW, if you like it, please say so, *NOT* privately to me,
> but here publicly on this list -- this will not make you any
> less of a man! :-)
> 
> Happy Valentine's everyone,
> -- 
> Luben
> 
> 
> 
> -
> To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH / RFC] scsi_error handler update. (1/4)
  2003-02-14 21:20                               ` James Bottomley
@ 2003-02-17 17:20                                 ` Luben Tuikov
  2003-02-17 17:58                                   ` James Bottomley
  2003-02-17 20:17                                   ` Doug Ledford
  2003-02-17 17:35                                 ` Luben Tuikov
  1 sibling, 2 replies; 47+ messages in thread
From: Luben Tuikov @ 2003-02-17 17:20 UTC (permalink / raw)
  To: James Bottomley; +Cc: SCSI Mailing List

James Bottomley wrote:
> On Fri, 2003-02-14 at 14:35, Luben Tuikov wrote
 >[...]
>>struct scsi_lun {

This should've been ``struct scsi_lu { ... ''

 >[...]
 >
> I don't like this on several grounds:
> 
> 1. You have a really complex locking hierarchy.  It may be
> philosophical, but I regard locking heirarchies as evil.  The only good
> they can possibly serve is if you give a clear demonstration that they
> avoid hot lock contention.  Otherwise, they tend to create more problems
> than they solve.

There is no ``locking hierarchy'' -- i.e. a lock *only* locks its list
manipulation. So, you don't have to have a string of locks to change
a scsi_command, for example.

It is *not* ``philosophical''.  It's what's in SAM-2/3.  And it is what
most SPI HBA do this already (since you use SPI HBA's in your example).

When was the last time a SPI HBA set channel/bus to anything different
than 0?

My Adaptec AIC-7899P dual channel, presents two hosts, not two channels.
And this is the right behaviour.

IOW, it already is hinting that those are ``portals''.  There is no
such thing as a ``host'' in SCSI-3.  ``host'' is an SPI term, and is gone
from SAM and SPC (which is what scsi-core should represent).

> 2. The data structures you propose don't match reality.  The reality is
> that we will have to cope with SPI for a while yet, and it doesn't have
> scsi_portal.

See above.  I've never seen an SPI HBA which sets channel/bus different
than zero.  I.e. for all N-channel SPI HBA that export a scsi_host for
a channel (i.e. bus/channel != 0 ever) you have a ``portal''.  I.e.
this is easily convertible to a ``portal''.

> 3. I don't see where the mid-layer has a use for the data.  By and

Well, how can I contend *that* statement?...  This basically means
that the whole thing I proposed is cr*p.

> large, we queue at the LUN level and we control parameters at the host
> level.

Not anymore, you don't.

You queue at LU level, and control parameters
	- at LU level, for the device server itself (i.e. the thingy
which executes scsi commands),
	- at Target level, (for target specific parameters),
	- at Portal level, for the service delivery subsystem (i.e.
controls specific for SPI, FC, etc.).

It's not that hard to imagine a target whose LUN 3 is the enclosure
and LU 1 is a certain disk, and the target is the whole ``box''.

> The only current times we need to even associate puns and luns
> is for error handling (reset non-locality) and for certain times when
> the host is negotiating transfer parameters.  The current sibling list
> may be a bit ugly, but it serves its purpose.

Patchwork solutions only cripple expansion of functionality.

Versatility (what SCSI-3 is aiming at) is tied very closely
with design (i.e. structure).  As an example of this
statment, try arithemtic in roman numerals and then you'd
appreciate the widely used arabic notation.

So you have OO structure from SAM-2/3 (bottom), and you have OO structure
from sysfs (top), and scsi-core should follow suit.

That OO structure for scsi-core is the one I posted -- it is a representation*
of the SCSI Architecture Model from SAM-3.

* I.e., suitable for a kernel.

-- 
Luben

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH / RFC] scsi_error handler update. (1/4)
  2003-02-14 21:27                               ` James Bottomley
@ 2003-02-17 17:28                                 ` Luben Tuikov
  0 siblings, 0 replies; 47+ messages in thread
From: Luben Tuikov @ 2003-02-17 17:28 UTC (permalink / raw)
  To: James Bottomley; +Cc: SCSI Mailing List

James Bottomley wrote:
> On Fri, 2003-02-14 at 14:35, Luben Tuikov wrote:
> 
>>Points:
>>
>>1) If an object of type struct scsi_core exists, then this means
>>that SCSI Core is loaded, thus its representation in sysfs (and SCSI
>>Core itself, i.e. it represents itself... :-) ).
>>... this was the reason I wanted scsi_core in current-scsi-core,
>>alas, no one saw it...
> 
> 
> I don't see a benefit to multiple SCSI cores.  I can see people might want to
> replace the mid-layer entirely, but I can't see anyone wanting to run
> two mid-layers concurrently.

And I didn't say that there *was* a benefit to multiple SCSI cores.
I basically hinted at a design which simply *allows* multiple SCSI cores.
(Not the other way around.)

>>2) struct scsi_portal represents a service delivery subsystem type,
>>e.g. SPI, USB, FC, iSCSI, SAS, etc.  Nitty-gritty of the *actual*
>>delivery subsystem should have their own sub-sub structure, since
>>struct scsi_portal unifies them all for scsi_core and scsi_target's
>>sake.
> 
> 
> For this and all the rest, I suppose I'm showing my OO bias by saying I 
> do have a preference for encapsulation: That is hiding data structure
> unless you have a use for it.  So what I really want are arguments for why
> exposing these splits is useful.

Because a portal is something generic which allows access to the
target.  I.e. you don't need to know what the service delivery
subsystem is -- this is left to the LLDD an to the user.

struct scsi_portal *may** contain a union/pointer/whatever to a
struct which represents the service delivery subsystem (SPI, FC, etc),
should *it* export controls and the user wants to manipulate them.

* It, really doesn't have to.

-- 
Luben



^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH / RFC] scsi_error handler update. (1/4)
  2003-02-14 21:20                               ` James Bottomley
  2003-02-17 17:20                                 ` Luben Tuikov
@ 2003-02-17 17:35                                 ` Luben Tuikov
  1 sibling, 0 replies; 47+ messages in thread
From: Luben Tuikov @ 2003-02-17 17:35 UTC (permalink / raw)
  To: James Bottomley; +Cc: SCSI Mailing List

James Bottomley wrote:
> 
> I don't like this on several grounds:
> 
> 1. You have a really complex locking hierarchy.  It may be

Several LLDDs may get async event of a target appearing on
the fabric, thus you want to protect the scsi_portal::target_list
with a lock.

Several processes may be initiated who register a scsi_portal
with SCSI Core, thus you want to protect scsi_core::portal_list
with a lock.

The new void scsi_enqueue_cmd(struct scsi_command *) should
be *fully* reentrant, thus you want to protect the
struct scsi_lu::pending_cmd_q with a lock.

... unless you can serialize all those operations elsewhere,
which would be reminicent of the BKL...

-- 
Luben

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH / RFC] scsi_error handler update. (1/4)
  2003-02-17 17:20                                 ` Luben Tuikov
@ 2003-02-17 17:58                                   ` James Bottomley
  2003-02-17 18:29                                     ` Luben Tuikov
  2003-02-17 20:17                                   ` Doug Ledford
  1 sibling, 1 reply; 47+ messages in thread
From: James Bottomley @ 2003-02-17 17:58 UTC (permalink / raw)
  To: Luben Tuikov; +Cc: SCSI Mailing List

On Mon, 2003-02-17 at 12:20, Luben Tuikov wrote:
> There is no ``locking hierarchy'' -- i.e. a lock *only* locks its list
> manipulation. So, you don't have to have a string of locks to change
> a scsi_command, for example.

If I want to loop over all outstanding commands on a device (say because
the FC has just got a LIP removal), how do I do it?  Your design implies
I have to loop over all portals, targets and luns.  To do this loop, I'd
have to stabilise all the lists by acquiring the three locks that
protect them, wouldn't I?

> When was the last time a SPI HBA set channel/bus to anything different
> than 0?

Channel is a special abstraction for the dual channel chips.  It's
really an extension of host number for drivers that need a channel
decode.  The mid-layer only uses it for the host reset function, since
resetting the host 

> My Adaptec AIC-7899P dual channel, presents two hosts, not two channels.
> And this is the right behaviour.

The adaptec driver certainly uses the multiple channel feature, but only
if the AHC_TWIN flag is set in the host.

> > 3. I don't see where the mid-layer has a use for the data.  By and
> 
> Well, how can I contend *that* statement?...  This basically means
> that the whole thing I proposed is cr*p.

This is the key issue.  Complexity should only be introduced in a design
when it serves a justifiable purpose.  How do the 4 layers you propose
simplify the design/improve the throughput or in any other way add
benefit over the two layers (host and device) that we already have?

Because the standards say so isn't enough.  The Mid-layer's job is to
make efficient use of devices which follow the standards as simply and
elegantly as possible, not mirror the SCSI architecture model.

James

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH / RFC] scsi_error handler update. (1/4)
  2003-02-17 17:58                                   ` James Bottomley
@ 2003-02-17 18:29                                     ` Luben Tuikov
  2003-02-18  5:37                                       ` Andre Hedrick
  0 siblings, 1 reply; 47+ messages in thread
From: Luben Tuikov @ 2003-02-17 18:29 UTC (permalink / raw)
  To: James Bottomley; +Cc: SCSI Mailing List

James Bottomley wrote:
> On Mon, 2003-02-17 at 12:20, Luben Tuikov wrote:
> 
>>There is no ``locking hierarchy'' -- i.e. a lock *only* locks its list
>>manipulation. So, you don't have to have a string of locks to change
>>a scsi_command, for example.
> 
> 
> If I want to loop over all outstanding commands on a device (say because
> the FC has just got a LIP removal), how do I do it?  Your design implies
> I have to loop over all portals, targets and luns.  To do this loop, I'd
> have to stabilise all the lists by acquiring the three locks that
> protect them, wouldn't I?

No, not at all.  If you know the struct scsi_lu, you can just lock
the pending_cmd_q and loop over the commands.

struct scsi_target is ``locked'' *implicitly* simply by having
atomic_read(&target->lu_q_count) != 0.

struct scsi_portal is ``locked'' *implicitly* simply by having
atomic_read(&portal->target_q_count) != 0.

struct scsi_core is ``locked'' *implicitly* simply by having
atomic_read(&core->portal_q_count) != 0.

You certainly do not need to lock the the whole hierarchy of locks
to loop over all pending commands.  Locks protect local data.
I.e. you just want to protect yourself from someone messing the
list up while you're traversing it.

>>When was the last time a SPI HBA set channel/bus to anything different
>>than 0?
> 
> 
> Channel is a special abstraction for the dual channel chips.  It's
> really an extension of host number for drivers that need a channel
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Thus it can be abstracted into a ``portal''.

>>>3. I don't see where the mid-layer has a use for the data.  By and
>>
>>Well, how can I contend *that* statement?...  This basically means
>>that the whole thing I proposed is cr*p.
> 
> 
> This is the key issue.  Complexity should only be introduced in a design

Well if it were the key issue, you should've just said *that*.
Nicely done.

> when it serves a justifiable purpose.  How do the 4 layers you propose
> simplify the design/improve the throughput or in any other way add
> benefit over the two layers (host and device) that we already have?

First of all it's 3 layers -- you're counting scsi_core but you don't
count it in the current structure. (scsi core pointers you can have as
ether-pointers, just as is now the practice)

So there's 3 layers: portal (aka host), target, lu. One more than
the current structure of scsi_core.

Look, you *NEED* the target layer, simply /because/ it exists in
nature. ;-)

I.e. a TARGET is the _addressable_ entity from a portal, i.e.
addressable on the fabric, but a LU is *NOT*.

The reason you get away with it now, is because linux scsi-core
gets help from the LLDDs.  But you have to understand that an LU
is a _function_ of the target.  So without this help of the LLDDs
the current scsi-core will *NOT* be able to address more than one
LU in a multi-LU target.

Yes, I know there's the old scan for LUs, but this is only
because linux scsi LLDD are SPI centric.

I.e. when you send a scsi command you're sending it to a (target,lu)
pair and this is what the LLDD does for you, i.e. it separates it
to a target, lu.

So, the question is: Is it worth it to abstract Target and LU into
OO design? Or is it better to leave it clunked up together as it is
now (device=(target,LU))?

I'm thinking sysfs here.

> Because the standards say so isn't enough.  The Mid-layer's job is to

And I'm not saying that it is enough. I'm just saying that:

> make efficient use of devices which follow the standards as simply and
> elegantly as possible, not mirror the SCSI architecture model.

Right, ``elegantly'' is the key word.

I don't think you can shrink any more from the 3 layer I proposed,
unless you go to the current 2 layer.

Judging from your words, the (host,device) pair is elegant enough?
If not, do you mind telling what is/would be?

-- 
Luben

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH / RFC] scsi_error handler update. (1/4)
  2003-02-17 17:20                                 ` Luben Tuikov
  2003-02-17 17:58                                   ` James Bottomley
@ 2003-02-17 20:17                                   ` Doug Ledford
  2003-02-17 20:19                                     ` Matthew Jacob
  2003-02-17 21:12                                     ` Luben Tuikov
  1 sibling, 2 replies; 47+ messages in thread
From: Doug Ledford @ 2003-02-17 20:17 UTC (permalink / raw)
  To: Luben Tuikov; +Cc: James Bottomley, SCSI Mailing List

On Mon, Feb 17, 2003 at 12:20:44PM -0500, Luben Tuikov wrote:
> James Bottomley wrote:
> 
> It is *not* ``philosophical''.  It's what's in SAM-2/3.  And it is what
> most SPI HBA do this already (since you use SPI HBA's in your example).

> When was the last time a SPI HBA set channel/bus to anything different
> than 0?

The last time someone booted up an Adaptec 2742T controller for one.

> My Adaptec AIC-7899P dual channel, presents two hosts, not two channels.
> And this is the right behaviour.

It is the right behaviour for that controller, no more.  The right
behaviour is determined by the controller in question, not philosophical
beliefs.  In the case of the 2742T controllers there are two busses on one
scsi chip, you have to flip a bit in the chip to switch control regs
between busses, the chip has a maximum number of commands it can handle
and those are shared between the two busses, etc.  Representing that as
one host is the right thing to do because that's how all the resource
limits, etc. have to be calculated.

> IOW, it already is hinting that those are ``portals''.  There is no
> such thing as a ``host'' in SCSI-3.  ``host'' is an SPI term, and is gone
> from SAM and SPC (which is what scsi-core should represent).

I disagree with that last statement strongly!  I've mentioned this in
private email before.  The scsi core should represent reality, not any
version of any spec!  It should *implement* the scsi protocol, but it need
not and should not try to emulate any specific version of the scsi spec in
design and it needs to and should try to represent the reality of the
hardware you are working with.  And reality is that no matter what
interconnect we are using there is a host in that chain and that host is
some piece of hardware that handles our interconnect.  Hosts have their
own set of resource requirements, concurrency issues, limitations, etc.  
Ignoring the concept of a host to emulate a spec ignores reality and makes
the job of actually working with these devices harder.  The reason the
current scsi core is a two layer abstraction is because that's the reality
of how the vast majority of this stuff is put together (there are a few
exceptions, tape autoloaders for example, but these are rare and well
known where as individual devices are far more common).

> Not anymore, you don't.
> 
> You queue at LU level, and control parameters
> 	- at LU level, for the device server itself (i.e. the thingy
> which executes scsi commands),
> 	- at Target level, (for target specific parameters),
> 	- at Portal level, for the service delivery subsystem (i.e.
> controls specific for SPI, FC, etc.).

Really, in the above list, just replace portal with host and you have what 
we are doing today.  By creating a separate struct target and putting the 
target specific params into it you aren't changing *what* we are doing, 
just *how* we are doing it (which I have no objection to).  By changing 
the name of host to portal, well, your just changing the name to match 
some document but without any real purpose.

However, what I would like to see is a service delivery subsystem specific 
set of routines in the scsi core.  For example, the new scsi eh code is 
mostly wasted on pretty much all the intelligent RAID controllers that act 
like scsi controllers (megaraid, serveraid, etc.) and a *much* simpler 
error recovery setup could be used on those controllers.  On the other 
hand, iscsi and fc both need a bit more in the way of error recovery 
intelligence than spi does.  So I could see using 3 different error 
recovery strategies depending on the interconnect type (I'm not sure where 
usb and ieee1394 would fit in here) and also due to interconnect 
differences I could see each of these same 3 groups of interconnects 
needing different default disk timeout parameters.  These different 
interconnects also offer totally different device scan capabilities.  SPI 
needs a manual "are you there?" scan, most fc/usb/ieee1394/intelligent 
raid controllers can all tell the scsi layer "here are my devices, set 
them up" and avoid manual scanning.  So, based upon interconnect in use 
and driver capabilities, I'd like to see the ability to select the right 
settings for a controller at controller init time.  To that extent, the 
"portal" abstraction has a use, but I still don't like the name.  No one 
goes out and buys a fiber channel portal, they go buy a fiber channel host 
adapter board or controller.

-- 
  Doug Ledford <dledford@redhat.com>     919-754-3700 x44233
         Red Hat, Inc. 
         1801 Varsity Dr.
         Raleigh, NC 27606

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH / RFC] scsi_error handler update. (1/4)
  2003-02-17 20:17                                   ` Doug Ledford
@ 2003-02-17 20:19                                     ` Matthew Jacob
  2003-02-17 21:12                                     ` Luben Tuikov
  1 sibling, 0 replies; 47+ messages in thread
From: Matthew Jacob @ 2003-02-17 20:19 UTC (permalink / raw)
  To: Doug Ledford; +Cc: Luben Tuikov, James Bottomley, SCSI Mailing List


> > When was the last time a SPI HBA set channel/bus to anything different
> > than 0?
>
> The last time someone booted up an Adaptec 2742T controller for one.

Or, more recently, QLogic 12160 cards.


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH / RFC] scsi_error handler update. (1/4)
  2003-02-17 20:17                                   ` Doug Ledford
  2003-02-17 20:19                                     ` Matthew Jacob
@ 2003-02-17 21:12                                     ` Luben Tuikov
  1 sibling, 0 replies; 47+ messages in thread
From: Luben Tuikov @ 2003-02-17 21:12 UTC (permalink / raw)
  To: Doug Ledford; +Cc: James Bottomley, SCSI Mailing List

Doug Ledford wrote:
> 
> It is the right behaviour for that controller, no more.  The right
> behaviour is determined by the controller in question, not philosophical
> beliefs.  In the case of the 2742T controllers there are two busses on one
> scsi chip, you have to flip a bit in the chip to switch control regs
> between busses, the chip has a maximum number of commands it can handle
> and those are shared between the two busses, etc.  Representing that as
> one host is the right thing to do because that's how all the resource
> limits, etc. have to be calculated.

Yes, it is the right thing to do (represent it as a host).  But here
``host'' is a software abstraction and somewhere else
it is a host adapter/controller, in effect ``host'' meaning a physical
device.

``Portal'' is what is used in current service delivery transport standards
since it provides a ``portal'' to the interconnect.

>>IOW, it already is hinting that those are ``portals''.  There is no
>>such thing as a ``host'' in SCSI-3.  ``host'' is an SPI term, and is gone
>>from SAM and SPC (which is what scsi-core should represent).
> 
> 
> I disagree with that last statement strongly!  I've mentioned this in
> private email before.  The scsi core should represent reality, not any

I never got it.

Just to show the ambiguity which might arise:
SOT---------------------------------------------
> version of any spec!  It should *implement* the scsi protocol, but it need
> not and should not try to emulate any specific version of the scsi spec in

This meaning that it should *not* stick to SPI?

> design and it needs to and should try to represent the reality of the
> hardware you are working with.  And reality is that no matter what

And this meaning it should stick to SPI, just because SPI HBAs are most
abundant?

> interconnect we are using there is a host in that chain and that host is
> some piece of hardware that handles our interconnect.

Here ``host'' means the physical entity.

>  Hosts have their
> own set of resource requirements, concurrency issues, limitations, etc.  

Here ``host'' means a software abstraction, as is the case for my Adaptec
card exporting two ``hosts'' with their limitaions, etc, but being one
physical host adaptor.
EOT----------------------------------------------

> 
>>Not anymore, you don't.
>>
>>You queue at LU level, and control parameters
>>	- at LU level, for the device server itself (i.e. the thingy
>>which executes scsi commands),
>>	- at Target level, (for target specific parameters),
>>	- at Portal level, for the service delivery subsystem (i.e.
>>controls specific for SPI, FC, etc.).
> 
> 
> Really, in the above list, just replace portal with host and you have what 
> we are doing today.  By creating a separate struct target and putting the 
> target specific params into it you aren't changing *what* we are doing, 
> just *how* we are doing it (which I have no objection to).

Well, I'm certainly glad to hear that.

What would change is the scsi core _representation_ of what is
out there.

A target is addressible, has a name and a few other things. A LU
has no name and is addressible only within a target.

And by having a better representation we get more versatile manipulation
of the devices which are out there.

>  By changing 
> the name of host to portal, well, your just changing the name to match 
> some document but without any real purpose.

Well, we call it ``host'' now just because that is what SPI/SCSI-2 calls it.
I.e. the naming is legacy.

But, really, from a scsi core point of view it is a ``portal''.  It is a ``host''
only from LLDDs point of vew.

I.e. scsi core doesn't really know about it being an SPI or FC or iSCSI.

What I'm trying to achieve here is better representation (in scsi core)
of the way SCSI-3 is forming up to become and the things which are to come.
How far away are we from a target whose LUs are all CPUs?

> However, what I would like to see is a service delivery subsystem specific 
> set of routines in the scsi core.  For example, the new scsi eh code is 
> mostly wasted on pretty much all the intelligent RAID controllers that act 
> like scsi controllers (megaraid, serveraid, etc.) and a *much* simpler 
> error recovery setup could be used on those controllers.  On the other 
> hand, iscsi and fc both need a bit more in the way of error recovery 
> intelligence than spi does.

Right.  iSCSI closely sticks to SAM-2 and the Task Management Functions
(TMF) therein.

> So I could see using 3 different error 
> recovery strategies depending on the interconnect type (I'm not sure where 

This may be the case, but the interface could be the same as far as TMFs
are concerned.  I.e. they would all have to offer it ``by law''.

> usb and ieee1394 would fit in here) and also due to interconnect 
> differences I could see each of these same 3 groups of interconnects 
> needing different default disk timeout parameters.  These different 

I've said this before: portals/targets should suggest timeout values to
scsi core on per LU basis.

> interconnects also offer totally different device scan capabilities.  SPI 
> needs a manual "are you there?" scan, most fc/usb/ieee1394/intelligent 
> raid controllers can all tell the scsi layer "here are my devices, set 
> them up" and avoid manual scanning.

Right.  The portal will tell scsi core about its targets as is the case
with iSCSI and most newer protocols, but this is controllable by
the user directly.  I.e. a user process may ``initiate'' target
discovery of the LLDD.

> So, based upon interconnect in use 
> and driver capabilities, I'd like to see the ability to select the right 
> settings for a controller at controller init time.

This is nice, but not with the current infrastructre of scsi core.

In order to get this ``unity'' in selecting the right settings
for a portal, we need to have some ``unity'' in the structures
which represent them.

Something like this maybe:

struct scsi_portal {
	...
	int ic_type;
	union scsi_interconnect {
		struct SPI spi;
		struct FC  fc;
		struct iSCSI is;
		struct SAS sas;
		...
	} ic;
	...
};

(Does this remind one of a certain other subsystem?)

>  To that extent, the 
> "portal" abstraction has a use, but I still don't like the name.  No one 
> goes out and buys a fiber channel portal, they go buy a fiber channel host 
> adapter board or controller.

Right, they do.  And when they plug it in they get a representation not
of the single ``fiber channel host adapter board or controller'' which they
bought but maybe of two ``hosts''.

Regarding the choice of the word ``portal'', this is what SCSI-3 uses in
its literature.

-- 
Luben

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH / RFC] scsi_error handler update. (1/4)
  2003-02-17 18:29                                     ` Luben Tuikov
@ 2003-02-18  5:37                                       ` Andre Hedrick
  2003-02-18 19:46                                         ` Luben Tuikov
  0 siblings, 1 reply; 47+ messages in thread
From: Andre Hedrick @ 2003-02-18  5:37 UTC (permalink / raw)
  To: Luben Tuikov; +Cc: James Bottomley, SCSI Mailing List


Luben,

You are trying to make the mid-layer to the work of iSCSI where your
driver needs to do it.  Everyone else gets how to do target portal groups
without dorking the kernel.  Maybe you should try harder.

Andre Hedrick
LAD Storage Consulting Group

On Mon, 17 Feb 2003, Luben Tuikov wrote:

> James Bottomley wrote:
> > On Mon, 2003-02-17 at 12:20, Luben Tuikov wrote:
> > 
> >>There is no ``locking hierarchy'' -- i.e. a lock *only* locks its list
> >>manipulation. So, you don't have to have a string of locks to change
> >>a scsi_command, for example.
> > 
> > 
> > If I want to loop over all outstanding commands on a device (say because
> > the FC has just got a LIP removal), how do I do it?  Your design implies
> > I have to loop over all portals, targets and luns.  To do this loop, I'd
> > have to stabilise all the lists by acquiring the three locks that
> > protect them, wouldn't I?
> 
> No, not at all.  If you know the struct scsi_lu, you can just lock
> the pending_cmd_q and loop over the commands.
> 
> struct scsi_target is ``locked'' *implicitly* simply by having
> atomic_read(&target->lu_q_count) != 0.
> 
> struct scsi_portal is ``locked'' *implicitly* simply by having
> atomic_read(&portal->target_q_count) != 0.
> 
> struct scsi_core is ``locked'' *implicitly* simply by having
> atomic_read(&core->portal_q_count) != 0.
> 
> You certainly do not need to lock the the whole hierarchy of locks
> to loop over all pending commands.  Locks protect local data.
> I.e. you just want to protect yourself from someone messing the
> list up while you're traversing it.
> 
> >>When was the last time a SPI HBA set channel/bus to anything different
> >>than 0?
> > 
> > 
> > Channel is a special abstraction for the dual channel chips.  It's
> > really an extension of host number for drivers that need a channel
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> 
> Thus it can be abstracted into a ``portal''.
> 
> >>>3. I don't see where the mid-layer has a use for the data.  By and
> >>
> >>Well, how can I contend *that* statement?...  This basically means
> >>that the whole thing I proposed is cr*p.
> > 
> > 
> > This is the key issue.  Complexity should only be introduced in a design
> 
> Well if it were the key issue, you should've just said *that*.
> Nicely done.
> 
> > when it serves a justifiable purpose.  How do the 4 layers you propose
> > simplify the design/improve the throughput or in any other way add
> > benefit over the two layers (host and device) that we already have?
> 
> First of all it's 3 layers -- you're counting scsi_core but you don't
> count it in the current structure. (scsi core pointers you can have as
> ether-pointers, just as is now the practice)
> 
> So there's 3 layers: portal (aka host), target, lu. One more than
> the current structure of scsi_core.
> 
> Look, you *NEED* the target layer, simply /because/ it exists in
> nature. ;-)
> 
> I.e. a TARGET is the _addressable_ entity from a portal, i.e.
> addressable on the fabric, but a LU is *NOT*.
> 
> The reason you get away with it now, is because linux scsi-core
> gets help from the LLDDs.  But you have to understand that an LU
> is a _function_ of the target.  So without this help of the LLDDs
> the current scsi-core will *NOT* be able to address more than one
> LU in a multi-LU target.
> 
> Yes, I know there's the old scan for LUs, but this is only
> because linux scsi LLDD are SPI centric.
> 
> I.e. when you send a scsi command you're sending it to a (target,lu)
> pair and this is what the LLDD does for you, i.e. it separates it
> to a target, lu.
> 
> So, the question is: Is it worth it to abstract Target and LU into
> OO design? Or is it better to leave it clunked up together as it is
> now (device=(target,LU))?
> 
> I'm thinking sysfs here.
> 
> > Because the standards say so isn't enough.  The Mid-layer's job is to
> 
> And I'm not saying that it is enough. I'm just saying that:
> 
> > make efficient use of devices which follow the standards as simply and
> > elegantly as possible, not mirror the SCSI architecture model.
> 
> Right, ``elegantly'' is the key word.
> 
> I don't think you can shrink any more from the 3 layer I proposed,
> unless you go to the current 2 layer.
> 
> Judging from your words, the (host,device) pair is elegant enough?
> If not, do you mind telling what is/would be?
> 
> -- 
> Luben
> 
> 
> -
> To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH / RFC] scsi_error handler update. (1/4)
  2003-02-18  5:37                                       ` Andre Hedrick
@ 2003-02-18 19:46                                         ` Luben Tuikov
  2003-02-18 22:16                                           ` Andre Hedrick
  0 siblings, 1 reply; 47+ messages in thread
From: Luben Tuikov @ 2003-02-18 19:46 UTC (permalink / raw)
  To: Andre Hedrick; +Cc: James Bottomley, SCSI Mailing List

Andre Hedrick wrote:
 > Luben,
 >
 > You are trying to make the mid-layer to the work of iSCSI where your
 > driver needs to do it.  Everyone else gets how to do target portal groups
 > without dorking the kernel.  Maybe you should try harder.

Andre,

1. What we're discussing here is 2.7 (3.1) stuff which *may* be
into a 2.8 (3.1) kernel -- which would be how many *YEARS* into the future?

This discussion has *NOTHING* to do with iSCSI in particular or with
FC or with SAS, or with...

It has _only_ to do with SAM-2/3.

2. You seem to be confused into thinking that (iSCSI Target) Portal
Groups have something to do with the ``portals'' I'm talking about.
They have _nothing_ to do with each other.

You see, a (iSCSI Target) Portal Group has a Portal Group Tag,
and NOWHERE do I talk about such a Tag, which should show you
that the choice of the word ``portal'' is unfortunate for you
since you got confused...

I encourage *EVERYONE* here to read the definitions of
	- ``Portal Group'',
	- ``Portal Group Tag'' and
	- ``Network Portal'',
from the iSCSI draft, and decide for themselves.
The doc is here:
http://www.haifa.il.ibm.com/satran/ips/
version 20 is what you want to read, the 2.1 Definitions section.

Then I encourage everyone to check out Eddy Quicksall's
iSCSI-in-diagrams at the bottom of the aforementioned
link -- it clearly demonstrates what a (iSCSI Target)
Portal Group and its Tag is.

You see, Andre, (iSCSI Target) Portal Group has nothing to do
with a ``SCSI Target _Device_'', but with connection management
and device access over network portals and session management
for the specific node -- all iSCSI specific concepts.

What I'm basically trying to represent is a target
being a collection of LUs, as is in SAM-3r04, 4.7.2, figure 12.

And a portal being an abstraction of the controller, which may
be *any* kind of controller (FC, SPI, SAS, etc).  But some
protocols will *not* have a controller, per se, or ``host'',
so the most appropriate word would be to call it a ``portal'' since
it provides access to the SCSI Initiator Port to the Service
Delivery Subsystem to the Interconect, and thus access to the
device.  But you see, that stuff in the middle is supposed to
be transparent to the Application Client (SCSI Core), and thus
you have LLDDs.

LLDDs will not be abstractions over ``make of physical controller''
anymore, but much more, abstractions over the *method* of connection,
i.e. we assumed it was SPI, and now we cannot assume even that,
with newer transports like SAS, iSCSI, SRP, etc.

3. Sorry to confuse you with the word ``portal''.  If you knew
SCSI, you'd see right away that what I mean is ``SCSI Domain''
(SAM-3r04, 4.4, figure 8, 4.5, figure 9) but the problem with
this is that ``SCSI Domain'' *in a computer/kernel* would be a
collection of all the ``SCSI adapters'' of a _certain_ Service
Delivery Subsystem, and this isn't _representative__enough_ of the
real world, to use James's and Doug's words.  So for this reason
the most approriate word for a ``device which provides access
to (specific/any) service delivery subsystem'' is ``portal''.
And inside that portal structure we keep info on what the SDS is.

Do not confuse the use of ``portal'' in my most recent emails with
its other uses in various other SCSI Service Delivery Protocol docs.

On a personal note:

4. I find it extremely personally insulting of your assumptions
about an iSCSI driver of mine, what it needs and what it doesn't?

How do you know if such a driver exists?  How do you know what my
real job is?  Do you really know me in order to post such speculation
and FUD?

I find it extrememly personally insulting in that you assume that
I have an ``agenda'' into changing SCSI Core to fit some non-existant
personal purpose.

Your message is speculative and defamatory.  I hope you resist from
posting any more such messages involving myself.

Hope this clears it up for you,
-- 
Luben

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH / RFC] scsi_error handler update. (1/4)
  2003-02-18 19:46                                         ` Luben Tuikov
@ 2003-02-18 22:16                                           ` Andre Hedrick
  2003-02-18 23:35                                             ` Luben Tuikov
  0 siblings, 1 reply; 47+ messages in thread
From: Andre Hedrick @ 2003-02-18 22:16 UTC (permalink / raw)
  To: Luben Tuikov; +Cc: SCSI Mailing List

Luben:

<Point 4 answered>
**********************
Date: Tue, 26 Mar 2002 15:31:41 -0500
From: Luben Tuikov <luben@splentec.com>
To: iSCSI <ips@ece.cmu.edu>, <other names deleted>
Subject: iSCSI: Login stage transition; T bit
**********************

I will give Eddy a call to confirm your point.

<Point 3 answered>

I know exactly what a portal, since I have already abstractived it away in
the current environment, I see no need to add such additional layering.
The current mid-layer handles it fine if you know how to map the issue
according to its rules.

I know exactly what an iSCSI TPGT is and the relation to the DOMAIN.

<Point 2 answered>

Making a target a collecion of LUs becomes a problem once you hit the
saturate the queue of the associated HBA's.  Since Linux associates the
queue depth to the HBA and not the actual device, other problems arrise.

HBA queue detpth == 255/256
HDA queue detpth == 31/32

HBA has 16 HDA's (for this example).

Artificially pooling to make two virtual HBA's of 8 HDA's each, while not
constraining the queue depth, will explode the midlayer.

Now assume an expanded model with N physical HBA's and load balancing the
queue issue becomes impossible.  So one needs to address it as another
issue.

<Point 1 answered>

OIC, maybe you should be more clear in your choice of terms.  Given the
answer to point 4 above is subject to your motivation.

Cheers,

Andre Hedrick
LAD Storage Consulting Group

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH / RFC] scsi_error handler update. (1/4)
  2003-02-18 22:16                                           ` Andre Hedrick
@ 2003-02-18 23:35                                             ` Luben Tuikov
  0 siblings, 0 replies; 47+ messages in thread
From: Luben Tuikov @ 2003-02-18 23:35 UTC (permalink / raw)
  To: Andre Hedrick; +Cc: SCSI Mailing List

Andre Hedrick wrote:
> Luben:
> 
> <Point 4 answered>
> **********************
> Date: Tue, 26 Mar 2002 15:31:41 -0500
> From: Luben Tuikov <luben@splentec.com>
> To: iSCSI <ips@ece.cmu.edu>, <other names deleted>
> Subject: iSCSI: Login stage transition; T bit
> **********************

Yes, I remember writing this message about a year ago.

Why you're refering to IPS and why you're refering to
this message and what it has to do with anything here
is beyond me.

[[ This email has to do with redundant information in
the PDU on iSCSI connection login -- *completely irrelevant*
here.  Normalization clearly shows the redundancy, people
agreed that it exists, but the consensus was that since
login were very cryptic in earlier versions, we left it
like this. ]]

Point 4 was of personal nature directed TO YOU,
to stop speculating over uncertainities, and write cr*p.

> <Point 3 answered>
> 
> I know exactly what a portal, since I have already abstractived it away in
> the current environment, I see no need to add such additional layering.
> The current mid-layer handles it fine if you know how to map the issue
> according to its rules.
> 
> I know exactly what an iSCSI TPGT is and the relation to the DOMAIN.
>
> <Point 2 answered>
> 
> Making a target a collecion of LUs becomes a problem once you hit the
> saturate the queue of the associated HBA's.  Since Linux associates the
> queue depth to the HBA and not the actual device, other problems arrise.

But we're discussing a NEW SCSI Core! Why cannot you understand this?

SPI Portals will have a per portal cmd queue length limit, but
other transports will NOT have those limits.  Because of SAN's, LUN masking,
etc, other transports will have a per target and per LU queue length limit,
as each logical unit has a Task Set (SAM3r04, 4.8, figure 14),
and the former is set by the transport (and may be dynamic).

This is why, Doug, rightfully so, wants to have specifics in
the struct scsi_portal, whether it is SPI, SAS, FC, etc.

This is the whole point of ``collectively working'' people.

> HBA queue detpth == 255/256
> HDA queue detpth == 31/32
> 
> HBA has 16 HDA's (for this example).
> 
> Artificially pooling to make two virtual HBA's of 8 HDA's each, while not
> constraining the queue depth, will explode the midlayer.
> 
> Now assume an expanded model with N physical HBA's and load balancing the
> queue issue becomes impossible.  So one needs to address it as another
> issue.

I repeat, what we're discussing is 2.7 (3.1) stuff, some years ahead.
We're not planning to do anything with SCSI Core now.

Everyone understands here that, the current scsi_host and the new
scsi_portal _represents_ the same thing in case it is SPI.  So, in
it's simplest form, you can ``Query replace regexp:''
``host'' with ``portal'' and from SAM-2/3 point of view this
will be *correct*.

The difference is that ``portal'' will abstact the type of controller,
whether it is SPI, FC, etc, and thus will offer more flexibility
for LLDDs, and closer representation of what is out there and
what will come.

By the time of 2.7, more exotic SCSI devices will appear working over
more exotic/newer SCSI transports, thus then we'll feel even more
the need for some changes.

> <Point 1 answered>
> 
> OIC, maybe you should be more clear in your choice of terms.  Given the
> answer to point 4 above is subject to your motivation.

I've explained my choice of terms very carefully, giving plenty of
examples, and plenty of cross-references in SAM-2/3 and SPC-2/3.

-- 
Luben

^ permalink raw reply	[flat|nested] 47+ messages in thread

end of thread, other threads:[~2003-02-18 23:35 UTC | newest]

Thread overview: 47+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2003-02-11  8:13 [PATCH / RFC] scsi_error handler update. (1/4) Mike Anderson
2003-02-11  8:15 ` [PATCH / RFC] scsi_error handler update. (2/4) Mike Anderson
2003-02-11  8:17   ` [PATCH / RFC] scsi_error handler update. (3/4) Mike Anderson
2003-02-11  8:19     ` [PATCH / RFC] scsi_error handler update. (4/4) Mike Anderson
2003-02-11 22:38     ` [PATCH / RFC] scsi_error handler update. (3/4) James Bottomley
2003-02-12  7:16       ` Mike Anderson
2003-02-12 14:26         ` Luben Tuikov
2003-02-12 14:37         ` James Bottomley
2003-02-12 22:34     ` James Bottomley
2003-02-13  8:24       ` Mike Anderson
2003-02-11 16:49 ` [PATCH / RFC] scsi_error handler update. (1/4) Luben Tuikov
2003-02-11 17:22   ` Mike Anderson
2003-02-11 19:05     ` Luben Tuikov
2003-02-11 20:14       ` Luben Tuikov
2003-02-11 21:14       ` Mike Anderson
     [not found]       ` <3E495862.3050709@splentec.com>
2003-02-11 21:20         ` Mike Anderson
2003-02-11 21:22           ` Luben Tuikov
2003-02-11 22:41             ` Christoph Hellwig
2003-02-12 20:10               ` Luben Tuikov
2003-02-12 20:46                 ` Christoph Hellwig
2003-02-12 21:23                   ` Mike Anderson
2003-02-12 22:15                     ` Luben Tuikov
2003-02-12 21:46                   ` Luben Tuikov
2003-02-13 15:47                     ` Christoph Hellwig
2003-02-13 18:55                       ` Luben Tuikov
2003-02-14  0:24                         ` Doug Ledford
2003-02-14 16:38                           ` Patrick Mansfield
2003-02-14 16:58                           ` Mike Anderson
2003-02-14 18:50                             ` Doug Ledford
2003-02-14 19:35                             ` Luben Tuikov
2003-02-14 21:20                               ` James Bottomley
2003-02-17 17:20                                 ` Luben Tuikov
2003-02-17 17:58                                   ` James Bottomley
2003-02-17 18:29                                     ` Luben Tuikov
2003-02-18  5:37                                       ` Andre Hedrick
2003-02-18 19:46                                         ` Luben Tuikov
2003-02-18 22:16                                           ` Andre Hedrick
2003-02-18 23:35                                             ` Luben Tuikov
2003-02-17 20:17                                   ` Doug Ledford
2003-02-17 20:19                                     ` Matthew Jacob
2003-02-17 21:12                                     ` Luben Tuikov
2003-02-17 17:35                                 ` Luben Tuikov
2003-02-14 21:27                               ` James Bottomley
2003-02-17 17:28                                 ` Luben Tuikov
2003-02-16  4:23                               ` Andre Hedrick
2003-02-11 18:00 ` Patrick Mansfield
2003-02-11 18:44   ` Mike Anderson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox