scsi in 2.5.48

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

* scsi in 2.5.48
@ 2002-11-18  9:14 Andrew Morton
  2002-11-18  9:45 ` Andrew Morton
  0 siblings, 1 reply; 6+ messages in thread
From: Andrew Morton @ 2002-11-18  9:14 UTC (permalink / raw)
  To: lkml, linux-scsi

Appears to be DOA.  Just a simple mke2fs hangs in get_request_wait().
Running an `ls -lR' against an IDE disk gets it going again.  Seems
that the driver is failing to call into the elevator to fetch more
requests.  The unplug activity from the IDE reads is sufficient to
keep the SCSI driver limping along.

The driver is aic7xxx.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: scsi in 2.5.48
  2002-11-18  9:14 scsi in 2.5.48 Andrew Morton
@ 2002-11-18  9:45 ` Andrew Morton
  2002-11-18 13:56   ` Jens Axboe
  0 siblings, 1 reply; 6+ messages in thread
From: Andrew Morton @ 2002-11-18  9:45 UTC (permalink / raw)
  To: lkml, linux-scsi

Andrew Morton wrote:
> 
> Appears to be DOA.  Just a simple mke2fs hangs in get_request_wait().

This makes it work again.


--- 25/drivers/scsi/scsi_lib.c~scsi-plug	Mon Nov 18 01:42:40 2002
+++ 25-akpm/drivers/scsi/scsi_lib.c	Mon Nov 18 01:42:44 2002
@@ -1024,7 +1024,6 @@ void scsi_request_fn(request_queue_t * q
 			/* can happen if the prep fails 
 			 * FIXME: elv_next_request() should be plugging the
 			 * queue */
-			blk_plug_device(q);
 			break;
 		}
 

_

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: scsi in 2.5.48
  2002-11-18  9:45 ` Andrew Morton
@ 2002-11-18 13:56   ` Jens Axboe
  2002-11-18 15:05     ` J.E.J. Bottomley
                       ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Jens Axboe @ 2002-11-18 13:56 UTC (permalink / raw)
  To: Andrew Morton; +Cc: lkml, linux-scsi, James Bottomley

On Mon, Nov 18 2002, Andrew Morton wrote:
> Andrew Morton wrote:
> > 
> > Appears to be DOA.  Just a simple mke2fs hangs in get_request_wait().
> 
> This makes it work again.
> 
> 
> --- 25/drivers/scsi/scsi_lib.c~scsi-plug	Mon Nov 18 01:42:40 2002
> +++ 25-akpm/drivers/scsi/scsi_lib.c	Mon Nov 18 01:42:44 2002
> @@ -1024,7 +1024,6 @@ void scsi_request_fn(request_queue_t * q
>  			/* can happen if the prep fails 
>  			 * FIXME: elv_next_request() should be plugging the
>  			 * queue */
> -			blk_plug_device(q);
>  			break;
>  		}

Right fix would be something ala:

===== drivers/block/ll_rw_blk.c 1.143 vs edited =====
--- 1.143/drivers/block/ll_rw_blk.c	Mon Nov 18 08:28:08 2002
+++ edited/drivers/block/ll_rw_blk.c	Mon Nov 18 14:45:55 2002
@@ -1038,6 +1038,16 @@
 }
 
 /**
+ * blk_run_queue - run a single device queue
+ * @q	The queue to run
+ */
+void __blk_run_queue(request_queue_t *q)
+{
+	blk_remove_plug(q);
+	q->request_fn(q);
+}
+
+/**
  * blk_run_queues - fire all plugged queues
  *
  * Description:
@@ -2198,4 +2211,5 @@
 EXPORT_SYMBOL(blk_start_queue);
 EXPORT_SYMBOL(blk_stop_queue);
 EXPORT_SYMBOL(__blk_stop_queue);
+EXPORT_SYMBOL(__blk_run_queue);
 EXPORT_SYMBOL(blk_run_queues);
===== drivers/scsi/scsi_lib.c 1.46 vs edited =====
--- 1.46/drivers/scsi/scsi_lib.c	Mon Nov 18 08:28:09 2002
+++ edited/drivers/scsi/scsi_lib.c	Mon Nov 18 14:49:15 2002
@@ -259,7 +259,7 @@
 	/*
 	 * Just hit the requeue function for the queue.
 	 */
-	q->request_fn(q);
+	__blk_run_queue(q);
 
 	SDpnt = (Scsi_Device *) q->queuedata;
 	SHpnt = SDpnt->host;
@@ -272,8 +272,6 @@
 	 * use function pointers to pick the right one.
 	 */
 	if (SDpnt->single_lun && blk_queue_empty(q) && SDpnt->device_busy ==0) {
-		request_queue_t *q;
-
 		for (SDpnt = SHpnt->host_queue; SDpnt; SDpnt = SDpnt->next) {
 			if (((SHpnt->can_queue > 0)
 			     && (SHpnt->host_busy >= SHpnt->can_queue))
@@ -283,8 +281,7 @@
 				break;
 			}
 
-			q = &SDpnt->request_queue;
-			q->request_fn(q);
+			__blk_run_queue(&SDpnt->request_queue);
 		}
 	}
 
@@ -299,7 +296,6 @@
 	all_clear = 1;
 	if (SHpnt->some_device_starved) {
 		for (SDpnt = SHpnt->host_queue; SDpnt; SDpnt = SDpnt->next) {
-			request_queue_t *q;
 			if ((SHpnt->can_queue > 0 && (SHpnt->host_busy >= SHpnt->can_queue))
 			    || (SHpnt->host_blocked) 
 			    || (SHpnt->host_self_blocked)) {
@@ -308,8 +304,7 @@
 			if (SDpnt->device_blocked || !SDpnt->starved) {
 				continue;
 			}
-			q = &SDpnt->request_queue;
-			q->request_fn(q);
+			__blk_run_queue(&SDpnt->request_queue);
 			all_clear = 0;
 		}
 		if (SDpnt == NULL && all_clear) {
===== drivers/scsi/scsi_error.c 1.21 vs edited =====
--- 1.21/drivers/scsi/scsi_error.c	Sat Nov 16 20:54:08 2002
+++ edited/drivers/scsi/scsi_error.c	Mon Nov 18 14:47:49 2002
@@ -1479,8 +1479,6 @@
 	 */
 	spin_lock_irqsave(shost->host_lock, flags);
 	for (sdev = shost->host_queue; sdev; sdev = sdev->next) {
-		request_queue_t *q = &sdev->request_queue;
-
 		if ((shost->can_queue > 0 &&
 		     (shost->host_busy >= shost->can_queue))
 		    || (shost->host_blocked)
@@ -1488,7 +1486,7 @@
 			break;
 		}
 
-		q->request_fn(q);
+		__blk_run_queue(&sdev->request_queue);
 	}
 	spin_unlock_irqrestore(shost->host_lock, flags);
 }
===== include/linux/blkdev.h 1.93 vs edited =====
--- 1.93/include/linux/blkdev.h	Mon Nov 18 08:28:09 2002
+++ edited/include/linux/blkdev.h	Mon Nov 18 14:46:29 2002
@@ -321,6 +321,7 @@
 extern void blk_start_queue(request_queue_t *q);
 extern void blk_stop_queue(request_queue_t *q);
 extern void __blk_stop_queue(request_queue_t *q);
+extern void __blk_run_queue(request_queue_t *q);
 
 static inline request_queue_t *bdev_get_queue(struct block_device *bdev)
 {


-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: scsi in 2.5.48
  2002-11-18 13:56   ` Jens Axboe
@ 2002-11-18 15:05     ` J.E.J. Bottomley
  2002-11-19  6:31     ` Andrew Morton
  2002-11-19 13:06     ` William Lee Irwin III
  2 siblings, 0 replies; 6+ messages in thread
From: J.E.J. Bottomley @ 2002-11-18 15:05 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Andrew Morton, lkml, linux-scsi, James Bottomley

akpm@digeo.com said:
> > Andrew Morton wrote:
> 
> > Appears to be DOA.  Just a simple mke2fs hangs in get_request_wait().
> This makes it work again. 

That can't be entirely right, because we can't exit the request function with 
pending requests and something like an unplugged queue to start them.  It will 
work for the adaptec because the prep defer is caused by the huge queue depth 
running us out of command blocks, so the queues would get re-run by a 
returning command.  If the failure were caused by zero outstanding commands, 
this would hang the system forever.

axboe@suse.de said:
> Right fix would be something ala: 

That looks about right.  On returning I/O we need to unplug and restart.

However, with regard to Andrew's problem, how can the queue plug indefinitely? 
 I thought it was guaranteed to be unplugged and run eventually?

James

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: scsi in 2.5.48
  2002-11-18 13:56   ` Jens Axboe
  2002-11-18 15:05     ` J.E.J. Bottomley
@ 2002-11-19  6:31     ` Andrew Morton
  2002-11-19 13:06     ` William Lee Irwin III
  2 siblings, 0 replies; 6+ messages in thread
From: Andrew Morton @ 2002-11-19  6:31 UTC (permalink / raw)
  To: Jens Axboe; +Cc: lkml, linux-scsi, James Bottomley

Jens Axboe wrote:
> 
> On Mon, Nov 18 2002, Andrew Morton wrote:
> > Andrew Morton wrote:
> > >
> > > Appears to be DOA.  Just a simple mke2fs hangs in get_request_wait().
> >
> > This makes it work again.
> >
> >
> > --- 25/drivers/scsi/scsi_lib.c~scsi-plug      Mon Nov 18 01:42:40 2002
> > +++ 25-akpm/drivers/scsi/scsi_lib.c   Mon Nov 18 01:42:44 2002
> > @@ -1024,7 +1024,6 @@ void scsi_request_fn(request_queue_t * q
> >                       /* can happen if the prep fails
> >                        * FIXME: elv_next_request() should be plugging the
> >                        * queue */
> > -                     blk_plug_device(q);
> >                       break;
> >               }
> 
> Right fix would be something ala:
> 
> ===== drivers/block/ll_rw_blk.c 1.143 vs edited =====
> --- 1.143/drivers/block/ll_rw_blk.c     Mon Nov 18 08:28:08 2002
> +++ edited/drivers/block/ll_rw_blk.c    Mon Nov 18 14:45:55 2002
> @@ -1038,6 +1038,16 @@
>  }
> 
>  /**
> + * blk_run_queue - run a single device queue
> + * @q  The queue to run
> + */

That fixes it for me, thanks.

Now, I had me a little bug in blk_congestion_wait() - it was forgetting
to wait.  The net effect of this bug was to increase plug/unplug traffic
by a factor of about 200.  And the 4-way was oopsing about once per
gigabyte of IO, in blk_run_queues().  Always due to a garbage q->unplug_fn.

That local list needs spinlock protection, because blk_remove_plug()
will actually take queues off that local list while another CPU (or
this one) is walking it.

Maybe that's a logic error, but I feel a ton safer with this patch in
place, and it stopped the oopses.

It also adds a few debug checks and uninlines stuff ;)



 drivers/block/ll_rw_blk.c |   27 ++++++++++++++-------------
 1 files changed, 14 insertions(+), 13 deletions(-)

--- 25/drivers/block/ll_rw_blk.c~plugbug	Mon Nov 18 21:29:23 2002
+++ 25-akpm/drivers/block/ll_rw_blk.c	Mon Nov 18 21:35:11 2002
@@ -737,7 +737,7 @@ new_segment:
 }
 
 
-inline int blk_phys_contig_segment(request_queue_t *q, struct bio *bio,
+int blk_phys_contig_segment(request_queue_t *q, struct bio *bio,
 				   struct bio *nxt)
 {
 	if (!(q->queue_flags & (1 << QUEUE_FLAG_CLUSTER)))
@@ -758,7 +758,7 @@ inline int blk_phys_contig_segment(reque
 	return 0;
 }
 
-inline int blk_hw_contig_segment(request_queue_t *q, struct bio *bio,
+int blk_hw_contig_segment(request_queue_t *q, struct bio *bio,
 				 struct bio *nxt)
 {
 	if (!(q->queue_flags & (1 << QUEUE_FLAG_CLUSTER)))
@@ -956,6 +956,7 @@ static int ll_merge_requests_fn(request_
  */
 void blk_plug_device(request_queue_t *q)
 {
+	WARN_ON(!irqs_disabled());
 	if (!blk_queue_plugged(q)) {
 		spin_lock(&blk_plug_lock);
 		list_add_tail(&q->plug_list, &blk_plug_list);
@@ -967,8 +968,9 @@ void blk_plug_device(request_queue_t *q)
  * remove the queue from the plugged list, if present. called with
  * queue lock held and interrupts disabled.
  */
-inline int blk_remove_plug(request_queue_t *q)
+int blk_remove_plug(request_queue_t *q)
 {
+	WARN_ON(!irqs_disabled());
 	if (blk_queue_plugged(q)) {
 		spin_lock(&blk_plug_lock);
 		list_del_init(&q->plug_list);
@@ -1096,28 +1098,27 @@ void __blk_run_queue(request_queue_t *q)
 #define blk_plug_entry(entry) list_entry((entry), request_queue_t, plug_list)
 void blk_run_queues(void)
 {
-	struct list_head local_plug_list;
-
-	INIT_LIST_HEAD(&local_plug_list);
+	LIST_HEAD(local_plug_list);
 
 	spin_lock_irq(&blk_plug_lock);
 
 	/*
 	 * this will happen fairly often
 	 */
-	if (list_empty(&blk_plug_list)) {
-		spin_unlock_irq(&blk_plug_lock);
-		return;
-	}
+	if (list_empty(&blk_plug_list))
+		goto out;
 
 	list_splice_init(&blk_plug_list, &local_plug_list);
-	spin_unlock_irq(&blk_plug_lock);
 	
 	while (!list_empty(&local_plug_list)) {
 		request_queue_t *q = blk_plug_entry(local_plug_list.next);
 
+		spin_unlock_irq(&blk_plug_lock);
 		q->unplug_fn(q);
+		spin_lock_irq(&blk_plug_lock);
 	}
+out:
+	spin_unlock_irq(&blk_plug_lock);
 }
 
 static int __blk_cleanup_queue(struct request_list *list)
@@ -1959,7 +1960,7 @@ int submit_bio(int rw, struct bio *bio)
 	return 1;
 }
 
-inline void blk_recalc_rq_segments(struct request *rq)
+void blk_recalc_rq_segments(struct request *rq)
 {
 	struct bio *bio;
 	int nr_phys_segs, nr_hw_segs;
@@ -1982,7 +1983,7 @@ inline void blk_recalc_rq_segments(struc
 	rq->nr_hw_segments = nr_hw_segs;
 }
 
-inline void blk_recalc_rq_sectors(struct request *rq, int nsect)
+void blk_recalc_rq_sectors(struct request *rq, int nsect)
 {
 	if (blk_fs_request(rq)) {
 		rq->hard_sector += nsect;

_

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: scsi in 2.5.48
  2002-11-18 13:56   ` Jens Axboe
  2002-11-18 15:05     ` J.E.J. Bottomley
  2002-11-19  6:31     ` Andrew Morton
@ 2002-11-19 13:06     ` William Lee Irwin III
  2 siblings, 0 replies; 6+ messages in thread
From: William Lee Irwin III @ 2002-11-19 13:06 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Andrew Morton, lkml, linux-scsi, James Bottomley

Andrew Morton wrote:
>>> Appears to be DOA.  Just a simple mke2fs hangs in get_request_wait().

On Mon, Nov 18 2002, Andrew Morton wrote:
>> This makes it work again.

On Mon, Nov 18, 2002 at 02:56:14PM +0100, Jens Axboe wrote:
> Right fix would be something ala:

This solves my free_initmem() deadlock, which for some reason I assumed
to be a problem of a lower-level nature.


Bill

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2002-11-19 13:01 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2002-11-18  9:14 scsi in 2.5.48 Andrew Morton
2002-11-18  9:45 ` Andrew Morton
2002-11-18 13:56   ` Jens Axboe
2002-11-18 15:05     ` J.E.J. Bottomley
2002-11-19  6:31     ` Andrew Morton
2002-11-19 13:06     ` William Lee Irwin III

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox