linux-scsi.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] fix for Incorrect number of segments after building list problem
@ 2004-10-14 21:51 James Bottomley
  2004-10-14 21:55 ` 'Dave Olien'
                   ` (2 more replies)
  0 siblings, 3 replies; 15+ messages in thread
From: James Bottomley @ 2004-10-14 21:51 UTC (permalink / raw)
  To: 'Dave Olien', Jens Axboe; +Cc: SCSI Mailing List

This is a rather nasty hack at the momen, but it seems to persuade
blk_recalc_rq_segments() not to underestimate.

Could you try it in your setup to see if it fixes the problem?

Thanks,

James

===== drivers/block/ll_rw_blk.c 1.271 vs edited =====
--- 1.271/drivers/block/ll_rw_blk.c	2004-09-13 19:23:21 -05:00
+++ edited/drivers/block/ll_rw_blk.c	2004-10-14 16:24:38 -05:00
@@ -921,7 +921,8 @@
 		}
 new_segment:
 		if (BIOVEC_VIRT_MERGEABLE(bvprv, bv) &&
-		    !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) {
+		    !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len) &&
+		    hw_seg_size + bv->bv_len <= q->max_segment_size) {
 			hw_seg_size += bv->bv_len;
 		} else {
 new_hw_segment:
@@ -2723,30 +2724,49 @@
 void blk_recalc_rq_segments(struct request *rq)
 {
 	struct bio *bio, *prevbio = NULL;
-	int nr_phys_segs, nr_hw_segs;
+	int nr_phys_segs, nr_hw_segs, tot_phys_size = 0, tot_hw_size = 0;
 
 	if (!rq->bio)
 		return;
 
 	nr_phys_segs = nr_hw_segs = 0;
 	rq_for_each_bio(bio, rq) {
+		int bi_phys_segs, bi_hw_segs;
 		/* Force bio hw/phys segs to be recalculated. */
 		bio->bi_flags &= ~(1 << BIO_SEG_VALID);
 
-		nr_phys_segs += bio_phys_segments(rq->q, bio);
-		nr_hw_segs += bio_hw_segments(rq->q, bio);
+		bi_phys_segs = bio_phys_segments(rq->q, bio);
+		bi_hw_segs = bio_hw_segments(rq->q, bio);
+		nr_phys_segs += bi_phys_segs;
+		nr_hw_segs += bi_hw_segs;
 		if (prevbio) {
-			if (blk_phys_contig_segment(rq->q, prevbio, bio))
+			if (blk_phys_contig_segment(rq->q, prevbio, bio) &&
+			    bio->bi_size + tot_phys_size < rq->q->max_segment_size)
 				nr_phys_segs--;
-			if (blk_hw_contig_segment(rq->q, prevbio, bio))
+			else
+				tot_phys_size = 0;
+			if (blk_hw_contig_segment(rq->q, prevbio, bio) &&
+			    bio->bi_size + tot_hw_size < rq->q->max_segment_size)
 				nr_hw_segs--;
+			else
+				tot_hw_size = 0;
 		}
+		if (bi_phys_segs > 1)
+			tot_phys_size = bio->bi_size;
+		else
+			tot_phys_size += bio->bi_size;
+		if (bi_hw_segs > 1)
+			tot_hw_size = bio->bi_size;
+		else
+			tot_hw_size += bio->bi_size;
+
 		prevbio = bio;
 	}
 
 	rq->nr_phys_segments = nr_phys_segs;
 	rq->nr_hw_segments = nr_hw_segs;
 }
+EXPORT_SYMBOL(blk_recalc_rq_segments);
 
 void blk_recalc_rq_sectors(struct request *rq, int nsect)
 {


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] fix for Incorrect number of segments after building list problem
  2004-10-14 21:51 [PATCH] fix for Incorrect number of segments after building list problem James Bottomley
@ 2004-10-14 21:55 ` 'Dave Olien'
  2004-10-14 22:15 ` 'Dave Olien'
  2004-10-20 14:39 ` Jens Axboe
  2 siblings, 0 replies; 15+ messages in thread
From: 'Dave Olien' @ 2004-10-14 21:55 UTC (permalink / raw)
  To: James Bottomley; +Cc: Jens Axboe, SCSI Mailing List


OK, I'll apply it immediately and let you know what happens.

Dave

On Thu, Oct 14, 2004 at 04:51:16PM -0500, James Bottomley wrote:
> This is a rather nasty hack at the momen, but it seems to persuade
> blk_recalc_rq_segments() not to underestimate.
> 
> Could you try it in your setup to see if it fixes the problem?
> 
> Thanks,
> 
> James
> 
> ===== drivers/block/ll_rw_blk.c 1.271 vs edited =====
> --- 1.271/drivers/block/ll_rw_blk.c	2004-09-13 19:23:21 -05:00
> +++ edited/drivers/block/ll_rw_blk.c	2004-10-14 16:24:38 -05:00
> @@ -921,7 +921,8 @@
>  		}
>  new_segment:
>  		if (BIOVEC_VIRT_MERGEABLE(bvprv, bv) &&
> -		    !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) {
> +		    !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len) &&
> +		    hw_seg_size + bv->bv_len <= q->max_segment_size) {
>  			hw_seg_size += bv->bv_len;
>  		} else {
>  new_hw_segment:
> @@ -2723,30 +2724,49 @@
>  void blk_recalc_rq_segments(struct request *rq)
>  {
>  	struct bio *bio, *prevbio = NULL;
> -	int nr_phys_segs, nr_hw_segs;
> +	int nr_phys_segs, nr_hw_segs, tot_phys_size = 0, tot_hw_size = 0;
>  
>  	if (!rq->bio)
>  		return;
>  
>  	nr_phys_segs = nr_hw_segs = 0;
>  	rq_for_each_bio(bio, rq) {
> +		int bi_phys_segs, bi_hw_segs;
>  		/* Force bio hw/phys segs to be recalculated. */
>  		bio->bi_flags &= ~(1 << BIO_SEG_VALID);
>  
> -		nr_phys_segs += bio_phys_segments(rq->q, bio);
> -		nr_hw_segs += bio_hw_segments(rq->q, bio);
> +		bi_phys_segs = bio_phys_segments(rq->q, bio);
> +		bi_hw_segs = bio_hw_segments(rq->q, bio);
> +		nr_phys_segs += bi_phys_segs;
> +		nr_hw_segs += bi_hw_segs;
>  		if (prevbio) {
> -			if (blk_phys_contig_segment(rq->q, prevbio, bio))
> +			if (blk_phys_contig_segment(rq->q, prevbio, bio) &&
> +			    bio->bi_size + tot_phys_size < rq->q->max_segment_size)
>  				nr_phys_segs--;
> -			if (blk_hw_contig_segment(rq->q, prevbio, bio))
> +			else
> +				tot_phys_size = 0;
> +			if (blk_hw_contig_segment(rq->q, prevbio, bio) &&
> +			    bio->bi_size + tot_hw_size < rq->q->max_segment_size)
>  				nr_hw_segs--;
> +			else
> +				tot_hw_size = 0;
>  		}
> +		if (bi_phys_segs > 1)
> +			tot_phys_size = bio->bi_size;
> +		else
> +			tot_phys_size += bio->bi_size;
> +		if (bi_hw_segs > 1)
> +			tot_hw_size = bio->bi_size;
> +		else
> +			tot_hw_size += bio->bi_size;
> +
>  		prevbio = bio;
>  	}
>  
>  	rq->nr_phys_segments = nr_phys_segs;
>  	rq->nr_hw_segments = nr_hw_segs;
>  }
> +EXPORT_SYMBOL(blk_recalc_rq_segments);
>  
>  void blk_recalc_rq_sectors(struct request *rq, int nsect)
>  {

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] fix for Incorrect number of segments after building list problem
  2004-10-14 21:51 [PATCH] fix for Incorrect number of segments after building list problem James Bottomley
  2004-10-14 21:55 ` 'Dave Olien'
@ 2004-10-14 22:15 ` 'Dave Olien'
  2004-10-14 22:51   ` 'Dave Olien'
  2004-10-20 14:39 ` Jens Axboe
  2 siblings, 1 reply; 15+ messages in thread
From: 'Dave Olien' @ 2004-10-14 22:15 UTC (permalink / raw)
  To: James Bottomley; +Cc: Jens Axboe, SCSI Mailing List


James,

I'm running your patch right now and watching for errors.
After 10 minutes, everything looks good.  Usually the
Incorrect segment count errors show up almost immediately.

I'd say this patch fixes my problem.

Now, I'll retry the dm multipath driver, and see if it triggers
any problems.

If you come up with a final patch you'd like me to test, just
send it my way.

Thanks!
Dave

On Thu, Oct 14, 2004 at 04:51:16PM -0500, James Bottomley wrote:
> This is a rather nasty hack at the momen, but it seems to persuade
> blk_recalc_rq_segments() not to underestimate.
> 
> Could you try it in your setup to see if it fixes the problem?
> 
> Thanks,
> 
> James
> 
> ===== drivers/block/ll_rw_blk.c 1.271 vs edited =====
> --- 1.271/drivers/block/ll_rw_blk.c	2004-09-13 19:23:21 -05:00
> +++ edited/drivers/block/ll_rw_blk.c	2004-10-14 16:24:38 -05:00
> @@ -921,7 +921,8 @@
>  		}
>  new_segment:
>  		if (BIOVEC_VIRT_MERGEABLE(bvprv, bv) &&
> -		    !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) {
> +		    !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len) &&
> +		    hw_seg_size + bv->bv_len <= q->max_segment_size) {
>  			hw_seg_size += bv->bv_len;
>  		} else {
>  new_hw_segment:
> @@ -2723,30 +2724,49 @@
>  void blk_recalc_rq_segments(struct request *rq)
>  {
>  	struct bio *bio, *prevbio = NULL;
> -	int nr_phys_segs, nr_hw_segs;
> +	int nr_phys_segs, nr_hw_segs, tot_phys_size = 0, tot_hw_size = 0;
>  
>  	if (!rq->bio)
>  		return;
>  
>  	nr_phys_segs = nr_hw_segs = 0;
>  	rq_for_each_bio(bio, rq) {
> +		int bi_phys_segs, bi_hw_segs;
>  		/* Force bio hw/phys segs to be recalculated. */
>  		bio->bi_flags &= ~(1 << BIO_SEG_VALID);
>  
> -		nr_phys_segs += bio_phys_segments(rq->q, bio);
> -		nr_hw_segs += bio_hw_segments(rq->q, bio);
> +		bi_phys_segs = bio_phys_segments(rq->q, bio);
> +		bi_hw_segs = bio_hw_segments(rq->q, bio);
> +		nr_phys_segs += bi_phys_segs;
> +		nr_hw_segs += bi_hw_segs;
>  		if (prevbio) {
> -			if (blk_phys_contig_segment(rq->q, prevbio, bio))
> +			if (blk_phys_contig_segment(rq->q, prevbio, bio) &&
> +			    bio->bi_size + tot_phys_size < rq->q->max_segment_size)
>  				nr_phys_segs--;
> -			if (blk_hw_contig_segment(rq->q, prevbio, bio))
> +			else
> +				tot_phys_size = 0;
> +			if (blk_hw_contig_segment(rq->q, prevbio, bio) &&
> +			    bio->bi_size + tot_hw_size < rq->q->max_segment_size)
>  				nr_hw_segs--;
> +			else
> +				tot_hw_size = 0;
>  		}
> +		if (bi_phys_segs > 1)
> +			tot_phys_size = bio->bi_size;
> +		else
> +			tot_phys_size += bio->bi_size;
> +		if (bi_hw_segs > 1)
> +			tot_hw_size = bio->bi_size;
> +		else
> +			tot_hw_size += bio->bi_size;
> +
>  		prevbio = bio;
>  	}
>  
>  	rq->nr_phys_segments = nr_phys_segs;
>  	rq->nr_hw_segments = nr_hw_segs;
>  }
> +EXPORT_SYMBOL(blk_recalc_rq_segments);
>  
>  void blk_recalc_rq_sectors(struct request *rq, int nsect)
>  {
> 
> -
> To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] fix for Incorrect number of segments after building list problem
  2004-10-14 22:15 ` 'Dave Olien'
@ 2004-10-14 22:51   ` 'Dave Olien'
  0 siblings, 0 replies; 15+ messages in thread
From: 'Dave Olien' @ 2004-10-14 22:51 UTC (permalink / raw)
  To: James Bottomley; +Cc: Jens Axboe, SCSI Mailing List


James,

I'm running through the dm multipath driver now.  The problems
I was having through dm (which is where this all started) have also been
solved.  So it seems your patch is an effective fix.

I'll let this run overnight, and see if any errors occur during
the night.

By the way, I've only looked briefly at the write barrier
implementation in the block layer and IO schedulers.  So I'm
pretty naive.  Does this requeuing of SCSI requests in scsi_lib.c
potentially defeat the write barrier code?

Thanks,
Dave


On Thu, Oct 14, 2004 at 03:15:03PM -0700, 'Dave Olien' wrote:
> 
> James,
> 
> I'm running your patch right now and watching for errors.
> After 10 minutes, everything looks good.  Usually the
> Incorrect segment count errors show up almost immediately.
> 
> I'd say this patch fixes my problem.
> 
> Now, I'll retry the dm multipath driver, and see if it triggers
> any problems.
> 
> If you come up with a final patch you'd like me to test, just
> send it my way.
> 
> Thanks!
> Dave
> 
> On Thu, Oct 14, 2004 at 04:51:16PM -0500, James Bottomley wrote:
> > This is a rather nasty hack at the momen, but it seems to persuade
> > blk_recalc_rq_segments() not to underestimate.
> > 
> > Could you try it in your setup to see if it fixes the problem?
> > 
> > Thanks,
> > 
> > James
> > 
> > ===== drivers/block/ll_rw_blk.c 1.271 vs edited =====
> > --- 1.271/drivers/block/ll_rw_blk.c	2004-09-13 19:23:21 -05:00
> > +++ edited/drivers/block/ll_rw_blk.c	2004-10-14 16:24:38 -05:00
> > @@ -921,7 +921,8 @@
> >  		}
> >  new_segment:
> >  		if (BIOVEC_VIRT_MERGEABLE(bvprv, bv) &&
> > -		    !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) {
> > +		    !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len) &&
> > +		    hw_seg_size + bv->bv_len <= q->max_segment_size) {
> >  			hw_seg_size += bv->bv_len;
> >  		} else {
> >  new_hw_segment:
> > @@ -2723,30 +2724,49 @@
> >  void blk_recalc_rq_segments(struct request *rq)
> >  {
> >  	struct bio *bio, *prevbio = NULL;
> > -	int nr_phys_segs, nr_hw_segs;
> > +	int nr_phys_segs, nr_hw_segs, tot_phys_size = 0, tot_hw_size = 0;
> >  
> >  	if (!rq->bio)
> >  		return;
> >  
> >  	nr_phys_segs = nr_hw_segs = 0;
> >  	rq_for_each_bio(bio, rq) {
> > +		int bi_phys_segs, bi_hw_segs;
> >  		/* Force bio hw/phys segs to be recalculated. */
> >  		bio->bi_flags &= ~(1 << BIO_SEG_VALID);
> >  
> > -		nr_phys_segs += bio_phys_segments(rq->q, bio);
> > -		nr_hw_segs += bio_hw_segments(rq->q, bio);
> > +		bi_phys_segs = bio_phys_segments(rq->q, bio);
> > +		bi_hw_segs = bio_hw_segments(rq->q, bio);
> > +		nr_phys_segs += bi_phys_segs;
> > +		nr_hw_segs += bi_hw_segs;
> >  		if (prevbio) {
> > -			if (blk_phys_contig_segment(rq->q, prevbio, bio))
> > +			if (blk_phys_contig_segment(rq->q, prevbio, bio) &&
> > +			    bio->bi_size + tot_phys_size < rq->q->max_segment_size)
> >  				nr_phys_segs--;
> > -			if (blk_hw_contig_segment(rq->q, prevbio, bio))
> > +			else
> > +				tot_phys_size = 0;
> > +			if (blk_hw_contig_segment(rq->q, prevbio, bio) &&
> > +			    bio->bi_size + tot_hw_size < rq->q->max_segment_size)
> >  				nr_hw_segs--;
> > +			else
> > +				tot_hw_size = 0;
> >  		}
> > +		if (bi_phys_segs > 1)
> > +			tot_phys_size = bio->bi_size;
> > +		else
> > +			tot_phys_size += bio->bi_size;
> > +		if (bi_hw_segs > 1)
> > +			tot_hw_size = bio->bi_size;
> > +		else
> > +			tot_hw_size += bio->bi_size;
> > +
> >  		prevbio = bio;
> >  	}
> >  
> >  	rq->nr_phys_segments = nr_phys_segs;
> >  	rq->nr_hw_segments = nr_hw_segs;
> >  }
> > +EXPORT_SYMBOL(blk_recalc_rq_segments);
> >  
> >  void blk_recalc_rq_sectors(struct request *rq, int nsect)
> >  {
> > 
> > -
> > To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] fix for Incorrect number of segments after building list problem
  2004-10-14 21:51 [PATCH] fix for Incorrect number of segments after building list problem James Bottomley
  2004-10-14 21:55 ` 'Dave Olien'
  2004-10-14 22:15 ` 'Dave Olien'
@ 2004-10-20 14:39 ` Jens Axboe
  2004-10-20 15:07   ` Jens Axboe
  2 siblings, 1 reply; 15+ messages in thread
From: Jens Axboe @ 2004-10-20 14:39 UTC (permalink / raw)
  To: James Bottomley; +Cc: 'Dave Olien', SCSI Mailing List

On Thu, Oct 14 2004, James Bottomley wrote:
> This is a rather nasty hack at the momen, but it seems to persuade
> blk_recalc_rq_segments() not to underestimate.
> 
> Could you try it in your setup to see if it fixes the problem?

Strange how this survived so long, thanks for debugging this James. The
patch does look a little hackish, I'll see if I can beat it into
submission.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] fix for Incorrect number of segments after building list problem
  2004-10-20 14:39 ` Jens Axboe
@ 2004-10-20 15:07   ` Jens Axboe
  2004-10-20 15:50     ` James Bottomley
  0 siblings, 1 reply; 15+ messages in thread
From: Jens Axboe @ 2004-10-20 15:07 UTC (permalink / raw)
  To: James Bottomley; +Cc: 'Dave Olien', SCSI Mailing List

On Wed, Oct 20 2004, Jens Axboe wrote:
> On Thu, Oct 14 2004, James Bottomley wrote:
> > This is a rather nasty hack at the momen, but it seems to persuade
> > blk_recalc_rq_segments() not to underestimate.
> > 
> > Could you try it in your setup to see if it fixes the problem?
> 
> Strange how this survived so long, thanks for debugging this James. The
> patch does look a little hackish, I'll see if I can beat it into
> submission.

Should this be enough to fix it?

===== drivers/block/ll_rw_blk.c 1.273 vs edited =====
--- 1.273/drivers/block/ll_rw_blk.c	2004-10-19 11:40:18 +02:00
+++ edited/drivers/block/ll_rw_blk.c	2004-10-20 17:06:12 +02:00
@@ -922,9 +922,10 @@
 		}
 new_segment:
 		if (BIOVEC_VIRT_MERGEABLE(bvprv, bv) &&
-		    !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) {
+		    !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len) &&
+		    hw_seg_size + bv->bv_len <= q->max_segment_size)
 			hw_seg_size += bv->bv_len;
-		} else {
+		else {
 new_hw_segment:
 			if (hw_seg_size > bio->bi_hw_front_size)
 				bio->bi_hw_front_size = hw_seg_size;
@@ -2766,22 +2767,36 @@
 {
 	struct bio *bio, *prevbio = NULL;
 	int nr_phys_segs, nr_hw_segs;
+	unsigned int phys_size, hw_size;
+	request_queue_t *q = rq->q;
 
 	if (!rq->bio)
 		return;
 
-	nr_phys_segs = nr_hw_segs = 0;
+	phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0;
 	rq_for_each_bio(bio, rq) {
 		/* Force bio hw/phys segs to be recalculated. */
 		bio->bi_flags &= ~(1 << BIO_SEG_VALID);
 
-		nr_phys_segs += bio_phys_segments(rq->q, bio);
-		nr_hw_segs += bio_hw_segments(rq->q, bio);
+		nr_phys_segs += bio_phys_segments(q, bio);
+		nr_hw_segs += bio_hw_segments(q, bio);
 		if (prevbio) {
-			if (blk_phys_contig_segment(rq->q, prevbio, bio))
+			int pseg = phys_size + prevbio->bi_size + bio->bi_size;
+			int hseg = hw_size + prevbio->bi_size + bio->bi_size;
+
+			if (blk_phys_contig_segment(q, prevbio, bio) &&
+			    pseg <= q->max_segment_size) {
 				nr_phys_segs--;
-			if (blk_hw_contig_segment(rq->q, prevbio, bio))
+				phys_size += prevbio->bi_size + bio->bi_size;
+			} else
+				phys_size = 0;
+
+			if (blk_hw_contig_segment(q, prevbio, bio) &&
+			    hseg <= q->max_segment_size) {
 				nr_hw_segs--;
+				hw_size += prevbio->bi_size + bio->bi_size;
+			} else
+				hw_size = 0;
 		}
 		prevbio = bio;
 	}

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] fix for Incorrect number of segments after building list problem
  2004-10-20 15:07   ` Jens Axboe
@ 2004-10-20 15:50     ` James Bottomley
  2004-10-20 15:58       ` Jens Axboe
  0 siblings, 1 reply; 15+ messages in thread
From: James Bottomley @ 2004-10-20 15:50 UTC (permalink / raw)
  To: Jens Axboe; +Cc: 'Dave Olien', SCSI Mailing List

On Wed, 2004-10-20 at 10:07, Jens Axboe wrote:
> > Strange how this survived so long, thanks for debugging this James. The
> > patch does look a little hackish, I'll see if I can beat it into
> > submission.

That's polite of you ... but you know it was my fault from the last
round of IOMMU merges ...

> Should this be enough to fix it?
> 
> ===== drivers/block/ll_rw_blk.c 1.273 vs edited =====
> --- 1.273/drivers/block/ll_rw_blk.c	2004-10-19 11:40:18 +02:00
> +++ edited/drivers/block/ll_rw_blk.c	2004-10-20 17:06:12 +02:00
> @@ -922,9 +922,10 @@
>  		}
>  new_segment:
>  		if (BIOVEC_VIRT_MERGEABLE(bvprv, bv) &&
> -		    !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) {
> +		    !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len) &&
> +		    hw_seg_size + bv->bv_len <= q->max_segment_size)
>  			hw_seg_size += bv->bv_len;
> -		} else {
> +		else {
>  new_hw_segment:
>  			if (hw_seg_size > bio->bi_hw_front_size)
>  				bio->bi_hw_front_size = hw_seg_size;

This piece is actually contamination from my tree.  Since
q->max_segment_size is supposed to represent the parameters of the
actual card sg descriptor table, and hence cannot theoretically be
exceeded on either phys or virt merges, there's currently no way to
communicate this parameter to the iommu, so the dma mapping will violate
it even if the block layer respects it.  We're just lucky most cards
(barring ide ones which have their own hack) have 32 bit DMA length
descriptors.

> @@ -2766,22 +2767,36 @@
>  {
>  	struct bio *bio, *prevbio = NULL;
>  	int nr_phys_segs, nr_hw_segs;
> +	unsigned int phys_size, hw_size;
> +	request_queue_t *q = rq->q;
>  
>  	if (!rq->bio)
>  		return;
>  
> -	nr_phys_segs = nr_hw_segs = 0;
> +	phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0;
>  	rq_for_each_bio(bio, rq) {
>  		/* Force bio hw/phys segs to be recalculated. */
>  		bio->bi_flags &= ~(1 << BIO_SEG_VALID);
>  
> -		nr_phys_segs += bio_phys_segments(rq->q, bio);
> -		nr_hw_segs += bio_hw_segments(rq->q, bio);
> +		nr_phys_segs += bio_phys_segments(q, bio);
> +		nr_hw_segs += bio_hw_segments(q, bio);
>  		if (prevbio) {
> -			if (blk_phys_contig_segment(rq->q, prevbio, bio))
> +			int pseg = phys_size + prevbio->bi_size + bio->bi_size;
> +			int hseg = hw_size + prevbio->bi_size + bio->bi_size;
> +
> +			if (blk_phys_contig_segment(q, prevbio, bio) &&
> +			    pseg <= q->max_segment_size) {
>  				nr_phys_segs--;
> -			if (blk_hw_contig_segment(rq->q, prevbio, bio))
> +				phys_size += prevbio->bi_size + bio->bi_size;
> +			} else
> +				phys_size = 0;
> +
> +			if (blk_hw_contig_segment(q, prevbio, bio) &&
> +			    hseg <= q->max_segment_size) {
>  				nr_hw_segs--;
> +				hw_size += prevbio->bi_size + bio->bi_size;
> +			} else
> +				hw_size = 0;
>  		}
>  		prevbio = bio;
>  	}

Yes, that looks much better ... thanks!  I was plotting to enhance this
later to use bi_hw_front_size and bi_hw_back_size, but not until we get
the IOMMU descriptor length issue sorted out.

James



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] fix for Incorrect number of segments after building list problem
  2004-10-20 15:50     ` James Bottomley
@ 2004-10-20 15:58       ` Jens Axboe
  2004-10-20 16:07         ` James Bottomley
  0 siblings, 1 reply; 15+ messages in thread
From: Jens Axboe @ 2004-10-20 15:58 UTC (permalink / raw)
  To: James Bottomley; +Cc: 'Dave Olien', SCSI Mailing List

On Wed, Oct 20 2004, James Bottomley wrote:
> On Wed, 2004-10-20 at 10:07, Jens Axboe wrote:
> > > Strange how this survived so long, thanks for debugging this James. The
> > > patch does look a little hackish, I'll see if I can beat it into
> > > submission.
> 
> That's polite of you ... but you know it was my fault from the last
> round of IOMMU merges ...

Actually it didn't cross my mind, but now you've politely reminded
everyone :-)

> > Should this be enough to fix it?
> > 
> > ===== drivers/block/ll_rw_blk.c 1.273 vs edited =====
> > --- 1.273/drivers/block/ll_rw_blk.c	2004-10-19 11:40:18 +02:00
> > +++ edited/drivers/block/ll_rw_blk.c	2004-10-20 17:06:12 +02:00
> > @@ -922,9 +922,10 @@
> >  		}
> >  new_segment:
> >  		if (BIOVEC_VIRT_MERGEABLE(bvprv, bv) &&
> > -		    !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) {
> > +		    !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len) &&
> > +		    hw_seg_size + bv->bv_len <= q->max_segment_size)
> >  			hw_seg_size += bv->bv_len;
> > -		} else {
> > +		else {
> >  new_hw_segment:
> >  			if (hw_seg_size > bio->bi_hw_front_size)
> >  				bio->bi_hw_front_size = hw_seg_size;
> 
> This piece is actually contamination from my tree.  Since
> q->max_segment_size is supposed to represent the parameters of the
> actual card sg descriptor table, and hence cannot theoretically be
> exceeded on either phys or virt merges, there's currently no way to
> communicate this parameter to the iommu, so the dma mapping will violate
> it even if the block layer respects it.  We're just lucky most cards
> (barring ide ones which have their own hack) have 32 bit DMA length
> descriptors.

Ah yes, now I remember. How is the fix for that coming along, btw?

> > @@ -2766,22 +2767,36 @@
> >  {
> >  	struct bio *bio, *prevbio = NULL;
> >  	int nr_phys_segs, nr_hw_segs;
> > +	unsigned int phys_size, hw_size;
> > +	request_queue_t *q = rq->q;
> >  
> >  	if (!rq->bio)
> >  		return;
> >  
> > -	nr_phys_segs = nr_hw_segs = 0;
> > +	phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0;
> >  	rq_for_each_bio(bio, rq) {
> >  		/* Force bio hw/phys segs to be recalculated. */
> >  		bio->bi_flags &= ~(1 << BIO_SEG_VALID);
> >  
> > -		nr_phys_segs += bio_phys_segments(rq->q, bio);
> > -		nr_hw_segs += bio_hw_segments(rq->q, bio);
> > +		nr_phys_segs += bio_phys_segments(q, bio);
> > +		nr_hw_segs += bio_hw_segments(q, bio);
> >  		if (prevbio) {
> > -			if (blk_phys_contig_segment(rq->q, prevbio, bio))
> > +			int pseg = phys_size + prevbio->bi_size + bio->bi_size;
> > +			int hseg = hw_size + prevbio->bi_size + bio->bi_size;
> > +
> > +			if (blk_phys_contig_segment(q, prevbio, bio) &&
> > +			    pseg <= q->max_segment_size) {
> >  				nr_phys_segs--;
> > -			if (blk_hw_contig_segment(rq->q, prevbio, bio))
> > +				phys_size += prevbio->bi_size + bio->bi_size;
> > +			} else
> > +				phys_size = 0;
> > +
> > +			if (blk_hw_contig_segment(q, prevbio, bio) &&
> > +			    hseg <= q->max_segment_size) {
> >  				nr_hw_segs--;
> > +				hw_size += prevbio->bi_size + bio->bi_size;
> > +			} else
> > +				hw_size = 0;
> >  		}
> >  		prevbio = bio;
> >  	}
> 
> Yes, that looks much better ... thanks!  I was plotting to enhance this
> later to use bi_hw_front_size and bi_hw_back_size, but not until we get
> the IOMMU descriptor length issue sorted out.

Agree, so I'll just push this to Andrew right away. Thanks for checking.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] fix for Incorrect number of segments after building list problem
  2004-10-20 15:58       ` Jens Axboe
@ 2004-10-20 16:07         ` James Bottomley
  2004-10-20 16:11           ` Jens Axboe
  2004-10-20 17:45           ` Jeff Garzik
  0 siblings, 2 replies; 15+ messages in thread
From: James Bottomley @ 2004-10-20 16:07 UTC (permalink / raw)
  To: Jens Axboe; +Cc: 'Dave Olien', SCSI Mailing List

On Wed, 2004-10-20 at 10:58, Jens Axboe wrote:
> Ah yes, now I remember. How is the fix for that coming along, btw?

Erm, it didn't cross my mind until I realised what this problem was
about.  All SCSI drivers are setting 65536 (the default) for the max
segment length.  However, this is being violated wholesale by all the
IOMMUs and it all works because their length descriptors are 32 bit
anyway ...

However, I'll add it to the list and see if I can get something out.  My
slight problem is testing.  Only SATA and IDE have this short descriptor
problem, so I've nothing really to test the actual fix with.

James



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] fix for Incorrect number of segments after building list problem
  2004-10-20 16:07         ` James Bottomley
@ 2004-10-20 16:11           ` Jens Axboe
  2004-10-20 17:45           ` Jeff Garzik
  1 sibling, 0 replies; 15+ messages in thread
From: Jens Axboe @ 2004-10-20 16:11 UTC (permalink / raw)
  To: James Bottomley; +Cc: 'Dave Olien', SCSI Mailing List

On Wed, Oct 20 2004, James Bottomley wrote:
> On Wed, 2004-10-20 at 10:58, Jens Axboe wrote:
> > Ah yes, now I remember. How is the fix for that coming along, btw?
> 
> Erm, it didn't cross my mind until I realised what this problem was
> about.  All SCSI drivers are setting 65536 (the default) for the max
> segment length.  However, this is being violated wholesale by all the
> IOMMUs and it all works because their length descriptors are 32 bit
> anyway ...
> 
> However, I'll add it to the list and see if I can get something out.  My
> slight problem is testing.  Only SATA and IDE have this short descriptor
> problem, so I've nothing really to test the actual fix with.

Just set some articial limit (say, 0x3fff) and add a debug check in
init_io() to check the violation?

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] fix for Incorrect number of segments after building list problem
  2004-10-20 16:07         ` James Bottomley
  2004-10-20 16:11           ` Jens Axboe
@ 2004-10-20 17:45           ` Jeff Garzik
  2004-10-20 17:47             ` Jens Axboe
  1 sibling, 1 reply; 15+ messages in thread
From: Jeff Garzik @ 2004-10-20 17:45 UTC (permalink / raw)
  To: James Bottomley; +Cc: Jens Axboe, 'Dave Olien', SCSI Mailing List

James Bottomley wrote:
> slight problem is testing.  Only SATA and IDE have this short descriptor
> problem, so I've nothing really to test the actual fix with.


Clearly we need to remedy this :)

	Jeff



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] fix for Incorrect number of segments after building list problem
  2004-10-20 17:45           ` Jeff Garzik
@ 2004-10-20 17:47             ` Jens Axboe
  2004-10-20 18:11               ` Jeff Garzik
  2004-10-21 12:49               ` James Bottomley
  0 siblings, 2 replies; 15+ messages in thread
From: Jens Axboe @ 2004-10-20 17:47 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: James Bottomley, 'Dave Olien', SCSI Mailing List

On Wed, Oct 20 2004, Jeff Garzik wrote:
> James Bottomley wrote:
> >slight problem is testing.  Only SATA and IDE have this short descriptor
> >problem, so I've nothing really to test the actual fix with.
> 
> 
> Clearly we need to remedy this :)

By improving James' imagination? :)

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] fix for Incorrect number of segments after building list problem
  2004-10-20 17:47             ` Jens Axboe
@ 2004-10-20 18:11               ` Jeff Garzik
  2004-10-21 12:49               ` James Bottomley
  1 sibling, 0 replies; 15+ messages in thread
From: Jeff Garzik @ 2004-10-20 18:11 UTC (permalink / raw)
  To: Jens Axboe; +Cc: James Bottomley, 'Dave Olien', SCSI Mailing List

Jens Axboe wrote:
> On Wed, Oct 20 2004, Jeff Garzik wrote:
> 
>>James Bottomley wrote:
>>
>>>slight problem is testing.  Only SATA and IDE have this short descriptor
>>>problem, so I've nothing really to test the actual fix with.
>>
>>
>>Clearly we need to remedy this :)
> 
> 
> By improving James' imagination? :)


No, by seeding the world with SATA!!

	Jeff



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] fix for Incorrect number of segments after building list problem
  2004-10-20 17:47             ` Jens Axboe
  2004-10-20 18:11               ` Jeff Garzik
@ 2004-10-21 12:49               ` James Bottomley
  2004-10-21 13:02                 ` Jens Axboe
  1 sibling, 1 reply; 15+ messages in thread
From: James Bottomley @ 2004-10-21 12:49 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Jeff Garzik, 'Dave Olien', SCSI Mailing List

On Wed, 2004-10-20 at 13:47, Jens Axboe wrote:
> On Wed, Oct 20 2004, Jeff Garzik wrote:
> > Clearly we need to remedy this :)

Shamelessly beg for hardware?  Moi?  How well^Wlittle you know me.

> By improving James' imagination? :)

I meant that 50% of the fix is pulling out the hacks that permeate the
IDE drivers to resplit SG lists after iommu merging.  That's the bit I
can't test.

James



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] fix for Incorrect number of segments after building list problem
  2004-10-21 12:49               ` James Bottomley
@ 2004-10-21 13:02                 ` Jens Axboe
  0 siblings, 0 replies; 15+ messages in thread
From: Jens Axboe @ 2004-10-21 13:02 UTC (permalink / raw)
  To: James Bottomley; +Cc: Jeff Garzik, 'Dave Olien', SCSI Mailing List

On Thu, Oct 21 2004, James Bottomley wrote:
> On Wed, 2004-10-20 at 13:47, Jens Axboe wrote:
> > On Wed, Oct 20 2004, Jeff Garzik wrote:
> > > Clearly we need to remedy this :)
> 
> Shamelessly beg for hardware?  Moi?  How well^Wlittle you know me.

Hehe

> > By improving James' imagination? :)
> 
> I meant that 50% of the fix is pulling out the hacks that permeate the
> IDE drivers to resplit SG lists after iommu merging.  That's the bit I
> can't test.

That's true. But you could set the same limits for SCSI, and check in
init_io() whether those limits are violated.

But I should shut up, I don't want to ruin your chance at free hardware.
SCSI whore gone SATA, so sad :)

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2004-10-21 13:03 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-10-14 21:51 [PATCH] fix for Incorrect number of segments after building list problem James Bottomley
2004-10-14 21:55 ` 'Dave Olien'
2004-10-14 22:15 ` 'Dave Olien'
2004-10-14 22:51   ` 'Dave Olien'
2004-10-20 14:39 ` Jens Axboe
2004-10-20 15:07   ` Jens Axboe
2004-10-20 15:50     ` James Bottomley
2004-10-20 15:58       ` Jens Axboe
2004-10-20 16:07         ` James Bottomley
2004-10-20 16:11           ` Jens Axboe
2004-10-20 17:45           ` Jeff Garzik
2004-10-20 17:47             ` Jens Axboe
2004-10-20 18:11               ` Jeff Garzik
2004-10-21 12:49               ` James Bottomley
2004-10-21 13:02                 ` Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).