Linux block layer
 help / color / mirror / Atom feed
From: "Dr. David Alan Gilbert" <linux@treblig.org>
To: Keith Busch <kbusch@kernel.org>
Cc: Keith Busch <kbusch@meta.com>,
	dm-devel@lists.linux.dev, linux-block@vger.kernel.org,
	mpatocka@redhat.com, Vjaceslavs Klimovs <vklimovs@gmail.com>
Subject: Re: [PATCH 2/2] dm-raid1: don't fail the mirror for invalid I/O errors
Date: Wed, 17 Jun 2026 16:44:35 +0000	[thread overview]
Message-ID: <ajLO80kZmFnyR2mH@gallifrey> (raw)
In-Reply-To: <ajLJiOwim7qBp_0P@kbusch-mbp>

* Keith Busch (kbusch@kernel.org) wrote:
> On Wed, Jun 17, 2026 at 03:33:55PM +0000, Dr. David Alan Gilbert wrote:
> > * Keith Busch (kbusch@kernel.org) wrote:
> > > On Tue, Jun 16, 2026 at 08:09:18PM +0000, Dr. David Alan Gilbert wrote:
> > > > root@dalek:/home/dg# lvcreate  --mirrors 1 -L 1G main /dev/sda2 /dev/sdb2
> > > 
> > > So this is a subtle difference from your original report which ran
> > > lvcreate a little differently:
> > > 
> > >   # lvcreate --type mirror --mirrors 1 -L 1G main /dev/sda2 /dev/sdb2
> > > 
> > > This patch series address problems with the original report with the
> > > "--type mirror" parameter, which uses dm-raid1.c instead of md/raid1.c.
> > 
> > Ah OK.
> > (I think I think I did say that somewhere, hmm ajFK5NXkxd6jU5zu@gallifrey ? )
> 
> I see. This will fix that setup:

And it does;
dg@dalek:~$ ./dbf
pread of 4096 said: -1 (Invalid argument)
dg@dalek:~$ ./dbf-write 
pwrite of 4096 said: -1 (Invalid argument)
dg@dalek:~$ ./dbf-joint 
pread of 4096 said: -1 (Invalid argument)
pwrite of 4096 said: -1 (Invalid argument)

and the log is clean.

Tested-by: Dr. David Alan Gilbert <linux@treblig.org>

(It's a bit scary you're having to go around quite
a few places and make similar fixes; I assume there
are others that do similar things).

Thanks again,

Dave

> 
> ---
> diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
> index 5b9368bd9e700..17a5f0d98aacc 100644
> --- a/drivers/md/raid1.c
> +++ b/drivers/md/raid1.c
> @@ -322,7 +322,9 @@ static void call_bio_endio(struct r1bio *r1_bio)
>  {
>  	struct bio *bio = r1_bio->master_bio;
>  
> -	if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
> +	if (test_bit(R1BIO_Invalid, &r1_bio->state))
> +		bio->bi_status = BLK_STS_INVAL;
> +	else if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
>  		bio->bi_status = BLK_STS_IOERR;
>  
>  	bio_endio(bio);
> @@ -403,6 +405,8 @@ static void raid1_end_read_request(struct bio *bio)
>  		;
>  	} else if (!raid1_should_handle_error(bio)) {
>  		uptodate = 1;
> +		if (bio->bi_status == BLK_STS_INVAL)
> +			set_bit(R1BIO_Invalid, &r1_bio->state);
>  	} else {
>  		/* If all other devices have failed, we want to return
>  		 * the error upwards rather than fail the last device.
> @@ -519,6 +523,14 @@ static void raid1_end_write_request(struct bio *bio)
>  		 */
>  		r1_bio->bios[mirror] = NULL;
>  		to_put = bio;
> +		/*
> +		 * An invalid I/O (e.g. a misaligned bio rejected by the lower
> +		 * device) was ignored above rather than faulting the device.
> +		 * It is not a successful write, though, so report the error to
> +		 * the caller instead of completing the master bio as uptodate.
> +		 */
> +		if (bio->bi_status == BLK_STS_INVAL)
> +			set_bit(R1BIO_Invalid, &r1_bio->state);
>  		/*
>  		 * Do not set R1BIO_Uptodate if the current device is
>  		 * rebuilding or Faulty. This is because we cannot use
> diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h
> index c98d43a7ae993..21e837db5b25e 100644
> --- a/drivers/md/raid1.h
> +++ b/drivers/md/raid1.h
> @@ -184,6 +184,12 @@ enum r1bio_state {
>  	R1BIO_MadeGood,
>  	R1BIO_WriteError,
>  	R1BIO_FailFast,
> +/* An invalid I/O (e.g. a bio rejected by the lower device because it does
> + * not meet that device's dma_alignment) is not a device failure.  Report
> + * the error to the caller without faulting the device or retrying, and do
> + * not complete a write as if it had succeeded.
> + */
> +	R1BIO_Invalid,
>  };
>  
>  static inline int sector_to_idx(sector_t sector)
> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
> index cee5a253a281d..3cee9612be26d 100644
> --- a/drivers/md/raid10.c
> +++ b/drivers/md/raid10.c
> @@ -323,7 +323,9 @@ static void raid_end_bio_io(struct r10bio *r10_bio)
>  	struct r10conf *conf = r10_bio->mddev->private;
>  
>  	if (!test_and_set_bit(R10BIO_Returned, &r10_bio->state)) {
> -		if (!test_bit(R10BIO_Uptodate, &r10_bio->state))
> +		if (test_bit(R10BIO_Invalid, &r10_bio->state))
> +			bio->bi_status = BLK_STS_INVAL;
> +		else if (!test_bit(R10BIO_Uptodate, &r10_bio->state))
>  			bio->bi_status = BLK_STS_IOERR;
>  		bio_endio(bio);
>  	}
> @@ -403,6 +405,8 @@ static void raid10_end_read_request(struct bio *bio)
>  		set_bit(R10BIO_Uptodate, &r10_bio->state);
>  	} else if (!raid1_should_handle_error(bio)) {
>  		uptodate = 1;
> +		if (bio->bi_status == BLK_STS_INVAL)
> +			set_bit(R10BIO_Invalid, &r10_bio->state);
>  	} else {
>  		/* If all other devices that store this block have
>  		 * failed, we want to return the error upwards rather
> @@ -523,6 +527,8 @@ static void raid10_end_write_request(struct bio *bio)
>  		 * before rdev->recovery_offset, but for simplicity we don't
>  		 * check this here.
>  		 */
> +		if (bio->bi_status == BLK_STS_INVAL)
> +			set_bit(R10BIO_Invalid, &r10_bio->state);
>  		if (test_bit(In_sync, &rdev->flags) &&
>  		    !test_bit(Faulty, &rdev->flags))
>  			set_bit(R10BIO_Uptodate, &r10_bio->state);
> diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h
> index ec79d87fb92f6..a1adad3acafe1 100644
> --- a/drivers/md/raid10.h
> +++ b/drivers/md/raid10.h
> @@ -175,5 +175,11 @@ enum r10bio_state {
>  /* failfast devices did receive failfast requests. */
>  	R10BIO_FailFast,
>  	R10BIO_Discard,
> +/* An invalid I/O (e.g. a bio rejected by the lower device because it does not
> + * meet that device's queue_limits) is not a device failure. Report the error
> + * to the caller without faulting the device or retrying, and do not complete a
> + * write as if it had succeeded.
> + */
> +	R10BIO_Invalid,
>  };
>  #endif
> --
-- 
 -----Open up your eyes, open up your mind, open up your code -------   
/ Dr. David Alan Gilbert    |       Running GNU/Linux       | Happy  \ 
\        dave @ treblig.org |                               | In Hex /
 \ _________________________|_____ http://www.treblig.org   |_______/

  reply	other threads:[~2026-06-17 16:44 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-16 15:05 [PATCH 1/2] dm-io: clone the source bio instead of copying its biovec Keith Busch
2026-06-16 15:05 ` [PATCH 2/2] dm-raid1: don't fail the mirror for invalid I/O errors Keith Busch
2026-06-16 17:54   ` Dr. David Alan Gilbert
2026-06-16 18:48     ` Keith Busch
2026-06-16 20:09       ` Dr. David Alan Gilbert
2026-06-16 23:45         ` Keith Busch
2026-06-16 23:47           ` Dr. David Alan Gilbert
2026-06-17 15:08         ` Keith Busch
2026-06-17 15:33           ` Dr. David Alan Gilbert
2026-06-17 16:21             ` Keith Busch
2026-06-17 16:44               ` Dr. David Alan Gilbert [this message]
2026-06-17 16:54                 ` Keith Busch
2026-06-17 16:59                   ` Dr. David Alan Gilbert
2026-06-16 15:40 ` Keith Busch
2026-06-16 15:58 ` Keith Busch

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ajLO80kZmFnyR2mH@gallifrey \
    --to=linux@treblig.org \
    --cc=dm-devel@lists.linux.dev \
    --cc=kbusch@kernel.org \
    --cc=kbusch@meta.com \
    --cc=linux-block@vger.kernel.org \
    --cc=mpatocka@redhat.com \
    --cc=vklimovs@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox