All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH RFC] dm-raid1: keep writing after leg failure
@ 2015-04-03  3:51 Lidong Zhong
  2015-04-08 12:27 ` Heinz Mauelshagen
  2015-04-21 21:49 ` Brassow Jonathan
  0 siblings, 2 replies; 5+ messages in thread
From: Lidong Zhong @ 2015-04-03  3:51 UTC (permalink / raw)
  To: dm-devel; +Cc: lwang

Currently if there is a leg failure, the bio will be put into the hold
list until userspace replace/remove the leg. Here we are trying to make
dm-raid1 ignore the failure and keep the following bios going on.
This is because there maybe a temporary path failure in clvmd
which leads to cluster raid1 remove/replace the fake device failure. And
it takes a long time to do the full sync if we readd the device back.
---
 drivers/md/dm-raid1.c | 30 ++++++++++++++++++++++++++----
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 9584443..e237c42 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -24,7 +24,9 @@
 #define MAX_RECOVERY 1	/* Maximum number of regions recovered in parallel. */
 
 #define DM_RAID1_HANDLE_ERRORS 0x01
+#define DM_RAID1_KEEP_LOG      0x02
 #define errors_handled(p)	((p)->features & DM_RAID1_HANDLE_ERRORS)
+#define keep_log(p)	        ((p)->features & DM_RAID1_KEEP_LOG)
 
 static DECLARE_WAIT_QUEUE_HEAD(_kmirrord_recovery_stopped);
 
@@ -750,7 +752,7 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
 		dm_rh_delay(ms->rh, bio);
 
 	while ((bio = bio_list_pop(&nosync))) {
-		if (unlikely(ms->leg_failure) && errors_handled(ms)) {
+		if (unlikely(ms->leg_failure) && errors_handled(ms) && !keep_log(ms)) {
 			spin_lock_irq(&ms->lock);
 			bio_list_add(&ms->failures, bio);
 			spin_unlock_irq(&ms->lock);
@@ -800,9 +802,19 @@ static void do_failures(struct mirror_set *ms, struct bio_list *failures)
 		 * be wrong if the failed leg returned after reboot and
 		 * got replicated back to the good legs.)
 		 */
-		if (!get_valid_mirror(ms))
+
+		/*
+		 * we return EIO when the log device is failed if keep_log is set
+		 */
+		if (!get_valid_mirror(ms) || (keep_log(ms) && !ms->log_failure))
 			bio_endio(bio, -EIO);
-		else if (errors_handled(ms))
+		/*
+		 * After the userspace get noticed that the leg has failed,
+		 * we just pretend that the bio has suceeded since the region 
+		 * has already been marked nosync. It's OK do the recovery after
+		 * the device comes back
+		 */
+		else if (errors_handled(ms) && !keep_log(ms))
 			hold_bio(ms, bio);
 		else
 			bio_endio(bio, 0);
@@ -1005,8 +1017,15 @@ static int parse_features(struct mirror_set *ms, unsigned argc, char **argv,
 		return -EINVAL;
 	}
 
+	argc--;
+	argv++;
 	(*args_used)++;
 
+	if (!strcmp("keep_log", argv[0])) {
+		ms->features |= DM_RAID1_KEEP_LOG;
+		(*args_used)++;
+	}
+
 	return 0;
 }
 
@@ -1382,8 +1401,11 @@ static void mirror_status(struct dm_target *ti, status_type_t type,
 			DMEMIT(" %s %llu", ms->mirror[m].dev->name,
 			       (unsigned long long)ms->mirror[m].offset);
 
-		if (ms->features & DM_RAID1_HANDLE_ERRORS)
+		if (errors_handled(ms) && keep_log(ms))
+			DMEMIT(" 2 handle_errors keep_log");
+		else if (errors_handled(ms))
 			DMEMIT(" 1 handle_errors");
+
 	}
 }
 
-- 
1.8.1.4

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH RFC] dm-raid1: keep writing after leg failure
  2015-04-03  3:51 [PATCH RFC] dm-raid1: keep writing after leg failure Lidong Zhong
@ 2015-04-08 12:27 ` Heinz Mauelshagen
  2015-04-09  2:40   ` Lidong Zhong
  2015-04-21 21:49 ` Brassow Jonathan
  1 sibling, 1 reply; 5+ messages in thread
From: Heinz Mauelshagen @ 2015-04-08 12:27 UTC (permalink / raw)
  To: device-mapper development; +Cc: Lidong Zhong

Lidong,

please see the patch series of 3 in response with $Subject.

Heinz

On 04/03/2015 05:51 AM, Lidong Zhong wrote:
> Currently if there is a leg failure, the bio will be put into the hold
> list until userspace replace/remove the leg. Here we are trying to make
> dm-raid1 ignore the failure and keep the following bios going on.
> This is because there maybe a temporary path failure in clvmd
> which leads to cluster raid1 remove/replace the fake device failure. And
> it takes a long time to do the full sync if we readd the device back.
> ---
>   drivers/md/dm-raid1.c | 30 ++++++++++++++++++++++++++----
>   1 file changed, 26 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
> index 9584443..e237c42 100644
> --- a/drivers/md/dm-raid1.c
> +++ b/drivers/md/dm-raid1.c
> @@ -24,7 +24,9 @@
>   #define MAX_RECOVERY 1	/* Maximum number of regions recovered in parallel. */
>   
>   #define DM_RAID1_HANDLE_ERRORS 0x01
> +#define DM_RAID1_KEEP_LOG      0x02
>   #define errors_handled(p)	((p)->features & DM_RAID1_HANDLE_ERRORS)
> +#define keep_log(p)	        ((p)->features & DM_RAID1_KEEP_LOG)
>   
>   static DECLARE_WAIT_QUEUE_HEAD(_kmirrord_recovery_stopped);
>   
> @@ -750,7 +752,7 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
>   		dm_rh_delay(ms->rh, bio);
>   
>   	while ((bio = bio_list_pop(&nosync))) {
> -		if (unlikely(ms->leg_failure) && errors_handled(ms)) {
> +		if (unlikely(ms->leg_failure) && errors_handled(ms) && !keep_log(ms)) {
>   			spin_lock_irq(&ms->lock);
>   			bio_list_add(&ms->failures, bio);
>   			spin_unlock_irq(&ms->lock);
> @@ -800,9 +802,19 @@ static void do_failures(struct mirror_set *ms, struct bio_list *failures)
>   		 * be wrong if the failed leg returned after reboot and
>   		 * got replicated back to the good legs.)
>   		 */
> -		if (!get_valid_mirror(ms))
> +
> +		/*
> +		 * we return EIO when the log device is failed if keep_log is set
> +		 */
> +		if (!get_valid_mirror(ms) || (keep_log(ms) && !ms->log_failure))
>   			bio_endio(bio, -EIO);
> -		else if (errors_handled(ms))
> +		/*
> +		 * After the userspace get noticed that the leg has failed,
> +		 * we just pretend that the bio has suceeded since the region
> +		 * has already been marked nosync. It's OK do the recovery after
> +		 * the device comes back
> +		 */
> +		else if (errors_handled(ms) && !keep_log(ms))
>   			hold_bio(ms, bio);
>   		else
>   			bio_endio(bio, 0);
> @@ -1005,8 +1017,15 @@ static int parse_features(struct mirror_set *ms, unsigned argc, char **argv,
>   		return -EINVAL;
>   	}
>   
> +	argc--;
> +	argv++;
>   	(*args_used)++;
>   
> +	if (!strcmp("keep_log", argv[0])) {
> +		ms->features |= DM_RAID1_KEEP_LOG;
> +		(*args_used)++;
> +	}
> +
>   	return 0;
>   }
>   
> @@ -1382,8 +1401,11 @@ static void mirror_status(struct dm_target *ti, status_type_t type,
>   			DMEMIT(" %s %llu", ms->mirror[m].dev->name,
>   			       (unsigned long long)ms->mirror[m].offset);
>   
> -		if (ms->features & DM_RAID1_HANDLE_ERRORS)
> +		if (errors_handled(ms) && keep_log(ms))
> +			DMEMIT(" 2 handle_errors keep_log");
> +		else if (errors_handled(ms))
>   			DMEMIT(" 1 handle_errors");
> +
>   	}
>   }
>   

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH RFC] dm-raid1: keep writing after leg failure
  2015-04-08 12:27 ` Heinz Mauelshagen
@ 2015-04-09  2:40   ` Lidong Zhong
  0 siblings, 0 replies; 5+ messages in thread
From: Lidong Zhong @ 2015-04-09  2:40 UTC (permalink / raw)
  To: device-mapper development

>>> On 4/8/2015 at 08:27 PM, in message <55251EBC.3060101@redhat.com>, Heinz
Mauelshagen <heinzm@redhat.com> wrote: 
> Lidong, 
>  
> please see the patch series of 3 in response with $Subject. 
>  

Hi Heinz,
Thanks for your correction.

Regards,
Lidong

> Heinz 
>  
> On 04/03/2015 05:51 AM, Lidong Zhong wrote: 
> > Currently if there is a leg failure, the bio will be put into the hold 
> > list until userspace replace/remove the leg. Here we are trying to make 
> > dm-raid1 ignore the failure and keep the following bios going on. 
> > This is because there maybe a temporary path failure in clvmd 
> > which leads to cluster raid1 remove/replace the fake device failure. And 
> > it takes a long time to do the full sync if we readd the device back. 
> > --- 
> >   drivers/md/dm-raid1.c | 30 ++++++++++++++++++++++++++---- 
> >   1 file changed, 26 insertions(+), 4 deletions(-) 
> > 
> > diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c 
> > index 9584443..e237c42 100644 
> > --- a/drivers/md/dm-raid1.c 
> > +++ b/drivers/md/dm-raid1.c 
> > @@ -24,7 +24,9 @@ 
> >   #define MAX_RECOVERY 1	/* Maximum number of regions recovered in parallel.  
> */ 
> >    
> >   #define DM_RAID1_HANDLE_ERRORS 0x01 
> > +#define DM_RAID1_KEEP_LOG      0x02 
> >   #define errors_handled(p)	((p)->features & DM_RAID1_HANDLE_ERRORS) 
> > +#define keep_log(p)	        ((p)->features & DM_RAID1_KEEP_LOG) 
> >    
> >   static DECLARE_WAIT_QUEUE_HEAD(_kmirrord_recovery_stopped); 
> >    
> > @@ -750,7 +752,7 @@ static void do_writes(struct mirror_set *ms, struct  
> bio_list *writes) 
> >   		dm_rh_delay(ms->rh, bio); 
> >    
> >   	while ((bio = bio_list_pop(&nosync))) { 
> > -		if (unlikely(ms->leg_failure) && errors_handled(ms)) { 
> > +		if (unlikely(ms->leg_failure) && errors_handled(ms) && !keep_log(ms)) { 
> >   			spin_lock_irq(&ms->lock); 
> >   			bio_list_add(&ms->failures, bio); 
> >   			spin_unlock_irq(&ms->lock); 
> > @@ -800,9 +802,19 @@ static void do_failures(struct mirror_set *ms, struct  
> bio_list *failures) 
> >   		 * be wrong if the failed leg returned after reboot and 
> >   		 * got replicated back to the good legs.) 
> >   		 */ 
> > -		if (!get_valid_mirror(ms)) 
> > + 
> > +		/* 
> > +		 * we return EIO when the log device is failed if keep_log is set 
> > +		 */ 
> > +		if (!get_valid_mirror(ms) || (keep_log(ms) && !ms->log_failure)) 
> >   			bio_endio(bio, -EIO); 
> > -		else if (errors_handled(ms)) 
> > +		/* 
> > +		 * After the userspace get noticed that the leg has failed, 
> > +		 * we just pretend that the bio has suceeded since the region 
> > +		 * has already been marked nosync. It's OK do the recovery after 
> > +		 * the device comes back 
> > +		 */ 
> > +		else if (errors_handled(ms) && !keep_log(ms)) 
> >   			hold_bio(ms, bio); 
> >   		else 
> >   			bio_endio(bio, 0); 
> > @@ -1005,8 +1017,15 @@ static int parse_features(struct mirror_set *ms,  
> unsigned argc, char **argv, 
> >   		return -EINVAL; 
> >   	} 
> >    
> > +	argc--; 
> > +	argv++; 
> >   	(*args_used)++; 
> >    
> > +	if (!strcmp("keep_log", argv[0])) { 
> > +		ms->features |= DM_RAID1_KEEP_LOG; 
> > +		(*args_used)++; 
> > +	} 
> > + 
> >   	return 0; 
> >   } 
> >    
> > @@ -1382,8 +1401,11 @@ static void mirror_status(struct dm_target *ti,  
> status_type_t type, 
> >   			DMEMIT(" %s %llu", ms->mirror[m].dev->name, 
> >   			       (unsigned long long)ms->mirror[m].offset); 
> >    
> > -		if (ms->features & DM_RAID1_HANDLE_ERRORS) 
> > +		if (errors_handled(ms) && keep_log(ms)) 
> > +			DMEMIT(" 2 handle_errors keep_log"); 
> > +		else if (errors_handled(ms)) 
> >   			DMEMIT(" 1 handle_errors"); 
> > + 
> >   	} 
> >   } 
> >    
>  
> -- 
> dm-devel mailing list 
> dm-devel@redhat.com 
> https://www.redhat.com/mailman/listinfo/dm-devel 
>  
>  

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH RFC] dm-raid1: keep writing after leg failure
  2015-04-03  3:51 [PATCH RFC] dm-raid1: keep writing after leg failure Lidong Zhong
  2015-04-08 12:27 ` Heinz Mauelshagen
@ 2015-04-21 21:49 ` Brassow Jonathan
  2015-04-22  2:51   ` Lidong Zhong
  1 sibling, 1 reply; 5+ messages in thread
From: Brassow Jonathan @ 2015-04-21 21:49 UTC (permalink / raw)
  To: device-mapper development; +Cc: lwang


> On Apr 2, 2015, at 10:51 PM, Lidong Zhong <lzhong@suse.com> wrote:
> 
> Currently if there is a leg failure, the bio will be put into the hold
> list until userspace replace/remove the leg. Here we are trying to make
> dm-raid1 ignore the failure and keep the following bios going on.
> This is because there maybe a temporary path failure in clvmd
> which leads to cluster raid1 remove/replace the fake device failure. And
> it takes a long time to do the full sync if we readd the device back.
> ---

Just be aware that there are failure scenarios that this does not handle as well.  Like a reboot causing the legs to return but not the log.  It’s a remote possibility, but something the old model considered.  The new RAID targets which leverage the MD personalities are much better (e.g. ‘lvcreate —type raid1’), but they don’t support active-active cluster setups.

I’ve seen a couple things on the linux-raid mailing list suggesting that someone (Goldwyn Rodrigues) may be thinking about making RAID1/10 cluster-aware…  LVM should be able to take advantage of this if it became a reality.

Are you interested in providing your use-case for cluster mirroring (vs opting for active/passive mirroring)?

 brassow

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH RFC] dm-raid1: keep writing after leg failure
  2015-04-21 21:49 ` Brassow Jonathan
@ 2015-04-22  2:51   ` Lidong Zhong
  0 siblings, 0 replies; 5+ messages in thread
From: Lidong Zhong @ 2015-04-22  2:51 UTC (permalink / raw)
  To: device-mapper development; +Cc: Liuhua Wang

Hi Brassow,
Thanks for your review.
>>> On 4/22/2015 at 05:49 AM, in message
<D7A1AF2A-21CC-4B39-9221-511A312B2AA5@redhat.com>, Brassow Jonathan
<jbrassow@redhat.com> wrote: 

> > On Apr 2, 2015, at 10:51 PM, Lidong Zhong <lzhong@suse.com> wrote: 
> >  
> > Currently if there is a leg failure, the bio will be put into the hold 
> > list until userspace replace/remove the leg. Here we are trying to make 
> > dm-raid1 ignore the failure and keep the following bios going on. 
> > This is because there maybe a temporary path failure in clvmd 
> > which leads to cluster raid1 remove/replace the fake device failure. And 
> > it takes a long time to do the full sync if we readd the device back. 
> > --- 
>  
> Just be aware that there are failure scenarios that this does not handle as  
> well.  Like a reboot causing the legs to return but not the log.  It’s a  
In this scenario the raid1 can not even get activated. I think it's beyond what 
dm-raid1 could proceed.

> remote possibility, but something the old model considered.  The new RAID  
> targets which leverage the MD personalities are much better (e.g. ‘lvcreate  
> —type raid1’), but they don’t support active-active cluster setups. 
>  
> I’ve seen a couple things on the linux-raid mailing list suggesting that  
> someone (Goldwyn Rodrigues) may be thinking about making RAID1/10  
> cluster-aware…  LVM should be able to take advantage of this if it became a  
> reality. 
>  
> Are you interested in providing your use-case for cluster mirroring (vs  
> opting for active/passive mirroring)? 
The most common use case would be cLCM providing shared storage with 
ocfs2/gfs2 running on it. Currently it's the only way to provide clustered raid1.
Of course it's a better choice for what Goldwyn is doing now.But how long it will
take to become stable enough. Also, the patch series would be an enhancement 
for dm-raid1. 

Regards,
Lidong
>  
>  brassow 
>  
> -- 
> dm-devel mailing list 
> dm-devel@redhat.com 
> https://www.redhat.com/mailman/listinfo/dm-devel 

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2015-04-22  2:51 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-04-03  3:51 [PATCH RFC] dm-raid1: keep writing after leg failure Lidong Zhong
2015-04-08 12:27 ` Heinz Mauelshagen
2015-04-09  2:40   ` Lidong Zhong
2015-04-21 21:49 ` Brassow Jonathan
2015-04-22  2:51   ` Lidong Zhong

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.