Linux RAID subsystem development
 help / color / mirror / Atom feed
* [PATCH v2] md: introduce new personality funciton start()
@ 2017-11-16 18:12 Song Liu
  2017-11-17 19:21 ` Shaohua Li
  0 siblings, 1 reply; 3+ messages in thread
From: Song Liu @ 2017-11-16 18:12 UTC (permalink / raw)
  To: linux-raid; +Cc: kernel-team, shli, neilb, artur.paszkiewicz, Song Liu

In do_md_run(), md threads should not wake up until the array is fully
initialized in md_run(). However, in raid5_run(), raid5-cache may wake
up mddev->thread to flush stripes that need to be written back. This
design doesn't break badly right now. But it could lead to bad bug in
the future.

This patch tries to resolve this problem by splitting start up work
into two personality functions, run() and start(). Tasks that do not
require the md threads should go into run(), while task that require
the md threads go into start().

r5l_load_log() is moved to raid5_start(), so it is not called until
the md threads are started in do_md_run().

Signed-off-by: Song Liu <songliubraving@fb.com>
---
 drivers/md/md.c          | 13 ++++++++-----
 drivers/md/md.h          |  5 +++++
 drivers/md/raid5-cache.c | 22 +++++++++++++++++-----
 drivers/md/raid5-log.h   |  1 +
 drivers/md/raid5.c       | 10 ++++++++++
 5 files changed, 41 insertions(+), 10 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index e014d39..afeab7b 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -5561,11 +5561,6 @@ int md_run(struct mddev *mddev)
 	if (start_readonly && mddev->ro == 0)
 		mddev->ro = 2; /* read-only, but switch on first write */
 
-	/*
-	 * NOTE: some pers->run(), for example r5l_recovery_log(), wakes
-	 * up mddev->thread. It is important to initialize critical
-	 * resources for mddev->thread BEFORE calling pers->run().
-	 */
 	err = pers->run(mddev);
 	if (err)
 		pr_warn("md: pers->run() failed ...\n");
@@ -5680,6 +5675,14 @@ static int do_md_run(struct mddev *mddev)
 		md_allow_write(mddev);
 
 	md_wakeup_thread(mddev->thread);
+	/* run start up tasks that require md_thread */
+	if (mddev->pers->start) {
+		err = mddev->pers->start(mddev);
+		if (err) {
+			bitmap_destroy(mddev);
+			goto out;
+		}
+	}
 	md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */
 
 	set_capacity(mddev->gendisk, mddev->array_sectors);
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 7d6bcf0..5b5e95a 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -523,7 +523,12 @@ struct md_personality
 	struct list_head list;
 	struct module *owner;
 	bool (*make_request)(struct mddev *mddev, struct bio *bio);
+	/* start up works that do NOT require md_thread. tasks that
+	 * requires md_thread should go into start()
+	 */
 	int (*run)(struct mddev *mddev);
+	/* start up works that require md threads */
+	int (*start)(struct mddev *mddev);
 	void (*free)(struct mddev *mddev, void *priv);
 	void (*status)(struct seq_file *seq, struct mddev *mddev);
 	/* error_handler must set ->faulty and clear ->in_sync
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index f1c86d9..6e28adc 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -2448,7 +2448,6 @@ static void r5c_recovery_flush_data_only_stripes(struct r5l_log *log,
 		raid5_release_stripe(sh);
 	}
 
-	md_wakeup_thread(conf->mddev->thread);
 	/* reuse conf->wait_for_quiescent in recovery */
 	wait_event(conf->wait_for_quiescent,
 		   atomic_read(&conf->active_stripes) == 0);
@@ -3040,6 +3039,23 @@ static int r5l_load_log(struct r5l_log *log)
 	return ret;
 }
 
+int r5l_start(struct r5l_log *log)
+{
+	int ret;
+
+	if (!log)
+		return 0;
+
+	ret = r5l_load_log(log);
+	if (ret) {
+		struct mddev *mddev = log->rdev->mddev;
+		struct r5conf *conf = mddev->private;
+
+		r5l_exit_log(conf);
+	}
+	return ret;
+}
+
 void r5c_update_on_rdev_error(struct mddev *mddev, struct md_rdev *rdev)
 {
 	struct r5conf *conf = mddev->private;
@@ -3142,13 +3158,9 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
 
 	rcu_assign_pointer(conf->log, log);
 
-	if (r5l_load_log(log))
-		goto error;
-
 	set_bit(MD_HAS_JOURNAL, &conf->mddev->flags);
 	return 0;
 
-error:
 	rcu_assign_pointer(conf->log, NULL);
 	md_unregister_thread(&log->reclaim_thread);
 reclaim_thread:
diff --git a/drivers/md/raid5-log.h b/drivers/md/raid5-log.h
index c3596a2..d8577b0 100644
--- a/drivers/md/raid5-log.h
+++ b/drivers/md/raid5-log.h
@@ -31,6 +31,7 @@ extern struct md_sysfs_entry r5c_journal_mode;
 extern void r5c_update_on_rdev_error(struct mddev *mddev,
 				     struct md_rdev *rdev);
 extern bool r5c_big_stripe_cached(struct r5conf *conf, sector_t sect);
+extern int r5l_start(struct r5l_log *log);
 
 extern struct dma_async_tx_descriptor *
 ops_run_partial_parity(struct stripe_head *sh, struct raid5_percpu *percpu,
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 1649e82..39cdd78 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -8364,6 +8364,13 @@ static int raid5_change_consistency_policy(struct mddev *mddev, const char *buf)
 	return err;
 }
 
+static int raid5_start(struct mddev *mddev)
+{
+	struct r5conf *conf = mddev->private;
+
+	return r5l_start(conf->log);
+}
+
 static struct md_personality raid6_personality =
 {
 	.name		= "raid6",
@@ -8371,6 +8378,7 @@ static struct md_personality raid6_personality =
 	.owner		= THIS_MODULE,
 	.make_request	= raid5_make_request,
 	.run		= raid5_run,
+	.start		= raid5_start,
 	.free		= raid5_free,
 	.status		= raid5_status,
 	.error_handler	= raid5_error,
@@ -8395,6 +8403,7 @@ static struct md_personality raid5_personality =
 	.owner		= THIS_MODULE,
 	.make_request	= raid5_make_request,
 	.run		= raid5_run,
+	.start		= raid5_start,
 	.free		= raid5_free,
 	.status		= raid5_status,
 	.error_handler	= raid5_error,
@@ -8420,6 +8429,7 @@ static struct md_personality raid4_personality =
 	.owner		= THIS_MODULE,
 	.make_request	= raid5_make_request,
 	.run		= raid5_run,
+	.start		= raid5_start,
 	.free		= raid5_free,
 	.status		= raid5_status,
 	.error_handler	= raid5_error,
-- 
2.9.5


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH v2] md: introduce new personality funciton start()
  2017-11-16 18:12 [PATCH v2] md: introduce new personality funciton start() Song Liu
@ 2017-11-17 19:21 ` Shaohua Li
  2017-11-17 20:23   ` Song Liu
  0 siblings, 1 reply; 3+ messages in thread
From: Shaohua Li @ 2017-11-17 19:21 UTC (permalink / raw)
  To: Song Liu; +Cc: linux-raid, kernel-team, neilb, artur.paszkiewicz

On Thu, Nov 16, 2017 at 10:12:44AM -0800, Song Liu wrote:
> In do_md_run(), md threads should not wake up until the array is fully
> initialized in md_run(). However, in raid5_run(), raid5-cache may wake
> up mddev->thread to flush stripes that need to be written back. This
> design doesn't break badly right now. But it could lead to bad bug in
> the future.
> 
> This patch tries to resolve this problem by splitting start up work
> into two personality functions, run() and start(). Tasks that do not
> require the md threads should go into run(), while task that require
> the md threads go into start().
> 
> r5l_load_log() is moved to raid5_start(), so it is not called until
> the md threads are started in do_md_run().
> 
> Signed-off-by: Song Liu <songliubraving@fb.com>
> ---
>  drivers/md/md.c          | 13 ++++++++-----
>  drivers/md/md.h          |  5 +++++
>  drivers/md/raid5-cache.c | 22 +++++++++++++++++-----
>  drivers/md/raid5-log.h   |  1 +
>  drivers/md/raid5.c       | 10 ++++++++++
>  5 files changed, 41 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index e014d39..afeab7b 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -5561,11 +5561,6 @@ int md_run(struct mddev *mddev)
>  	if (start_readonly && mddev->ro == 0)
>  		mddev->ro = 2; /* read-only, but switch on first write */
>  
> -	/*
> -	 * NOTE: some pers->run(), for example r5l_recovery_log(), wakes
> -	 * up mddev->thread. It is important to initialize critical
> -	 * resources for mddev->thread BEFORE calling pers->run().
> -	 */
>  	err = pers->run(mddev);
>  	if (err)
>  		pr_warn("md: pers->run() failed ...\n");
> @@ -5680,6 +5675,14 @@ static int do_md_run(struct mddev *mddev)
>  		md_allow_write(mddev);
>  
>  	md_wakeup_thread(mddev->thread);
> +	/* run start up tasks that require md_thread */
> +	if (mddev->pers->start) {
> +		err = mddev->pers->start(mddev);
> +		if (err) {
> +			bitmap_destroy(mddev);
> +			goto out;
> +		}
> +	}

Looks there is one thing left. dm-raid.c calls md_run but not do_md_run,
dm-raid supports raid5-cache too. So we need export a wrap for ->start and let
dm-raid use it.

This doesn't seem to prevent reshape happening. mddev->thread calls
check_recovery which can create and wakeup sync_thread. Probably we should
preserve ->recovery and set it 0, do log load and then set it back, this way we
should prevent the reshape for sure.

Thanks,
Shaohua
>  	md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */
>  
>  	set_capacity(mddev->gendisk, mddev->array_sectors);
> diff --git a/drivers/md/md.h b/drivers/md/md.h
> index 7d6bcf0..5b5e95a 100644
> --- a/drivers/md/md.h
> +++ b/drivers/md/md.h
> @@ -523,7 +523,12 @@ struct md_personality
>  	struct list_head list;
>  	struct module *owner;
>  	bool (*make_request)(struct mddev *mddev, struct bio *bio);
> +	/* start up works that do NOT require md_thread. tasks that
> +	 * requires md_thread should go into start()
> +	 */
>  	int (*run)(struct mddev *mddev);
> +	/* start up works that require md threads */
> +	int (*start)(struct mddev *mddev);
>  	void (*free)(struct mddev *mddev, void *priv);
>  	void (*status)(struct seq_file *seq, struct mddev *mddev);
>  	/* error_handler must set ->faulty and clear ->in_sync
> diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
> index f1c86d9..6e28adc 100644
> --- a/drivers/md/raid5-cache.c
> +++ b/drivers/md/raid5-cache.c
> @@ -2448,7 +2448,6 @@ static void r5c_recovery_flush_data_only_stripes(struct r5l_log *log,
>  		raid5_release_stripe(sh);
>  	}
>  
> -	md_wakeup_thread(conf->mddev->thread);
>  	/* reuse conf->wait_for_quiescent in recovery */
>  	wait_event(conf->wait_for_quiescent,
>  		   atomic_read(&conf->active_stripes) == 0);
> @@ -3040,6 +3039,23 @@ static int r5l_load_log(struct r5l_log *log)
>  	return ret;
>  }
>  
> +int r5l_start(struct r5l_log *log)
> +{
> +	int ret;
> +
> +	if (!log)
> +		return 0;
> +
> +	ret = r5l_load_log(log);
> +	if (ret) {
> +		struct mddev *mddev = log->rdev->mddev;
> +		struct r5conf *conf = mddev->private;
> +
> +		r5l_exit_log(conf);
> +	}
> +	return ret;
> +}
> +
>  void r5c_update_on_rdev_error(struct mddev *mddev, struct md_rdev *rdev)
>  {
>  	struct r5conf *conf = mddev->private;
> @@ -3142,13 +3158,9 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
>  
>  	rcu_assign_pointer(conf->log, log);
>  
> -	if (r5l_load_log(log))
> -		goto error;
> -
>  	set_bit(MD_HAS_JOURNAL, &conf->mddev->flags);
>  	return 0;
>  
> -error:
>  	rcu_assign_pointer(conf->log, NULL);
>  	md_unregister_thread(&log->reclaim_thread);
>  reclaim_thread:
> diff --git a/drivers/md/raid5-log.h b/drivers/md/raid5-log.h
> index c3596a2..d8577b0 100644
> --- a/drivers/md/raid5-log.h
> +++ b/drivers/md/raid5-log.h
> @@ -31,6 +31,7 @@ extern struct md_sysfs_entry r5c_journal_mode;
>  extern void r5c_update_on_rdev_error(struct mddev *mddev,
>  				     struct md_rdev *rdev);
>  extern bool r5c_big_stripe_cached(struct r5conf *conf, sector_t sect);
> +extern int r5l_start(struct r5l_log *log);
>  
>  extern struct dma_async_tx_descriptor *
>  ops_run_partial_parity(struct stripe_head *sh, struct raid5_percpu *percpu,
> diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
> index 1649e82..39cdd78 100644
> --- a/drivers/md/raid5.c
> +++ b/drivers/md/raid5.c
> @@ -8364,6 +8364,13 @@ static int raid5_change_consistency_policy(struct mddev *mddev, const char *buf)
>  	return err;
>  }
>  
> +static int raid5_start(struct mddev *mddev)
> +{
> +	struct r5conf *conf = mddev->private;
> +
> +	return r5l_start(conf->log);
> +}
> +
>  static struct md_personality raid6_personality =
>  {
>  	.name		= "raid6",
> @@ -8371,6 +8378,7 @@ static struct md_personality raid6_personality =
>  	.owner		= THIS_MODULE,
>  	.make_request	= raid5_make_request,
>  	.run		= raid5_run,
> +	.start		= raid5_start,
>  	.free		= raid5_free,
>  	.status		= raid5_status,
>  	.error_handler	= raid5_error,
> @@ -8395,6 +8403,7 @@ static struct md_personality raid5_personality =
>  	.owner		= THIS_MODULE,
>  	.make_request	= raid5_make_request,
>  	.run		= raid5_run,
> +	.start		= raid5_start,
>  	.free		= raid5_free,
>  	.status		= raid5_status,
>  	.error_handler	= raid5_error,
> @@ -8420,6 +8429,7 @@ static struct md_personality raid4_personality =
>  	.owner		= THIS_MODULE,
>  	.make_request	= raid5_make_request,
>  	.run		= raid5_run,
> +	.start		= raid5_start,
>  	.free		= raid5_free,
>  	.status		= raid5_status,
>  	.error_handler	= raid5_error,
> -- 
> 2.9.5
> 

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v2] md: introduce new personality funciton start()
  2017-11-17 19:21 ` Shaohua Li
@ 2017-11-17 20:23   ` Song Liu
  0 siblings, 0 replies; 3+ messages in thread
From: Song Liu @ 2017-11-17 20:23 UTC (permalink / raw)
  To: Shaohua Li
  Cc: linux-raid, Kernel Team, NeilBrown, artur.paszkiewicz@intel.com


> On Nov 17, 2017, at 11:21 AM, Shaohua Li <shli@kernel.org> wrote:
> 
> On Thu, Nov 16, 2017 at 10:12:44AM -0800, Song Liu wrote:
>> In do_md_run(), md threads should not wake up until the array is fully
>> initialized in md_run(). However, in raid5_run(), raid5-cache may wake
>> up mddev->thread to flush stripes that need to be written back. This
>> design doesn't break badly right now. But it could lead to bad bug in
>> the future.
>> 
>> This patch tries to resolve this problem by splitting start up work
>> into two personality functions, run() and start(). Tasks that do not
>> require the md threads should go into run(), while task that require
>> the md threads go into start().
>> 
>> r5l_load_log() is moved to raid5_start(), so it is not called until
>> the md threads are started in do_md_run().
>> 
>> Signed-off-by: Song Liu <songliubraving@fb.com>
>> ---
>> drivers/md/md.c          | 13 ++++++++-----
>> drivers/md/md.h          |  5 +++++
>> drivers/md/raid5-cache.c | 22 +++++++++++++++++-----
>> drivers/md/raid5-log.h   |  1 +
>> drivers/md/raid5.c       | 10 ++++++++++
>> 5 files changed, 41 insertions(+), 10 deletions(-)
>> 
>> diff --git a/drivers/md/md.c b/drivers/md/md.c
>> index e014d39..afeab7b 100644
>> --- a/drivers/md/md.c
>> +++ b/drivers/md/md.c
>> @@ -5561,11 +5561,6 @@ int md_run(struct mddev *mddev)
>> 	if (start_readonly && mddev->ro == 0)
>> 		mddev->ro = 2; /* read-only, but switch on first write */
>> 
>> -	/*
>> -	 * NOTE: some pers->run(), for example r5l_recovery_log(), wakes
>> -	 * up mddev->thread. It is important to initialize critical
>> -	 * resources for mddev->thread BEFORE calling pers->run().
>> -	 */
>> 	err = pers->run(mddev);
>> 	if (err)
>> 		pr_warn("md: pers->run() failed ...\n");
>> @@ -5680,6 +5675,14 @@ static int do_md_run(struct mddev *mddev)
>> 		md_allow_write(mddev);
>> 
>> 	md_wakeup_thread(mddev->thread);
>> +	/* run start up tasks that require md_thread */
>> +	if (mddev->pers->start) {
>> +		err = mddev->pers->start(mddev);
>> +		if (err) {
>> +			bitmap_destroy(mddev);
>> +			goto out;
>> +		}
>> +	}
> 
> Looks there is one thing left. dm-raid.c calls md_run but not do_md_run,
> dm-raid supports raid5-cache too. So we need export a wrap for ->start and let
> dm-raid use it.

I can add this. 


> This doesn't seem to prevent reshape happening. mddev->thread calls
> check_recovery which can create and wakeup sync_thread. Probably we should
> preserve ->recovery and set it 0, do log load and then set it back, this way we
> should prevent the reshape for sure.

Maybe we need some systematic way to gate both threads (md_thread and 
sync_thread). Maybe some extra flags like:

MD_THREAD_ENABLED
SYNC_THREAD_ENABLED

Thanks,
Song


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2017-11-17 20:23 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-11-16 18:12 [PATCH v2] md: introduce new personality funciton start() Song Liu
2017-11-17 19:21 ` Shaohua Li
2017-11-17 20:23   ` Song Liu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox