Re: [PATCH V2 08/13] md: set MD_CHANGE_PENDING in a spinlocked region

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Shaohua Li <shli@kernel.org>
To: Guoqing Jiang <gqjiang@suse.com>
Cc: neilb@suse.de, linux-raid@vger.kernel.org
Subject: Re: [PATCH V2 08/13] md: set MD_CHANGE_PENDING in a spinlocked region
Date: Mon, 25 Apr 2016 10:32:01 -0700	[thread overview]
Message-ID: <20160425173201.GA11993@kernel.org> (raw)
In-Reply-To: <1461221895-20688-1-git-send-email-gqjiang@suse.com>

On Thu, Apr 21, 2016 at 02:58:15PM +0800, Guoqing Jiang wrote:
> Some code waits for a metadata update by:
> 
> 1. flagging that it is needed (MD_CHANGE_DEVS or MD_CHANGE_CLEAN)
> 2. setting MD_CHANGE_PENDING and waking the management thread
> 3. waiting for MD_CHANGE_PENDING to be cleared
> 
> If the first two are done without locking, the code in md_update_sb()
> which checks if it needs to repeat might test if an update is needed
> before step 1, then clear MD_CHANGE_PENDING after step 2, resulting
> in the wait returning early.
> 
> So make sure all places that set MD_CHANGE_PENDING are protected by
> mddev->lock.
> 
> Reviewed-by: NeilBrown <neilb@suse.com>
> Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
> ---
> Changes:
> 1. s/write_lock/lock which is reported by auto build
> 
>  drivers/md/md.c          | 22 +++++++++++++++++-----
>  drivers/md/raid1.c       |  2 ++
>  drivers/md/raid10.c      |  6 +++++-
>  drivers/md/raid5-cache.c |  2 ++
>  drivers/md/raid5.c       |  2 ++
>  5 files changed, 28 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index bf2b74d..e74657e 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -2293,12 +2293,18 @@ repeat:
>  	if (mddev_is_clustered(mddev)) {
>  		if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags))
>  			force_change = 1;
> +		if (test_and_clear_bit(MD_CHANGE_CLEAN, &mddev->flags))
> +			nospares = 1;
>  		ret = md_cluster_ops->metadata_update_start(mddev);
>  		/* Has someone else has updated the sb */
>  		if (!does_sb_need_changing(mddev)) {
>  			if (ret == 0)
>  				md_cluster_ops->metadata_update_cancel(mddev);
> -			clear_bit(MD_CHANGE_PENDING, &mddev->flags);
> +			spin_lock(&mddev->lock);
> +			if (!test_bit(MD_CHANGE_DEVS, &mddev->flags) &&
> +			    !test_bit(MD_CHANGE_CLEAN, &mddev->flags))
> +				clear_bit(MD_CHANGE_PENDING, &mddev->flags);
> +			spin_unlock(&mddev->lock);
>  			return;
>  		}
>  	}
> @@ -2434,7 +2440,8 @@ repeat:
>  
>  	spin_lock(&mddev->lock);
>  	if (mddev->in_sync != sync_req ||
> -	    test_bit(MD_CHANGE_DEVS, &mddev->flags)) {
> +	    test_bit(MD_CHANGE_DEVS, &mddev->flags) ||
> +	    test_bit(MD_CHANGE_CLEAN, &mddev->flags)) {
>  		/* have to write it out again */
>  		spin_unlock(&mddev->lock);
>  		goto repeat;
> @@ -8145,18 +8152,20 @@ void md_do_sync(struct md_thread *thread)
>  		}
>  	}
>   skip:
> -	set_bit(MD_CHANGE_DEVS, &mddev->flags);
> -
>  	if (mddev_is_clustered(mddev) &&
>  	    ret == 0) {
>  		/* set CHANGE_PENDING here since maybe another
>  		 * update is needed, so other nodes are informed */
> +		spin_lock(&mddev->lock);
> +		set_bit(MD_CHANGE_DEVS, &mddev->flags);
>  		set_bit(MD_CHANGE_PENDING, &mddev->flags);
> +		spin_unlock(&mddev->lock);
>  		md_wakeup_thread(mddev->thread);
>  		wait_event(mddev->sb_wait,
>  			   !test_bit(MD_CHANGE_PENDING, &mddev->flags));
>  		md_cluster_ops->resync_finish(mddev);
> -	}
> +	} else
> +		set_bit(MD_CHANGE_DEVS, &mddev->flags);
>  
>  	spin_lock(&mddev->lock);
>  	if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
> @@ -8548,6 +8557,7 @@ EXPORT_SYMBOL(md_finish_reshape);
>  int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
>  		       int is_new)
>  {
> +	struct mddev *mddev = rdev->mddev;
>  	int rv;
>  	if (is_new)
>  		s += rdev->new_data_offset;
> @@ -8557,8 +8567,10 @@ int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
>  	if (rv == 0) {
>  		/* Make sure they get written out promptly */
>  		sysfs_notify_dirent_safe(rdev->sysfs_state);
> +		spin_lock(&mddev->lock);
>  		set_bit(MD_CHANGE_CLEAN, &rdev->mddev->flags);
>  		set_bit(MD_CHANGE_PENDING, &rdev->mddev->flags);
> +		spin_unlock(&mddev->lock);
>  		md_wakeup_thread(rdev->mddev->thread);
>  		return 1;
>  	} else
> diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
> index a7f2b9c..985fa07 100644
> --- a/drivers/md/raid1.c
> +++ b/drivers/md/raid1.c
> @@ -1474,8 +1474,10 @@ static void raid1_error(struct mddev *mddev, struct md_rdev *rdev)
>  	 * if recovery is running, make sure it aborts.
>  	 */
>  	set_bit(MD_RECOVERY_INTR, &mddev->recovery);
> +	spin_lock(&mddev->lock);
>  	set_bit(MD_CHANGE_DEVS, &mddev->flags);
>  	set_bit(MD_CHANGE_PENDING, &mddev->flags);
> +	spin_unlock(&mddev->lock);
>  	printk(KERN_ALERT
>  	       "md/raid1:%s: Disk failure on %s, disabling device.\n"
>  	       "md/raid1:%s: Operation continuing on %d devices.\n",
> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
> index e3fd725..98a4cf1 100644
> --- a/drivers/md/raid10.c
> +++ b/drivers/md/raid10.c
> @@ -1102,8 +1102,10 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
>  		bio->bi_iter.bi_sector < conf->reshape_progress))) {
>  		/* Need to update reshape_position in metadata */
>  		mddev->reshape_position = conf->reshape_progress;
> +		spin_lock(&mddev->lock);
>  		set_bit(MD_CHANGE_DEVS, &mddev->flags);
>  		set_bit(MD_CHANGE_PENDING, &mddev->flags);
> +		spin_unlock(&mddev->lock);
>  		md_wakeup_thread(mddev->thread);
>  		wait_event(mddev->sb_wait,
>  			   !test_bit(MD_CHANGE_PENDING, &mddev->flags));
> @@ -1585,15 +1587,17 @@ static void raid10_error(struct mddev *mddev, struct md_rdev *rdev)
>  	}
>  	if (test_and_clear_bit(In_sync, &rdev->flags))
>  		mddev->degraded++;
> +	spin_unlock_irqrestore(&conf->device_lock, flags);
>  	/*
>  	 * If recovery is running, make sure it aborts.
>  	 */
>  	set_bit(MD_RECOVERY_INTR, &mddev->recovery);
>  	set_bit(Blocked, &rdev->flags);
>  	set_bit(Faulty, &rdev->flags);
> +	spin_lock(&mddev->lock);
>  	set_bit(MD_CHANGE_DEVS, &mddev->flags);
>  	set_bit(MD_CHANGE_PENDING, &mddev->flags);
> -	spin_unlock_irqrestore(&conf->device_lock, flags);
> +	spin_unlock(&mddev->lock);
>  	printk(KERN_ALERT
>  	       "md/raid10:%s: Disk failure on %s, disabling device.\n"
>  	       "md/raid10:%s: Operation continuing on %d devices.\n",
> diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
> index 9531f5f..2ba9366 100644
> --- a/drivers/md/raid5-cache.c
> +++ b/drivers/md/raid5-cache.c
> @@ -712,8 +712,10 @@ static void r5l_write_super_and_discard_space(struct r5l_log *log,
>  	 * in_teardown check workaround this issue.
>  	 */
>  	if (!log->in_teardown) {
> +		spin_lock(&mddev->lock);
>  		set_bit(MD_CHANGE_DEVS, &mddev->flags);
>  		set_bit(MD_CHANGE_PENDING, &mddev->flags);
> +		spin_unlock(&mddev->lock);
>  		md_wakeup_thread(mddev->thread);
>  		wait_event(mddev->sb_wait,
>  			!test_bit(MD_CHANGE_PENDING, &mddev->flags) ||
> diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
> index 8ab8b65..d4a1e37 100644
> --- a/drivers/md/raid5.c
> +++ b/drivers/md/raid5.c
> @@ -2514,8 +2514,10 @@ static void raid5_error(struct mddev *mddev, struct md_rdev *rdev)
>  
>  	set_bit(Blocked, &rdev->flags);
>  	set_bit(Faulty, &rdev->flags);
> +	spin_lock(&mddev->lock);
>  	set_bit(MD_CHANGE_DEVS, &mddev->flags);
>  	set_bit(MD_CHANGE_PENDING, &mddev->flags);
> +	spin_unlock(&mddev->lock);
>  	printk(KERN_ALERT
>  	       "md/raid:%s: Disk failure on %s, disabling device.\n"
>  	       "md/raid:%s: Operation continuing on %d devices.\n",

The .error_handler is called by md_error, called by .bio_endio, which is called
in softirq. So the lock should be hold with irq disabled.

Thanks,
Shaohua

next prev parent reply	other threads:[~2016-04-25 17:32 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-04-21  5:58 [PATCH 00/13] The latest patches for md-cluster Guoqing Jiang
2016-04-21  5:58 ` [PATCH 01/13] md-cluster: change resync lock from asynchronous to synchronous Guoqing Jiang
2016-04-21  6:20   ` kbuild test robot
2016-04-21  6:55   ` [PATCH V2 " Guoqing Jiang
2016-04-21  5:58 ` [PATCH 02/13] md-cluser: make resync_finish only called after pers->sync_request Guoqing Jiang
2016-04-21  5:58 ` [PATCH 03/13] md-cluster: wake up thread to continue recovery Guoqing Jiang
2016-04-21  5:58 ` [PATCH 04/13] md-cluster: unregister thread if err happened Guoqing Jiang
2016-04-21  5:58 ` [PATCH 05/13] md-cluster: fix locking when node joins cluster during message broadcast Guoqing Jiang
2016-04-21  5:58 ` [PATCH 06/13] md-cluster: change array_sectors and update size are not supported Guoqing Jiang
2016-04-21  5:58 ` [PATCH 07/13] md-cluster: wakeup thread if activated a spare disk Guoqing Jiang
2016-04-21  5:58 ` [PATCH 08/13] md: set MD_CHANGE_PENDING in a spinlocked region Guoqing Jiang
2016-04-21  6:26   ` kbuild test robot
2016-04-21  6:58   ` [PATCH V2 " Guoqing Jiang
2016-04-25 17:32     ` Shaohua Li [this message]
2016-04-26  3:19       ` Guoqing Jiang
2016-04-27  1:56     ` [PATCH V3 " Guoqing Jiang
2016-04-27 15:27       ` Shaohua Li
2016-04-28  2:55         ` Guoqing Jiang
2016-04-28  3:58           ` Shaohua Li
2016-04-29  1:26             ` NeilBrown
2016-04-29 18:21               ` Shaohua Li
2016-04-21  5:58 ` [PATCH 09/13] md-cluster: always setup in-memory bitmap Guoqing Jiang
2016-04-21  7:00   ` kbuild test robot
2016-04-21  7:00   ` [PATCH] md-cluster: fix ifnullfree.cocci warnings kbuild test robot
2016-04-21  9:10     ` Guoqing Jiang
2016-04-25 17:45   ` [PATCH 09/13] md-cluster: always setup in-memory bitmap Shaohua Li
2016-04-26  3:22     ` Guoqing Jiang
2016-04-27 15:24       ` Shaohua Li
2016-04-28  2:59         ` Guoqing Jiang
2016-04-21  5:58 ` [PATCH 10/13] md-cluster: sync bitmap when node received RESYNCING msg Guoqing Jiang
2016-04-21  5:58 ` [PATCH 11/13] md-cluster/bitmap: fix wrong calcuation of offset Guoqing Jiang
2016-04-21  5:58 ` [PATCH 12/13] md-cluster/bitmap: fix wrong page num in bitmap_file_clear_bit and bitmap_file_set_bit Guoqing Jiang
2016-04-21  5:58 ` [PATCH 13/13] md-cluster/bitmap: unplug bitmap to sync dirty pages to disk Guoqing Jiang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160425173201.GA11993@kernel.org \
    --to=shli@kernel.org \
    --cc=gqjiang@suse.com \
    --cc=linux-raid@vger.kernel.org \
    --cc=neilb@suse.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.