linux-raid.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: NeilBrown <neilb@suse.com>
To: Xiao Ni <xni@redhat.com>
Cc: linux-raid@vger.kernel.org
Subject: Re: [PATCH 0/4] RFC: attempt to remove md deadlocks with metadata without
Date: Fri, 13 Oct 2017 14:48:41 +1100	[thread overview]
Message-ID: <87376nn1wm.fsf@notabene.neil.brown.name> (raw)
In-Reply-To: <960568852.19225619.1507689864371.JavaMail.zimbra@redhat.com>

[-- Attachment #1: Type: text/plain, Size: 5512 bytes --]

On Tue, Oct 10 2017, Xiao Ni wrote:
>
> I added the stack traces as an attachment. 
>

Thanks.  very helpful.
I think I can see the problem.  The following patch might
fix it.
Thanks for your ongoing testing!

NeilBrown

From: NeilBrown <neilb@suse.com>
Date: Fri, 13 Oct 2017 14:46:37 +1100
Subject: [PATCH] md: move suspend_hi/lo handling into core md code

responding to ->suspend_lo and ->suspend_hi is similar
to responding to ->suspended.  It is best to wait in
the common core code without incrementing ->active_io.
This allows mddev_suspend()/mddev_resume() to work while
requesting are waiting for suspend_lo/hi to change.
This is particularly important as the code to update these
values now uses mddev_suspend().

So move the code for testing suspend_lo/hi out of raid1.c
and raid5.c, and place it in md.c

Signed-off-by: NeilBrown <neilb@suse.com>
---
 drivers/md/md.c    | 29 +++++++++++++++++++++++------
 drivers/md/raid1.c | 12 ++++--------
 drivers/md/raid5.c | 22 ----------------------
 3 files changed, 27 insertions(+), 36 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index ae531666f127..93ba3a526718 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -266,16 +266,31 @@ static DEFINE_SPINLOCK(all_mddevs_lock);
  * call has finished, the bio has been linked into some internal structure
  * and so is visible to ->quiesce(), so we don't need the refcount any more.
  */
+static bool is_suspended(struct mddev *mddev, struct bio *bio)
+{
+	if (mddev->suspended)
+		return true;
+	if (bio_data_dir(bio) != WRITE)
+		return false;
+	if (mddev->suspend_lo >= mddev->suspend_hi)
+		return false;
+	if (bio->bi_iter.bi_sector >= mddev->suspend_hi)
+		return false;
+	if (bio_end_sector(bio) < mddev->suspend_lo)
+		return false;
+	return true;
+}
+
 void md_handle_request(struct mddev *mddev, struct bio *bio)
 {
 check_suspended:
 	rcu_read_lock();
-	if (mddev->suspended) {
+	if (is_suspended(mddev, bio)) {
 		DEFINE_WAIT(__wait);
 		for (;;) {
 			prepare_to_wait(&mddev->sb_wait, &__wait,
 					TASK_UNINTERRUPTIBLE);
-			if (!mddev->suspended)
+			if (!is_suspended(mddev, bio))
 				break;
 			rcu_read_unlock();
 			schedule();
@@ -4854,10 +4869,11 @@ suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
 		goto unlock;
 	old = mddev->suspend_lo;
 	mddev->suspend_lo = new;
-	if (new >= old)
+	if (new >= old) {
 		/* Shrinking suspended region */
+		wake_up(&mddev->sb_wait);
 		mddev->pers->quiesce(mddev, 2);
-	else {
+	} else {
 		/* Expanding suspended region - need to wait */
 		mddev_suspend(mddev);
 		mddev_resume(mddev);
@@ -4897,10 +4913,11 @@ suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
 		goto unlock;
 	old = mddev->suspend_hi;
 	mddev->suspend_hi = new;
-	if (new <= old)
+	if (new <= old) {
 		/* Shrinking suspended region */
+		wake_up(&mddev->sb_wait);
 		mddev->pers->quiesce(mddev, 2);
-	else {
+	} else {
 		/* Expanding suspended region - need to wait */
 		mddev_suspend(mddev);
 		mddev_resume(mddev);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index f3f3e40dc9d8..277a145b3ff5 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1310,11 +1310,9 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
 	 */
 
 
-	if ((bio_end_sector(bio) > mddev->suspend_lo &&
-	    bio->bi_iter.bi_sector < mddev->suspend_hi) ||
-	    (mddev_is_clustered(mddev) &&
+	if (mddev_is_clustered(mddev) &&
 	     md_cluster_ops->area_resyncing(mddev, WRITE,
-		     bio->bi_iter.bi_sector, bio_end_sector(bio)))) {
+		     bio->bi_iter.bi_sector, bio_end_sector(bio))) {
 
 		/*
 		 * As the suspend_* range is controlled by userspace, we want
@@ -1325,12 +1323,10 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
 			sigset_t full, old;
 			prepare_to_wait(&conf->wait_barrier,
 					&w, TASK_INTERRUPTIBLE);
-			if (bio_end_sector(bio) <= mddev->suspend_lo ||
-			    bio->bi_iter.bi_sector >= mddev->suspend_hi ||
-			    (mddev_is_clustered(mddev) &&
+			if (mddev_is_clustered(mddev) &&
 			     !md_cluster_ops->area_resyncing(mddev, WRITE,
 				     bio->bi_iter.bi_sector,
-				     bio_end_sector(bio))))
+				     bio_end_sector(bio)))
 				break;
 			sigfillset(&full);
 			sigprocmask(SIG_BLOCK, &full, &old);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 51c9dac6e744..1f9f2d80c004 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -5685,28 +5685,6 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
 				goto retry;
 			}
 
-			if (rw == WRITE &&
-			    logical_sector >= mddev->suspend_lo &&
-			    logical_sector < mddev->suspend_hi) {
-				raid5_release_stripe(sh);
-				/* As the suspend_* range is controlled by
-				 * userspace, we want an interruptible
-				 * wait.
-				 */
-				prepare_to_wait(&conf->wait_for_overlap,
-						&w, TASK_INTERRUPTIBLE);
-				if (logical_sector >= mddev->suspend_lo &&
-				    logical_sector < mddev->suspend_hi) {
-					sigset_t full, old;
-					sigfillset(&full);
-					sigprocmask(SIG_BLOCK, &full, &old);
-					schedule();
-					sigprocmask(SIG_SETMASK, &old, NULL);
-					do_prepare = true;
-				}
-				goto retry;
-			}
-
 			if (test_bit(STRIPE_EXPANDING, &sh->state) ||
 			    !add_stripe_bio(sh, bi, dd_idx, rw, previous)) {
 				/* Stripe is busy expanding or
-- 
2.14.0.rc0.dirty


[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 832 bytes --]

  parent reply	other threads:[~2017-10-13  3:48 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-09-12  1:49 [PATCH 0/4] RFC: attempt to remove md deadlocks with metadata without NeilBrown
2017-09-12  1:49 ` [PATCH 1/4] md: always hold reconfig_mutex when calling mddev_suspend() NeilBrown
2017-09-12  1:49 ` [PATCH 4/4] md: allow metadata update while suspending NeilBrown
2017-09-12  1:49 ` [PATCH 3/4] md: use mddev_suspend/resume instead of ->quiesce() NeilBrown
2017-09-12  1:49 ` [PATCH 2/4] md: don't call bitmap_create() while array is quiesced NeilBrown
2017-09-12  2:51 ` [PATCH 0/4] RFC: attempt to remove md deadlocks with metadata without Xiao Ni
2017-09-13  2:11 ` Xiao Ni
2017-09-13 15:09   ` Xiao Ni
2017-09-13 23:05     ` NeilBrown
2017-09-14  4:55       ` Xiao Ni
2017-09-14  5:32         ` NeilBrown
2017-09-14  7:57           ` Xiao Ni
2017-09-16 13:15             ` Xiao Ni
2017-10-05  5:17             ` NeilBrown
2017-10-06  3:53               ` Xiao Ni
2017-10-06  4:32                 ` NeilBrown
2017-10-09  1:21                   ` Xiao Ni
2017-10-09  4:57                     ` NeilBrown
2017-10-09  5:32                       ` Xiao Ni
2017-10-09  5:52                         ` NeilBrown
2017-10-10  6:05                           ` Xiao Ni
2017-10-10 21:20                             ` NeilBrown
     [not found]                               ` <960568852.19225619.1507689864371.JavaMail.zimbra@redhat.com>
2017-10-13  3:48                                 ` NeilBrown [this message]
2017-10-16  4:43                                   ` Xiao Ni
2017-09-30  9:46 ` Xiao Ni
2017-10-05  5:03   ` NeilBrown
2017-10-06  3:40     ` Xiao Ni

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87376nn1wm.fsf@notabene.neil.brown.name \
    --to=neilb@suse.com \
    --cc=linux-raid@vger.kernel.org \
    --cc=xni@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).