linux-raid.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Shaohua Li <shli@kernel.org>
To: linux-raid@vger.kernel.org
Cc: neilb@suse.de, axboe@kernel.dk, dan.j.williams@intel.com,
	shli@fusionio.com
Subject: [patch 4/8] raid5: reduce chance release_stripe() taking device_lock
Date: Mon, 04 Jun 2012 16:01:56 +0800	[thread overview]
Message-ID: <20120604080321.874560112@kernel.org> (raw)
In-Reply-To: 20120604080152.098975870@kernel.org

[-- Attachment #1: raid5-reduce-release_stripe-lock.patch --]
[-- Type: text/plain, Size: 4095 bytes --]

release_stripe() is a place conf->device_lock is heavily contended. We take the
lock even stripe count isn't 1, which isn't required. On the on the other hand,
decreasing count first and taking lock if count is 0 can expose races:
1. bewteen dec count and taking lock, another thread hits the stripe in cache,
so increase count. The stripe will be deleted from any list. In this case
stripe count isn't 0.
2. between dec count and taking lock, another thread hits the stripe in cache
and release it. In this case the stripe is already in specific list. We do
list_move to adjust its position.
So both cases are fixable to me.

Signed-off-by: Shaohua Li <shli@fusionio.com>
---
 drivers/md/raid5.c |   43 +++++++++++++++++++++++++++++--------------
 1 file changed, 29 insertions(+), 14 deletions(-)

Index: linux/drivers/md/raid5.c
===================================================================
--- linux.orig/drivers/md/raid5.c	2012-06-01 13:50:56.336138112 +0800
+++ linux/drivers/md/raid5.c	2012-06-01 14:03:17.062826938 +0800
@@ -201,20 +201,39 @@ static int stripe_operations_active(stru
 	       test_bit(STRIPE_COMPUTE_RUN, &sh->state);
 }
 
-static void __release_stripe(struct r5conf *conf, struct stripe_head *sh)
+static void __release_stripe(struct r5conf *conf, struct stripe_head *sh,
+	int locking)
 {
+	unsigned long uninitialized_var(flags);
+
 	if (atomic_dec_and_test(&sh->count)) {
-		BUG_ON(!list_empty(&sh->lru));
+		/*
+		 * Before we hold device_lock, other thread can hit this stripe
+		 * in cache. It could do:
+		 * 1. just get_active_stripe(). The stripe count isn't 0 then.
+		 * 2. do get_active_stripe() and follow release_stripe(). So the
+		 * stripe might be already released and already in specific
+		 * list. we do list_move to adjust its position in the list.
+		 */
+		if (locking) {
+			spin_lock_irqsave(&conf->device_lock, flags);
+			if (atomic_read(&sh->count) != 0) {
+				spin_unlock_irqrestore(&conf->device_lock,
+							flags);
+				return;
+			}
+		}
+
 		BUG_ON(atomic_read(&conf->active_stripes)==0);
 		if (test_bit(STRIPE_HANDLE, &sh->state)) {
 			if (test_bit(STRIPE_DELAYED, &sh->state))
-				list_add_tail(&sh->lru, &conf->delayed_list);
+				list_move_tail(&sh->lru, &conf->delayed_list);
 			else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
 				   sh->bm_seq - conf->seq_write > 0)
-				list_add_tail(&sh->lru, &conf->bitmap_list);
+				list_move_tail(&sh->lru, &conf->bitmap_list);
 			else {
 				clear_bit(STRIPE_BIT_DELAY, &sh->state);
-				list_add_tail(&sh->lru, &conf->handle_list);
+				list_move_tail(&sh->lru, &conf->handle_list);
 			}
 			md_wakeup_thread(conf->mddev->thread);
 		} else {
@@ -225,23 +244,22 @@ static void __release_stripe(struct r5co
 					md_wakeup_thread(conf->mddev->thread);
 			atomic_dec(&conf->active_stripes);
 			if (!test_bit(STRIPE_EXPANDING, &sh->state)) {
-				list_add_tail(&sh->lru, &conf->inactive_list);
+				list_move_tail(&sh->lru, &conf->inactive_list);
 				wake_up(&conf->wait_for_stripe);
 				if (conf->retry_read_aligned)
 					md_wakeup_thread(conf->mddev->thread);
 			}
 		}
+		if (locking)
+			spin_unlock_irqrestore(&conf->device_lock, flags);
 	}
 }
 
 static void release_stripe(struct stripe_head *sh)
 {
 	struct r5conf *conf = sh->raid_conf;
-	unsigned long flags;
 
-	spin_lock_irqsave(&conf->device_lock, flags);
-	__release_stripe(conf, sh);
-	spin_unlock_irqrestore(&conf->device_lock, flags);
+	__release_stripe(conf, sh, 1);
 }
 
 static inline void remove_hash(struct stripe_head *sh)
@@ -484,9 +502,6 @@ get_active_stripe(struct r5conf *conf, s
 			} else {
 				if (!test_bit(STRIPE_HANDLE, &sh->state))
 					atomic_inc(&conf->active_stripes);
-				if (list_empty(&sh->lru) &&
-				    !test_bit(STRIPE_EXPANDING, &sh->state))
-					BUG();
 				list_del_init(&sh->lru);
 			}
 		}
@@ -3672,7 +3687,7 @@ static void activate_bit_delay(struct r5
 		struct stripe_head *sh = list_entry(head.next, struct stripe_head, lru);
 		list_del_init(&sh->lru);
 		atomic_inc(&sh->count);
-		__release_stripe(conf, sh);
+		__release_stripe(conf, sh, 0);
 	}
 }
 


  parent reply	other threads:[~2012-06-04  8:01 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-06-04  8:01 [patch 0/8] raid5: improve write performance for fast storage Shaohua Li
2012-06-04  8:01 ` [patch 1/8] raid5: add a per-stripe lock Shaohua Li
2012-06-07  0:54   ` NeilBrown
2012-06-07  6:29     ` Shaohua Li
2012-06-07  6:35       ` NeilBrown
2012-06-07  6:52         ` Shaohua Li
2012-06-12 21:02           ` Dan Williams
2012-06-13  4:08             ` Dan Williams
2012-06-13  4:23               ` Shaohua Li
2012-06-12 21:10   ` Dan Williams
2012-06-04  8:01 ` [patch 2/8] raid5: lockless access raid5 overrided bi_phys_segments Shaohua Li
2012-06-07  1:06   ` NeilBrown
2012-06-12 20:41     ` Dan Williams
2012-06-04  8:01 ` [patch 3/8] raid5: remove some device_lock locking places Shaohua Li
2012-06-04  8:01 ` Shaohua Li [this message]
2012-06-07  0:50   ` [patch 4/8] raid5: reduce chance release_stripe() taking device_lock NeilBrown
2012-06-04  8:01 ` [patch 5/8] raid5: add batch stripe release Shaohua Li
2012-06-04  8:01 ` [patch 6/8] raid5: make_request use " Shaohua Li
2012-06-07  1:23   ` NeilBrown
2012-06-07  6:33     ` Shaohua Li
2012-06-07  7:33       ` NeilBrown
2012-06-07  7:58         ` Shaohua Li
2012-06-08  6:16           ` Shaohua Li
2012-06-08  6:42             ` NeilBrown
2012-06-04  8:01 ` [patch 7/8] raid5: raid5d handle stripe in batch way Shaohua Li
2012-06-07  1:32   ` NeilBrown
2012-06-07  6:35     ` Shaohua Li
2012-06-07  7:38       ` NeilBrown
2012-06-04  8:02 ` [patch 8/8] raid5: create multiple threads to handle stripes Shaohua Li
2012-06-07  1:39   ` NeilBrown
2012-06-07  6:45     ` Shaohua Li
2012-06-13  4:08       ` Dan Williams
2012-06-21 10:09         ` Shaohua Li
2012-07-02 20:43           ` Dan Williams

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20120604080321.874560112@kernel.org \
    --to=shli@kernel.org \
    --cc=axboe@kernel.dk \
    --cc=dan.j.williams@intel.com \
    --cc=linux-raid@vger.kernel.org \
    --cc=neilb@suse.de \
    --cc=shli@fusionio.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).