From: NeilBrown <neilb@suse.de>
To: Shaohua Li <shli@kernel.org>
Cc: Dan Williams <djbw@fb.com>, linux-raid@vger.kernel.org
Subject: Re: [patch]raid5: make release_stripe lockless
Date: Thu, 28 Mar 2013 11:45:46 +1100 [thread overview]
Message-ID: <20130328114546.207e1d74@notabene.brown> (raw)
In-Reply-To: <20130322063617.GA22668@kernel.org>
[-- Attachment #1: Type: text/plain, Size: 6013 bytes --]
On Fri, 22 Mar 2013 14:36:17 +0800 Shaohua Li <shli@kernel.org> wrote:
>
> Subject: raid5: make release_stripe lockless
>
> release_stripe still has big lock contention. We just add the stripe to a llist
> without taking device_lock. We let the raid5d thread to do the real stripe
> release, which must hold device_lock anyway. In this way, release_stripe
> doesn't hold any locks.
>
> The side effect is the released stripes order is changed. But sounds not a big
> deal, stripes are never handled in order. And I thought block layer can already
> do nice request merge, which means order isn't that important.
>
> I kept the unplug release batch, which is unnecessary with this patch from lock
> contention avoid point of view, and actually if we delete it, the stripe_head
> release_list and lru can share storage. But the unplug release batch is also
> helpful for request merge. We probably can delay wakeup raid5d till unplug, but
> I'm still afraid of the case which raid5d is running.
Looks good, thanks.
One comment:
> +/* should hold conf->device_lock already */
> +static int release_stripe_list(struct r5conf *conf)
> +{
> + struct stripe_head *sh;
> + struct llist_node *node;
> + int count = 0;
> +
> + while (1) {
> + node = llist_del_first(&conf->released_stripes);
> + if (!node)
> + break;
Why not:
llist_for_each_entry(sh, llist_delete_all(&conf->released_stripes), release_list) {
clear_bit()
__release_stripe(conf, sh);
count++;
}
??
NeilBrown
> + sh = llist_entry(node, struct stripe_head, release_list);
> + /*
> + * llist_del_first() uses cmpxchg, so implies a memory fence.
> + * It's guaranteed the stripe isn't in released_stripes list
> + * now, clearing STRIPE_ON_RELEASE_LIST is safe.
> + */
> + clear_bit(STRIPE_ON_RELEASE_LIST, &sh->state);
> + /*
> + * Don't worry the bit is set here, because if the bit is set
> + * again, the count is always > 1. This is true for
> + * STRIPE_ON_UNPLUG_LIST bit too.
> + */
> + __release_stripe(conf, sh);
> + count++;
> + }
> + return count;
> +}
> +
> static void release_stripe(struct stripe_head *sh)
> {
> struct r5conf *conf = sh->raid_conf;
> unsigned long flags;
> + bool wakeup;
>
> + if (test_and_set_bit(STRIPE_ON_RELEASE_LIST, &sh->state))
> + goto slow_path;
> + wakeup = llist_add(&sh->release_list, &conf->released_stripes);
> + if (wakeup)
> + md_wakeup_thread(conf->mddev->thread);
> + return;
> +slow_path:
> local_irq_save(flags);
> + /* we are ok here if STRIPE_ON_RELEASE_LIST is set or not */
> if (atomic_dec_and_lock(&sh->count, &conf->device_lock)) {
> do_release_stripe(conf, sh);
> spin_unlock(&conf->device_lock);
> @@ -515,7 +553,8 @@ get_active_stripe(struct r5conf *conf, s
> if (atomic_read(&sh->count)) {
> BUG_ON(!list_empty(&sh->lru)
> && !test_bit(STRIPE_EXPANDING, &sh->state)
> - && !test_bit(STRIPE_ON_UNPLUG_LIST, &sh->state));
> + && !test_bit(STRIPE_ON_UNPLUG_LIST, &sh->state)
> + && !test_bit(STRIPE_ON_RELEASE_LIST, &sh->state));
> } else {
> if (!test_bit(STRIPE_HANDLE, &sh->state))
> atomic_inc(&conf->active_stripes);
> @@ -4128,6 +4167,10 @@ static void raid5_unplug(struct blk_plug
> */
> smp_mb__before_clear_bit();
> clear_bit(STRIPE_ON_UNPLUG_LIST, &sh->state);
> + /*
> + * STRIPE_ON_RELEASE_LIST could be set here. In that
> + * case, the count is always > 1 here
> + */
> __release_stripe(conf, sh);
> cnt++;
> }
> @@ -4813,10 +4856,12 @@ static void raid5auxd(struct md_thread *
> handled = 0;
> spin_lock_irq(&conf->device_lock);
> while (1) {
> - int batch_size;
> + int batch_size, released;
> +
> + released = release_stripe_list(conf);
>
> batch_size = handle_active_stripes(conf, &auxth->work_mask);
> - if (!batch_size)
> + if (!batch_size && !released)
> break;
> handled += batch_size;
> }
> @@ -4851,7 +4896,9 @@ static void raid5d(struct md_thread *thr
> spin_lock_irq(&conf->device_lock);
> while (1) {
> struct bio *bio;
> - int batch_size;
> + int batch_size, released;
> +
> + released = release_stripe_list(conf);
>
> if (
> !list_empty(&conf->bitmap_list)) {
> @@ -4876,7 +4923,7 @@ static void raid5d(struct md_thread *thr
> }
>
> batch_size = handle_active_stripes(conf, &conf->work_mask);
> - if (!batch_size)
> + if (!batch_size && !released)
> break;
> handled += batch_size;
>
> @@ -5471,6 +5518,7 @@ static struct r5conf *setup_conf(struct
> INIT_LIST_HEAD(&conf->delayed_list);
> INIT_LIST_HEAD(&conf->bitmap_list);
> INIT_LIST_HEAD(&conf->inactive_list);
> + init_llist_head(&conf->released_stripes);
> atomic_set(&conf->active_stripes, 0);
> atomic_set(&conf->preread_active_stripes, 0);
> atomic_set(&conf->active_aligned_reads, 0);
> Index: linux/drivers/md/raid5.h
> ===================================================================
> --- linux.orig/drivers/md/raid5.h 2013-03-21 10:34:56.452256640 +0800
> +++ linux/drivers/md/raid5.h 2013-03-21 10:34:57.256246529 +0800
> @@ -197,6 +197,7 @@ enum reconstruct_states {
> struct stripe_head {
> struct hlist_node hash;
> struct list_head lru; /* inactive_list or handle_list */
> + struct llist_node release_list;
> struct r5conf *raid_conf;
> short generation; /* increments with every
> * reshape */
> @@ -324,6 +325,7 @@ enum {
> STRIPE_COMPUTE_RUN,
> STRIPE_OPS_REQ_PENDING,
> STRIPE_ON_UNPLUG_LIST,
> + STRIPE_ON_RELEASE_LIST,
> };
>
> /*
> @@ -462,6 +464,7 @@ struct r5conf {
> */
> atomic_t active_stripes;
> struct list_head inactive_list;
> + struct llist_head released_stripes;
> wait_queue_head_t wait_for_stripe;
> wait_queue_head_t wait_for_overlap;
> int inactive_blocked; /* release of inactive stripes blocked,
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 828 bytes --]
next prev parent reply other threads:[~2013-03-28 0:45 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-03-18 4:31 [patch]raid5: make release_stripe lockless Shaohua Li
2013-03-19 21:53 ` Dan Williams
2013-03-20 0:55 ` Shaohua Li
2013-03-22 6:36 ` Shaohua Li
2013-03-28 0:45 ` NeilBrown [this message]
2013-03-28 2:00 ` Shaohua Li
2013-03-28 2:28 ` NeilBrown
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20130328114546.207e1d74@notabene.brown \
--to=neilb@suse.de \
--cc=djbw@fb.com \
--cc=linux-raid@vger.kernel.org \
--cc=shli@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).