linux-raid.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: NeilBrown <neilb@suse.de>
To: Shaohua Li <shli@kernel.org>
Cc: Dan Williams <djbw@fb.com>, linux-raid@vger.kernel.org
Subject: Re: [patch]raid5: make release_stripe lockless
Date: Thu, 28 Mar 2013 11:45:46 +1100	[thread overview]
Message-ID: <20130328114546.207e1d74@notabene.brown> (raw)
In-Reply-To: <20130322063617.GA22668@kernel.org>

[-- Attachment #1: Type: text/plain, Size: 6013 bytes --]

On Fri, 22 Mar 2013 14:36:17 +0800 Shaohua Li <shli@kernel.org> wrote:

> 
> Subject: raid5: make release_stripe lockless
> 
> release_stripe still has big lock contention. We just add the stripe to a llist
> without taking device_lock. We let the raid5d thread to do the real stripe
> release, which must hold device_lock anyway. In this way, release_stripe
> doesn't hold any locks.
> 
> The side effect is the released stripes order is changed. But sounds not a big
> deal, stripes are never handled in order. And I thought block layer can already
> do nice request merge, which means order isn't that important.
> 
> I kept the unplug release batch, which is unnecessary with this patch from lock
> contention avoid point of view, and actually if we delete it, the stripe_head
> release_list and lru can share storage. But the unplug release batch is also
> helpful for request merge. We probably can delay wakeup raid5d till unplug, but
> I'm still afraid of the case which raid5d is running.

Looks good, thanks.

One comment:


> +/* should hold conf->device_lock already */
> +static int release_stripe_list(struct r5conf *conf)
> +{
> +	struct stripe_head *sh;
> +	struct llist_node *node;
> +	int count = 0;
> +
> +	while (1) {
> +		node = llist_del_first(&conf->released_stripes);
> +		if (!node)
> +			break;

Why not:
 llist_for_each_entry(sh, llist_delete_all(&conf->released_stripes), release_list) {
      clear_bit()
      __release_stripe(conf, sh);
      count++;
 }

??

NeilBrown

> +		sh = llist_entry(node, struct stripe_head, release_list);
> +		/*
> +		 * llist_del_first() uses cmpxchg, so implies a memory fence.
> +		 * It's guaranteed the stripe isn't in released_stripes list
> +		 * now, clearing STRIPE_ON_RELEASE_LIST is safe.
> +		 */
> +		clear_bit(STRIPE_ON_RELEASE_LIST, &sh->state);
> +		/*
> +		 * Don't worry the bit is set here, because if the bit is set
> +		 * again, the count is always > 1. This is true for
> +		 * STRIPE_ON_UNPLUG_LIST bit too.
> +		 */
> +		__release_stripe(conf, sh);
> +		count++;
> +	}
> +	return count;
> +}
> +
>  static void release_stripe(struct stripe_head *sh)
>  {
>  	struct r5conf *conf = sh->raid_conf;
>  	unsigned long flags;
> +	bool wakeup;
>  
> +	if (test_and_set_bit(STRIPE_ON_RELEASE_LIST, &sh->state))
> +		goto slow_path;
> +	wakeup = llist_add(&sh->release_list, &conf->released_stripes);
> +	if (wakeup)
> +		md_wakeup_thread(conf->mddev->thread);
> +	return;
> +slow_path:
>  	local_irq_save(flags);
> +	/* we are ok here if STRIPE_ON_RELEASE_LIST is set or not */
>  	if (atomic_dec_and_lock(&sh->count, &conf->device_lock)) {
>  		do_release_stripe(conf, sh);
>  		spin_unlock(&conf->device_lock);
> @@ -515,7 +553,8 @@ get_active_stripe(struct r5conf *conf, s
>  			if (atomic_read(&sh->count)) {
>  				BUG_ON(!list_empty(&sh->lru)
>  				    && !test_bit(STRIPE_EXPANDING, &sh->state)
> -				    && !test_bit(STRIPE_ON_UNPLUG_LIST, &sh->state));
> +				    && !test_bit(STRIPE_ON_UNPLUG_LIST, &sh->state)
> +				    && !test_bit(STRIPE_ON_RELEASE_LIST, &sh->state));
>  			} else {
>  				if (!test_bit(STRIPE_HANDLE, &sh->state))
>  					atomic_inc(&conf->active_stripes);
> @@ -4128,6 +4167,10 @@ static void raid5_unplug(struct blk_plug
>  			 */
>  			smp_mb__before_clear_bit();
>  			clear_bit(STRIPE_ON_UNPLUG_LIST, &sh->state);
> +			/*
> +			 * STRIPE_ON_RELEASE_LIST could be set here. In that
> +			 * case, the count is always > 1 here
> +			 */
>  			__release_stripe(conf, sh);
>  			cnt++;
>  		}
> @@ -4813,10 +4856,12 @@ static void raid5auxd(struct md_thread *
>  	handled = 0;
>  	spin_lock_irq(&conf->device_lock);
>  	while (1) {
> -		int batch_size;
> +		int batch_size, released;
> +
> +		released = release_stripe_list(conf);
>  
>  		batch_size = handle_active_stripes(conf, &auxth->work_mask);
> -		if (!batch_size)
> +		if (!batch_size && !released)
>  			break;
>  		handled += batch_size;
>  	}
> @@ -4851,7 +4896,9 @@ static void raid5d(struct md_thread *thr
>  	spin_lock_irq(&conf->device_lock);
>  	while (1) {
>  		struct bio *bio;
> -		int batch_size;
> +		int batch_size, released;
> +
> +		released = release_stripe_list(conf);
>  
>  		if (
>  		    !list_empty(&conf->bitmap_list)) {
> @@ -4876,7 +4923,7 @@ static void raid5d(struct md_thread *thr
>  		}
>  
>  		batch_size = handle_active_stripes(conf, &conf->work_mask);
> -		if (!batch_size)
> +		if (!batch_size && !released)
>  			break;
>  		handled += batch_size;
>  
> @@ -5471,6 +5518,7 @@ static struct r5conf *setup_conf(struct
>  	INIT_LIST_HEAD(&conf->delayed_list);
>  	INIT_LIST_HEAD(&conf->bitmap_list);
>  	INIT_LIST_HEAD(&conf->inactive_list);
> +	init_llist_head(&conf->released_stripes);
>  	atomic_set(&conf->active_stripes, 0);
>  	atomic_set(&conf->preread_active_stripes, 0);
>  	atomic_set(&conf->active_aligned_reads, 0);
> Index: linux/drivers/md/raid5.h
> ===================================================================
> --- linux.orig/drivers/md/raid5.h	2013-03-21 10:34:56.452256640 +0800
> +++ linux/drivers/md/raid5.h	2013-03-21 10:34:57.256246529 +0800
> @@ -197,6 +197,7 @@ enum reconstruct_states {
>  struct stripe_head {
>  	struct hlist_node	hash;
>  	struct list_head	lru;	      /* inactive_list or handle_list */
> +	struct llist_node	release_list;
>  	struct r5conf		*raid_conf;
>  	short			generation;	/* increments with every
>  						 * reshape */
> @@ -324,6 +325,7 @@ enum {
>  	STRIPE_COMPUTE_RUN,
>  	STRIPE_OPS_REQ_PENDING,
>  	STRIPE_ON_UNPLUG_LIST,
> +	STRIPE_ON_RELEASE_LIST,
>  };
>  
>  /*
> @@ -462,6 +464,7 @@ struct r5conf {
>  	 */
>  	atomic_t		active_stripes;
>  	struct list_head	inactive_list;
> +	struct llist_head	released_stripes;
>  	wait_queue_head_t	wait_for_stripe;
>  	wait_queue_head_t	wait_for_overlap;
>  	int			inactive_blocked;	/* release of inactive stripes blocked,


[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 828 bytes --]

  reply	other threads:[~2013-03-28  0:45 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-03-18  4:31 [patch]raid5: make release_stripe lockless Shaohua Li
2013-03-19 21:53 ` Dan Williams
2013-03-20  0:55   ` Shaohua Li
2013-03-22  6:36     ` Shaohua Li
2013-03-28  0:45       ` NeilBrown [this message]
2013-03-28  2:00         ` Shaohua Li
2013-03-28  2:28           ` NeilBrown

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20130328114546.207e1d74@notabene.brown \
    --to=neilb@suse.de \
    --cc=djbw@fb.com \
    --cc=linux-raid@vger.kernel.org \
    --cc=shli@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).