Re: [RFC PATCH] MD: fix lock contention for flush bios

linux-raid.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Guoqing Jiang <gqjiang@suse.com>
To: Xiao Ni <xni@redhat.com>, linux-raid@vger.kernel.org
Cc: shli@kernel.org, neilb@suse.com, ming.lei@redhat.com, ncroxon@redhat.com
Subject: Re: [RFC PATCH] MD: fix lock contention for flush bios
Date: Wed, 24 Jan 2018 17:02:57 +0800	[thread overview]
Message-ID: <c3b38c2f-e139-85e2-b78b-17f20b6622b3@suse.com> (raw)
In-Reply-To: <1516761834-4701-1-git-send-email-xni@redhat.com>



On 01/24/2018 10:43 AM, Xiao Ni wrote:
> There is a lock contention when there are many processes which send flush bios
> to md device. eg. Create many lvs on one raid device and mkfs.xfs on each lv.
>
> Now it just can handle flush request sequentially. It needs to wait mddev->flush_bio
> to be NULL, otherwise get mddev->lock.

With the new approach, can we still keep the synchronization across all 
devices?
I found the previous commit a2826aa92e2e ("md: support barrier requests 
on all
personalities") did want to keep synchronization.

[snip]

> Suggested-by: Ming Lei <ming.lei@redhat.com>
> Signed-off-by: Xiao Ni <xni@redhat.com>
> ---
>   drivers/md/md.c | 105 +++++++++++++++++++++++++-------------------------------
>   drivers/md/md.h |  14 ++++----
>   2 files changed, 54 insertions(+), 65 deletions(-)
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 4e4dee0..1e562f5 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -409,33 +409,61 @@ static int md_congested(void *data, int bits)
>   	return mddev_congested(mddev, bits);
>   }
>   
> -/*
> - * Generic flush handling for md
> - */
> +static void submit_flushes(struct work_struct *ws)
> +{
> +	struct flush_info *fi = container_of(ws, struct flush_info, flush_work);
> +	struct mddev *mddev = fi->mddev;
> +	struct bio *bio = fi->bio;
> +
> +	bio->bi_opf &= ~REQ_PREFLUSH;
> +	md_handle_request(mddev, bio);
>   
> -static void md_end_flush(struct bio *bio)
> +	kfree(fi);
> +}
> +
> +

An extra blank line above.

> +static void md_end_flush(struct bio *flush_bio)
>   {
> -	struct md_rdev *rdev = bio->bi_private;
> -	struct mddev *mddev = rdev->mddev;
> +	struct flush_info *fi = flush_bio->bi_private;
> +	struct bio *bio = fi->bio;
> +	struct mddev *mddev = fi->mddev;
> +	struct md_rdev *rdev;
>   
> -	rdev_dec_pending(rdev, mddev);
> +	rcu_read_lock();
> +		rdev_for_each_rcu(rdev, mddev)
> +			rdev_dec_pending(rdev, mddev);

Unnecessary indentation.

> +	rcu_read_unlock();
>   
> -	if (atomic_dec_and_test(&mddev->flush_pending)) {
> -		/* The pre-request flush has finished */
> -		queue_work(md_wq, &mddev->flush_work);
> +	if (bio->bi_iter.bi_size == 0)
> +		/* an empty barrier - all done */
> +		bio_endio(bio);
> +	else {
> +		INIT_WORK(&fi->flush_work, submit_flushes);
> +		queue_work(md_wq, &fi->flush_work);
>   	}
> -	bio_put(bio);
>   }
>   
> -static void md_submit_flush_data(struct work_struct *ws);
> -
> -static void submit_flushes(struct work_struct *ws)
> +void md_flush_request(struct mddev *mddev, struct bio *bio)
>   {
> -	struct mddev *mddev = container_of(ws, struct mddev, flush_work);
>   	struct md_rdev *rdev;
> +	struct flush_info *fi;
> +	struct bio *f_bio;
> +
> +	fi = kmalloc(sizeof(*fi), GFP_NOIO);
> +	if (fi == NULL) {
> +		pr_err("md: %s failed to alloc memory for flush bio\n",
> +		       mdname(mddev));
> +		bio->bi_status = BLK_STS_IOERR;
> +		bio_endio(bio);

Maybe you missed "return" here.

Thanks,
Guoqing

> +	}
> +
> +	fi->bio = bio;
> +	fi->mddev = mddev;
> +	f_bio = &fi->flush_bio;
> +	bio_init(f_bio, NULL, 0);
> +	f_bio->bi_private = fi;
> +	f_bio->bi_end_io = md_end_flush;
>   
> -	INIT_WORK(&mddev->flush_work, md_submit_flush_data);
> -	atomic_set(&mddev->flush_pending, 1);
>   	rcu_read_lock();
>   	rdev_for_each_rcu(rdev, mddev)
>   		if (rdev->raid_disk >= 0 &&
> @@ -449,54 +477,16 @@ static void submit_flushes(struct work_struct *ws)
>   			atomic_inc(&rdev->nr_pending);
>   			rcu_read_unlock();
>   			bi = bio_alloc_mddev(GFP_NOIO, 0, mddev);
> -			bi->bi_end_io = md_end_flush;
> -			bi->bi_private = rdev;
>   			bio_set_dev(bi, rdev->bdev);
>   			bi->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
> -			atomic_inc(&mddev->flush_pending);
> +			bio_chain(bi, f_bio);
>   			submit_bio(bi);
>   			rcu_read_lock();
>   			rdev_dec_pending(rdev, mddev);
>   		}
>   	rcu_read_unlock();
> -	if (atomic_dec_and_test(&mddev->flush_pending))
> -		queue_work(md_wq, &mddev->flush_work);
> -}
> -
> -static void md_submit_flush_data(struct work_struct *ws)
> -{
> -	struct mddev *mddev = container_of(ws, struct mddev, flush_work);
> -	struct bio *bio = mddev->flush_bio;
> -
> -	/*
> -	 * must reset flush_bio before calling into md_handle_request to avoid a
> -	 * deadlock, because other bios passed md_handle_request suspend check
> -	 * could wait for this and below md_handle_request could wait for those
> -	 * bios because of suspend check
> -	 */
> -	mddev->flush_bio = NULL;
> -	wake_up(&mddev->sb_wait);
> -
> -	if (bio->bi_iter.bi_size == 0)
> -		/* an empty barrier - all done */
> -		bio_endio(bio);
> -	else {
> -		bio->bi_opf &= ~REQ_PREFLUSH;
> -		md_handle_request(mddev, bio);
> -	}
> -}
> -
> -void md_flush_request(struct mddev *mddev, struct bio *bio)
> -{
> -	spin_lock_irq(&mddev->lock);
> -	wait_event_lock_irq(mddev->sb_wait,
> -			    !mddev->flush_bio,
> -			    mddev->lock);
> -	mddev->flush_bio = bio;
> -	spin_unlock_irq(&mddev->lock);
>   
> -	INIT_WORK(&mddev->flush_work, submit_flushes);
> -	queue_work(md_wq, &mddev->flush_work);
> +	bio_endio(f_bio);
>   }
>   EXPORT_SYMBOL(md_flush_request);
>   
> @@ -555,7 +545,6 @@ void mddev_init(struct mddev *mddev)
>   	atomic_set(&mddev->openers, 0);
>   	atomic_set(&mddev->active_io, 0);
>   	spin_lock_init(&mddev->lock);
> -	atomic_set(&mddev->flush_pending, 0);
>   	init_waitqueue_head(&mddev->sb_wait);
>   	init_waitqueue_head(&mddev->recovery_wait);
>   	mddev->reshape_position = MaxSector;
> diff --git a/drivers/md/md.h b/drivers/md/md.h
> index 7d6bcf0..16e7f03 100644
> --- a/drivers/md/md.h
> +++ b/drivers/md/md.h
> @@ -252,6 +252,13 @@ enum mddev_sb_flags {
>   	MD_SB_NEED_REWRITE,	/* metadata write needs to be repeated */
>   };
>   
> +struct flush_info {
> +	struct bio *bio;
> +	struct bio flush_bio;
> +	struct mddev *mddev;
> +	struct work_struct flush_work;
> +};
> +
>   struct mddev {
>   	void				*private;
>   	struct md_personality		*pers;
> @@ -457,13 +464,6 @@ struct mddev {
>   						   * metadata and bitmap writes
>   						   */
>   
> -	/* Generic flush handling.
> -	 * The last to finish preflush schedules a worker to submit
> -	 * the rest of the request (without the REQ_PREFLUSH flag).
> -	 */
> -	struct bio *flush_bio;
> -	atomic_t flush_pending;
> -	struct work_struct flush_work;
>   	struct work_struct event_work;	/* used by dm to report failure event */
>   	void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev);
>   	struct md_cluster_info		*cluster_info;

next prev parent reply	other threads:[~2018-01-24  9:02 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-01-24  2:43 [RFC PATCH] MD: fix lock contention for flush bios Xiao Ni
2018-01-24  9:02 ` Guoqing Jiang [this message]
2018-01-24 13:41   ` Xiao Ni
     [not found]     ` <3b9cdab1-c6ac-c40c-4cd8-4a6ea20b5547@suse.com>
2018-01-26  2:07       ` Ming Lei
2018-01-26  2:23         ` Guoqing Jiang
2018-02-17 21:22 ` Shaohua Li

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=c3b38c2f-e139-85e2-b78b-17f20b6622b3@suse.com \
    --to=gqjiang@suse.com \
    --cc=linux-raid@vger.kernel.org \
    --cc=ming.lei@redhat.com \
    --cc=ncroxon@redhat.com \
    --cc=neilb@suse.com \
    --cc=shli@kernel.org \
    --cc=xni@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).