From: Guoqing Jiang <gqjiang@suse.com>
To: Xiao Ni <xni@redhat.com>, linux-raid@vger.kernel.org
Cc: shli@kernel.org, neilb@suse.com, ming.lei@redhat.com, ncroxon@redhat.com
Subject: Re: [RFC PATCH] MD: fix lock contention for flush bios
Date: Wed, 24 Jan 2018 17:02:57 +0800 [thread overview]
Message-ID: <c3b38c2f-e139-85e2-b78b-17f20b6622b3@suse.com> (raw)
In-Reply-To: <1516761834-4701-1-git-send-email-xni@redhat.com>
On 01/24/2018 10:43 AM, Xiao Ni wrote:
> There is a lock contention when there are many processes which send flush bios
> to md device. eg. Create many lvs on one raid device and mkfs.xfs on each lv.
>
> Now it just can handle flush request sequentially. It needs to wait mddev->flush_bio
> to be NULL, otherwise get mddev->lock.
With the new approach, can we still keep the synchronization across all
devices?
I found the previous commit a2826aa92e2e ("md: support barrier requests
on all
personalities") did want to keep synchronization.
[snip]
> Suggested-by: Ming Lei <ming.lei@redhat.com>
> Signed-off-by: Xiao Ni <xni@redhat.com>
> ---
> drivers/md/md.c | 105 +++++++++++++++++++++++++-------------------------------
> drivers/md/md.h | 14 ++++----
> 2 files changed, 54 insertions(+), 65 deletions(-)
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 4e4dee0..1e562f5 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -409,33 +409,61 @@ static int md_congested(void *data, int bits)
> return mddev_congested(mddev, bits);
> }
>
> -/*
> - * Generic flush handling for md
> - */
> +static void submit_flushes(struct work_struct *ws)
> +{
> + struct flush_info *fi = container_of(ws, struct flush_info, flush_work);
> + struct mddev *mddev = fi->mddev;
> + struct bio *bio = fi->bio;
> +
> + bio->bi_opf &= ~REQ_PREFLUSH;
> + md_handle_request(mddev, bio);
>
> -static void md_end_flush(struct bio *bio)
> + kfree(fi);
> +}
> +
> +
An extra blank line above.
> +static void md_end_flush(struct bio *flush_bio)
> {
> - struct md_rdev *rdev = bio->bi_private;
> - struct mddev *mddev = rdev->mddev;
> + struct flush_info *fi = flush_bio->bi_private;
> + struct bio *bio = fi->bio;
> + struct mddev *mddev = fi->mddev;
> + struct md_rdev *rdev;
>
> - rdev_dec_pending(rdev, mddev);
> + rcu_read_lock();
> + rdev_for_each_rcu(rdev, mddev)
> + rdev_dec_pending(rdev, mddev);
Unnecessary indentation.
> + rcu_read_unlock();
>
> - if (atomic_dec_and_test(&mddev->flush_pending)) {
> - /* The pre-request flush has finished */
> - queue_work(md_wq, &mddev->flush_work);
> + if (bio->bi_iter.bi_size == 0)
> + /* an empty barrier - all done */
> + bio_endio(bio);
> + else {
> + INIT_WORK(&fi->flush_work, submit_flushes);
> + queue_work(md_wq, &fi->flush_work);
> }
> - bio_put(bio);
> }
>
> -static void md_submit_flush_data(struct work_struct *ws);
> -
> -static void submit_flushes(struct work_struct *ws)
> +void md_flush_request(struct mddev *mddev, struct bio *bio)
> {
> - struct mddev *mddev = container_of(ws, struct mddev, flush_work);
> struct md_rdev *rdev;
> + struct flush_info *fi;
> + struct bio *f_bio;
> +
> + fi = kmalloc(sizeof(*fi), GFP_NOIO);
> + if (fi == NULL) {
> + pr_err("md: %s failed to alloc memory for flush bio\n",
> + mdname(mddev));
> + bio->bi_status = BLK_STS_IOERR;
> + bio_endio(bio);
Maybe you missed "return" here.
Thanks,
Guoqing
> + }
> +
> + fi->bio = bio;
> + fi->mddev = mddev;
> + f_bio = &fi->flush_bio;
> + bio_init(f_bio, NULL, 0);
> + f_bio->bi_private = fi;
> + f_bio->bi_end_io = md_end_flush;
>
> - INIT_WORK(&mddev->flush_work, md_submit_flush_data);
> - atomic_set(&mddev->flush_pending, 1);
> rcu_read_lock();
> rdev_for_each_rcu(rdev, mddev)
> if (rdev->raid_disk >= 0 &&
> @@ -449,54 +477,16 @@ static void submit_flushes(struct work_struct *ws)
> atomic_inc(&rdev->nr_pending);
> rcu_read_unlock();
> bi = bio_alloc_mddev(GFP_NOIO, 0, mddev);
> - bi->bi_end_io = md_end_flush;
> - bi->bi_private = rdev;
> bio_set_dev(bi, rdev->bdev);
> bi->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
> - atomic_inc(&mddev->flush_pending);
> + bio_chain(bi, f_bio);
> submit_bio(bi);
> rcu_read_lock();
> rdev_dec_pending(rdev, mddev);
> }
> rcu_read_unlock();
> - if (atomic_dec_and_test(&mddev->flush_pending))
> - queue_work(md_wq, &mddev->flush_work);
> -}
> -
> -static void md_submit_flush_data(struct work_struct *ws)
> -{
> - struct mddev *mddev = container_of(ws, struct mddev, flush_work);
> - struct bio *bio = mddev->flush_bio;
> -
> - /*
> - * must reset flush_bio before calling into md_handle_request to avoid a
> - * deadlock, because other bios passed md_handle_request suspend check
> - * could wait for this and below md_handle_request could wait for those
> - * bios because of suspend check
> - */
> - mddev->flush_bio = NULL;
> - wake_up(&mddev->sb_wait);
> -
> - if (bio->bi_iter.bi_size == 0)
> - /* an empty barrier - all done */
> - bio_endio(bio);
> - else {
> - bio->bi_opf &= ~REQ_PREFLUSH;
> - md_handle_request(mddev, bio);
> - }
> -}
> -
> -void md_flush_request(struct mddev *mddev, struct bio *bio)
> -{
> - spin_lock_irq(&mddev->lock);
> - wait_event_lock_irq(mddev->sb_wait,
> - !mddev->flush_bio,
> - mddev->lock);
> - mddev->flush_bio = bio;
> - spin_unlock_irq(&mddev->lock);
>
> - INIT_WORK(&mddev->flush_work, submit_flushes);
> - queue_work(md_wq, &mddev->flush_work);
> + bio_endio(f_bio);
> }
> EXPORT_SYMBOL(md_flush_request);
>
> @@ -555,7 +545,6 @@ void mddev_init(struct mddev *mddev)
> atomic_set(&mddev->openers, 0);
> atomic_set(&mddev->active_io, 0);
> spin_lock_init(&mddev->lock);
> - atomic_set(&mddev->flush_pending, 0);
> init_waitqueue_head(&mddev->sb_wait);
> init_waitqueue_head(&mddev->recovery_wait);
> mddev->reshape_position = MaxSector;
> diff --git a/drivers/md/md.h b/drivers/md/md.h
> index 7d6bcf0..16e7f03 100644
> --- a/drivers/md/md.h
> +++ b/drivers/md/md.h
> @@ -252,6 +252,13 @@ enum mddev_sb_flags {
> MD_SB_NEED_REWRITE, /* metadata write needs to be repeated */
> };
>
> +struct flush_info {
> + struct bio *bio;
> + struct bio flush_bio;
> + struct mddev *mddev;
> + struct work_struct flush_work;
> +};
> +
> struct mddev {
> void *private;
> struct md_personality *pers;
> @@ -457,13 +464,6 @@ struct mddev {
> * metadata and bitmap writes
> */
>
> - /* Generic flush handling.
> - * The last to finish preflush schedules a worker to submit
> - * the rest of the request (without the REQ_PREFLUSH flag).
> - */
> - struct bio *flush_bio;
> - atomic_t flush_pending;
> - struct work_struct flush_work;
> struct work_struct event_work; /* used by dm to report failure event */
> void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev);
> struct md_cluster_info *cluster_info;
next prev parent reply other threads:[~2018-01-24 9:02 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-01-24 2:43 [RFC PATCH] MD: fix lock contention for flush bios Xiao Ni
2018-01-24 9:02 ` Guoqing Jiang [this message]
2018-01-24 13:41 ` Xiao Ni
[not found] ` <3b9cdab1-c6ac-c40c-4cd8-4a6ea20b5547@suse.com>
2018-01-26 2:07 ` Ming Lei
2018-01-26 2:23 ` Guoqing Jiang
2018-02-17 21:22 ` Shaohua Li
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=c3b38c2f-e139-85e2-b78b-17f20b6622b3@suse.com \
--to=gqjiang@suse.com \
--cc=linux-raid@vger.kernel.org \
--cc=ming.lei@redhat.com \
--cc=ncroxon@redhat.com \
--cc=neilb@suse.com \
--cc=shli@kernel.org \
--cc=xni@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).