From: Steven Whitehouse <swhiteho@redhat.com>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] [PATCH 2/2] gfs2: Extended attribute readahead optimization
Date: Thu, 12 Nov 2015 13:44:22 +0000 [thread overview]
Message-ID: <564497B6.7060105@redhat.com> (raw)
In-Reply-To: <1446404579-5211-3-git-send-email-agruenba@redhat.com>
Hi,
On 01/11/15 19:02, Andreas Gruenbacher wrote:
> Instead of submitting separate bio for the inode and its extended
> attributes, submit a single bio for both when possible. The entire
> request becomes a normal read, not a readahead.
>
> To keep track of the buffer heads that make up the bio, we allocate
> temporary buffer head arrays: in the endio handler, it would also be
> possible to compute the buffer head block numbers from the bio and to
> grab the buffer heads with gfs2_getbuf, but the code would become even
> messier.
> ---
> fs/gfs2/meta_io.c | 94 ++++++++++++++++++++++++++++++++++++++++++++-----------
> 1 file changed, 76 insertions(+), 18 deletions(-)
>
> diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
> index 0f24828..e650127 100644
> --- a/fs/gfs2/meta_io.c
> +++ b/fs/gfs2/meta_io.c
> @@ -187,19 +187,63 @@ struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno)
> return bh;
> }
>
> -static void gfs2_meta_readahead(struct gfs2_glock *gl, u64 blkno)
> +struct gfs2_meta_read {
> + int num;
> + struct buffer_head *bhs[0];
> +};
> +
> +static void gfs2_meta_read_endio(struct bio *bio) {
> + struct gfs2_meta_read *r = bio->bi_private;
> + int i;
> +
> + for (i = 0; i < r->num; i++) {
> + struct buffer_head *bh = r->bhs[i];
> +
> + if (unlikely(bio_flagged(bio, BIO_QUIET)))
> + set_bit(BH_Quiet, &bh->b_state);
> +
> + bh->b_end_io(bh, !bio->bi_error);
> + }
> + bio_put(bio);
> + kfree(r);
> +}
> +
> +/*
> + * (See submit_bh_wbc.)
> + */
> +static void gfs2_submit_bhs(int rw, struct buffer_head *bhs[], int num)
> {
> - struct buffer_head *bh;
> + struct gfs2_meta_read *r;
> + struct buffer_head *bh = bhs[0];
> + struct bio *bio;
> + int i;
>
> - bh = gfs2_getbuf(gl, blkno, 1);
> - lock_buffer(bh);
> - if (buffer_uptodate(bh)) {
> - unlock_buffer(bh);
> - brelse(bh);
> + if (!num)
> + return;
> +
> + if (num == 1) {
> + bh->b_end_io = end_buffer_read_sync;
> + submit_bh(rw, bh);
> return;
> }
> - bh->b_end_io = end_buffer_read_sync;
> - submit_bh(READA | REQ_META | REQ_PRIO, bh);
> +
> + r = kmalloc(sizeof(*r) + num * sizeof(r->bhs[0]),
> + GFP_NOIO | __GFP_NOFAIL);
Can we avoid doing this I wonder? I would like to avoid adding
GFP_NOFAIL allocations, and I think it should be possible to figure out
where the buffers are using the bio itself (i.e. iterating over it,
similar to gfs2_end_log_write() in lops.c) so that we don't need to
allocate this additional memory. Otherwise this looks like the right
approach I think,
Steve.
> + r->num = num;
> + for (i = 0; i < num; i++)
> + r->bhs[i] = bhs[i];
> +
> + bio = bio_alloc(GFP_NOIO, num);
> + bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
> + bio->bi_bdev = bh->b_bdev;
> + for (i = 0; i < num; i++) {
> + bh = bhs[i];
> + bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
> + }
> + bio->bi_end_io = gfs2_meta_read_endio;
> + bio->bi_private = r;
> +
> + submit_bio(rw, bio);
> }
>
> /**
> @@ -216,7 +260,8 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
> int rahead, struct buffer_head **bhp)
> {
> struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
> - struct buffer_head *bh;
> + struct buffer_head *bh, *bhs[2];
> + int num = 0;
>
> if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) {
> *bhp = NULL;
> @@ -228,18 +273,31 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
> lock_buffer(bh);
> if (buffer_uptodate(bh)) {
> unlock_buffer(bh);
> - if (rahead)
> - gfs2_meta_readahead(gl, blkno + 1);
> - return 0;
> + flags &= ~DIO_WAIT;
> + } else {
> + bh->b_end_io = end_buffer_read_sync;
> + get_bh(bh);
> + bhs[num++] = bh;
> }
> - bh->b_end_io = end_buffer_read_sync;
> - get_bh(bh);
> - submit_bh(READ_SYNC | REQ_META | REQ_PRIO, bh);
> - if (rahead)
> - gfs2_meta_readahead(gl, blkno + 1);
> +
> + if (rahead) {
> + bh = gfs2_getbuf(gl, blkno + 1, CREATE);
> +
> + lock_buffer(bh);
> + if (buffer_uptodate(bh)) {
> + unlock_buffer(bh);
> + brelse(bh);
> + } else {
> + bh->b_end_io = end_buffer_read_sync;
> + bhs[num++] = bh;
> + }
> + }
> +
> + gfs2_submit_bhs(READ_SYNC | REQ_META | REQ_PRIO, bhs, num);
> if (!(flags & DIO_WAIT))
> return 0;
>
> + bh = *bhp;
> wait_on_buffer(bh);
> if (unlikely(!buffer_uptodate(bh))) {
> struct gfs2_trans *tr = current->journal_info;
next prev parent reply other threads:[~2015-11-12 13:44 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-11-01 19:02 [Cluster-devel] [PATCH 0/2] Extended attribute readahead Andreas Gruenbacher
2015-11-01 19:02 ` [Cluster-devel] [PATCH 1/2] gfs2: " Andreas Gruenbacher
2015-11-03 17:29 ` Bob Peterson
2015-11-03 19:02 ` Steven Whitehouse
2015-11-03 20:18 ` Andreas Gruenbacher
2015-11-01 19:02 ` [Cluster-devel] [PATCH 2/2] gfs2: Extended attribute readahead optimization Andreas Gruenbacher
2015-11-12 13:44 ` Steven Whitehouse [this message]
2015-11-12 15:33 ` Andreas Gruenbacher
2015-11-12 20:15 ` Andreas Gruenbacher
2015-11-12 20:33 ` Steven Whitehouse
2015-11-13 22:24 ` Andreas Gruenbacher
2015-11-16 18:14 ` Bob Peterson
2015-11-13 13:48 ` Bob Peterson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=564497B6.7060105@redhat.com \
--to=swhiteho@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.