From mboxrd@z Thu Jan 1 00:00:00 1970 From: Abhijith Das Date: Fri, 7 Sep 2018 08:51:53 -0400 (EDT) Subject: [Cluster-devel] [GFS2 PATCH 4/4] gfs2: read journal in large chunks to locate the head In-Reply-To: References: <1536253357-8967-1-git-send-email-adas@redhat.com> <1536253357-8967-5-git-send-email-adas@redhat.com> Message-ID: <1885267593.63541784.1536324713959.JavaMail.zimbra@redhat.com> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit ----- Original Message ----- > From: "Andreas Gruenbacher" > To: "Abhi Das" > Cc: "cluster-devel" > Sent: Friday, September 7, 2018 7:14:29 AM > Subject: Re: [Cluster-devel] [GFS2 PATCH 4/4] gfs2: read journal in large chunks to locate the head > > Abhi, > > On 6 September 2018 at 19:02, Abhi Das wrote: > > Use bio(s) to read in the journal sequentially in large chunks and > > locate the head of the journal. > > This is faster in most cases when compared to the existing bisect > > method which operates one block at a time. > > > > Signed-off-by: Abhi Das > > --- > > fs/gfs2/incore.h | 8 +++- > > fs/gfs2/lops.c | 122 > > +++++++++++++++++++++++++++++++++++++++++++++------ > > fs/gfs2/lops.h | 1 + > > fs/gfs2/ops_fstype.c | 1 + > > fs/gfs2/recovery.c | 115 > > +++++------------------------------------------- > > 5 files changed, 129 insertions(+), 118 deletions(-) > > > > diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h > > index b96d39c..b24c105 100644 > > --- a/fs/gfs2/incore.h > > +++ b/fs/gfs2/incore.h > > @@ -529,6 +529,11 @@ struct gfs2_journal_extent { > > u64 blocks; > > }; > > > > +enum { > > + JDF_RECOVERY = 1, > > + JDF_JHEAD = 2, > > +}; > > + > > struct gfs2_jdesc { > > struct list_head jd_list; > > struct list_head extent_list; > > @@ -536,12 +541,13 @@ struct gfs2_jdesc { > > struct work_struct jd_work; > > struct inode *jd_inode; > > unsigned long jd_flags; > > -#define JDF_RECOVERY 1 > > unsigned int jd_jid; > > unsigned int jd_blocks; > > int jd_recover_error; > > /* Replay stuff */ > > > > + struct gfs2_log_header_host jd_jhead; > > + struct bio *jd_rd_bio; /* bio used for reading this journal */ > > unsigned int jd_found_blocks; > > unsigned int jd_found_revokes; > > unsigned int jd_replayed_blocks; > > diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c > > index 4cc19af..21979b2 100644 > > --- a/fs/gfs2/lops.c > > +++ b/fs/gfs2/lops.c > > @@ -18,6 +18,7 @@ > > #include > > #include > > > > +#include "bmap.h" > > #include "dir.h" > > #include "gfs2.h" > > #include "incore.h" > > @@ -227,6 +228,50 @@ static void gfs2_end_log_write(struct bio *bio) > > wake_up(&sdp->sd_log_flush_wait); > > } > > > > +static void gfs2_end_log_read(struct bio *bio) > > +{ > > + struct gfs2_jdesc *jd = bio->bi_private; > > + struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); > > + struct page *page; > > + struct bio_vec *bvec; > > + int i, last; > > + > > + if (bio->bi_status) { > > + fs_err(sdp, "Error %d reading from journal, jid=%u\n", > > + bio->bi_status, jd->jd_jid); > > + } > > + > > + bio_for_each_segment_all(bvec, bio, i) { > > + struct gfs2_log_header_host uninitialized_var(lh); > > + void *ptr; > > + > > + page = bvec->bv_page; > > + ptr = page_address(page); > > + last = page_private(page); > > + > > + if (!test_bit(JDF_JHEAD, &jd->jd_flags)) { > > + mempool_free(page, gfs2_page_pool); > > + continue; > > + } > > + > > + if (!__get_log_header(sdp, ptr, 0, &lh)) { > > + if (lh.lh_sequence > jd->jd_jhead.lh_sequence) > > + jd->jd_jhead = lh; > > + else > > + goto found; > > + } > > + > > + if (last) { > > + found: > > + clear_bit(JDF_JHEAD, &jd->jd_flags); > > + wake_up_bit(&jd->jd_flags, JDF_JHEAD); > > + } > > + mempool_free(page, gfs2_page_pool); > > + } > > + > > + bio_put(bio); > > +} > > + > > /** > > * gfs2_log_flush_bio - Submit any pending log bio > > * @biop: Address of the bio pointer > > @@ -241,8 +286,10 @@ void gfs2_log_flush_bio(struct bio **biop, int op, int > > op_flags) > > { > > struct bio *bio = *biop; > > if (bio) { > > - struct gfs2_sbd *sdp = bio->bi_private; > > - atomic_inc(&sdp->sd_log_in_flight); > > + if (op != REQ_OP_READ) { > > + struct gfs2_sbd *sdp = bio->bi_private; > > + atomic_inc(&sdp->sd_log_in_flight); > > + } > > bio_set_op_attrs(bio, op, op_flags); > > submit_bio(bio); > > *biop = NULL; > > @@ -253,6 +300,7 @@ void gfs2_log_flush_bio(struct bio **biop, int op, int > > op_flags) > > * gfs2_log_alloc_bio - Allocate a new bio for log writing > > * @jd: The journal descriptor > > * @blkno: The next device block number we want to write to > > + * @op: REQ_OP > > * > > * This should never be called when there is a cached bio in the > > * super block. When it returns, there will be a cached bio in the > > @@ -262,21 +310,24 @@ void gfs2_log_flush_bio(struct bio **biop, int op, > > int op_flags) > > * Returns: Newly allocated bio > > */ > > > > -static struct bio *gfs2_log_alloc_bio(struct gfs2_jdesc *jd, u64 blkno) > > +static struct bio *gfs2_log_alloc_bio(struct gfs2_jdesc *jd, u64 blkno, > > int op) > > { > > struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); > > struct super_block *sb = sdp->sd_vfs; > > struct bio *bio; > > > > - BUG_ON(sdp->sd_log_bio); > > + BUG_ON((op == REQ_OP_READ ? jd->jd_rd_bio : sdp->sd_log_bio)); > > > > bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES); > > bio->bi_iter.bi_sector = blkno * (sb->s_blocksize >> 9); > > bio_set_dev(bio, sb->s_bdev); > > - bio->bi_end_io = gfs2_end_log_write; > > - bio->bi_private = sdp; > > + bio->bi_end_io = op == REQ_OP_READ ? gfs2_end_log_read : > > gfs2_end_log_write; > > + bio->bi_private = op == REQ_OP_READ ? (void*)jd : (void*)sdp; > > > > - sdp->sd_log_bio = bio; > > + if (op == REQ_OP_READ) > > + jd->jd_rd_bio = bio; > > + else > > + sdp->sd_log_bio = bio; > > > > return bio; > > } > > @@ -285,6 +336,7 @@ static struct bio *gfs2_log_alloc_bio(struct gfs2_jdesc > > *jd, u64 blkno) > > * gfs2_log_get_bio - Get cached log bio, or allocate a new one > > * @jd: The journal descriptor > > * @blkno: The device block number we want to write to > > + * @op: REQ_OP > > * > > * If there is a cached bio, then if the next block number is sequential > > * with the previous one, return it, otherwise flush the bio to the > > @@ -294,10 +346,10 @@ static struct bio *gfs2_log_alloc_bio(struct > > gfs2_jdesc *jd, u64 blkno) > > * Returns: The bio to use for log writes > > */ > > > > -static struct bio *gfs2_log_get_bio(struct gfs2_jdesc *jd, u64 blkno) > > +static struct bio *gfs2_log_get_bio(struct gfs2_jdesc *jd, u64 blkno, int > > op) > > { > > struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); > > - struct bio *bio = sdp->sd_log_bio; > > + struct bio *bio = op == REQ_OP_READ ? jd->jd_rd_bio : > > sdp->sd_log_bio; > > u64 nblk; > > > > if (bio) { > > @@ -305,10 +357,12 @@ static struct bio *gfs2_log_get_bio(struct gfs2_jdesc > > *jd, u64 blkno) > > nblk >>= sdp->sd_fsb2bb_shift; > > if (blkno == nblk) > > return bio; > > - gfs2_log_flush_bio(&sdp->sd_log_bio, REQ_OP_WRITE, 0); > > + gfs2_log_flush_bio(op == REQ_OP_READ ? &jd->jd_rd_bio > > + : &sdp->sd_log_bio, REQ_OP_WRITE, 0); > > Shouldn't it be "op" here instead of "REQ_OP_WRITE"? > Yep, you're right! Good catch. I'll repost this patch along with Bob's suggestions as well. Cheers! --Abhi