From mboxrd@z Thu Jan 1 00:00:00 1970 From: Andrew Price Date: Fri, 22 Feb 2013 13:34:17 +0000 Subject: [Cluster-devel] fsck: Clean up pass1 inode iteration code In-Reply-To: <1361534883.2720.25.camel@menhir> References: <1361534883.2720.25.camel@menhir> Message-ID: <512773D9.2090400@redhat.com> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Hi Steve, On 22/02/13 12:08, Steven Whitehouse wrote: > > The original version of this patch, including readahead for the > inodes only appears to deliver a performance improvement in some > particular cases, and slows down other cases. So this patch is > fairly similar in that it includes the clean ups from the previous > attempt, however the readahead has been left out for the time > being until the problem is better understood, and can be fixed. > > This clean up still has the advantage of better code structure > around the main loop, and in addition it also reduced the number > of read requests which are generated too. Looks good to me. > Signed-off-by: Steven Whitehouse > > diff --git a/gfs2/fsck/pass1.c b/gfs2/fsck/pass1.c > index 540f2a9..9a34e97 100644 > --- a/gfs2/fsck/pass1.c > +++ b/gfs2/fsck/pass1.c > @@ -14,6 +14,7 @@ > #include > #include > #include > +#include > #include > #include > #include > @@ -1547,6 +1548,128 @@ static int check_system_inodes(struct gfs2_sbd *sdp) > return 0; > } > > +static int pass1_process_bitmap(struct gfs2_sbd *sdp, struct rgrp_tree *rgd, uint64_t *ibuf, unsigned n) > +{ > + struct gfs2_buffer_head *bh; > + unsigned i; > + uint64_t block; > + > + for (i = 0; i < n; i++) { > + block = ibuf[i]; > + > + /* skip gfs1 rindex indirect blocks */ > + if (sdp->gfs1 && blockfind(&gfs1_rindex_blks, block)) { > + log_debug(_("Skipping rindex indir block " > + "%lld (0x%llx)\n"), > + (unsigned long long)block, > + (unsigned long long)block); > + continue; > + } > + warm_fuzzy_stuff(block); > + > + if (fsck_abort) { /* if asked to abort */ > + gfs2_special_free(&gfs1_rindex_blks); > + return FSCK_OK; > + } > + if (skip_this_pass) { > + printf( _("Skipping pass 1 is not a good idea.\n")); > + skip_this_pass = FALSE; > + fflush(stdout); > + } > + if (fsck_system_inode(sdp, block)) { > + log_debug(_("Already processed system inode " > + "%lld (0x%llx)\n"), > + (unsigned long long)block, > + (unsigned long long)block); > + continue; > + } > + bh = bread(sdp, block); > + > + if (gfs2_check_meta(bh, GFS2_METATYPE_DI)) { > + /* In gfs2, a bitmap mark of 2 means an inode, > + but in gfs1 it means any metadata. So if > + this is gfs1 and not an inode, it may be > + okay. If it's non-dinode metadata, it will > + be referenced by an inode, so we need to > + skip it here and it will be sorted out > + when the referencing inode is checked. */ > + if (sdp->gfs1) { > + uint32_t check_magic; > + > + check_magic = ((struct gfs2_meta_header *) > + (bh->b_data))->mh_magic; > + if (be32_to_cpu(check_magic) == GFS2_MAGIC) { > + log_debug( _("Deferring GFS1 " > + "metadata block #" > + "%" PRIu64" (0x%" > + PRIx64 ")\n"), > + block, block); > + brelse(bh); > + continue; > + } > + } > + log_err( _("Found invalid inode at block #" > + "%llu (0x%llx)\n"), > + (unsigned long long)block, > + (unsigned long long)block); > + if (gfs2_blockmap_set(bl, block, gfs2_block_free)) { > + stack; > + brelse(bh); > + gfs2_special_free(&gfs1_rindex_blks); > + return FSCK_ERROR; > + } > + check_n_fix_bitmap(sdp, block, gfs2_block_free); > + } else if (handle_di(sdp, bh) < 0) { > + stack; > + brelse(bh); > + gfs2_special_free(&gfs1_rindex_blks); > + return FSCK_ERROR; > + } > + /* Ignore everything else - they should be hit by the > + handle_di step. Don't check NONE either, because > + check_meta passes everything if GFS2_METATYPE_NONE > + is specified. Hopefully, other metadata types such > + as indirect blocks will be handled when the inode > + itself is processed, and if it's not, it should be > + caught in pass5. */ > + brelse(bh); > + } > + > + return 0; > +} > + > +static int pass1_process_rgrp(struct gfs2_sbd *sdp, struct rgrp_tree *rgd) > +{ > + unsigned k, n, i; > + uint64_t *ibuf = malloc(sdp->bsize * GFS2_NBBY * sizeof(uint64_t)); Coverity will complain about ibuf not getting checked here. Andy > + int ret; > + > + for (k = 0; k < rgd->ri.ri_length; k++) { > + n = lgfs2_bm_scan(rgd, k, ibuf, GFS2_BLKST_DINODE); > + > + if (n) { > + ret = pass1_process_bitmap(sdp, rgd, ibuf, n); > + if (ret) > + return ret; > + } > + > + /* > + For GFS1, we have to count the "free meta" blocks in the > + resource group and mark them specially so we can count them > + properly in pass5. > + */ > + if (!sdp->gfs1) > + continue; > + > + n = lgfs2_bm_scan(rgd, k, ibuf, GFS2_BLKST_UNLINKED); > + for (i = 0; i < n; i++) > + gfs2_blockmap_set(bl, ibuf[i], gfs2_freemeta); > + } > + > + free(ibuf); > + return 0; > +} > + > /** > * pass1 - walk through inodes and check inode state > * > @@ -1563,12 +1686,10 @@ static int check_system_inodes(struct gfs2_sbd *sdp) > int pass1(struct gfs2_sbd *sdp) > { > struct osi_node *n, *next = NULL; > - struct gfs2_buffer_head *bh; > - uint64_t block = 0; > struct rgrp_tree *rgd; > - int first; > uint64_t i; > uint64_t rg_count = 0; > + int ret; > > osi_list_init(&gfs1_rindex_blks.list); > > @@ -1611,115 +1732,10 @@ int pass1(struct gfs2_sbd *sdp) > gfs2_meta_rgrp);*/ > } > > - first = 1; > + ret = pass1_process_rgrp(sdp, rgd); > + if (ret) > + return ret; > > - while (1) { > - /* "block" is relative to the entire file system */ > - /* Get the next dinode in the file system, according > - to the bitmap. This should ONLY be dinodes unless > - it's GFS1, in which case it can be any metadata. */ > - if (gfs2_next_rg_meta(rgd, &block, first)) > - break; > - /* skip gfs1 rindex indirect blocks */ > - if (sdp->gfs1 && blockfind(&gfs1_rindex_blks, block)) { > - log_debug(_("Skipping rindex indir block " > - "%lld (0x%llx)\n"), > - (unsigned long long)block, > - (unsigned long long)block); > - first = 0; > - continue; > - } > - warm_fuzzy_stuff(block); > - > - if (fsck_abort) { /* if asked to abort */ > - gfs2_special_free(&gfs1_rindex_blks); > - return FSCK_OK; > - } > - if (skip_this_pass) { > - printf( _("Skipping pass 1 is not a good idea.\n")); > - skip_this_pass = FALSE; > - fflush(stdout); > - } > - if (fsck_system_inode(sdp, block)) { > - log_debug(_("Already processed system inode " > - "%lld (0x%llx)\n"), > - (unsigned long long)block, > - (unsigned long long)block); > - first = 0; > - continue; > - } > - bh = bread(sdp, block); > - > - /*log_debug( _("Checking metadata block #%" PRIu64 > - " (0x%" PRIx64 ")\n"), block, block);*/ > - > - if (gfs2_check_meta(bh, GFS2_METATYPE_DI)) { > - /* In gfs2, a bitmap mark of 2 means an inode, > - but in gfs1 it means any metadata. So if > - this is gfs1 and not an inode, it may be > - okay. If it's non-dinode metadata, it will > - be referenced by an inode, so we need to > - skip it here and it will be sorted out > - when the referencing inode is checked. */ > - if (sdp->gfs1) { > - uint32_t check_magic; > - > - check_magic = ((struct > - gfs2_meta_header *) > - (bh->b_data))->mh_magic; > - if (be32_to_cpu(check_magic) == > - GFS2_MAGIC) { > - log_debug( _("Deferring GFS1 " > - "metadata block #" > - "%" PRIu64" (0x%" > - PRIx64 ")\n"), > - block, block); > - brelse(bh); > - first = 0; > - continue; > - } > - } > - log_err( _("Found invalid inode at block #" > - "%llu (0x%llx)\n"), > - (unsigned long long)block, > - (unsigned long long)block); > - if (gfs2_blockmap_set(bl, block, > - gfs2_block_free)) { > - stack; > - brelse(bh); > - gfs2_special_free(&gfs1_rindex_blks); > - return FSCK_ERROR; > - } > - check_n_fix_bitmap(sdp, block, > - gfs2_block_free); > - } else if (handle_di(sdp, bh) < 0) { > - stack; > - brelse(bh); > - gfs2_special_free(&gfs1_rindex_blks); > - return FSCK_ERROR; > - } > - /* Ignore everything else - they should be hit by the > - handle_di step. Don't check NONE either, because > - check_meta passes everything if GFS2_METATYPE_NONE > - is specified. Hopefully, other metadata types such > - as indirect blocks will be handled when the inode > - itself is processed, and if it's not, it should be > - caught in pass5. */ > - brelse(bh); > - first = 0; > - } > - /* > - For GFS1, we have to count the "free meta" blocks in the > - resource group and mark them specially so we can count them > - properly in pass5. > - */ > - if (!sdp->gfs1) > - continue; > - first = 1; > - while (gfs2_next_rg_freemeta(rgd, &block, first) == 0) { > - gfs2_blockmap_set(bl, block, gfs2_freemeta); > - first = 0; > - } > } > gfs2_special_free(&gfs1_rindex_blks); > return FSCK_OK; > diff --git a/gfs2/libgfs2/libgfs2.h b/gfs2/libgfs2/libgfs2.h > index 46d4d67..db31a6c 100644 > --- a/gfs2/libgfs2/libgfs2.h > +++ b/gfs2/libgfs2/libgfs2.h > @@ -757,12 +757,12 @@ extern int build_root(struct gfs2_sbd *sdp); > extern int do_init_inum(struct gfs2_sbd *sdp); > extern int do_init_statfs(struct gfs2_sbd *sdp); > extern int gfs2_check_meta(struct gfs2_buffer_head *bh, int type); > +extern unsigned lgfs2_bm_scan(struct rgrp_tree *rgd, unsigned idx, > + uint64_t *buf, uint8_t state); > extern int gfs2_next_rg_meta(struct rgrp_tree *rgd, uint64_t *block, > int first); > extern int gfs2_next_rg_metatype(struct gfs2_sbd *sdp, struct rgrp_tree *rgd, > uint64_t *block, uint32_t type, int first); > -extern int gfs2_next_rg_freemeta(struct rgrp_tree *rgd, uint64_t *block, > - int first); > > /* super.c */ > extern int check_sb(struct gfs2_sb *sb); > diff --git a/gfs2/libgfs2/structures.c b/gfs2/libgfs2/structures.c > index 645c45a..f9231ca 100644 > --- a/gfs2/libgfs2/structures.c > +++ b/gfs2/libgfs2/structures.c > @@ -495,6 +495,24 @@ int gfs2_check_meta(struct gfs2_buffer_head *bh, int type) > return 0; > } > > +unsigned lgfs2_bm_scan(struct rgrp_tree *rgd, unsigned idx, uint64_t *buf, uint8_t state) > +{ > + struct gfs2_bitmap *bi = &rgd->bits[idx]; > + unsigned n = 0; > + uint32_t blk = 0; > + > + while(blk < (bi->bi_len * GFS2_NBBY)) { > + blk = gfs2_bitfit((const unsigned char *)rgd->bh[idx]->b_data + bi->bi_offset, > + bi->bi_len, blk, state); > + if (blk == BFITNOENT) > + break; > + buf[n++] = blk + (bi->bi_start * GFS2_NBBY) + rgd->ri.ri_data0; > + blk++; > + } > + > + return n; > +} > + > /** > * gfs2_next_rg_meta > * @rgd: > @@ -545,11 +563,6 @@ int gfs2_next_rg_meta(struct rgrp_tree *rgd, uint64_t *block, int first) > return __gfs2_next_rg_meta(rgd, block, first, GFS2_BLKST_DINODE); > } > > -int gfs2_next_rg_freemeta(struct rgrp_tree *rgd, uint64_t *block, int first) > -{ > - return __gfs2_next_rg_meta(rgd, block, first, GFS2_BLKST_UNLINKED); > -} > - > /** > * next_rg_metatype > * @rgd: > >