From mboxrd@z Thu Jan 1 00:00:00 1970 From: Andrew Price Date: Wed, 21 Jan 2015 19:50:31 +0000 Subject: [Cluster-devel] [fsck.gfs2 PATCH] fsck.gfs2: Check the integrity of the journal index In-Reply-To: <306243605.11832356.1421692449387.JavaMail.zimbra@redhat.com> References: <306243605.11832356.1421692449387.JavaMail.zimbra@redhat.com> Message-ID: <54C00307.4050103@redhat.com> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Hi Bob, Just one minor comment below. Other than that, the 3 patches look fine to me. On 19/01/15 18:34, Bob Peterson wrote: > Hi, > > This patch checks the jindex system directory to make sure the entries > all start with "journal" and so forth. If not, the jindex is deleted > and rebuilt. As part of this patch, I moved where we read in the rindex > file and rgrps to an earlier point in time, before the journals are > replayed. This allows us to remove a dummied-up rgrp kludge in the code. > However, if the replayed journal block is part of an rgrp, we need to > refresh the rgrp based on the values rewritten from the journal. > > Regards, > > Bob Peterson > Red Hat File Systems > > Signed-off-by: Bob Peterson > --- > diff --git a/gfs2/fsck/fs_recovery.c b/gfs2/fsck/fs_recovery.c > index 095d118..4eaba1e 100644 > --- a/gfs2/fsck/fs_recovery.c > +++ b/gfs2/fsck/fs_recovery.c > @@ -96,6 +96,30 @@ void gfs2_revoke_clean(struct gfs2_sbd *sdp) > } > } > > +static void refresh_rgrp(struct gfs2_sbd *sdp, struct rgrp_tree *rgd, > + struct gfs2_buffer_head *bh, uint64_t blkno) > +{ > + int i; > + > + log_debug(_("Block is part of rgrp 0x%llx; refreshing the rgrp.\n"), > + (unsigned long long)rgd->ri.ri_addr); > + for (i = 0; i < rgd->ri.ri_length; i++) { > + if (rgd->bits[i].bi_bh->b_blocknr != blkno) > + continue; > + > + memcpy(rgd->bits[i].bi_bh->b_data, bh->b_data, sdp->bsize); > + bmodified(rgd->bits[i].bi_bh); > + if (i == 0) { /* this is the rgrp itself */ > + if (sdp->gfs1) > + gfs_rgrp_in((struct gfs_rgrp *)&rgd->rg, > + rgd->bits[0].bi_bh); > + else > + gfs2_rgrp_in(&rgd->rg, rgd->bits[0].bi_bh); > + } > + break; > + } > +} > + > static int buf_lo_scan_elements(struct gfs2_inode *ip, unsigned int start, > struct gfs2_log_descriptor *ld, __be64 *ptr, > int pass) > @@ -105,6 +129,7 @@ static int buf_lo_scan_elements(struct gfs2_inode *ip, unsigned int start, > struct gfs2_buffer_head *bh_log, *bh_ip; > uint64_t blkno; > int error = 0; > + struct rgrp_tree *rgd; > > if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_METADATA) > return 0; > @@ -147,6 +172,9 @@ static int buf_lo_scan_elements(struct gfs2_inode *ip, unsigned int start, > error = -EIO; > } else { > bmodified(bh_ip); > + rgd = gfs2_blk2rgrpd(sdp, blkno); > + if (rgd && blkno < rgd->ri.ri_data0) > + refresh_rgrp(sdp, rgd, bh_ip, blkno); > } > > brelse(bh_log); > @@ -676,28 +704,8 @@ int replay_journals(struct gfs2_sbd *sdp, int preen, int force_check, > > for(i = 0; i < sdp->md.journals; i++) { > if (sdp->md.journal[i]) { > - struct rgrp_tree rgd; > - struct gfs2_bitmap bits; > - > - /* The real rgrp tree hasn't been built at this point, > - * so we need to dummy one up that covers the whole > - * file system so basic functions in check_metatree > - * don't segfault. */ > - rgd.start = sdp->sb_addr + 1; > - rgd.length = 1; > - bits.bi_bh = NULL; > - bits.bi_start = 0; > - bits.bi_len = sdp->fssize / GFS2_NBBY; > - rgd.bits = &bits; > - rgd.ri.ri_addr = sdp->sb_addr + 1; > - rgd.ri.ri_length = 1; > - rgd.ri.ri_data0 = sdp->sb_addr + 2; > - rgd.ri.ri_data = sdp->fssize - (sdp->sb_addr + 2); > - > - sdp->rgtree.osi_node = (struct osi_node *)&rgd; > error = check_metatree(sdp->md.journal[i], > &rangecheck_journal); > - sdp->rgtree.osi_node = NULL; > if (error) > /* Don't use fsck_inode_put here because it's a > system file and we need to dismantle it. */ > @@ -707,8 +715,7 @@ int replay_journals(struct gfs2_sbd *sdp, int preen, int force_check, > if (!sdp->md.journal[i]) { > log_err(_("File system journal \"journal%d\" is " > "missing or corrupt: pass1 will try to " > - "recreate it.\n"), > - i); > + "recreate it.\n"), i); > continue; > } > if (!error) { > diff --git a/gfs2/fsck/initialize.c b/gfs2/fsck/initialize.c > index 4e52262..043917c 100644 > --- a/gfs2/fsck/initialize.c > +++ b/gfs2/fsck/initialize.c > @@ -142,7 +142,7 @@ static int set_block_ranges(struct gfs2_sbd *sdp) > uint64_t rmin = 0; > int error; > > - log_info( _("Setting block ranges...\n")); > + log_info( _("Setting block ranges...")); > > for (n = osi_first(&sdp->rgtree); n; n = next) { > next = osi_next(n); > @@ -184,9 +184,12 @@ static int set_block_ranges(struct gfs2_sbd *sdp) > goto fail; > } > > + log_info(_("0x%llx to 0x%llx\n"), (unsigned long long)first_data_block, > + (unsigned long long)last_data_block); > return 0; > > fail: > + log_info( _("Error\n")); > return -1; > } > > @@ -685,10 +688,6 @@ static int init_system_inodes(struct gfs2_sbd *sdp) > if (sdp->md.rooti == NULL) > return -1; > > - err = fetch_rgrps(sdp); > - if (err) > - return err; > - > /******************************************************************* > ***************** Initialize more system inodes ***************** > *******************************************************************/ > @@ -1513,6 +1512,63 @@ static int init_rindex(struct gfs2_sbd *sdp) > } > > /** > + * check_jindex_dent - check the jindex directory entries > + * > + * This function makes sure the directory entries of the jindex are valid. > + * If they're not '.' or '..' they better have the form journalXXX. > + */ > +static int check_jindex_dent(struct gfs2_inode *ip, struct gfs2_dirent *dent, > + struct gfs2_dirent *prev_de, > + struct gfs2_buffer_head *bh, char *filename, > + uint32_t *count, int *lindex, void *priv) > +{ > + struct gfs2_dirent dentry, *de; > + int i; > + char tmp_name[PATH_MAX]; > + > + memset(&dentry, 0, sizeof(struct gfs2_dirent)); Instead of using these memset()s it might be faster to use zero initialisers ( = {0}) in the declarations instead. Or maybe drop this memset() and make sure gfs2_dirent_in() sets the entire struct. It should be possible to avoid using tmp_name altogether in this function, too, I think. Cheers, Andy > + gfs2_dirent_in(&dentry, (char *)dent); > + de = &dentry; > + > + if (de->de_name_len == 1 && filename[0] == '.') > + goto dirent_good; > + if (de->de_name_len == 2 && filename[0] == '.' && filename[1] == '.') > + goto dirent_good; > + > + memset(tmp_name, 0, sizeof(tmp_name)); > + if (de->de_name_len < sizeof(tmp_name)) > + strncpy(tmp_name, filename, de->de_name_len); > + else > + strncpy(tmp_name, filename, sizeof(tmp_name) - 1); > + > + if ((de->de_name_len >= 11) || /* "journal9999" */ > + (de->de_name_len <= 7) || > + (strncmp(filename, "journal", 7))) { > + log_debug(_("Journal index entry '%s' has an invalid filename." > + "\n"), tmp_name); > + return -1; > + } > + for (i = 7; i < de->de_name_len; i++) { > + if (filename[i] < '0' || filename[i] > '9') { > + log_debug(_("Journal '%s' has an invalid filename.\n"), > + tmp_name); > + return -4; > + } > + } > + > +dirent_good: > + /* Return the number of leaf entries so metawalk doesn't flag this > + leaf as having none. */ > + *count = be16_to_cpu(((struct gfs2_leaf *)bh->b_data)->lf_entries); > + return 0; > +} > + > +struct metawalk_fxns jindex_check_fxns = { > + .private = NULL, > + .check_dentry = check_jindex_dent, > +}; > + > +/** > * init_jindex - read in the rindex file > */ > static int init_jindex(struct gfs2_sbd *sdp) > @@ -1521,6 +1577,7 @@ static int init_jindex(struct gfs2_sbd *sdp) > ****************** Fill in journal information ****************** > *******************************************************************/ > > + log_debug("Validating the journal index.\n"); > /* rgrepair requires the journals be read in in order to distinguish > "real" rgrps from rgrps that are just copies left in journals. */ > if (sdp->gfs1) > @@ -1537,24 +1594,52 @@ static int init_jindex(struct gfs2_sbd *sdp) > "jindex file.\n")); > return -1; > } > - /* In order to rebuild jindex, we need some valid > - rgrps in memory. Temporarily read those in. */ > - err = fetch_rgrps(sdp); > - if (err) > - return err; > > err = build_jindex(sdp); > - /* Free rgrps read in earlier (re-read them later) */ > - gfs2_rgrp_free(&sdp->rgtree); > if (err) { > log_crit(_("Error %d rebuilding jindex\n"), err); > return err; > } > + gfs2_lookupi(sdp->master_dir, "jindex", 6, &sdp->md.jiinode); > + } > + > + /* check for irrelevant entries in jindex. Can't use check_dir because > + that creates and destroys the inode, which we don't want. */ > + if (!sdp->gfs1) { > + int error; > + > + log_debug("Checking the integrity of the journal index.\n"); > + if (sdp->md.jiinode->i_di.di_flags & GFS2_DIF_EXHASH) > + error = check_leaf_blks(sdp->md.jiinode, > + &jindex_check_fxns); > + else > + error = check_linear_dir(sdp->md.jiinode, > + sdp->md.jiinode->i_bh, > + &jindex_check_fxns); > + if (error) { > + log_err(_("The system journal index is damaged.\n")); > + if (!query( _("Okay to rebuild it? (y/n) "))) { > + log_crit(_("Error: cannot proceed without a " > + "valid jindex file.\n")); > + return -1; > + } > + inode_put(&sdp->md.jiinode); > + gfs2_dirent_del(sdp->master_dir, "jindex", 6); > + log_err(_("Corrupt journal index was removed.\n")); > + error = build_jindex(sdp); > + if (error) { > + log_err(_("Error rebuilding journal " > + "index: Cannot continue.\n")); > + return error; > + } > + gfs2_lookupi(sdp->master_dir, "jindex", 6, > + &sdp->md.jiinode); > + } > } > > /* read in the ji data */ > if (ji_update(sdp)){ > - log_err( _("Unable to read in jindex inode.\n")); > + log_err( _("Unable to read jindex inode.\n")); > return -1; > } > return 0; > @@ -1655,31 +1740,34 @@ int initialize(struct gfs2_sbd *sdp, int force_check, int preen, > if (init_rindex(sdp)) > return FSCK_ERROR; > > - /* We need to read in jindex in order to replay the journals */ > - if (init_jindex(sdp)) > + if (fetch_rgrps(sdp)) > return FSCK_ERROR; > > - /* If GFS, rebuild the journals. If GFS2, replay them. We don't have > - the smarts to replay GFS1 journals (neither did gfs_fsck). */ > - > - if (sdp->gfs1) { > - if (reconstruct_journals(sdp)) > + /* We need to read in jindex in order to replay the journals. If > + there's an error, we may proceed and let init_system_inodes > + try to rebuild it. */ > + if (init_jindex(sdp) == 0) { > + /* If GFS, rebuild the journals. If GFS2, replay them. We don't > + have the smarts to replay GFS1 journals (neither did > + gfs_fsck). */ > + if (sdp->gfs1) { > + if (reconstruct_journals(sdp)) > + return FSCK_ERROR; > + } else if (replay_journals(sdp, preen, force_check, > + &clean_journals)) { > + if (!opts.no && preen_is_safe(sdp, preen, force_check)) > + block_mounters(sdp, 0); > + stack; > return FSCK_ERROR; > - } else if (replay_journals(sdp, preen, force_check, &clean_journals)) { > - if (!opts.no && preen_is_safe(sdp, preen, force_check)) > - block_mounters(sdp, 0); > - stack; > - return FSCK_ERROR; > - } > - if (sdp->md.journals == clean_journals) > - *all_clean = 1; > - else { > - if (force_check || !preen) > + } > + if (sdp->md.journals == clean_journals) > + *all_clean = 1; > + else if (force_check || !preen) > log_notice( _("\nJournal recovery complete.\n")); > - } > > - if (!force_check && *all_clean && preen) > - return FSCK_OK; > + if (!force_check && *all_clean && preen) > + return FSCK_OK; > + } > > if (init_system_inodes(sdp)) > return FSCK_ERROR; > diff --git a/gfs2/fsck/metawalk.c b/gfs2/fsck/metawalk.c > index 5f432d6..b771b9e 100644 > --- a/gfs2/fsck/metawalk.c > +++ b/gfs2/fsck/metawalk.c > @@ -368,14 +368,11 @@ static int check_entries(struct gfs2_inode *ip, struct gfs2_buffer_head *bh, > > if (type == DIR_LINEAR) { > dent = (struct gfs2_dirent *)(bh->b_data + sizeof(struct gfs2_dinode)); > - } else if (type == DIR_EXHASH) { > + } else { > dent = (struct gfs2_dirent *)(bh->b_data + sizeof(struct gfs2_leaf)); > log_debug( _("Checking leaf %llu (0x%llx)\n"), > (unsigned long long)bh->b_blocknr, > (unsigned long long)bh->b_blocknr); > - } else { > - log_err( _("Invalid directory type %d specified\n"), type); > - return -1; > } > > prev = NULL; > @@ -465,7 +462,7 @@ static int check_entries(struct gfs2_inode *ip, struct gfs2_buffer_head *bh, > pass->private); > if (error < 0) { > stack; > - return -1; > + return error; > } > } > } > @@ -529,7 +526,7 @@ int check_leaf(struct gfs2_inode *ip, int lindex, struct metawalk_fxns *pass, > if (pass->check_leaf_depth) > error = pass->check_leaf_depth(ip, *leaf_no, *ref_count, lbh); > > - if (pass->check_leaf) { > + if (error >= 0 && pass->check_leaf) { > error = pass->check_leaf(ip, *leaf_no, pass->private); > if (error == -EEXIST) { > log_info(_("Previous reference to leaf %lld (0x%llx) " > @@ -616,6 +613,8 @@ out: > (*ref_count) <<= (ip->i_di.di_depth - di_depth); > } > brelse(lbh); > + if (error < 0) > + return error; > return 0; > > bad_leaf: > @@ -674,7 +673,7 @@ static void dir_leaf_reada(struct gfs2_inode *ip, uint64_t *tbl, unsigned hsize) > } > > /* Checks exhash directory entries */ > -static int check_leaf_blks(struct gfs2_inode *ip, struct metawalk_fxns *pass) > +int check_leaf_blks(struct gfs2_inode *ip, struct metawalk_fxns *pass) > { > int error = 0; > unsigned hsize = (1 << ip->i_di.di_depth); > @@ -801,6 +800,10 @@ static int check_leaf_blks(struct gfs2_inode *ip, struct metawalk_fxns *pass) > orig_ref_count, ref_count); > tbl_valid = 0; > } > + if (error < 0) { > + free(tbl); > + return error; > + } > if (!leaf.lf_next || error) > break; > leaf_no = leaf.lf_next; > diff --git a/gfs2/fsck/metawalk.h b/gfs2/fsck/metawalk.h > index aae9121..06345c3 100644 > --- a/gfs2/fsck/metawalk.h > +++ b/gfs2/fsck/metawalk.h > @@ -11,6 +11,7 @@ struct metawalk_fxns; > extern int check_inode_eattr(struct gfs2_inode *ip, > struct metawalk_fxns *pass); > extern int check_metatree(struct gfs2_inode *ip, struct metawalk_fxns *pass); > +extern int check_leaf_blks(struct gfs2_inode *ip, struct metawalk_fxns *pass); > extern int check_dir(struct gfs2_sbd *sdp, uint64_t block, > struct metawalk_fxns *pass); > extern int check_linear_dir(struct gfs2_inode *ip, struct gfs2_buffer_head *bh, > diff --git a/gfs2/fsck/pass1.c b/gfs2/fsck/pass1.c > index 4348683..b952619 100644 > --- a/gfs2/fsck/pass1.c > +++ b/gfs2/fsck/pass1.c > @@ -238,7 +238,7 @@ static int p1check_leaf(struct gfs2_inode *ip, uint64_t block, void *private) > if (q == gfs2_leaf_blk) /* If the previous reference also saw > this as a leaf, it was already > checked, so don't check again. */ > - return -EEXIST; > + return EEXIST; /* non-fatal */ > } > fsck_blockmap_set(ip, block, _("directory leaf"), gfs2_leaf_blk); > return 0; >