* [Cluster-devel] [fsck.gfs2 PATCH] fsck.gfs2: Check the integrity of the journal index
2015-01-19 18:34 ` [Cluster-devel] [fsck.gfs2 PATCH] fsck.gfs2: Check the integrity of the journal index Bob Peterson
@ 2015-01-21 19:50 ` Andrew Price
2015-01-22 20:35 ` Bob Peterson
0 siblings, 1 reply; 3+ messages in thread
From: Andrew Price @ 2015-01-21 19:50 UTC (permalink / raw)
To: cluster-devel.redhat.com
Hi Bob,
Just one minor comment below. Other than that, the 3 patches look fine
to me.
On 19/01/15 18:34, Bob Peterson wrote:
> Hi,
>
> This patch checks the jindex system directory to make sure the entries
> all start with "journal" and so forth. If not, the jindex is deleted
> and rebuilt. As part of this patch, I moved where we read in the rindex
> file and rgrps to an earlier point in time, before the journals are
> replayed. This allows us to remove a dummied-up rgrp kludge in the code.
> However, if the replayed journal block is part of an rgrp, we need to
> refresh the rgrp based on the values rewritten from the journal.
>
> Regards,
>
> Bob Peterson
> Red Hat File Systems
>
> Signed-off-by: Bob Peterson <rpeterso@redhat.com>
> ---
> diff --git a/gfs2/fsck/fs_recovery.c b/gfs2/fsck/fs_recovery.c
> index 095d118..4eaba1e 100644
> --- a/gfs2/fsck/fs_recovery.c
> +++ b/gfs2/fsck/fs_recovery.c
> @@ -96,6 +96,30 @@ void gfs2_revoke_clean(struct gfs2_sbd *sdp)
> }
> }
>
> +static void refresh_rgrp(struct gfs2_sbd *sdp, struct rgrp_tree *rgd,
> + struct gfs2_buffer_head *bh, uint64_t blkno)
> +{
> + int i;
> +
> + log_debug(_("Block is part of rgrp 0x%llx; refreshing the rgrp.\n"),
> + (unsigned long long)rgd->ri.ri_addr);
> + for (i = 0; i < rgd->ri.ri_length; i++) {
> + if (rgd->bits[i].bi_bh->b_blocknr != blkno)
> + continue;
> +
> + memcpy(rgd->bits[i].bi_bh->b_data, bh->b_data, sdp->bsize);
> + bmodified(rgd->bits[i].bi_bh);
> + if (i == 0) { /* this is the rgrp itself */
> + if (sdp->gfs1)
> + gfs_rgrp_in((struct gfs_rgrp *)&rgd->rg,
> + rgd->bits[0].bi_bh);
> + else
> + gfs2_rgrp_in(&rgd->rg, rgd->bits[0].bi_bh);
> + }
> + break;
> + }
> +}
> +
> static int buf_lo_scan_elements(struct gfs2_inode *ip, unsigned int start,
> struct gfs2_log_descriptor *ld, __be64 *ptr,
> int pass)
> @@ -105,6 +129,7 @@ static int buf_lo_scan_elements(struct gfs2_inode *ip, unsigned int start,
> struct gfs2_buffer_head *bh_log, *bh_ip;
> uint64_t blkno;
> int error = 0;
> + struct rgrp_tree *rgd;
>
> if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_METADATA)
> return 0;
> @@ -147,6 +172,9 @@ static int buf_lo_scan_elements(struct gfs2_inode *ip, unsigned int start,
> error = -EIO;
> } else {
> bmodified(bh_ip);
> + rgd = gfs2_blk2rgrpd(sdp, blkno);
> + if (rgd && blkno < rgd->ri.ri_data0)
> + refresh_rgrp(sdp, rgd, bh_ip, blkno);
> }
>
> brelse(bh_log);
> @@ -676,28 +704,8 @@ int replay_journals(struct gfs2_sbd *sdp, int preen, int force_check,
>
> for(i = 0; i < sdp->md.journals; i++) {
> if (sdp->md.journal[i]) {
> - struct rgrp_tree rgd;
> - struct gfs2_bitmap bits;
> -
> - /* The real rgrp tree hasn't been built at this point,
> - * so we need to dummy one up that covers the whole
> - * file system so basic functions in check_metatree
> - * don't segfault. */
> - rgd.start = sdp->sb_addr + 1;
> - rgd.length = 1;
> - bits.bi_bh = NULL;
> - bits.bi_start = 0;
> - bits.bi_len = sdp->fssize / GFS2_NBBY;
> - rgd.bits = &bits;
> - rgd.ri.ri_addr = sdp->sb_addr + 1;
> - rgd.ri.ri_length = 1;
> - rgd.ri.ri_data0 = sdp->sb_addr + 2;
> - rgd.ri.ri_data = sdp->fssize - (sdp->sb_addr + 2);
> -
> - sdp->rgtree.osi_node = (struct osi_node *)&rgd;
> error = check_metatree(sdp->md.journal[i],
> &rangecheck_journal);
> - sdp->rgtree.osi_node = NULL;
> if (error)
> /* Don't use fsck_inode_put here because it's a
> system file and we need to dismantle it. */
> @@ -707,8 +715,7 @@ int replay_journals(struct gfs2_sbd *sdp, int preen, int force_check,
> if (!sdp->md.journal[i]) {
> log_err(_("File system journal \"journal%d\" is "
> "missing or corrupt: pass1 will try to "
> - "recreate it.\n"),
> - i);
> + "recreate it.\n"), i);
> continue;
> }
> if (!error) {
> diff --git a/gfs2/fsck/initialize.c b/gfs2/fsck/initialize.c
> index 4e52262..043917c 100644
> --- a/gfs2/fsck/initialize.c
> +++ b/gfs2/fsck/initialize.c
> @@ -142,7 +142,7 @@ static int set_block_ranges(struct gfs2_sbd *sdp)
> uint64_t rmin = 0;
> int error;
>
> - log_info( _("Setting block ranges...\n"));
> + log_info( _("Setting block ranges..."));
>
> for (n = osi_first(&sdp->rgtree); n; n = next) {
> next = osi_next(n);
> @@ -184,9 +184,12 @@ static int set_block_ranges(struct gfs2_sbd *sdp)
> goto fail;
> }
>
> + log_info(_("0x%llx to 0x%llx\n"), (unsigned long long)first_data_block,
> + (unsigned long long)last_data_block);
> return 0;
>
> fail:
> + log_info( _("Error\n"));
> return -1;
> }
>
> @@ -685,10 +688,6 @@ static int init_system_inodes(struct gfs2_sbd *sdp)
> if (sdp->md.rooti == NULL)
> return -1;
>
> - err = fetch_rgrps(sdp);
> - if (err)
> - return err;
> -
> /*******************************************************************
> ***************** Initialize more system inodes *****************
> *******************************************************************/
> @@ -1513,6 +1512,63 @@ static int init_rindex(struct gfs2_sbd *sdp)
> }
>
> /**
> + * check_jindex_dent - check the jindex directory entries
> + *
> + * This function makes sure the directory entries of the jindex are valid.
> + * If they're not '.' or '..' they better have the form journalXXX.
> + */
> +static int check_jindex_dent(struct gfs2_inode *ip, struct gfs2_dirent *dent,
> + struct gfs2_dirent *prev_de,
> + struct gfs2_buffer_head *bh, char *filename,
> + uint32_t *count, int *lindex, void *priv)
> +{
> + struct gfs2_dirent dentry, *de;
> + int i;
> + char tmp_name[PATH_MAX];
> +
> + memset(&dentry, 0, sizeof(struct gfs2_dirent));
Instead of using these memset()s it might be faster to use zero
initialisers ( = {0}) in the declarations instead. Or maybe drop this
memset() and make sure gfs2_dirent_in() sets the entire struct. It
should be possible to avoid using tmp_name altogether in this function,
too, I think.
Cheers,
Andy
> + gfs2_dirent_in(&dentry, (char *)dent);
> + de = &dentry;
> +
> + if (de->de_name_len == 1 && filename[0] == '.')
> + goto dirent_good;
> + if (de->de_name_len == 2 && filename[0] == '.' && filename[1] == '.')
> + goto dirent_good;
> +
> + memset(tmp_name, 0, sizeof(tmp_name));
> + if (de->de_name_len < sizeof(tmp_name))
> + strncpy(tmp_name, filename, de->de_name_len);
> + else
> + strncpy(tmp_name, filename, sizeof(tmp_name) - 1);
> +
> + if ((de->de_name_len >= 11) || /* "journal9999" */
> + (de->de_name_len <= 7) ||
> + (strncmp(filename, "journal", 7))) {
> + log_debug(_("Journal index entry '%s' has an invalid filename."
> + "\n"), tmp_name);
> + return -1;
> + }
> + for (i = 7; i < de->de_name_len; i++) {
> + if (filename[i] < '0' || filename[i] > '9') {
> + log_debug(_("Journal '%s' has an invalid filename.\n"),
> + tmp_name);
> + return -4;
> + }
> + }
> +
> +dirent_good:
> + /* Return the number of leaf entries so metawalk doesn't flag this
> + leaf as having none. */
> + *count = be16_to_cpu(((struct gfs2_leaf *)bh->b_data)->lf_entries);
> + return 0;
> +}
> +
> +struct metawalk_fxns jindex_check_fxns = {
> + .private = NULL,
> + .check_dentry = check_jindex_dent,
> +};
> +
> +/**
> * init_jindex - read in the rindex file
> */
> static int init_jindex(struct gfs2_sbd *sdp)
> @@ -1521,6 +1577,7 @@ static int init_jindex(struct gfs2_sbd *sdp)
> ****************** Fill in journal information ******************
> *******************************************************************/
>
> + log_debug("Validating the journal index.\n");
> /* rgrepair requires the journals be read in in order to distinguish
> "real" rgrps from rgrps that are just copies left in journals. */
> if (sdp->gfs1)
> @@ -1537,24 +1594,52 @@ static int init_jindex(struct gfs2_sbd *sdp)
> "jindex file.\n"));
> return -1;
> }
> - /* In order to rebuild jindex, we need some valid
> - rgrps in memory. Temporarily read those in. */
> - err = fetch_rgrps(sdp);
> - if (err)
> - return err;
>
> err = build_jindex(sdp);
> - /* Free rgrps read in earlier (re-read them later) */
> - gfs2_rgrp_free(&sdp->rgtree);
> if (err) {
> log_crit(_("Error %d rebuilding jindex\n"), err);
> return err;
> }
> + gfs2_lookupi(sdp->master_dir, "jindex", 6, &sdp->md.jiinode);
> + }
> +
> + /* check for irrelevant entries in jindex. Can't use check_dir because
> + that creates and destroys the inode, which we don't want. */
> + if (!sdp->gfs1) {
> + int error;
> +
> + log_debug("Checking the integrity of the journal index.\n");
> + if (sdp->md.jiinode->i_di.di_flags & GFS2_DIF_EXHASH)
> + error = check_leaf_blks(sdp->md.jiinode,
> + &jindex_check_fxns);
> + else
> + error = check_linear_dir(sdp->md.jiinode,
> + sdp->md.jiinode->i_bh,
> + &jindex_check_fxns);
> + if (error) {
> + log_err(_("The system journal index is damaged.\n"));
> + if (!query( _("Okay to rebuild it? (y/n) "))) {
> + log_crit(_("Error: cannot proceed without a "
> + "valid jindex file.\n"));
> + return -1;
> + }
> + inode_put(&sdp->md.jiinode);
> + gfs2_dirent_del(sdp->master_dir, "jindex", 6);
> + log_err(_("Corrupt journal index was removed.\n"));
> + error = build_jindex(sdp);
> + if (error) {
> + log_err(_("Error rebuilding journal "
> + "index: Cannot continue.\n"));
> + return error;
> + }
> + gfs2_lookupi(sdp->master_dir, "jindex", 6,
> + &sdp->md.jiinode);
> + }
> }
>
> /* read in the ji data */
> if (ji_update(sdp)){
> - log_err( _("Unable to read in jindex inode.\n"));
> + log_err( _("Unable to read jindex inode.\n"));
> return -1;
> }
> return 0;
> @@ -1655,31 +1740,34 @@ int initialize(struct gfs2_sbd *sdp, int force_check, int preen,
> if (init_rindex(sdp))
> return FSCK_ERROR;
>
> - /* We need to read in jindex in order to replay the journals */
> - if (init_jindex(sdp))
> + if (fetch_rgrps(sdp))
> return FSCK_ERROR;
>
> - /* If GFS, rebuild the journals. If GFS2, replay them. We don't have
> - the smarts to replay GFS1 journals (neither did gfs_fsck). */
> -
> - if (sdp->gfs1) {
> - if (reconstruct_journals(sdp))
> + /* We need to read in jindex in order to replay the journals. If
> + there's an error, we may proceed and let init_system_inodes
> + try to rebuild it. */
> + if (init_jindex(sdp) == 0) {
> + /* If GFS, rebuild the journals. If GFS2, replay them. We don't
> + have the smarts to replay GFS1 journals (neither did
> + gfs_fsck). */
> + if (sdp->gfs1) {
> + if (reconstruct_journals(sdp))
> + return FSCK_ERROR;
> + } else if (replay_journals(sdp, preen, force_check,
> + &clean_journals)) {
> + if (!opts.no && preen_is_safe(sdp, preen, force_check))
> + block_mounters(sdp, 0);
> + stack;
> return FSCK_ERROR;
> - } else if (replay_journals(sdp, preen, force_check, &clean_journals)) {
> - if (!opts.no && preen_is_safe(sdp, preen, force_check))
> - block_mounters(sdp, 0);
> - stack;
> - return FSCK_ERROR;
> - }
> - if (sdp->md.journals == clean_journals)
> - *all_clean = 1;
> - else {
> - if (force_check || !preen)
> + }
> + if (sdp->md.journals == clean_journals)
> + *all_clean = 1;
> + else if (force_check || !preen)
> log_notice( _("\nJournal recovery complete.\n"));
> - }
>
> - if (!force_check && *all_clean && preen)
> - return FSCK_OK;
> + if (!force_check && *all_clean && preen)
> + return FSCK_OK;
> + }
>
> if (init_system_inodes(sdp))
> return FSCK_ERROR;
> diff --git a/gfs2/fsck/metawalk.c b/gfs2/fsck/metawalk.c
> index 5f432d6..b771b9e 100644
> --- a/gfs2/fsck/metawalk.c
> +++ b/gfs2/fsck/metawalk.c
> @@ -368,14 +368,11 @@ static int check_entries(struct gfs2_inode *ip, struct gfs2_buffer_head *bh,
>
> if (type == DIR_LINEAR) {
> dent = (struct gfs2_dirent *)(bh->b_data + sizeof(struct gfs2_dinode));
> - } else if (type == DIR_EXHASH) {
> + } else {
> dent = (struct gfs2_dirent *)(bh->b_data + sizeof(struct gfs2_leaf));
> log_debug( _("Checking leaf %llu (0x%llx)\n"),
> (unsigned long long)bh->b_blocknr,
> (unsigned long long)bh->b_blocknr);
> - } else {
> - log_err( _("Invalid directory type %d specified\n"), type);
> - return -1;
> }
>
> prev = NULL;
> @@ -465,7 +462,7 @@ static int check_entries(struct gfs2_inode *ip, struct gfs2_buffer_head *bh,
> pass->private);
> if (error < 0) {
> stack;
> - return -1;
> + return error;
> }
> }
> }
> @@ -529,7 +526,7 @@ int check_leaf(struct gfs2_inode *ip, int lindex, struct metawalk_fxns *pass,
> if (pass->check_leaf_depth)
> error = pass->check_leaf_depth(ip, *leaf_no, *ref_count, lbh);
>
> - if (pass->check_leaf) {
> + if (error >= 0 && pass->check_leaf) {
> error = pass->check_leaf(ip, *leaf_no, pass->private);
> if (error == -EEXIST) {
> log_info(_("Previous reference to leaf %lld (0x%llx) "
> @@ -616,6 +613,8 @@ out:
> (*ref_count) <<= (ip->i_di.di_depth - di_depth);
> }
> brelse(lbh);
> + if (error < 0)
> + return error;
> return 0;
>
> bad_leaf:
> @@ -674,7 +673,7 @@ static void dir_leaf_reada(struct gfs2_inode *ip, uint64_t *tbl, unsigned hsize)
> }
>
> /* Checks exhash directory entries */
> -static int check_leaf_blks(struct gfs2_inode *ip, struct metawalk_fxns *pass)
> +int check_leaf_blks(struct gfs2_inode *ip, struct metawalk_fxns *pass)
> {
> int error = 0;
> unsigned hsize = (1 << ip->i_di.di_depth);
> @@ -801,6 +800,10 @@ static int check_leaf_blks(struct gfs2_inode *ip, struct metawalk_fxns *pass)
> orig_ref_count, ref_count);
> tbl_valid = 0;
> }
> + if (error < 0) {
> + free(tbl);
> + return error;
> + }
> if (!leaf.lf_next || error)
> break;
> leaf_no = leaf.lf_next;
> diff --git a/gfs2/fsck/metawalk.h b/gfs2/fsck/metawalk.h
> index aae9121..06345c3 100644
> --- a/gfs2/fsck/metawalk.h
> +++ b/gfs2/fsck/metawalk.h
> @@ -11,6 +11,7 @@ struct metawalk_fxns;
> extern int check_inode_eattr(struct gfs2_inode *ip,
> struct metawalk_fxns *pass);
> extern int check_metatree(struct gfs2_inode *ip, struct metawalk_fxns *pass);
> +extern int check_leaf_blks(struct gfs2_inode *ip, struct metawalk_fxns *pass);
> extern int check_dir(struct gfs2_sbd *sdp, uint64_t block,
> struct metawalk_fxns *pass);
> extern int check_linear_dir(struct gfs2_inode *ip, struct gfs2_buffer_head *bh,
> diff --git a/gfs2/fsck/pass1.c b/gfs2/fsck/pass1.c
> index 4348683..b952619 100644
> --- a/gfs2/fsck/pass1.c
> +++ b/gfs2/fsck/pass1.c
> @@ -238,7 +238,7 @@ static int p1check_leaf(struct gfs2_inode *ip, uint64_t block, void *private)
> if (q == gfs2_leaf_blk) /* If the previous reference also saw
> this as a leaf, it was already
> checked, so don't check again. */
> - return -EEXIST;
> + return EEXIST; /* non-fatal */
> }
> fsck_blockmap_set(ip, block, _("directory leaf"), gfs2_leaf_blk);
> return 0;
>
^ permalink raw reply [flat|nested] 3+ messages in thread