cluster-devel.redhat.com archive mirror
 help / color / mirror / Atom feed
From: Andrew Price <anprice@redhat.com>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] [fsck.gfs2 PATCH] fsck.gfs2: Check the integrity of the journal index
Date: Wed, 21 Jan 2015 19:50:31 +0000	[thread overview]
Message-ID: <54C00307.4050103@redhat.com> (raw)
In-Reply-To: <306243605.11832356.1421692449387.JavaMail.zimbra@redhat.com>

Hi Bob,

Just one minor comment below. Other than that, the 3 patches look fine 
to me.

On 19/01/15 18:34, Bob Peterson wrote:
> Hi,
>
> This patch checks the jindex system directory to make sure the entries
> all start with "journal" and so forth. If not, the jindex is deleted
> and rebuilt. As part of this patch, I moved where we read in the rindex
> file and rgrps to an earlier point in time, before the journals are
> replayed. This allows us to remove a dummied-up rgrp kludge in the code.
> However, if the replayed journal block is part of an rgrp, we need to
> refresh the rgrp based on the values rewritten from the journal.
>
> Regards,
>
> Bob Peterson
> Red Hat File Systems
>
> Signed-off-by: Bob Peterson <rpeterso@redhat.com>
> ---
> diff --git a/gfs2/fsck/fs_recovery.c b/gfs2/fsck/fs_recovery.c
> index 095d118..4eaba1e 100644
> --- a/gfs2/fsck/fs_recovery.c
> +++ b/gfs2/fsck/fs_recovery.c
> @@ -96,6 +96,30 @@ void gfs2_revoke_clean(struct gfs2_sbd *sdp)
>   	}
>   }
>
> +static void refresh_rgrp(struct gfs2_sbd *sdp, struct rgrp_tree *rgd,
> +			 struct gfs2_buffer_head *bh, uint64_t blkno)
> +{
> +	int i;
> +
> +	log_debug(_("Block is part of rgrp 0x%llx; refreshing the rgrp.\n"),
> +		  (unsigned long long)rgd->ri.ri_addr);
> +	for (i = 0; i < rgd->ri.ri_length; i++) {
> +		if (rgd->bits[i].bi_bh->b_blocknr != blkno)
> +			continue;
> +
> +		memcpy(rgd->bits[i].bi_bh->b_data, bh->b_data, sdp->bsize);
> +		bmodified(rgd->bits[i].bi_bh);
> +		if (i == 0) { /* this is the rgrp itself */
> +			if (sdp->gfs1)
> +				gfs_rgrp_in((struct gfs_rgrp *)&rgd->rg,
> +					    rgd->bits[0].bi_bh);
> +			else
> +				gfs2_rgrp_in(&rgd->rg, rgd->bits[0].bi_bh);
> +		}
> +		break;
> +	}
> +}
> +
>   static int buf_lo_scan_elements(struct gfs2_inode *ip, unsigned int start,
>   				struct gfs2_log_descriptor *ld, __be64 *ptr,
>   				int pass)
> @@ -105,6 +129,7 @@ static int buf_lo_scan_elements(struct gfs2_inode *ip, unsigned int start,
>   	struct gfs2_buffer_head *bh_log, *bh_ip;
>   	uint64_t blkno;
>   	int error = 0;
> +	struct rgrp_tree *rgd;
>
>   	if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_METADATA)
>   		return 0;
> @@ -147,6 +172,9 @@ static int buf_lo_scan_elements(struct gfs2_inode *ip, unsigned int start,
>   			error = -EIO;
>   		} else {
>   			bmodified(bh_ip);
> +			rgd = gfs2_blk2rgrpd(sdp, blkno);
> +			if (rgd && blkno < rgd->ri.ri_data0)
> +				refresh_rgrp(sdp, rgd, bh_ip, blkno);
>   		}
>
>   		brelse(bh_log);
> @@ -676,28 +704,8 @@ int replay_journals(struct gfs2_sbd *sdp, int preen, int force_check,
>
>   	for(i = 0; i < sdp->md.journals; i++) {
>   		if (sdp->md.journal[i]) {
> -			struct rgrp_tree rgd;
> -			struct gfs2_bitmap bits;
> -
> -			/* The real rgrp tree hasn't been built at this point,
> -			 * so we need to dummy one up that covers the whole
> -			 * file system so basic functions in check_metatree
> -			 * don't segfault. */
> -			rgd.start = sdp->sb_addr + 1;
> -			rgd.length = 1;
> -			bits.bi_bh = NULL;
> -			bits.bi_start = 0;
> -			bits.bi_len = sdp->fssize / GFS2_NBBY;
> -			rgd.bits = &bits;
> -			rgd.ri.ri_addr = sdp->sb_addr + 1;
> -			rgd.ri.ri_length = 1;
> -			rgd.ri.ri_data0 = sdp->sb_addr + 2;
> -			rgd.ri.ri_data = sdp->fssize - (sdp->sb_addr + 2);
> -
> -			sdp->rgtree.osi_node = (struct osi_node *)&rgd;
>   			error = check_metatree(sdp->md.journal[i],
>   					       &rangecheck_journal);
> -			sdp->rgtree.osi_node = NULL;
>   			if (error)
>   				/* Don't use fsck_inode_put here because it's a
>   				   system file and we need to dismantle it. */
> @@ -707,8 +715,7 @@ int replay_journals(struct gfs2_sbd *sdp, int preen, int force_check,
>   		if (!sdp->md.journal[i]) {
>   			log_err(_("File system journal \"journal%d\" is "
>   				  "missing or corrupt: pass1 will try to "
> -				  "recreate it.\n"),
> -				i);
> +				  "recreate it.\n"), i);
>   			continue;
>   		}
>   		if (!error) {
> diff --git a/gfs2/fsck/initialize.c b/gfs2/fsck/initialize.c
> index 4e52262..043917c 100644
> --- a/gfs2/fsck/initialize.c
> +++ b/gfs2/fsck/initialize.c
> @@ -142,7 +142,7 @@ static int set_block_ranges(struct gfs2_sbd *sdp)
>   	uint64_t rmin = 0;
>   	int error;
>
> -	log_info( _("Setting block ranges...\n"));
> +	log_info( _("Setting block ranges..."));
>
>   	for (n = osi_first(&sdp->rgtree); n; n = next) {
>   		next = osi_next(n);
> @@ -184,9 +184,12 @@ static int set_block_ranges(struct gfs2_sbd *sdp)
>   		goto fail;
>   	}
>
> +	log_info(_("0x%llx to 0x%llx\n"), (unsigned long long)first_data_block,
> +		 (unsigned long long)last_data_block);
>   	return 0;
>
>    fail:
> +	log_info( _("Error\n"));
>   	return -1;
>   }
>
> @@ -685,10 +688,6 @@ static int init_system_inodes(struct gfs2_sbd *sdp)
>   	if (sdp->md.rooti == NULL)
>   		return -1;
>
> -	err = fetch_rgrps(sdp);
> -	if (err)
> -		return err;
> -
>   	/*******************************************************************
>   	 *****************  Initialize more system inodes  *****************
>   	 *******************************************************************/
> @@ -1513,6 +1512,63 @@ static int init_rindex(struct gfs2_sbd *sdp)
>   }
>
>   /**
> + * check_jindex_dent - check the jindex directory entries
> + *
> + * This function makes sure the directory entries of the jindex are valid.
> + * If they're not '.' or '..' they better have the form journalXXX.
> + */
> +static int check_jindex_dent(struct gfs2_inode *ip, struct gfs2_dirent *dent,
> +			     struct gfs2_dirent *prev_de,
> +			     struct gfs2_buffer_head *bh, char *filename,
> +			     uint32_t *count, int *lindex, void *priv)
> +{
> +	struct gfs2_dirent dentry, *de;
> +	int i;
> +	char tmp_name[PATH_MAX];
> +
> +	memset(&dentry, 0, sizeof(struct gfs2_dirent));

Instead of using these memset()s it might be faster to use zero 
initialisers ( = {0}) in the declarations instead. Or maybe drop this 
memset() and make sure gfs2_dirent_in() sets the entire struct. It 
should be possible to avoid using tmp_name altogether in this function, 
too, I think.

Cheers,
Andy

> +	gfs2_dirent_in(&dentry, (char *)dent);
> +	de = &dentry;
> +
> +	if (de->de_name_len == 1 && filename[0] == '.')
> +		goto dirent_good;
> +	if (de->de_name_len == 2 && filename[0] == '.' && filename[1] == '.')
> +		goto dirent_good;
> +
> +	memset(tmp_name, 0, sizeof(tmp_name));
> +	if (de->de_name_len < sizeof(tmp_name))
> +		strncpy(tmp_name, filename, de->de_name_len);
> +	else
> +		strncpy(tmp_name, filename, sizeof(tmp_name) - 1);
> +
> +	if ((de->de_name_len >= 11) || /* "journal9999" */
> +	    (de->de_name_len <= 7) ||
> +	    (strncmp(filename, "journal", 7))) {
> +		log_debug(_("Journal index entry '%s' has an invalid filename."
> +			    "\n"), tmp_name);
> +		return -1;
> +	}
> +	for (i = 7; i < de->de_name_len; i++) {
> +		if (filename[i] < '0' || filename[i] > '9') {
> +			log_debug(_("Journal '%s' has an invalid filename.\n"),
> +				  tmp_name);
> +			return -4;
> +		}
> +	}
> +
> +dirent_good:
> +	/* Return the number of leaf entries so metawalk doesn't flag this
> +	   leaf as having none. */
> +	*count = be16_to_cpu(((struct gfs2_leaf *)bh->b_data)->lf_entries);
> +	return 0;
> +}
> +
> +struct metawalk_fxns jindex_check_fxns = {
> +	.private = NULL,
> +	.check_dentry = check_jindex_dent,
> +};
> +
> +/**
>    * init_jindex - read in the rindex file
>    */
>   static int init_jindex(struct gfs2_sbd *sdp)
> @@ -1521,6 +1577,7 @@ static int init_jindex(struct gfs2_sbd *sdp)
>   	 ******************  Fill in journal information  ******************
>   	 *******************************************************************/
>
> +	log_debug("Validating the journal index.\n");
>   	/* rgrepair requires the journals be read in in order to distinguish
>   	   "real" rgrps from rgrps that are just copies left in journals. */
>   	if (sdp->gfs1)
> @@ -1537,24 +1594,52 @@ static int init_jindex(struct gfs2_sbd *sdp)
>   				   "jindex file.\n"));
>   			return -1;
>   		}
> -		/* In order to rebuild jindex, we need some valid
> -		   rgrps in memory.  Temporarily read those in. */
> -		err = fetch_rgrps(sdp);
> -		if (err)
> -			return err;
>
>   		err = build_jindex(sdp);
> -		/* Free rgrps read in earlier (re-read them later) */
> -		gfs2_rgrp_free(&sdp->rgtree);
>   		if (err) {
>   			log_crit(_("Error %d rebuilding jindex\n"), err);
>   			return err;
>   		}
> +		gfs2_lookupi(sdp->master_dir, "jindex", 6, &sdp->md.jiinode);
> +	}
> +
> +	/* check for irrelevant entries in jindex. Can't use check_dir because
> +	   that creates and destroys the inode, which we don't want. */
> +	if (!sdp->gfs1) {
> +		int error;
> +
> +		log_debug("Checking the integrity of the journal index.\n");
> +		if (sdp->md.jiinode->i_di.di_flags & GFS2_DIF_EXHASH)
> +			error = check_leaf_blks(sdp->md.jiinode,
> +						&jindex_check_fxns);
> +		else
> +			error = check_linear_dir(sdp->md.jiinode,
> +						 sdp->md.jiinode->i_bh,
> +						 &jindex_check_fxns);
> +		if (error) {
> +			log_err(_("The system journal index is damaged.\n"));
> +			if (!query( _("Okay to rebuild it? (y/n) "))) {
> +				log_crit(_("Error: cannot proceed without a "
> +					   "valid jindex file.\n"));
> +				return -1;
> +			}
> +			inode_put(&sdp->md.jiinode);
> +			gfs2_dirent_del(sdp->master_dir, "jindex", 6);
> +			log_err(_("Corrupt journal index was removed.\n"));
> +			error = build_jindex(sdp);
> +			if (error) {
> +				log_err(_("Error rebuilding journal "
> +					  "index: Cannot continue.\n"));
> +				return error;
> +			}
> +			gfs2_lookupi(sdp->master_dir, "jindex", 6,
> +				     &sdp->md.jiinode);
> +		}
>   	}
>
>   	/* read in the ji data */
>   	if (ji_update(sdp)){
> -		log_err( _("Unable to read in jindex inode.\n"));
> +		log_err( _("Unable to read jindex inode.\n"));
>   		return -1;
>   	}
>   	return 0;
> @@ -1655,31 +1740,34 @@ int initialize(struct gfs2_sbd *sdp, int force_check, int preen,
>   	if (init_rindex(sdp))
>   		return FSCK_ERROR;
>
> -	/* We need to read in jindex in order to replay the journals */
> -	if (init_jindex(sdp))
> +	if (fetch_rgrps(sdp))
>   		return FSCK_ERROR;
>
> -	/* If GFS, rebuild the journals.  If GFS2, replay them.  We don't have
> -	   the smarts to replay GFS1 journals (neither did gfs_fsck). */
> -
> -	if (sdp->gfs1) {
> -		if (reconstruct_journals(sdp))
> +	/* We need to read in jindex in order to replay the journals. If
> +	   there's an error, we may proceed and let init_system_inodes
> +	   try to rebuild it. */
> +	if (init_jindex(sdp) == 0) {
> +		/* If GFS, rebuild the journals. If GFS2, replay them. We don't
> +		   have the smarts to replay GFS1 journals (neither did
> +		   gfs_fsck). */
> +		if (sdp->gfs1) {
> +			if (reconstruct_journals(sdp))
> +				return FSCK_ERROR;
> +		} else if (replay_journals(sdp, preen, force_check,
> +					   &clean_journals)) {
> +			if (!opts.no && preen_is_safe(sdp, preen, force_check))
> +				block_mounters(sdp, 0);
> +			stack;
>   			return FSCK_ERROR;
> -	} else if (replay_journals(sdp, preen, force_check, &clean_journals)) {
> -		if (!opts.no && preen_is_safe(sdp, preen, force_check))
> -			block_mounters(sdp, 0);
> -		stack;
> -		return FSCK_ERROR;
> -	}
> -	if (sdp->md.journals == clean_journals)
> -		*all_clean = 1;
> -	else {
> -		if (force_check || !preen)
> +		}
> +		if (sdp->md.journals == clean_journals)
> +			*all_clean = 1;
> +		else if (force_check || !preen)
>   			log_notice( _("\nJournal recovery complete.\n"));
> -	}
>
> -	if (!force_check && *all_clean && preen)
> -		return FSCK_OK;
> +		if (!force_check && *all_clean && preen)
> +			return FSCK_OK;
> +	}
>
>   	if (init_system_inodes(sdp))
>   		return FSCK_ERROR;
> diff --git a/gfs2/fsck/metawalk.c b/gfs2/fsck/metawalk.c
> index 5f432d6..b771b9e 100644
> --- a/gfs2/fsck/metawalk.c
> +++ b/gfs2/fsck/metawalk.c
> @@ -368,14 +368,11 @@ static int check_entries(struct gfs2_inode *ip, struct gfs2_buffer_head *bh,
>
>   	if (type == DIR_LINEAR) {
>   		dent = (struct gfs2_dirent *)(bh->b_data + sizeof(struct gfs2_dinode));
> -	} else if (type == DIR_EXHASH) {
> +	} else {
>   		dent = (struct gfs2_dirent *)(bh->b_data + sizeof(struct gfs2_leaf));
>   		log_debug( _("Checking leaf %llu (0x%llx)\n"),
>   			  (unsigned long long)bh->b_blocknr,
>   			  (unsigned long long)bh->b_blocknr);
> -	} else {
> -		log_err( _("Invalid directory type %d specified\n"), type);
> -		return -1;
>   	}
>
>   	prev = NULL;
> @@ -465,7 +462,7 @@ static int check_entries(struct gfs2_inode *ip, struct gfs2_buffer_head *bh,
>   							   pass->private);
>   				if (error < 0) {
>   					stack;
> -					return -1;
> +					return error;
>   				}
>   			}
>   		}
> @@ -529,7 +526,7 @@ int check_leaf(struct gfs2_inode *ip, int lindex, struct metawalk_fxns *pass,
>   	if (pass->check_leaf_depth)
>   		error = pass->check_leaf_depth(ip, *leaf_no, *ref_count, lbh);
>
> -	if (pass->check_leaf) {
> +	if (error >= 0 && pass->check_leaf) {
>   		error = pass->check_leaf(ip, *leaf_no, pass->private);
>   		if (error == -EEXIST) {
>   			log_info(_("Previous reference to leaf %lld (0x%llx) "
> @@ -616,6 +613,8 @@ out:
>   		(*ref_count) <<= (ip->i_di.di_depth - di_depth);
>   	}
>   	brelse(lbh);
> +	if (error < 0)
> +		return error;
>   	return 0;
>
>   bad_leaf:
> @@ -674,7 +673,7 @@ static void dir_leaf_reada(struct gfs2_inode *ip, uint64_t *tbl, unsigned hsize)
>   }
>
>   /* Checks exhash directory entries */
> -static int check_leaf_blks(struct gfs2_inode *ip, struct metawalk_fxns *pass)
> +int check_leaf_blks(struct gfs2_inode *ip, struct metawalk_fxns *pass)
>   {
>   	int error = 0;
>   	unsigned hsize = (1 << ip->i_di.di_depth);
> @@ -801,6 +800,10 @@ static int check_leaf_blks(struct gfs2_inode *ip, struct metawalk_fxns *pass)
>   					  orig_ref_count, ref_count);
>   				tbl_valid = 0;
>   			}
> +			if (error < 0) {
> +				free(tbl);
> +				return error;
> +			}
>   			if (!leaf.lf_next || error)
>   				break;
>   			leaf_no = leaf.lf_next;
> diff --git a/gfs2/fsck/metawalk.h b/gfs2/fsck/metawalk.h
> index aae9121..06345c3 100644
> --- a/gfs2/fsck/metawalk.h
> +++ b/gfs2/fsck/metawalk.h
> @@ -11,6 +11,7 @@ struct metawalk_fxns;
>   extern int check_inode_eattr(struct gfs2_inode *ip,
>   			     struct metawalk_fxns *pass);
>   extern int check_metatree(struct gfs2_inode *ip, struct metawalk_fxns *pass);
> +extern int check_leaf_blks(struct gfs2_inode *ip, struct metawalk_fxns *pass);
>   extern int check_dir(struct gfs2_sbd *sdp, uint64_t block,
>   		     struct metawalk_fxns *pass);
>   extern int check_linear_dir(struct gfs2_inode *ip, struct gfs2_buffer_head *bh,
> diff --git a/gfs2/fsck/pass1.c b/gfs2/fsck/pass1.c
> index 4348683..b952619 100644
> --- a/gfs2/fsck/pass1.c
> +++ b/gfs2/fsck/pass1.c
> @@ -238,7 +238,7 @@ static int p1check_leaf(struct gfs2_inode *ip, uint64_t block, void *private)
>   		if (q == gfs2_leaf_blk) /* If the previous reference also saw
>   					   this as a leaf, it was already
>   					   checked, so don't check again. */
> -			return -EEXIST;
> +			return EEXIST; /* non-fatal */
>   	}
>   	fsck_blockmap_set(ip, block, _("directory leaf"), gfs2_leaf_blk);
>   	return 0;
>



  reply	other threads:[~2015-01-21 19:50 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <1301302656.11831907.1421692358657.JavaMail.zimbra@redhat.com>
2015-01-19 18:34 ` [Cluster-devel] [fsck.gfs2 PATCH] fsck.gfs2: Check the integrity of the journal index Bob Peterson
2015-01-21 19:50   ` Andrew Price [this message]
2015-01-22 20:35     ` Bob Peterson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=54C00307.4050103@redhat.com \
    --to=anprice@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).