From mboxrd@z Thu Jan 1 00:00:00 1970 From: Bob Peterson Date: Wed, 1 Aug 2012 20:11:31 -0400 (EDT) Subject: [Cluster-devel] [gfs2-utils RHEL59 patch] gfs_fsck: Back-port some fixes from fsck.gfs2 Message-ID: <1375817359.8059948.1343866291017.JavaMail.root@redhat.com> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Hi, This is kinda big, so I'm posting it here before I push it to the git repo. This patch back-ports some patches from fsck.gfs2 to gfs_fsck to allow gfs_fsck to better detect and fix file systems that have invalid indirect metadata blocks. Regards, Bob Peterson --- diff --git a/gfs/gfs_fsck/bio.c b/gfs/gfs_fsck/bio.c index cd6833c..bf743e7 100644 --- a/gfs/gfs_fsck/bio.c +++ b/gfs/gfs_fsck/bio.c @@ -134,9 +134,6 @@ int write_buf(struct fsck_sb *sdp, osi_buf_t *bh, int flags){ return -1; } - log_debug("Writing to %"PRIu64" - %"PRIu64" %u\n", - (uint64)(BH_BLKNO(bh) * BH_SIZE(bh)), - BH_BLKNO(bh), BH_SIZE(bh)); if(do_write(disk_fd, BH_DATA(bh), BH_SIZE(bh))) { log_err("Unable to write %u bytes to position %"PRIu64"\n", BH_SIZE(bh), (uint64)(BH_BLKNO(bh) * BH_SIZE(bh))); diff --git a/gfs/gfs_fsck/metawalk.c b/gfs/gfs_fsck/metawalk.c index e14d4dc..e485428 100644 --- a/gfs/gfs_fsck/metawalk.c +++ b/gfs/gfs_fsck/metawalk.c @@ -664,7 +664,7 @@ static int build_metalist(struct fsck_inode *ip, osi_list_t *mlp, osi_list_t *prev_list, *cur_list, *tmp; int i, head_size; uint64 *ptr, block; - int err; + int error = 0, err; if(get_and_read_buf(ip->i_sbd, ip->i_di.di_num.no_addr, &metabh)) { stack; @@ -708,13 +708,22 @@ static int build_metalist(struct fsck_inode *ip, osi_list_t *mlp, pass->private); if(err < 0) { stack; + error = err; goto fail; } if(err > 0) { + if (!error) + error = err; log_debug("Skipping block %"PRIu64 "\n", block); continue; } + if (check_range(ip->i_sbd, block)) { + log_debug("Skipping invalid block " + "%lld\n", + (unsigned long long)block); + continue; + } if(!nbh) { if(get_and_read_buf(ip->i_sbd, block, &nbh)) { @@ -726,7 +735,7 @@ static int build_metalist(struct fsck_inode *ip, osi_list_t *mlp, } } } - return 0; + return error; fail: for (i = 0; i < GFS_MAX_META_HEIGHT; i++) { @@ -739,9 +748,8 @@ static int build_metalist(struct fsck_inode *ip, osi_list_t *mlp, relse_buf(ip->i_sbd, bh); } } - /* This is an error path, so we need to release the buffer here: */ - relse_buf(ip->i_sbd, metabh); - return -1; + /* No need to relse_buf on metabh because it's in the list */ + return error; } /** @@ -769,9 +777,10 @@ int check_metatree(struct fsck_inode *ip, struct metawalk_fxns *pass) osi_list_init(&metalist[i]); /* create metalist for each level */ - if(build_metalist(ip, &metalist[0], pass)){ + error = build_metalist(ip, &metalist[0], pass); + if (error) { stack; - return -1; + return error; } /* We don't need to record directory blocks - they will be @@ -1072,9 +1081,6 @@ int delete_blocks(struct fsck_inode *ip, uint64_t block, osi_buf_t **bh, if (block_check(ip->i_sbd->bl, block, &q)) return 0; if (!q.dup_block) { - log_info("Deleting %s block %lld as part " - "of inode %lld \n", - btype, block, ip->i_di.di_num.no_addr); block_set(ip->i_sbd->bl, block, block_free); free_block(ip->i_sbd, block); } diff --git a/gfs/gfs_fsck/pass1.c b/gfs/gfs_fsck/pass1.c index 4b9de02..a5cac7b 100644 --- a/gfs/gfs_fsck/pass1.c +++ b/gfs/gfs_fsck/pass1.c @@ -35,6 +35,8 @@ #include "link.h" #include "metawalk.h" +#define BAD_POINTER_TOLERANCE 10 + struct block_count { uint64_t indir_count; uint64_t data_count; @@ -101,6 +103,8 @@ static int check_metalist(struct fsck_inode *ip, uint64_t block, *bh = NULL; + if (block == 135116188) + printf("here."); if (check_range(ip->i_sbd, block)){ /* blk outside of FS */ block_set(sdp->bl, ip->i_di.di_num.no_addr, bad_block); log_debug("Bad indirect block pointer (out of range).\n"); @@ -120,8 +124,11 @@ static int check_metalist(struct fsck_inode *ip, uint64_t block, get_and_read_buf(ip->i_sbd, block, &nbh); if (check_meta(nbh, GFS_METATYPE_IN)){ - log_debug("Bad indirect block pointer (points to " - "something that is not an indirect block).\n"); + log_err("Inode %lld has a bad indirect block pointer %lld " + "(points to something that is not an " + "indirect block).\n", + (unsigned long long)ip->i_di.di_num.no_addr, + (unsigned long long)block); if(!found_dup) { block_set(sdp->bl, block, meta_inval); relse_buf(ip->i_sbd, nbh); @@ -129,14 +136,19 @@ static int check_metalist(struct fsck_inode *ip, uint64_t block, } relse_buf(ip->i_sbd, nbh); + nbh = NULL; } else /* blk check ok */ *bh = nbh; - if (!found_dup) { + bc->indir_count++; + if (found_dup) { + if (nbh) + relse_buf(ip->i_sbd, nbh); + return 1; /* don't process the metadata again */ + } else { log_debug("Setting %" PRIu64 " to indirect block.\n", block); block_set(sdp->bl, block, indir_blk); } - bc->indir_count++; return 0; } @@ -734,6 +746,93 @@ int add_to_dir_list(struct fsck_sb *sbp, uint64_t block) return 0; } +/** + * Check for massive amounts of pointer corruption. If the block has + * lots of out-of-range pointers, we can't trust any of the pointers. + * For example, a stray pointer with a value of 0x1d might be + * corruption/nonsense, and if so, we don't want to delete an + * important file (like master or the root directory) because of it. + * We need to check for a large number of bad pointers BEFORE we start + * messing with them because we don't want to mark a block as a + * duplicate (for example) until we know if the pointers in general can + * be trusted. Thus it needs to be in a separate loop. + */ +static int rangecheck_block(struct fsck_inode *ip, uint64_t block, + osi_buf_t **bh, const char *btype, void *private) +{ + long *bad_pointers = (long *)private; + struct block_query q = {0}; + + if (check_range(ip->i_sbd, block) != 0) { + (*bad_pointers)++; + log_debug("Bad %s block pointer (out of range #%ld) " + "found in inode %lld.\n", btype, *bad_pointers, + (unsigned long long)ip->i_di.di_num.no_addr); + if ((*bad_pointers) <= BAD_POINTER_TOLERANCE) + return ENOENT; + else + return -ENOENT; /* Exits check_metatree quicker */ + } + /* See how many duplicate blocks it has */ + if(block_check(ip->i_sbd->bl, block, &q)) { + stack; + return -1; + } + if (q.block_type != block_free) { + (*bad_pointers)++; + log_debug("Duplicated %s block pointer (violation #%ld) " + "found in inode %lld.\n", btype, *bad_pointers, + (unsigned long long)ip->i_di.di_num.no_addr); + if ((*bad_pointers) <= BAD_POINTER_TOLERANCE) + return ENOENT; + else + return -ENOENT; /* Exits check_metatree quicker */ + } + return 0; +} + +static int rangecheck_metadata(struct fsck_inode *ip, uint64_t block, + osi_buf_t **bh, void *private) +{ + return rangecheck_block(ip, block, bh, "metadata", private); +} + +static int rangecheck_leaf(struct fsck_inode *ip, uint64_t block, + osi_buf_t *bh, void *private) +{ + return rangecheck_block(ip, block, &bh, "leaf", private); +} + +static int rangecheck_data(struct fsck_inode *ip, uint64_t block, + struct fsck_rgrp **rgd, void *private) +{ + return rangecheck_block(ip, block, NULL, "data", private); +} + +static int rangecheck_eattr_indir(struct fsck_inode *ip, uint64_t block, + uint64_t parent, osi_buf_t **bh, + void *private) +{ + return rangecheck_block(ip, block, NULL, "indirect extended attribute", + private); +} + +static int rangecheck_eattr_leaf(struct fsck_inode *ip, uint64_t block, + uint64_t parent, osi_buf_t **bh, + void *private) +{ + return rangecheck_block(ip, block, NULL, "extended attribute", + private); +} + +struct metawalk_fxns rangecheck_fxns = { + .private = NULL, + .check_metalist = rangecheck_metadata, + .check_data = rangecheck_data, + .check_leaf = rangecheck_leaf, + .check_eattr_indir = rangecheck_eattr_indir, + .check_eattr_leaf = rangecheck_eattr_leaf, +}; static int handle_di(struct fsck_sb *sdp, osi_buf_t *bh, uint64_t block, int mfree) { @@ -741,6 +840,7 @@ static int handle_di(struct fsck_sb *sdp, osi_buf_t *bh, uint64_t block, int mfr struct fsck_inode *ip; int error; struct block_count bc = {0}; + long bad_pointers = 0L; struct metawalk_fxns invalidate_metatree = {0}; invalidate_metatree.check_metalist = clear_metalist; invalidate_metatree.check_data = clear_data; @@ -823,6 +923,25 @@ static int handle_di(struct fsck_sb *sdp, osi_buf_t *bh, uint64_t block, int mfr goto success; } + + /* First, check the metadata for massive amounts of pointer corruption. + Such corruption can only lead us to ruin trying to clean it up, + so it's better to check it up front and delete the inode if + there is corruption. */ + rangecheck_fxns.private = &bad_pointers; + error = check_metatree(ip, &rangecheck_fxns); + if (bad_pointers > BAD_POINTER_TOLERANCE) { + log_err("Error: inode %llu has more than %d bad pointers.\n", + (unsigned long long)ip->i_di.di_num.no_addr, + BAD_POINTER_TOLERANCE); + if (block_set(sdp->bl, block, meta_inval)) { + stack; + goto fail; + } + fs_set_bitmap(sdp, block, GFS_BLKST_FREE); + goto success; + } + switch(ip->i_di.di_type) { case GFS_FILE_DIR: @@ -930,8 +1049,9 @@ static int handle_di(struct fsck_sb *sdp, osi_buf_t *bh, uint64_t block, int mfr return 0; } if(error > 0) { - log_warn("Marking inode %lld invalid\n", - ip->i_di.di_num.no_addr); + log_err("Error: inode %llu has unrecoverable errors; " + "invalidating.\n", + (unsigned long long)ip->i_di.di_num.no_addr); /* FIXME: Must set all leaves invalid as well */ check_metatree(ip, &invalidate_metatree); block_set(ip->i_sbd->bl, ip->i_di.di_num.no_addr, meta_inval); diff --git a/gfs/gfs_fsck/util.c b/gfs/gfs_fsck/util.c index bb53d8c..6d2a9a2 100644 --- a/gfs/gfs_fsck/util.c +++ b/gfs/gfs_fsck/util.c @@ -107,11 +107,8 @@ int check_meta(osi_buf_t *bh, int type){ check_magic = gfs32_to_cpu(check_magic); check_type = gfs32_to_cpu(check_type); - if((check_magic != GFS_MAGIC) || (type && (check_type != type))){ - log_debug("For %"PRIu64" Expected %X:%X - got %X:%X\n", BH_BLKNO(bh), GFS_MAGIC, type, - check_magic, check_type); - return -1; - } + if((check_magic != GFS_MAGIC) || (type && (check_type != type))) + return -1; return 0; }