From mboxrd@z Thu Jan 1 00:00:00 1970 From: rpeterso@sourceware.org Date: 20 Jun 2006 14:43:01 -0000 Subject: [Cluster-devel] cluster/gfs/gfs_fsck fs_recovery.c fsck.h init ... Message-ID: <20060620144301.24472.qmail@sourceware.org> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit CVSROOT: /cvs/cluster Module name: cluster Branch: RHEL4 Changes by: rpeterso at sourceware.org 2006-06-20 14:42:56 Modified files: gfs/gfs_fsck : fs_recovery.c fsck.h initialize.c log.c main.c pass1.c pass2.c pass3.c pass4.c pass5.c super.c util.c util.h Log message: This addresses bugzilla bug #156009 - gfs fsck needs a good review of logging. I improved logging and added some "warm fuzzy" feel-good messages to let the users know it's not hung. Patches: http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/fs_recovery.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.2&r2=1.1.2.3 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/fsck.h.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.2.2.2&r2=1.2.2.3 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/initialize.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.7&r2=1.1.2.8 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/log.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.2.2.3&r2=1.2.2.4 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/main.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.2&r2=1.1.2.3 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/pass1.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.8&r2=1.1.2.9 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/pass2.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.3.2.4&r2=1.3.2.5 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/pass3.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.3&r2=1.1.2.4 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/pass4.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.5&r2=1.1.2.6 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/pass5.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.6&r2=1.1.2.7 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/super.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.2.2.6&r2=1.2.2.7 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/util.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.3&r2=1.1.2.4 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/util.h.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.2&r2=1.1.2.3 --- cluster/gfs/gfs_fsck/fs_recovery.c 2005/06/15 16:10:19 1.1.2.2 +++ cluster/gfs/gfs_fsck/fs_recovery.c 2006/06/20 14:42:56 1.1.2.3 @@ -75,15 +75,15 @@ * Returns: 0 on success, -1 on failure */ int reconstruct_journals(struct fsck_sb *sdp){ - int i; + int i; - log_warn("Clearing journals (this may take a while)\n"); - for(i=0; i < sdp->journals; i++) { - if((i % 10) == 0) - log_at_notice("."); - if(reconstruct_single_journal(sdp, i)) - return -1; - } - log_notice("Cleared journals\n"); - return 0; + log_notice("Clearing journals (this may take a while)"); + for(i=0; i < sdp->journals; i++) { + if((i % 2) == 0) + log_at_notice("."); + if(reconstruct_single_journal(sdp, i)) + return -1; + } + log_notice("\nJournals cleared.\n"); + return 0; } --- cluster/gfs/gfs_fsck/fsck.h 2005/06/17 16:18:33 1.2.2.2 +++ cluster/gfs/gfs_fsck/fsck.h 2006/06/20 14:42:56 1.2.2.3 @@ -26,7 +26,7 @@ int no:1; }; - +extern uint64_t last_fs_block; int initialize(struct fsck_sb *sbp); void destroy(struct fsck_sb *sbp); --- cluster/gfs/gfs_fsck/initialize.c 2005/12/15 21:36:42 1.1.2.7 +++ cluster/gfs/gfs_fsck/initialize.c 2006/06/20 14:42:56 1.1.2.8 @@ -98,6 +98,7 @@ { uint32_t i; + log_info("Freeing buffers.\n"); if(sdp->riinode){ free(sdp->riinode); sdp->riinode = NULL; @@ -108,7 +109,7 @@ } if(sdp->rooti){ free(sdp->rooti); - sdp->rooti=NULL; + sdp->rooti = NULL; } if(sdp->jindex){ @@ -196,6 +197,7 @@ rmin = ri->ri_data1; } + last_fs_block = rmax; for (i = 0; i < sdp->journals; i++) { @@ -406,6 +408,7 @@ log_warn("Unable to unblock other mounters - manual intevention required\n"); log_warn("Use 'gfs_tool sb proto' to fix\n"); } + log_info("Syncing the device.\n"); fsync(sbp->diskfd); } empty_super_block(sbp); --- cluster/gfs/gfs_fsck/log.c 2005/06/15 16:10:19 1.2.2.3 +++ cluster/gfs/gfs_fsck/log.c 2006/06/20 14:42:56 1.2.2.4 @@ -45,11 +45,13 @@ case MSG_DEBUG: printf("(%s:%d)\t", file, line); vprintf(format, args); + fflush(stdout); break; case MSG_INFO: case MSG_NOTICE: case MSG_WARN: vprintf(format, args); + fflush(stdout); break; case MSG_ERROR: case MSG_CRITICAL: --- cluster/gfs/gfs_fsck/main.c 2005/02/22 22:01:06 1.1.2.2 +++ cluster/gfs/gfs_fsck/main.c 2006/06/20 14:42:56 1.1.2.3 @@ -20,6 +20,8 @@ #include "fsck.h" #include "log.h" +uint64_t last_fs_block; + void print_map(struct block_list *il, int count) { int i, j; @@ -128,40 +130,41 @@ log_notice("Starting pass1\n"); if (pass1(sbp)) return 1; - log_notice("Pass1 complete\n"); + log_notice("Pass1 complete \n"); log_notice("Starting pass1b\n"); if(pass1b(sbp)) return 1; - log_notice("Pass1b complete\n"); + log_notice("Pass1b complete \n"); log_notice("Starting pass1c\n"); if(pass1c(sbp)) return 1; - log_notice("Pass1c complete\n"); + log_notice("Pass1c complete \n"); log_notice("Starting pass2\n"); if (pass2(sbp, &opts)) return 1; - log_notice("Pass2 complete\n"); + log_notice("Pass2 complete \n"); log_notice("Starting pass3\n"); if (pass3(sbp, &opts)) return 1; - log_notice("Pass3 complete\n"); + log_notice("Pass3 complete \n"); log_notice("Starting pass4\n"); if (pass4(sbp, &opts)) return 1; - log_notice("Pass4 complete\n"); + log_notice("Pass4 complete \n"); log_notice("Starting pass5\n"); if (pass5(sbp, &opts)) return 1; - log_notice("Pass5 complete\n"); + log_notice("Pass5 complete \n"); /* print_map(sbp->bl, sbp->last_fs_block); */ + log_notice("Writing changes to disk\n"); destroy(sbp); return 0; --- cluster/gfs/gfs_fsck/pass1.c 2006/04/20 20:43:41 1.1.2.8 +++ cluster/gfs/gfs_fsck/pass1.c 2006/06/20 14:42:56 1.1.2.9 @@ -883,7 +883,8 @@ stack; return -1; } - log_debug("RG at %"PRIu64" is %u long\n", rgd->rd_ri.ri_addr, rgd->rd_ri.ri_length); + log_debug("RG at %"PRIu64" is %u long\n", rgd->rd_ri.ri_addr, + rgd->rd_ri.ri_length); for (i = 0; i < rgd->rd_ri.ri_length; i++) { if(block_set(sbp->bl, rgd->rd_ri.ri_addr + i, meta_other)){ @@ -903,7 +904,7 @@ if(next_rg_meta_free(rgd, &block, first, &mfree)) break; - + warm_fuzzy_stuff(block); if(get_and_read_buf(sbp, block, &bh, 0)){ stack; log_crit("Unable to retrieve block %"PRIu64 --- cluster/gfs/gfs_fsck/pass2.c 2005/03/07 22:06:01 1.3.2.4 +++ cluster/gfs/gfs_fsck/pass2.c 2006/06/20 14:42:56 1.3.2.5 @@ -808,6 +808,7 @@ } int error = 0; + log_info("Checking directory inodes.\n"); /* Grab each directory inode, and run checks on it */ for(i = 0; i < sbp->last_fs_block; i++) { @@ -824,7 +825,7 @@ if(q.block_type != inode_dir) continue; - log_info("Checking directory inode at block %"PRIu64"\n", i); + log_debug("Checking directory inode at block %"PRIu64"\n", i); memset(&ds, 0, sizeof(ds)); --- cluster/gfs/gfs_fsck/pass3.c 2005/03/03 19:57:03 1.1.2.3 +++ cluster/gfs/gfs_fsck/pass3.c 2006/06/20 14:42:56 1.1.2.4 @@ -205,6 +205,7 @@ * until we find one that's been checked already. If we don't * find a parent, put in lost+found. */ + log_info("Checking directory linkage.\n"); for(i = 0; i < FSCK_HASH_SIZE; i++) { osi_list_foreach(tmp, &sbp->dir_hash[i]) { di = osi_list_entry(tmp, struct dir_info, list); @@ -270,8 +271,8 @@ break; } else { - log_info("Directory at block %"PRIu64 - " connected\n", di->dinode); + log_debug("Directory at block %" PRIu64 " connected\n", + di->dinode); } di = tdi; } --- cluster/gfs/gfs_fsck/pass4.c 2005/03/07 22:06:01 1.1.2.5 +++ cluster/gfs/gfs_fsck/pass4.c 2006/06/20 14:42:56 1.1.2.6 @@ -59,8 +59,8 @@ (ii->inode == sbp->sb.sb_quota_di.no_addr) || (ii->inode == sbp->sb.sb_license_di.no_addr)) continue; - log_info("Checking reference count on inode at block %"PRIu64 - "\n", ii->inode); + log_debug("Checking reference count on inode at block %"PRIu64 + "\n", ii->inode); if(ii->counted_links == 0) { log_err("Found unlinked inode at %"PRIu64"\n", ii->inode); @@ -174,6 +174,7 @@ if(sbp->lf_dip) log_debug("At beginning of pass4, l+f entries is %u\n", sbp->lf_dip->i_di.di_entries); + log_info("Checking inode reference counts.\n"); for (i = 0; i < FSCK_HASH_SIZE; i++) { list = &sbp->inode_hash[i]; if(scan_inode_list(sbp, list)) { --- cluster/gfs/gfs_fsck/pass5.c 2006/04/20 20:43:41 1.1.2.6 +++ cluster/gfs/gfs_fsck/pass5.c 2006/06/20 14:42:56 1.1.2.7 @@ -17,6 +17,7 @@ #include "ondisk.h" #include "fs_bits.h" #include "bio.h" +#include "util.h" #ifdef DEBUG int rgrp_countbits(unsigned char *buffer, unsigned int buflen, @@ -191,6 +192,8 @@ while(byte < end) { rg_status = ((*byte >> bit) & GFS_BIT_MASK); block = rg_data + *rg_block; + log_debug("Checking block %" PRIu64 "\n", block); + warm_fuzzy_stuff(block); block_check(sbp->bl, block, &q); block_status = convert_mark(q.block_type, count); --- cluster/gfs/gfs_fsck/super.c 2006/04/20 20:43:41 1.2.2.6 +++ cluster/gfs/gfs_fsck/super.c 2006/06/20 14:42:56 1.2.2.7 @@ -1081,9 +1081,9 @@ gfs_grow or something. Count the RGs by hand. */ } trust_lvl; - log_warn("Validating Resource Group index.\n"); + log_info("Validating Resource Group index.\n"); for (trust_lvl = blind_faith; trust_lvl <= distrust; trust_lvl++) { - log_warn("Level %d check.\n", trust_lvl + 1); + log_info("Level %d check.\n", trust_lvl + 1); count1 = count2 = 0; /* ---------------------------------------------------------------- */ /* Step 1 - Calculate or figure out our own RG index */ @@ -1097,7 +1097,7 @@ error = gfs_rgindex_calculate(sdp, &expected_rglist, &calc_rg_count); if (error) { /* If calculated RGs don't reasonably match the fs */ - log_warn("(failed--trying again at level 3)\n"); + log_info("(failed--trying again@level 3)\n"); ri_cleanup(&sdp->rglist); continue; /* Try again, this time counting them manually */ } @@ -1106,7 +1106,7 @@ error = gfs_rgindex_rebuild(sdp, &expected_rglist, &calc_rg_count); /* count the RGs. */ if (error) { /* If calculated RGs don't reasonably match the fs */ - log_err("(failed--giving up)\n"); + log_info("(failed--giving up)\n"); goto fail; /* try again, this time counting them manually */ } } @@ -1177,7 +1177,7 @@ count1++; } /* for all RGs in the index */ if (!error) { - log_warn("%u resource groups found.\n", rg); + log_info("%u resource groups found.\n", rg); if (trust_lvl != blind_faith && rg != calc_rg_count) log_warn("Resource group count discrepancy. Index says %d. " \ "Should be %d.\n", rg, calc_rg_count); @@ -1205,15 +1205,15 @@ sdp->rgcount = count1; } if (!error) { /* if no problems encountered with the rgs */ - log_warn("(passed)\n"); + log_info("(passed)\n"); break; /* no reason to distrust what we saw. Otherwise, we reiterate and become a little less trusting. */ } else { if (trust_lvl < distrust) - log_warn("(failed--trying again at level 2)\n"); + log_info("(failed--trying again@level 2)\n"); else - log_err("(failed--recovery impossible)\n"); + log_info("(failed--recovery impossible)\n"); } ri_cleanup(&sdp->rglist); } /* for trust_lvl */ --- cluster/gfs/gfs_fsck/util.c 2005/03/03 19:57:03 1.1.2.3 +++ cluster/gfs/gfs_fsck/util.c 2006/06/20 14:42:56 1.1.2.4 @@ -313,3 +313,24 @@ return NULL; } #endif + +/* Put out a warm, fuzzy message every second so the user */ +/* doesn't think we hung. (This may take a long time). */ +void warm_fuzzy_stuff(uint64_t block) +{ + static struct timeval tv; + static uint32_t seconds = 0; + + gettimeofday(&tv, NULL); + if (!seconds) + seconds = tv.tv_sec; + if (tv.tv_sec - seconds) { + uint64_t percent; + + seconds = tv.tv_sec; + if (last_fs_block) { + percent = (block * 100) / last_fs_block; + log_notice("\r%" PRIu64 " percent complete.\r", percent); + } + } +} --- cluster/gfs/gfs_fsck/util.h 2005/03/03 19:57:03 1.1.2.2 +++ cluster/gfs/gfs_fsck/util.h 2006/06/20 14:42:56 1.1.2.3 @@ -35,5 +35,6 @@ int next_rg_meta_free(struct fsck_rgrp *rgd, uint64 *block, int first, int *free); int next_rg_metatype(struct fsck_rgrp *rgd, uint64 *block, uint32 type, int first); struct di_info *search_list(osi_list_t *list, uint64 addr); +void warm_fuzzy_stuff(uint64_t block); #endif /* __UTIL_H__ */