From: rpeterso@sourceware.org <rpeterso@sourceware.org>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] cluster/gfs/gfs_fsck fsck.h initialize.c log.c ...
Date: 20 Nov 2006 20:59:38 -0000 [thread overview]
Message-ID: <20061120205938.14950.qmail@sourceware.org> (raw)
CVSROOT: /cvs/cluster
Module name: cluster
Branch: RHEL50
Changes by: rpeterso at sourceware.org 2006-11-20 20:59:36
Modified files:
gfs/gfs_fsck : fsck.h initialize.c log.c main.c pass1.c
pass1b.c pass1c.c pass2.c pass3.c pass4.c
pass5.c util.c
Log message:
Resolves: bz208836 / bz216203 - fatal: invalid metadata block
1. Fix a memory leak in pass1b.
2. Improve performance of pass1b by combining loops through fs.
3. Give an error message and abort if file system > 16TB and node
architecture is 32-bits.
4. Give users an "Abort" "Continue" and "Skip" if they interrupt
with ctrl-c. Also, report progress for that pass on interrupt.
5. Added more "percent complete" messages for other passes.
See bz comments for more details.
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/fsck.h.diff?cvsroot=cluster&only_with_tag=RHEL50&r1=1.4&r2=1.4.4.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/initialize.c.diff?cvsroot=cluster&only_with_tag=RHEL50&r1=1.13&r2=1.13.4.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/log.c.diff?cvsroot=cluster&only_with_tag=RHEL50&r1=1.5&r2=1.5.4.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/main.c.diff?cvsroot=cluster&only_with_tag=RHEL50&r1=1.4&r2=1.4.4.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/pass1.c.diff?cvsroot=cluster&only_with_tag=RHEL50&r1=1.14&r2=1.14.4.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/pass1b.c.diff?cvsroot=cluster&only_with_tag=RHEL50&r1=1.4&r2=1.4.4.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/pass1c.c.diff?cvsroot=cluster&only_with_tag=RHEL50&r1=1.4&r2=1.4.4.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/pass2.c.diff?cvsroot=cluster&only_with_tag=RHEL50&r1=1.12&r2=1.12.4.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/pass3.c.diff?cvsroot=cluster&only_with_tag=RHEL50&r1=1.9&r2=1.9.4.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/pass4.c.diff?cvsroot=cluster&only_with_tag=RHEL50&r1=1.12&r2=1.12.4.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/pass5.c.diff?cvsroot=cluster&only_with_tag=RHEL50&r1=1.10&r2=1.10.4.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/util.c.diff?cvsroot=cluster&only_with_tag=RHEL50&r1=1.7&r2=1.7.4.1
--- cluster/gfs/gfs_fsck/fsck.h 2006/06/14 19:12:07 1.4
+++ cluster/gfs/gfs_fsck/fsck.h 2006/11/20 20:59:36 1.4.4.1
@@ -26,7 +26,8 @@
int no:1;
};
-extern uint64_t last_fs_block;
+extern uint64_t last_fs_block, last_reported_block;
+extern int skip_this_pass, fsck_abort, fsck_query;
int initialize(struct fsck_sb *sbp);
void destroy(struct fsck_sb *sbp);
--- cluster/gfs/gfs_fsck/initialize.c 2006/09/20 16:36:57 1.13
+++ cluster/gfs/gfs_fsck/initialize.c 2006/11/20 20:59:36 1.13.4.1
@@ -210,6 +210,12 @@
}
sdp->last_fs_block = (jmax > rmax) ? jmax : rmax;
+ if (sdp->last_fs_block > 0xffffffff && sizeof(unsigned long) <= 4) {
+ log_crit("This file system is too big for this computer to handle.\n");
+ log_crit("Last fs block = 0x%llx, but sizeof(unsigned long) is %d bytes.\n",
+ sdp->last_fs_block, sizeof(unsigned long));
+ goto fail;
+ }
sdp->last_data_block = rmax;
sdp->first_data_block = rmin;
--- cluster/gfs/gfs_fsck/log.c 2006/06/14 19:12:07 1.5
+++ cluster/gfs/gfs_fsck/log.c 2006/11/20 20:59:36 1.5.4.1
@@ -102,7 +102,8 @@
if(sbp->opts->no)
return 0;
- /* Watch stdin (fd 0) to see when it has input. */
+ fsck_query = TRUE;
+ /* Watch stdin (fd 0) to see when it has input. */
FD_ZERO(&rfds);
FD_SET(STDIN_FILENO, &rfds);
@@ -149,5 +150,6 @@
read(STDIN_FILENO, &response, sizeof(char));
}
+ fsck_query = FALSE;
return ret;
}
--- cluster/gfs/gfs_fsck/main.c 2006/06/14 19:12:07 1.4
+++ cluster/gfs/gfs_fsck/main.c 2006/11/20 20:59:36 1.4.4.1
@@ -14,13 +14,17 @@
#include <stdio.h>
#include <stdint.h>
#include <libgen.h>
+#include <ctype.h>
+#include <signal.h>
#include "copyright.cf"
#include "fsck_incore.h"
#include "fsck.h"
#include "log.h"
-uint64_t last_fs_block;
+uint64_t last_fs_block, last_reported_block = -1;
+int skip_this_pass = FALSE, fsck_abort = FALSE, fsck_query = FALSE;
+const char *pass = "";
void print_map(struct block_list *il, int count)
{
@@ -110,6 +114,61 @@
return 0;
}
+void interrupt(int sig)
+{
+ fd_set rfds;
+ struct timeval tv;
+ char response;
+ int err;
+
+ if (fsck_query) /* if we're asking them a question */
+ return; /* ignore the interrupt signal */
+ FD_ZERO(&rfds);
+ FD_SET(STDIN_FILENO, &rfds);
+
+ tv.tv_sec = 0;
+ tv.tv_usec = 0;
+ /* Make sure there isn't extraneous input before asking the
+ * user the question */
+ while((err = select(STDIN_FILENO + 1, &rfds, NULL, NULL, &tv))) {
+ if(err < 0) {
+ log_debug("Error in select() on stdin\n");
+ break;
+ }
+ read(STDIN_FILENO, &response, sizeof(char));
+ }
+ while (TRUE) {
+ printf("\ngfs_fsck interrupted in %s: ", pass);
+ if (!last_reported_block || last_reported_block == last_fs_block)
+ printf("progress unknown.\n");
+ else
+ printf("processing block %" PRIu64 " out of %" PRIu64 "\n",
+ last_reported_block, last_fs_block);
+ printf("Do you want to abort gfs_fsck, skip the rest of %s or continue (a/s/c)?", pass);
+
+ /* Make sure query is printed out */
+ fflush(stdout);
+ read(STDIN_FILENO, &response, sizeof(char));
+
+ if(tolower(response) == 's') {
+ skip_this_pass = TRUE;
+ return;
+ }
+ else if (tolower(response) == 'a') {
+ fsck_abort = TRUE;
+ return;
+ }
+ else if (tolower(response) == 'c')
+ return;
+ else {
+ while(response != '\n')
+ read(STDIN_FILENO, &response, sizeof(char));
+ printf("Bad response, please type 'c', 'a' or 's'.\n");
+ continue;
+ }
+ }
+}
+
int main(int argc, char **argv)
{
struct fsck_sb sb;
@@ -127,44 +186,98 @@
if (initialize(sbp))
return 1;
+ signal(SIGINT, interrupt);
log_notice("Starting pass1\n");
+ pass = "pass 1";
+ last_reported_block = 0;
if (pass1(sbp))
return 1;
- log_notice("Pass1 complete \n");
-
- log_notice("Starting pass1b\n");
- if(pass1b(sbp))
- return 1;
- log_notice("Pass1b complete \n");
-
- log_notice("Starting pass1c\n");
- if(pass1c(sbp))
- return 1;
- log_notice("Pass1c complete \n");
-
- log_notice("Starting pass2\n");
- if (pass2(sbp, &opts))
- return 1;
- log_notice("Pass2 complete \n");
-
- log_notice("Starting pass3\n");
- if (pass3(sbp, &opts))
- return 1;
- log_notice("Pass3 complete \n");
-
- log_notice("Starting pass4\n");
- if (pass4(sbp, &opts))
- return 1;
- log_notice("Pass4 complete \n");
-
- log_notice("Starting pass5\n");
- if (pass5(sbp, &opts))
- return 1;
- log_notice("Pass5 complete \n");
-
-/* print_map(sbp->bl, sbp->last_fs_block); */
+ if (skip_this_pass || fsck_abort) {
+ skip_this_pass = FALSE;
+ log_notice("Pass1 interrupted \n");
+ }
+ else
+ log_notice("Pass1 complete \n");
- log_notice("Writing changes to disk\n");
+ if (!fsck_abort) {
+ last_reported_block = 0;
+ pass = "pass 1b";
+ log_notice("Starting pass1b\n");
+ if(pass1b(sbp))
+ return 1;
+ if (skip_this_pass || fsck_abort) {
+ skip_this_pass = FALSE;
+ log_notice("Pass1b interrupted \n");
+ }
+ else
+ log_notice("Pass1b complete \n");
+ }
+ if (!fsck_abort) {
+ last_reported_block = 0;
+ pass = "pass 1c";
+ log_notice("Starting pass1c\n");
+ if(pass1c(sbp))
+ return 1;
+ if (skip_this_pass || fsck_abort) {
+ skip_this_pass = FALSE;
+ log_notice("Pass1c interrupted \n");
+ }
+ else
+ log_notice("Pass1c complete \n");
+ }
+ if (!fsck_abort) {
+ last_reported_block = 0;
+ pass = "pass 2";
+ log_notice("Starting pass2\n");
+ if (pass2(sbp, &opts))
+ return 1;
+ if (skip_this_pass || fsck_abort) {
+ skip_this_pass = FALSE;
+ log_notice("Pass2 interrupted \n");
+ }
+ else
+ log_notice("Pass2 complete \n");
+ }
+ if (!fsck_abort) {
+ last_reported_block = 0;
+ pass = "pass 3";
+ log_notice("Starting pass3\n");
+ if (pass3(sbp, &opts))
+ return 1;
+ if (skip_this_pass || fsck_abort) {
+ skip_this_pass = FALSE;
+ log_notice("Pass3 interrupted \n");
+ }
+ else
+ log_notice("Pass3 complete \n");
+ }
+ if (!fsck_abort) {
+ last_reported_block = 0;
+ pass = "pass 4";
+ log_notice("Starting pass4\n");
+ if (pass4(sbp, &opts))
+ return 1;
+ if (skip_this_pass || fsck_abort) {
+ skip_this_pass = FALSE;
+ log_notice("Pass4 interrupted \n");
+ }
+ else
+ log_notice("Pass4 complete \n");
+ }
+ if (!fsck_abort) {
+ last_reported_block = 0;
+ pass = "pass 5";
+ log_notice("Starting pass5\n");
+ if (pass5(sbp, &opts))
+ return 1;
+ if (skip_this_pass || fsck_abort) {
+ skip_this_pass = FALSE;
+ log_notice("Pass5 interrupted \n");
+ }
+ else
+ log_notice("Pass5 complete \n");
+ log_notice("Writing changes to disk\n");
+ }
destroy(sbp);
return 0;
--- cluster/gfs/gfs_fsck/pass1.c 2006/06/14 19:12:07 1.14
+++ cluster/gfs/gfs_fsck/pass1.c 2006/11/20 20:59:36 1.14.4.1
@@ -905,6 +905,13 @@
break;
warm_fuzzy_stuff(block);
+ if (fsck_abort) /* if asked to abort */
+ return 0;
+ if (skip_this_pass) {
+ printf("Skipping pass 1 is not a good idea.\n");
+ skip_this_pass = FALSE;
+ fflush(stdout);
+ }
if(get_and_read_buf(sbp, block, &bh, 0)){
stack;
log_crit("Unable to retrieve block %"PRIu64
--- cluster/gfs/gfs_fsck/pass1b.c 2005/08/01 16:20:48 1.4
+++ cluster/gfs/gfs_fsck/pass1b.c 2006/11/20 20:59:36 1.4.4.1
@@ -470,6 +470,7 @@
osi_list_t *tmp;
struct metawalk_fxns find_dirents = {0};
find_dirents.check_dentry = &find_dentry;
+ int rc = 0;
osi_list_init(&sbp->dup_list);
/* Shove all blocks marked as duplicated into a list */
@@ -487,10 +488,14 @@
log_info("Scanning filesystem for inodes containing duplicate blocks...\n");
log_debug("Filesystem has %"PRIu64" blocks total\n", sbp->last_fs_block);
for(i = 0; i < sbp->last_fs_block; i += 1) {
+ warm_fuzzy_stuff(i);
+ if (skip_this_pass || fsck_abort) /* if asked to skip the rest */
+ goto out;
log_debug("Scanning block %"PRIu64" for inodes\n", i);
if(block_check(sbp->bl, i, &q)) {
stack;
- return -1;
+ rc = -1;
+ goto out;
}
if((q.block_type == inode_dir) ||
(q.block_type == inode_file) ||
@@ -503,33 +508,27 @@
b = osi_list_entry(tmp, struct blocks, list);
if(find_block_ref(sbp, i, b)) {
stack;
- return -1;
+ rc = -1;
+ goto out;
}
}
}
- }
-
- /* Rescan the fs looking for directory entries to the inodes
- * with duplicate blocks - might need this to deal with the
- * inode correctly */
- log_info("Looking through directory entries for inodes with duplicate blocks...\n");
- for(i = 0; i < sbp->last_fs_block; i++) {
- if(block_check(sbp->bl, i, &q)) {
- stack;
- return 0;
- }
if(q.block_type == inode_dir) {
check_dir(sbp, i, &find_dirents);
}
}
-
/* Fix dups here - it's going to slow things down a lot to fix
* it later */
log_info("Handling duplicate blocks\n");
- osi_list_foreach(tmp, &sbp->dup_list) {
- b = osi_list_entry(tmp, struct blocks, list);
- handle_dup_blk(sbp, b);
+out:
+ /*osi_list_foreach(tmp, &sbp->dup_list) {*/
+ while (!osi_list_empty(&sbp->dup_list)) {
+ b = osi_list_entry(sbp->dup_list.next, struct blocks, list);
+ if (!skip_this_pass && !rc) /* no error & not asked to skip the rest */
+ handle_dup_blk(sbp, b);
+ osi_list_del(&b->list);
+ free(b);
}
- return 0;
+ return rc;
}
--- cluster/gfs/gfs_fsck/pass1c.c 2006/02/02 01:14:56 1.4
+++ cluster/gfs/gfs_fsck/pass1c.c 2006/11/20 20:59:36 1.4.4.1
@@ -252,6 +252,8 @@
log_info("Looking for inodes containing ea blocks...\n");
while (!find_next_block_type(sbp->bl, eattr_block, &block_no)) {
+ if (skip_this_pass || fsck_abort) /* if asked to skip the rest */
+ return 0;
log_info("EA in inode %"PRIu64"\n", block_no);
if(get_and_read_buf(sbp, block_no, &bh, 0)) {
stack;
--- cluster/gfs/gfs_fsck/pass2.c 2006/06/14 19:12:07 1.12
+++ cluster/gfs/gfs_fsck/pass2.c 2006/11/20 20:59:36 1.12.4.1
@@ -812,6 +812,10 @@
/* Grab each directory inode, and run checks on it */
for(i = 0; i < sbp->last_fs_block; i++) {
+ warm_fuzzy_stuff(i);
+ if (skip_this_pass || fsck_abort) /* if asked to skip the rest */
+ return 0;
+
/* Skip the root inode - it's checked above */
if(i == sbp->sb.sb_root_di.no_addr)
continue;
--- cluster/gfs/gfs_fsck/pass3.c 2006/06/14 19:12:07 1.9
+++ cluster/gfs/gfs_fsck/pass3.c 2006/11/20 20:59:36 1.9.4.1
@@ -213,6 +213,8 @@
/* FIXME: Change this so it returns success or
* failure and put the parent inode in a
* param */
+ if (skip_this_pass || fsck_abort) /* if asked to skip the rest */
+ return 0;
tdi = mark_and_return_parent(sbp, di);
/* FIXME: Factor this ? */
--- cluster/gfs/gfs_fsck/pass4.c 2006/06/14 19:12:07 1.12
+++ cluster/gfs/gfs_fsck/pass4.c 2006/11/20 20:59:36 1.12.4.1
@@ -49,6 +49,8 @@
/* FIXME: should probably factor this out into a generic
* scanning fxn */
osi_list_foreach(tmp, list) {
+ if (skip_this_pass || fsck_abort) /* if asked to skip the rest */
+ return 0;
if(!(ii = osi_list_entry(tmp, struct inode_info, list))) {
log_crit("osi_list_foreach broken in scan_info_list!!\n");
exit(1);
@@ -176,6 +178,8 @@
sbp->lf_dip->i_di.di_entries);
log_info("Checking inode reference counts.\n");
for (i = 0; i < FSCK_HASH_SIZE; i++) {
+ if (skip_this_pass || fsck_abort) /* if asked to skip the rest */
+ return 0;
list = &sbp->inode_hash[i];
if(scan_inode_list(sbp, list)) {
stack;
--- cluster/gfs/gfs_fsck/pass5.c 2006/06/14 19:12:07 1.10
+++ cluster/gfs/gfs_fsck/pass5.c 2006/11/20 20:59:36 1.10.4.1
@@ -194,6 +194,8 @@
block = rg_data + *rg_block;
log_debug("Checking block %" PRIu64 "\n", block);
warm_fuzzy_stuff(block);
+ if (skip_this_pass || fsck_abort) /* if asked to skip the rest */
+ return 0;
block_check(sbp->bl, block, &q);
block_status = convert_mark(q.block_type, count);
@@ -266,6 +268,8 @@
BH_DATA(rgp->rd_bh[i]) + bits->bi_offset,
bits->bi_len, &rg_block,
rgp->rd_ri.ri_data1, count);
+ if (skip_this_pass || fsck_abort) /* if asked to skip the rest */
+ return 0;
}
/* Compare the rgrps counters with what we found */
@@ -351,6 +355,8 @@
/* Reconcile RG bitmaps with fsck bitmap */
for(tmp = sbp->rglist.next; tmp != &sbp->rglist; tmp = tmp->next){
+ if (skip_this_pass || fsck_abort) /* if asked to skip the rest */
+ return 0;
log_info("Updating Resource Group %"PRIu64"\n", rg_count);
memset(count, 0, sizeof(*count) * 5);
rgp = osi_list_entry(tmp, struct fsck_rgrp, rd_list);
--- cluster/gfs/gfs_fsck/util.c 2006/06/14 19:12:07 1.7
+++ cluster/gfs/gfs_fsck/util.c 2006/11/20 20:59:36 1.7.4.1
@@ -321,6 +321,7 @@
static struct timeval tv;
static uint32_t seconds = 0;
+ last_reported_block = block;
gettimeofday(&tv, NULL);
if (!seconds)
seconds = tv.tv_sec;
next reply other threads:[~2006-11-20 20:59 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-11-20 20:59 rpeterso [this message]
-- strict thread matches above, loose matches on Subject: below --
2006-11-17 17:00 [Cluster-devel] cluster/gfs/gfs_fsck fsck.h initialize.c log.c rpeterso
2006-11-17 16:43 rpeterso
2006-11-17 16:32 rpeterso
2006-11-17 16:15 rpeterso
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20061120205938.14950.qmail@sourceware.org \
--to=rpeterso@sourceware.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.