From: rpeterso@redhat.com <rpeterso@redhat.com>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] [PATCH 16/42] fsck.gfs2: fix leaf blocks, don't try to patch the hash table
Date: Mon, 8 Apr 2013 07:40:48 -0700 [thread overview]
Message-ID: <1365432074-17615-17-git-send-email-rpeterso@redhat.com> (raw)
In-Reply-To: <1365432074-17615-1-git-send-email-rpeterso@redhat.com>
From: Bob Peterson <rpeterso@redhat.com>
Before this patch, when we detected a bad leaf block, fsck.gfs2 would
try to patch the hash table. That's very wrong, because the hash table
needs to be on nice power-of-two boundaries. This patch changes the
code so that the hash table is actually repaired.
rhbz#902920
---
gfs2/fsck/metawalk.c | 135 ++++++++++++++++++---------------------------------
gfs2/fsck/metawalk.h | 3 +-
gfs2/fsck/pass1.c | 14 ++++++
gfs2/fsck/pass2.c | 9 +++-
4 files changed, 72 insertions(+), 89 deletions(-)
diff --git a/gfs2/fsck/metawalk.c b/gfs2/fsck/metawalk.c
index 73bdba0..4cd712e 100644
--- a/gfs2/fsck/metawalk.c
+++ b/gfs2/fsck/metawalk.c
@@ -480,52 +480,14 @@ static int check_entries(struct gfs2_inode *ip, struct gfs2_buffer_head *bh,
return 0;
}
-/* warn_and_patch - Warn the user of an error and ask permission to fix it
- * Process a bad leaf pointer and ask to repair the first time.
- * The repair process involves extending the previous leaf's entries
- * so that they replace the bad ones. We have to hack up the old
- * leaf a bit, but it's better than deleting the whole directory,
- * which is what used to happen before. */
-static int warn_and_patch(struct gfs2_inode *ip, uint64_t *leaf_no,
- uint64_t *bad_leaf, uint64_t old_leaf,
- uint64_t first_ok_leaf, int pindex, const char *msg)
-{
- int okay_to_fix = 0;
-
- if (*bad_leaf != *leaf_no) {
- log_err( _("Directory Inode %llu (0x%llx) points to leaf %llu"
- " (0x%llx) %s.\n"),
- (unsigned long long)ip->i_di.di_num.no_addr,
- (unsigned long long)ip->i_di.di_num.no_addr,
- (unsigned long long)*leaf_no,
- (unsigned long long)*leaf_no, msg);
- }
- if (*leaf_no == *bad_leaf ||
- (okay_to_fix = query( _("Attempt to patch around it? (y/n) ")))) {
- if (valid_block(ip->i_sbd, old_leaf))
- gfs2_put_leaf_nr(ip, pindex, old_leaf);
- else
- gfs2_put_leaf_nr(ip, pindex, first_ok_leaf);
- log_err( _("Directory Inode %llu (0x%llx) repaired.\n"),
- (unsigned long long)ip->i_di.di_num.no_addr,
- (unsigned long long)ip->i_di.di_num.no_addr);
- } else
- log_err( _("Bad leaf left in place.\n"));
- *bad_leaf = *leaf_no;
- *leaf_no = old_leaf;
- return okay_to_fix;
-}
-
/**
* check_leaf - check a leaf block for errors
* Reads in the leaf block
* Leaves the buffer around for further analysis (caller must brelse)
*/
static int check_leaf(struct gfs2_inode *ip, int lindex,
- struct metawalk_fxns *pass, int *ref_count,
- uint64_t *leaf_no, uint64_t old_leaf, uint64_t *bad_leaf,
- uint64_t first_ok_leaf, struct gfs2_leaf *leaf,
- struct gfs2_leaf *oldleaf)
+ struct metawalk_fxns *pass,
+ uint64_t *leaf_no, struct gfs2_leaf *leaf, int *ref_count)
{
int error = 0, fix;
struct gfs2_buffer_head *lbh = NULL;
@@ -533,7 +495,6 @@ static int check_leaf(struct gfs2_inode *ip, int lindex,
struct gfs2_sbd *sdp = ip->i_sbd;
const char *msg;
- *ref_count = 1;
/* Make sure the block number is in range. */
if (!valid_block(ip->i_sbd, *leaf_no)) {
log_err( _("Leaf block #%llu (0x%llx) is out of range for "
@@ -543,7 +504,7 @@ static int check_leaf(struct gfs2_inode *ip, int lindex,
(unsigned long long)ip->i_di.di_num.no_addr,
(unsigned long long)ip->i_di.di_num.no_addr);
msg = _("that is out of range");
- goto out_copy_old_leaf;
+ goto bad_leaf;
}
/* Try to read in the leaf block. */
@@ -551,7 +512,7 @@ static int check_leaf(struct gfs2_inode *ip, int lindex,
/* Make sure it's really a valid leaf block. */
if (gfs2_check_meta(lbh, GFS2_METATYPE_LF)) {
msg = _("that is not really a leaf");
- goto out_copy_old_leaf;
+ goto bad_leaf;
}
if (pass->check_leaf) {
error = pass->check_leaf(ip, *leaf_no, pass->private);
@@ -587,7 +548,7 @@ static int check_leaf(struct gfs2_inode *ip, int lindex,
(unsigned long long)*leaf_no,
(unsigned long long)*leaf_no);
msg = _("that is not a leaf");
- goto out_copy_old_leaf;
+ goto bad_leaf;
}
if (pass->check_dentry && is_dir(&ip->i_di, sdp->gfs1)) {
@@ -599,16 +560,18 @@ static int check_leaf(struct gfs2_inode *ip, int lindex,
if (error < 0) {
stack;
- goto out;
+ goto out; /* This seems wrong: needs investigation */
}
- if (count != leaf->lf_entries) {
- /* release and re-read the leaf in case check_entries
- changed it. */
- brelse(lbh);
- lbh = bread(sdp, *leaf_no);
- gfs2_leaf_in(leaf, lbh);
+ if (count == leaf->lf_entries)
+ goto out;
+ /* release and re-read the leaf in case check_entries
+ changed it. */
+ brelse(lbh);
+ lbh = bread(sdp, *leaf_no);
+ gfs2_leaf_in(leaf, lbh);
+ if (count != leaf->lf_entries) {
log_err( _("Leaf %llu (0x%llx) entry count in "
"directory %llu (0x%llx) does not match "
"number of entries found - is %u, found %u\n"),
@@ -630,17 +593,16 @@ out:
brelse(lbh);
return 0;
-out_copy_old_leaf:
- /* The leaf we read in is bad. So we'll copy the old leaf into the
- * new one. However, that will make us shift our ref count. */
- fix = warn_and_patch(ip, leaf_no, bad_leaf, old_leaf,
- first_ok_leaf, lindex, msg);
- (*ref_count)++;
- memcpy(leaf, oldleaf, sizeof(struct gfs2_leaf));
- if (lbh) {
- if (fix)
- bmodified(lbh);
+bad_leaf:
+ if (lbh)
brelse(lbh);
+ if (pass->repair_leaf) {
+ /* The leaf we read in is bad so we need to repair it. */
+ fix = pass->repair_leaf(ip, leaf_no, lindex, *ref_count, msg,
+ pass->private);
+ if (fix < 0)
+ return fix;
+
}
return 1;
}
@@ -679,17 +641,17 @@ static void dir_leaf_reada(struct gfs2_inode *ip, uint64_t *tbl, unsigned hsize)
/* Checks exhash directory entries */
static int check_leaf_blks(struct gfs2_inode *ip, struct metawalk_fxns *pass)
{
- int error;
- struct gfs2_leaf leaf, oldleaf;
+ int error = 0;
+ struct gfs2_leaf leaf;
unsigned hsize = (1 << ip->i_di.di_depth);
- uint64_t leaf_no, old_leaf, bad_leaf = -1;
+ uint64_t leaf_no, leaf_next;
uint64_t first_ok_leaf, orig_di_blocks;
struct gfs2_buffer_head *lbh;
int lindex;
struct gfs2_sbd *sdp = ip->i_sbd;
- int ref_count = 0, orig_ref_count, orig_di_depth, orig_di_height, old_was_dup;
+ int ref_count, orig_ref_count, orig_di_depth, orig_di_height;
uint64_t *tbl;
- int tbl_valid;
+ int chained_leaf, tbl_valid;
tbl = get_dir_hash(ip);
if (tbl == NULL) {
@@ -751,10 +713,11 @@ static int check_leaf_blks(struct gfs2_inode *ip, struct metawalk_fxns *pass)
posix_fadvise(sdp->device_fd, 0, 0, POSIX_FADV_NORMAL);
return 1;
}
- old_leaf = -1;
- memset(&oldleaf, 0, sizeof(oldleaf));
- old_was_dup = 0;
- for (lindex = 0; lindex < hsize; lindex++) {
+ lindex = 0;
+ leaf_next = -1;
+ while (lindex < hsize) {
+ int l;
+
if (fsck_abort)
break;
@@ -774,38 +737,36 @@ static int check_leaf_blks(struct gfs2_inode *ip, struct metawalk_fxns *pass)
}
leaf_no = be64_to_cpu(tbl[lindex]);
- /* GFS has multiple indirect pointers to the same leaf
- * until those extra pointers are needed, so skip the dups */
- if (leaf_no == bad_leaf) {
- tbl[lindex] = cpu_to_be64(old_leaf);
- gfs2_put_leaf_nr(ip, lindex, old_leaf);
- ref_count++;
- continue;
- } else if (old_leaf == leaf_no) {
+ /* count the number of block pointers to this leaf. We don't
+ need to count the current lindex, because we already know
+ it's a reference */
+ ref_count = 1;
+
+ for (l = lindex + 1; l < hsize; l++) {
+ leaf_next = be64_to_cpu(tbl[l]);
+ if (leaf_next != leaf_no)
+ break;
ref_count++;
- continue;
}
orig_ref_count = ref_count;
+ chained_leaf = 0;
do {
if (fsck_abort) {
free(tbl);
posix_fadvise(sdp->device_fd, 0, 0, POSIX_FADV_NORMAL);
return 0;
}
- error = check_leaf(ip, lindex, pass, &ref_count,
- &leaf_no, old_leaf, &bad_leaf,
- first_ok_leaf, &leaf, &oldleaf);
+ error = check_leaf(ip, lindex, pass, &leaf_no, &leaf,
+ &ref_count);
if (ref_count != orig_ref_count)
tbl_valid = 0;
- old_was_dup = (error == -EEXIST);
- old_leaf = leaf_no;
- memcpy(&oldleaf, &leaf, sizeof(oldleaf));
if (!leaf.lf_next || error)
break;
leaf_no = leaf.lf_next;
- log_debug( _("Leaf chain (0x%llx) detected.\n"),
- (unsigned long long)leaf_no);
+ chained_leaf++;
+ log_debug( _("Leaf chain #%d (0x%llx) detected.\n"),
+ chained_leaf, (unsigned long long)leaf_no);
} while (1); /* while we have chained leaf blocks */
if (orig_di_depth != ip->i_di.di_depth) {
log_debug(_("Depth of 0x%llx changed from %d to %d\n"),
diff --git a/gfs2/fsck/metawalk.h b/gfs2/fsck/metawalk.h
index bef99ae..e11b5e0 100644
--- a/gfs2/fsck/metawalk.h
+++ b/gfs2/fsck/metawalk.h
@@ -104,7 +104,8 @@ struct metawalk_fxns {
int (*check_hash_tbl) (struct gfs2_inode *ip, uint64_t *tbl,
unsigned hsize, void *private);
int (*repair_leaf) (struct gfs2_inode *ip, uint64_t *leaf_no,
- int lindex, int ref_count, const char *msg);
+ int lindex, int ref_count, const char *msg,
+ void *private);
};
#endif /* _METAWALK_H */
diff --git a/gfs2/fsck/pass1.c b/gfs2/fsck/pass1.c
index dd6b958..e827a55 100644
--- a/gfs2/fsck/pass1.c
+++ b/gfs2/fsck/pass1.c
@@ -78,6 +78,19 @@ static int invalidate_eattr_leaf(struct gfs2_inode *ip, uint64_t block,
void *private);
static int handle_ip(struct gfs2_sbd *sdp, struct gfs2_inode *ip);
+static int pass1_repair_leaf(struct gfs2_inode *ip, uint64_t *leaf_no,
+ int lindex, int ref_count, const char *msg,
+ void *private)
+{
+ struct block_count *bc = (struct block_count *)private;
+ int new_leaf_blks;
+
+ new_leaf_blks = repair_leaf(ip, leaf_no, lindex, ref_count, msg);
+ bc->indir_count += new_leaf_blks;
+
+ return new_leaf_blks;
+}
+
struct metawalk_fxns pass1_fxns = {
.private = NULL,
.check_leaf = check_leaf,
@@ -90,6 +103,7 @@ struct metawalk_fxns pass1_fxns = {
.check_eattr_extentry = check_extended_leaf_eattr,
.finish_eattr_indir = finish_eattr_indir,
.big_file_msg = big_file_comfort,
+ .repair_leaf = pass1_repair_leaf,
};
struct metawalk_fxns undo_fxns = {
diff --git a/gfs2/fsck/pass2.c b/gfs2/fsck/pass2.c
index 527071c..e0b1350 100644
--- a/gfs2/fsck/pass2.c
+++ b/gfs2/fsck/pass2.c
@@ -1422,6 +1422,13 @@ static int check_hash_tbl(struct gfs2_inode *ip, uint64_t *tbl,
return error;
}
+static int pass2_repair_leaf(struct gfs2_inode *ip, uint64_t *leaf_no,
+ int lindex, int ref_count, const char *msg,
+ void *private)
+{
+ return repair_leaf(ip, leaf_no, lindex, ref_count, msg);
+}
+
struct metawalk_fxns pass2_fxns = {
.private = NULL,
.check_leaf = NULL,
@@ -1432,7 +1439,7 @@ struct metawalk_fxns pass2_fxns = {
.check_dentry = check_dentry,
.check_eattr_entry = NULL,
.check_hash_tbl = check_hash_tbl,
- .repair_leaf = repair_leaf,
+ .repair_leaf = pass2_repair_leaf,
};
/* Check system directory inode */
--
1.7.11.7
next prev parent reply other threads:[~2013-04-08 14:40 UTC|newest]
Thread overview: 43+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-04-08 14:40 [Cluster-devel] [PATCH 00/42] fsck.gfs2 fixes and improvements rpeterso
2013-04-08 14:40 ` [Cluster-devel] [PATCH 01/42] libgfs2: externalize dir_split_leaf rpeterso
2013-04-08 14:40 ` [Cluster-devel] [PATCH 02/42] libgfs2: allow dir_split_leaf to receive a leaf buffer rpeterso
2013-04-08 14:40 ` [Cluster-devel] [PATCH 03/42] libgfs2: let dir_split_leaf receive a "broken" lindex rpeterso
2013-04-08 14:40 ` [Cluster-devel] [PATCH 04/42] fsck.gfs2: Move function find_free_blk to util.c rpeterso
2013-04-08 14:40 ` [Cluster-devel] [PATCH 05/42] fsck.gfs2: Split out function to make sure lost+found exists rpeterso
2013-04-08 14:40 ` [Cluster-devel] [PATCH 06/42] fsck.gfs2: Check for formal inode mismatch when adding to lost+found rpeterso
2013-04-08 14:40 ` [Cluster-devel] [PATCH 07/42] fsck.gfs2: shorten some debug messages in lost+found rpeterso
2013-04-08 14:40 ` [Cluster-devel] [PATCH 08/42] fsck.gfs2: Move basic directory entry checks to separate function rpeterso
2013-04-08 14:40 ` [Cluster-devel] [PATCH 09/42] fsck.gfs2: Add formal inode check to basic dirent checks rpeterso
2013-04-08 14:40 ` [Cluster-devel] [PATCH 10/42] fsck.gfs2: Add new function to check dir hash tables rpeterso
2013-04-08 14:40 ` [Cluster-devel] [PATCH 11/42] fsck.gfs2: Special case '..' when processing bad formal inode number rpeterso
2013-04-08 14:40 ` [Cluster-devel] [PATCH 12/42] fsck.gfs2: Move function to read directory hash table to util.c rpeterso
2013-04-08 14:40 ` [Cluster-devel] [PATCH 13/42] fsck.gfs2: Misc cleanups rpeterso
2013-04-08 14:40 ` [Cluster-devel] [PATCH 14/42] fsck.gfs2: Verify dirent hash values correspond to proper leaf block rpeterso
2013-04-08 14:40 ` [Cluster-devel] [PATCH 15/42] fsck.gfs2: re-read hash table if directory height or depth changes rpeterso
2013-04-08 14:40 ` rpeterso [this message]
2013-04-08 14:40 ` [Cluster-devel] [PATCH 17/42] fsck.gfs2: check leaf depth when validating leaf blocks rpeterso
2013-04-08 14:40 ` [Cluster-devel] [PATCH 18/42] fsck.gfs2: small cleanups rpeterso
2013-04-08 14:40 ` [Cluster-devel] [PATCH 19/42] fsck.gfs2: reprocess inodes when blocks are added rpeterso
2013-04-08 14:40 ` [Cluster-devel] [PATCH 20/42] fsck.gfs2: Remove redundant leaf depth check rpeterso
2013-04-08 14:40 ` [Cluster-devel] [PATCH 21/42] fsck.gfs2: link dinodes that only have extended attribute problems rpeterso
2013-04-08 14:40 ` [Cluster-devel] [PATCH 22/42] fsck.gfs2: Add clarifying message to duplicate processing rpeterso
2013-04-08 14:40 ` [Cluster-devel] [PATCH 23/42] fsck.gfs2: separate function to calculate metadata block header size rpeterso
2013-04-08 14:40 ` [Cluster-devel] [PATCH 24/42] fsck.gfs2: Rework the "undo" functions rpeterso
2013-04-08 14:40 ` [Cluster-devel] [PATCH 25/42] fsck.gfs2: Check for interrupt when resolving duplicates rpeterso
2013-04-08 14:40 ` [Cluster-devel] [PATCH 26/42] fsck.gfs2: Consistent naming of struct duptree variables rpeterso
2013-04-08 14:40 ` [Cluster-devel] [PATCH 27/42] fsck.gfs2: Keep proper counts when duplicates are found rpeterso
2013-04-08 14:41 ` [Cluster-devel] [PATCH 28/42] fsck.gfs2: print metadata block reference on data errors rpeterso
2013-04-08 14:41 ` [Cluster-devel] [PATCH 29/42] fsck.gfs2: print block count values when fixing them rpeterso
2013-04-08 14:41 ` [Cluster-devel] [PATCH 30/42] fsck.gfs2: Do not invalidate metablocks of dinodes with invalid mode rpeterso
2013-04-08 14:41 ` [Cluster-devel] [PATCH 31/42] fsck.gfs2: Log when unrecoverable data block errors are encountered rpeterso
2013-04-08 14:41 ` [Cluster-devel] [PATCH 32/42] fsck.gfs2: don't remove buffers from the list when errors are found rpeterso
2013-04-08 14:41 ` [Cluster-devel] [PATCH 33/42] fsck.gfs2: Don't flag GFS1 non-dinode blocks as duplicates rpeterso
2013-04-08 14:41 ` [Cluster-devel] [PATCH 34/42] fsck.gfs2: externalize check_leaf rpeterso
2013-04-08 14:41 ` [Cluster-devel] [PATCH 35/42] fsck.gfs2 pass2: check leaf blocks when fixing hash table rpeterso
2013-04-08 14:41 ` [Cluster-devel] [PATCH 36/42] fsck.gfs2: standardize check_metatree return codes rpeterso
2013-04-08 14:41 ` [Cluster-devel] [PATCH 37/42] fsck.gfs2: don't invalidate files with duplicate data block refs rpeterso
2013-04-08 14:41 ` [Cluster-devel] [PATCH 38/42] fsck.gfs2: check for duplicate first references rpeterso
2013-04-08 14:41 ` [Cluster-devel] [PATCH 39/42] fsck.gfs2: When flagging a duplicate reference, show valid or invalid rpeterso
2013-04-08 14:41 ` [Cluster-devel] [PATCH 40/42] fsck.gfs2: major duplicate reference reform rpeterso
2013-04-08 14:41 ` [Cluster-devel] [PATCH 41/42] fsck.gfs2: Remove all bad eattr blocks rpeterso
2013-04-08 14:41 ` [Cluster-devel] [PATCH 42/42] fsck.gfs2: Remove unused variable rpeterso
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1365432074-17615-17-git-send-email-rpeterso@redhat.com \
--to=rpeterso@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).