* Re: [Bugme-new] [Bug 10882] New: Different kernel crashes on corrupted filesystems
[not found] <bug-10882-10286@http.bugzilla.kernel.org/>
@ 2008-06-07 19:19 ` Andrew Morton
2008-06-21 1:54 ` [PATCH] ext3: validate directory entry data before use Duane Griffin
` (2 more replies)
0 siblings, 3 replies; 22+ messages in thread
From: Andrew Morton @ 2008-06-07 19:19 UTC (permalink / raw)
To: linux-ext4
On Sat, 7 Jun 2008 09:19:33 -0700 (PDT) bugme-daemon@bugzilla.kernel.org wrote:
>
> http://bugzilla.kernel.org/show_bug.cgi?id=10882
Various ext3 crashes when mounting corrupted images.
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH] ext3: validate directory entry data before use
2008-06-07 19:19 ` [Bugme-new] [Bug 10882] New: Different kernel crashes on corrupted filesystems Andrew Morton
@ 2008-06-21 1:54 ` Duane Griffin
2008-06-21 15:54 ` [PATCH, v2] " Duane Griffin
2008-06-24 6:36 ` [PATCH] " Andreas Dilger
2008-06-23 21:56 ` [PATCH] ext3: handle corrupted orphan list at mount Duane Griffin
2008-06-24 13:47 ` [PATCH] ext3: handle deleting corrupted indirect blocks Duane Griffin
2 siblings, 2 replies; 22+ messages in thread
From: Duane Griffin @ 2008-06-21 1:54 UTC (permalink / raw)
To: linux-ext4; +Cc: linux-kernel, akpm, sct, adilger, Sami Liedes, Duane Griffin
Various places in fs/ext3/namei.c use ext3_next_entry to loop over
directory entries, but not all of them verify that entries are valid before
attempting to move to the next one. In the case where rec_len == 0 this
causes an infinite loop.
Introduce a new version of ext3_next_entry that checks the validity of the
entry before using its data to find the next one. Rename the original
function to __ext3_next_entry and use it in places where we already know
the data is valid.
Note that the changes to empty_dir follow the original code in reporting
the directory as empty in the presence of errors.
This patch fixes the first case (image hdb.25.softlockup.gz) reported in
http://bugzilla.kernel.org/show_bug.cgi?id=10882.
Signed-off-by: Duane Griffin <duaneg@dghda.com>
--
Please note that I've only properly tested the originally reported failure
case (i.e. the changes to ext3_dx_find_entry).
Reviewers may want to pay particular attention to the changes to do_split,
make_indexed_dir and empty_dir. I've tried to follow the original code's
error handling conventions, as noted above for empty_dir, but I'm not sure
this is the best way to do things.
---
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 0b8cf80..f092208 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -185,13 +185,27 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
* p is at least 6 bytes before the end of page
*/
static inline struct ext3_dir_entry_2 *
-ext3_next_entry(struct ext3_dir_entry_2 *p)
+__ext3_next_entry(struct ext3_dir_entry_2 *p)
{
return (struct ext3_dir_entry_2 *)((char *)p +
ext3_rec_len_from_disk(p->rec_len));
}
/*
+ * Checks the directory entry looks sane before using it to find the next one.
+ * Returns NULL and reports an error if an invalid entry is passed.
+ */
+static inline struct ext3_dir_entry_2 *
+ext3_next_entry(const char *func, struct inode *dir,
+ struct ext3_dir_entry_2 *de, struct buffer_head *bh, int offset)
+{
+ if (ext3_check_dir_entry(func, dir, de, bh, offset))
+ return __ext3_next_entry(de);
+ else
+ return NULL;
+}
+
+/*
* Future: use high four bits of block for coalesce-on-delete flags
* Mask them off for now.
*/
@@ -271,15 +285,17 @@ struct stats
unsigned bcount;
};
-static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_entry_2 *de,
- int size, int show_names)
+static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct inode *dir,
+ struct buffer_head *bh, int size,
+ int show_names)
{
+ struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) bh->b_data;
unsigned names = 0, space = 0;
char *base = (char *) de;
struct dx_hash_info h = *hinfo;
printk("names: ");
- while ((char *) de < base + size)
+ while (de && (char *) de < base + size)
{
if (de->inode)
{
@@ -295,7 +311,7 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_ent
space += EXT3_DIR_REC_LEN(de->name_len);
names++;
}
- de = ext3_next_entry(de);
+ de = ext3_next_entry("dx_show_leaf", dir, de, bh, 0);
}
printk("(%i)\n", names);
return (struct stats) { names, space, 1 };
@@ -319,7 +335,7 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
if (!(bh = ext3_bread (NULL,dir, block, 0,&err))) continue;
stats = levels?
dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
- dx_show_leaf(hinfo, (struct ext3_dir_entry_2 *) bh->b_data, blocksize, 0);
+ dx_show_leaf(hinfo, dir, bh, blocksize, 0);
names += stats.names;
space += stats.space;
bcount += stats.bcount;
@@ -583,7 +599,7 @@ static int htree_dirblock_to_tree(struct file *dir_file,
top = (struct ext3_dir_entry_2 *) ((char *) de +
dir->i_sb->s_blocksize -
EXT3_DIR_REC_LEN(0));
- for (; de < top; de = ext3_next_entry(de)) {
+ for (; de < top; de = __ext3_next_entry(de)) {
if (!ext3_check_dir_entry("htree_dirblock_to_tree", dir, de, bh,
(block<<EXT3_BLOCK_SIZE_BITS(dir->i_sb))
+((char *)de - bh->b_data))) {
@@ -658,7 +674,12 @@ int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
}
if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) {
de = (struct ext3_dir_entry_2 *) frames[0].bh->b_data;
- de = ext3_next_entry(de);
+ de = ext3_next_entry("ext3_htree_fill_tree",
+ dir, de, frames[0].bh, 0);
+ if (de == NULL) {
+ err = -EIO;
+ goto errout;
+ }
if ((err = ext3_htree_store_dirent(dir_file, 2, 0, de)) != 0)
goto errout;
count++;
@@ -726,8 +747,7 @@ static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
count++;
cond_resched();
}
- /* XXX: do we need to check rec_len == 0 case? -Chris */
- de = ext3_next_entry(de);
+ de = __ext3_next_entry(de);
}
return count;
}
@@ -991,24 +1011,28 @@ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
de = (struct ext3_dir_entry_2 *) bh->b_data;
top = (struct ext3_dir_entry_2 *) ((char *) de + sb->s_blocksize -
EXT3_DIR_REC_LEN(0));
- for (; de < top; de = ext3_next_entry(de))
- if (ext3_match (namelen, name, de)) {
+ for (; de < top; de = __ext3_next_entry(de)) {
+ int offset = (block << EXT3_BLOCK_SIZE_BITS(sb))
+ + ((char *) de - bh->b_data);
+
if (!ext3_check_dir_entry("ext3_find_entry",
- dir, de, bh,
- (block<<EXT3_BLOCK_SIZE_BITS(sb))
- +((char *)de - bh->b_data))) {
- brelse (bh);
+ dir, de, bh, offset)) {
+ brelse(bh);
*err = ERR_BAD_DX_DIR;
goto errout;
}
- *res_dir = de;
- dx_release (frames);
- return bh;
+
+ if (ext3_match(namelen, name, de)) {
+ *res_dir = de;
+ dx_release(frames);
+ return bh;
+ }
}
brelse (bh);
+
/* Check to see if we should continue to search */
- retval = ext3_htree_next_block(dir, hash, frame,
- frames, NULL);
+ retval = ext3_htree_next_block(dir, hash, frame, frames, NULL);
+
if (retval < 0) {
ext3_warning(sb, __func__,
"error reading index page in directory #%lu",
@@ -1141,7 +1165,7 @@ static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)
prev = to = de;
while ((char*)de < base + size) {
- next = ext3_next_entry(de);
+ next = __ext3_next_entry(de);
if (de->inode && de->name_len) {
rec_len = EXT3_DIR_REC_LEN(de->name_len);
if (de > to)
@@ -1172,9 +1196,21 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
struct dx_map_entry *map;
char *data1 = (*bh)->b_data, *data2;
unsigned split, move, size, i;
- struct ext3_dir_entry_2 *de = NULL, *de2;
+ struct ext3_dir_entry_2 *de, *de2;
int err = 0;
+ /* Verify directory entries are sane before trying anything else. */
+ de = (struct ext3_dir_entry_2 *) data1;
+ de2 = (char *) de + dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0);
+ while (de < de2) {
+ de = ext3_next_entry("do_split", dir, de, *bh, 0);
+ if (de == NULL) {
+ brelse(*bh);
+ *bh = NULL;
+ goto errout;
+ }
+ }
+
bh2 = ext3_append (handle, dir, &newblock, &err);
if (!(bh2)) {
brelse(*bh);
@@ -1397,8 +1433,15 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
memcpy (data1, de, len);
de = (struct ext3_dir_entry_2 *) data1;
top = data1 + len;
- while ((char *)(de2 = ext3_next_entry(de)) < top)
+
+ while (1) {
+ de2 = ext3_next_entry("make_indexed_dir", dir, de, bh, 0);
+ if (de2 == NULL || de2 >= top) {
+ break;
+ }
de = de2;
+ }
+
de->rec_len = ext3_rec_len_to_disk(data1 + blocksize - (char *) de);
/* Initialize the root; the dot dirents already exist */
de = (struct ext3_dir_entry_2 *) (&root->dotdot);
@@ -1655,7 +1698,7 @@ static int ext3_delete_entry (handle_t *handle,
}
i += ext3_rec_len_from_disk(de->rec_len);
pde = de;
- de = ext3_next_entry(de);
+ de = __ext3_next_entry(de);
}
return -ENOENT;
}
@@ -1792,7 +1835,7 @@ retry:
de->rec_len = ext3_rec_len_to_disk(EXT3_DIR_REC_LEN(de->name_len));
strcpy (de->name, ".");
ext3_set_de_type(dir->i_sb, de, S_IFDIR);
- de = ext3_next_entry(de);
+ de = __ext3_next_entry(de);
de->inode = cpu_to_le32(dir->i_ino);
de->rec_len = ext3_rec_len_to_disk(inode->i_sb->s_blocksize -
EXT3_DIR_REC_LEN(1));
@@ -1825,7 +1868,7 @@ out_stop:
/*
* routine to check that the specified directory is empty (for rmdir)
*/
-static int empty_dir (struct inode * inode)
+static int empty_dir(struct inode *dir, struct inode *inode)
{
unsigned long offset;
struct buffer_head * bh;
@@ -1846,8 +1889,14 @@ static int empty_dir (struct inode * inode)
inode->i_ino);
return 1;
}
+
de = (struct ext3_dir_entry_2 *) bh->b_data;
- de1 = ext3_next_entry(de);
+ de1 = ext3_next_entry("empty_dir", dir, de, bh, offset);
+ if (de1 == NULL) {
+ brelse(bh);
+ return 1;
+ }
+
if (le32_to_cpu(de->inode) != inode->i_ino ||
!le32_to_cpu(de1->inode) ||
strcmp (".", de->name) ||
@@ -1860,8 +1909,8 @@ static int empty_dir (struct inode * inode)
}
offset = ext3_rec_len_from_disk(de->rec_len) +
ext3_rec_len_from_disk(de1->rec_len);
- de = ext3_next_entry(de1);
- while (offset < inode->i_size ) {
+ de = ext3_next_entry("empty_dir", dir, de1, bh, offset);
+ while (de && offset < inode->i_size) {
if (!bh ||
(void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
err = 0;
@@ -1890,7 +1939,7 @@ static int empty_dir (struct inode * inode)
return 0;
}
offset += ext3_rec_len_from_disk(de->rec_len);
- de = ext3_next_entry(de);
+ de = __ext3_next_entry(de);
}
brelse (bh);
return 1;
@@ -2068,7 +2117,7 @@ static int ext3_rmdir (struct inode * dir, struct dentry *dentry)
goto end_rmdir;
retval = -ENOTEMPTY;
- if (!empty_dir (inode))
+ if (!empty_dir(dir, inode))
goto end_rmdir;
retval = ext3_delete_entry(handle, dir, de, bh);
@@ -2244,7 +2293,7 @@ retry:
}
#define PARENT_INO(buffer) \
- (ext3_next_entry((struct ext3_dir_entry_2 *)(buffer))->inode)
+ (__ext3_next_entry((struct ext3_dir_entry_2 *)(buffer))->inode)
/*
* Anybody can rename anything with this: the permission checks are left to the
@@ -2297,7 +2346,7 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
if (S_ISDIR(old_inode->i_mode)) {
if (new_inode) {
retval = -ENOTEMPTY;
- if (!empty_dir (new_inode))
+ if (!empty_dir(new_dir, new_inode))
goto end_rename;
}
retval = -EIO;
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH, v2] ext3: validate directory entry data before use
2008-06-21 1:54 ` [PATCH] ext3: validate directory entry data before use Duane Griffin
@ 2008-06-21 15:54 ` Duane Griffin
2008-06-21 16:13 ` Jochen Voß
2008-06-25 10:08 ` Jan Kara
2008-06-24 6:36 ` [PATCH] " Andreas Dilger
1 sibling, 2 replies; 22+ messages in thread
From: Duane Griffin @ 2008-06-21 15:54 UTC (permalink / raw)
To: linux-ext4; +Cc: linux-kernel, akpm, sct, adilger, Sami Liedes, Duane Griffin
Various places in fs/ext3/namei.c use ext3_next_entry to loop over
directory entries, but not all of them verify that entries are valid before
attempting to move to the next one. In the case where rec_len == 0 this
causes an infinite loop.
Introduce a new version of ext3_next_entry that checks the validity of the
entry before using its data to find the next one. Rename the original
function to __ext3_next_entry and use it in places where we already know
the data is valid.
Note that the changes to empty_dir follow the original code in reporting
the directory as empty in the presence of errors.
This patch fixes the first case (image hdb.25.softlockup.gz) reported in
http://bugzilla.kernel.org/show_bug.cgi?id=10882.
Signed-off-by: Duane Griffin <duaneg@dghda.com>
--
This is version 2. The original patch was an earlier version causing
warnings that I sent by mistake. This version just fixes those.
Please note that I've only properly tested the originally reported failure
case (i.e. the changes to ext3_dx_find_entry).
Reviewers may want to pay particular attention to the changes to do_split,
make_indexed_dir and empty_dir. I've tried to follow the original code's
error handling conventions, as noted above for empty_dir, but I'm not sure
this is the best way to do things.
---
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 0b8cf80..ea0236b 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -185,13 +185,27 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
* p is at least 6 bytes before the end of page
*/
static inline struct ext3_dir_entry_2 *
-ext3_next_entry(struct ext3_dir_entry_2 *p)
+__ext3_next_entry(struct ext3_dir_entry_2 *p)
{
return (struct ext3_dir_entry_2 *)((char *)p +
ext3_rec_len_from_disk(p->rec_len));
}
/*
+ * Checks the directory entry looks sane before using it to find the next one.
+ * Returns NULL and reports an error if an invalid entry is passed.
+ */
+static inline struct ext3_dir_entry_2 *
+ext3_next_entry(const char *func, struct inode *dir,
+ struct ext3_dir_entry_2 *de, struct buffer_head *bh, int offset)
+{
+ if (ext3_check_dir_entry(func, dir, de, bh, offset))
+ return __ext3_next_entry(de);
+ else
+ return NULL;
+}
+
+/*
* Future: use high four bits of block for coalesce-on-delete flags
* Mask them off for now.
*/
@@ -271,15 +285,17 @@ struct stats
unsigned bcount;
};
-static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_entry_2 *de,
- int size, int show_names)
+static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct inode *dir,
+ struct buffer_head *bh, int size,
+ int show_names)
{
+ struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) bh->b_data;
unsigned names = 0, space = 0;
char *base = (char *) de;
struct dx_hash_info h = *hinfo;
printk("names: ");
- while ((char *) de < base + size)
+ while (de && (char *) de < base + size)
{
if (de->inode)
{
@@ -295,7 +311,7 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_ent
space += EXT3_DIR_REC_LEN(de->name_len);
names++;
}
- de = ext3_next_entry(de);
+ de = ext3_next_entry("dx_show_leaf", dir, de, bh, 0);
}
printk("(%i)\n", names);
return (struct stats) { names, space, 1 };
@@ -319,7 +335,7 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
if (!(bh = ext3_bread (NULL,dir, block, 0,&err))) continue;
stats = levels?
dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
- dx_show_leaf(hinfo, (struct ext3_dir_entry_2 *) bh->b_data, blocksize, 0);
+ dx_show_leaf(hinfo, dir, bh, blocksize, 0);
names += stats.names;
space += stats.space;
bcount += stats.bcount;
@@ -583,7 +599,7 @@ static int htree_dirblock_to_tree(struct file *dir_file,
top = (struct ext3_dir_entry_2 *) ((char *) de +
dir->i_sb->s_blocksize -
EXT3_DIR_REC_LEN(0));
- for (; de < top; de = ext3_next_entry(de)) {
+ for (; de < top; de = __ext3_next_entry(de)) {
if (!ext3_check_dir_entry("htree_dirblock_to_tree", dir, de, bh,
(block<<EXT3_BLOCK_SIZE_BITS(dir->i_sb))
+((char *)de - bh->b_data))) {
@@ -658,7 +674,12 @@ int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
}
if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) {
de = (struct ext3_dir_entry_2 *) frames[0].bh->b_data;
- de = ext3_next_entry(de);
+ de = ext3_next_entry("ext3_htree_fill_tree",
+ dir, de, frames[0].bh, 0);
+ if (de == NULL) {
+ err = -EIO;
+ goto errout;
+ }
if ((err = ext3_htree_store_dirent(dir_file, 2, 0, de)) != 0)
goto errout;
count++;
@@ -726,8 +747,7 @@ static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
count++;
cond_resched();
}
- /* XXX: do we need to check rec_len == 0 case? -Chris */
- de = ext3_next_entry(de);
+ de = __ext3_next_entry(de);
}
return count;
}
@@ -991,24 +1011,28 @@ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
de = (struct ext3_dir_entry_2 *) bh->b_data;
top = (struct ext3_dir_entry_2 *) ((char *) de + sb->s_blocksize -
EXT3_DIR_REC_LEN(0));
- for (; de < top; de = ext3_next_entry(de))
- if (ext3_match (namelen, name, de)) {
+ for (; de < top; de = __ext3_next_entry(de)) {
+ int offset = (block << EXT3_BLOCK_SIZE_BITS(sb))
+ + ((char *) de - bh->b_data);
+
if (!ext3_check_dir_entry("ext3_find_entry",
- dir, de, bh,
- (block<<EXT3_BLOCK_SIZE_BITS(sb))
- +((char *)de - bh->b_data))) {
- brelse (bh);
+ dir, de, bh, offset)) {
+ brelse(bh);
*err = ERR_BAD_DX_DIR;
goto errout;
}
- *res_dir = de;
- dx_release (frames);
- return bh;
+
+ if (ext3_match(namelen, name, de)) {
+ *res_dir = de;
+ dx_release(frames);
+ return bh;
+ }
}
brelse (bh);
+
/* Check to see if we should continue to search */
- retval = ext3_htree_next_block(dir, hash, frame,
- frames, NULL);
+ retval = ext3_htree_next_block(dir, hash, frame, frames, NULL);
+
if (retval < 0) {
ext3_warning(sb, __func__,
"error reading index page in directory #%lu",
@@ -1141,7 +1165,7 @@ static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)
prev = to = de;
while ((char*)de < base + size) {
- next = ext3_next_entry(de);
+ next = __ext3_next_entry(de);
if (de->inode && de->name_len) {
rec_len = EXT3_DIR_REC_LEN(de->name_len);
if (de > to)
@@ -1172,9 +1196,22 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
struct dx_map_entry *map;
char *data1 = (*bh)->b_data, *data2;
unsigned split, move, size, i;
- struct ext3_dir_entry_2 *de = NULL, *de2;
+ struct ext3_dir_entry_2 *de, *de2;
int err = 0;
+ /* Verify directory entries are sane before trying anything else. */
+ de = (struct ext3_dir_entry_2 *) data1;
+ de2 = (struct ext3_dir_entry_2 *) data1 +
+ dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0);
+ while (de < de2) {
+ de = ext3_next_entry("do_split", dir, de, *bh, 0);
+ if (de == NULL) {
+ brelse(*bh);
+ *bh = NULL;
+ goto errout;
+ }
+ }
+
bh2 = ext3_append (handle, dir, &newblock, &err);
if (!(bh2)) {
brelse(*bh);
@@ -1397,8 +1434,15 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
memcpy (data1, de, len);
de = (struct ext3_dir_entry_2 *) data1;
top = data1 + len;
- while ((char *)(de2 = ext3_next_entry(de)) < top)
+
+ while (1) {
+ de2 = ext3_next_entry("make_indexed_dir", dir, de, bh, 0);
+ if (de2 == NULL || (char *) (char *) (char *) (char *) (char *) (char *) (char *) (char *) (char *) de2 >= top) {
+ break;
+ }
de = de2;
+ }
+
de->rec_len = ext3_rec_len_to_disk(data1 + blocksize - (char *) de);
/* Initialize the root; the dot dirents already exist */
de = (struct ext3_dir_entry_2 *) (&root->dotdot);
@@ -1655,7 +1699,7 @@ static int ext3_delete_entry (handle_t *handle,
}
i += ext3_rec_len_from_disk(de->rec_len);
pde = de;
- de = ext3_next_entry(de);
+ de = __ext3_next_entry(de);
}
return -ENOENT;
}
@@ -1792,7 +1836,7 @@ retry:
de->rec_len = ext3_rec_len_to_disk(EXT3_DIR_REC_LEN(de->name_len));
strcpy (de->name, ".");
ext3_set_de_type(dir->i_sb, de, S_IFDIR);
- de = ext3_next_entry(de);
+ de = __ext3_next_entry(de);
de->inode = cpu_to_le32(dir->i_ino);
de->rec_len = ext3_rec_len_to_disk(inode->i_sb->s_blocksize -
EXT3_DIR_REC_LEN(1));
@@ -1825,7 +1869,7 @@ out_stop:
/*
* routine to check that the specified directory is empty (for rmdir)
*/
-static int empty_dir (struct inode * inode)
+static int empty_dir(struct inode *dir, struct inode *inode)
{
unsigned long offset;
struct buffer_head * bh;
@@ -1846,8 +1890,14 @@ static int empty_dir (struct inode * inode)
inode->i_ino);
return 1;
}
+
de = (struct ext3_dir_entry_2 *) bh->b_data;
- de1 = ext3_next_entry(de);
+ de1 = ext3_next_entry("empty_dir", dir, de, bh, 0);
+ if (de1 == NULL) {
+ brelse(bh);
+ return 1;
+ }
+
if (le32_to_cpu(de->inode) != inode->i_ino ||
!le32_to_cpu(de1->inode) ||
strcmp (".", de->name) ||
@@ -1860,8 +1910,8 @@ static int empty_dir (struct inode * inode)
}
offset = ext3_rec_len_from_disk(de->rec_len) +
ext3_rec_len_from_disk(de1->rec_len);
- de = ext3_next_entry(de1);
- while (offset < inode->i_size ) {
+ de = ext3_next_entry("empty_dir", dir, de1, bh, offset);
+ while (de && offset < inode->i_size) {
if (!bh ||
(void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
err = 0;
@@ -1890,7 +1940,7 @@ static int empty_dir (struct inode * inode)
return 0;
}
offset += ext3_rec_len_from_disk(de->rec_len);
- de = ext3_next_entry(de);
+ de = __ext3_next_entry(de);
}
brelse (bh);
return 1;
@@ -2068,7 +2121,7 @@ static int ext3_rmdir (struct inode * dir, struct dentry *dentry)
goto end_rmdir;
retval = -ENOTEMPTY;
- if (!empty_dir (inode))
+ if (!empty_dir(dir, inode))
goto end_rmdir;
retval = ext3_delete_entry(handle, dir, de, bh);
@@ -2244,7 +2297,7 @@ retry:
}
#define PARENT_INO(buffer) \
- (ext3_next_entry((struct ext3_dir_entry_2 *)(buffer))->inode)
+ (__ext3_next_entry((struct ext3_dir_entry_2 *)(buffer))->inode)
/*
* Anybody can rename anything with this: the permission checks are left to the
@@ -2297,7 +2350,7 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
if (S_ISDIR(old_inode->i_mode)) {
if (new_inode) {
retval = -ENOTEMPTY;
- if (!empty_dir (new_inode))
+ if (!empty_dir(new_dir, new_inode))
goto end_rename;
}
retval = -EIO;
^ permalink raw reply related [flat|nested] 22+ messages in thread
* Re: [PATCH, v2] ext3: validate directory entry data before use
2008-06-21 15:54 ` [PATCH, v2] " Duane Griffin
@ 2008-06-21 16:13 ` Jochen Voß
2008-06-21 16:31 ` Duane Griffin
2008-06-25 10:08 ` Jan Kara
1 sibling, 1 reply; 22+ messages in thread
From: Jochen Voß @ 2008-06-21 16:13 UTC (permalink / raw)
To: Duane Griffin; +Cc: linux-ext4, linux-kernel, akpm, sct, adilger, Sami Liedes
Hi Duane,
2008/6/21 Duane Griffin <duaneg@dghda.com>:
> @@ -1397,8 +1434,15 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
> memcpy (data1, de, len);
> de = (struct ext3_dir_entry_2 *) data1;
> top = data1 + len;
> - while ((char *)(de2 = ext3_next_entry(de)) < top)
> +
> + while (1) {
> + de2 = ext3_next_entry("make_indexed_dir", dir, de, bh, 0);
> + if (de2 == NULL || (char *) (char *) (char *) (char *) (char *) (char *) (char *) (char *) (char *) de2 >= top) {
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
This looks very strange!
> + break;
> + }
> de = de2;
> + }
> +
> de->rec_len = ext3_rec_len_to_disk(data1 + blocksize - (char *) de);
> /* Initialize the root; the dot dirents already exist */
> de = (struct ext3_dir_entry_2 *) (&root->dotdot);
All the best,
Jochen
--
http://seehuhn.de/
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH, v2] ext3: validate directory entry data before use
2008-06-21 16:13 ` Jochen Voß
@ 2008-06-21 16:31 ` Duane Griffin
0 siblings, 0 replies; 22+ messages in thread
From: Duane Griffin @ 2008-06-21 16:31 UTC (permalink / raw)
To: Jochen Voß; +Cc: linux-ext4, linux-kernel, akpm, sct, adilger, Sami Liedes
2008/6/21 Jochen Voß <jochen.voss@googlemail.com>:
> Hi Duane,
>
> 2008/6/21 Duane Griffin <duaneg@dghda.com>:
>> @@ -1397,8 +1434,15 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
>> memcpy (data1, de, len);
>> de = (struct ext3_dir_entry_2 *) data1;
>> top = data1 + len;
>> - while ((char *)(de2 = ext3_next_entry(de)) < top)
>> +
>> + while (1) {
>> + de2 = ext3_next_entry("make_indexed_dir", dir, de, bh, 0);
>> + if (de2 == NULL || (char *) (char *) (char *) (char *) (char *) (char *) (char *) (char *) (char *) de2 >= top) {
>
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> This looks very strange!
Aargh, I just can't seem to get this patch out cleanly! That looks
like a vi typo, thanks for catching it so quickly. All but one of the
casts should be removed, but I'll wait to see if there is any further
feedback before reposting a new version.
> All the best,
> Jochen
Cheers,
Duane.
--
"I never could learn to drink that blood and call it wine" - Bob Dylan
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH] ext3: handle corrupted orphan list at mount
2008-06-07 19:19 ` [Bugme-new] [Bug 10882] New: Different kernel crashes on corrupted filesystems Andrew Morton
2008-06-21 1:54 ` [PATCH] ext3: validate directory entry data before use Duane Griffin
@ 2008-06-23 21:56 ` Duane Griffin
2008-06-23 22:32 ` Sami Liedes
2008-06-24 16:08 ` Jan Kara
2008-06-24 13:47 ` [PATCH] ext3: handle deleting corrupted indirect blocks Duane Griffin
2 siblings, 2 replies; 22+ messages in thread
From: Duane Griffin @ 2008-06-23 21:56 UTC (permalink / raw)
To: linux-ext4; +Cc: linux-kernel, akpm, sct, adilger, Sami Liedes, Duane Griffin
If the orphan node list includes valid, untruncatable nodes with nlink > 0
the ext3_orphan_cleanup loop which attempts to delete them will not do so,
causing it to loop forever. Fix by checking for such nodes in the
ext3_orphan_get function.
This patch fixes the second case (image hdb.20000009.softlockup.gz)
reported in http://bugzilla.kernel.org/show_bug.cgi?id=10882.
Signed-off-by: Duane Griffin <duaneg@dghda.com>
--
Note that we can still end up in an infinite loop if the ext3_truncate
errors out early (and keeps doing so). I'm not sure if we should worry
about that. If we did want to handle it then we could change ext3_truncate
to return success/failure and exit the ext3_orphan_cleanup.
---
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 7712682..bc030f4 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -669,6 +669,14 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
if (IS_ERR(inode))
goto iget_failed;
+ /*
+ * If the orphans has i_nlinks > 0 then it should be able to be
+ * truncated, otherwise it won't be removed from the orphan list
+ * during processing and an infinite loop will result.
+ */
+ if (inode->i_nlink && !ext3_can_truncate(inode))
+ goto bad_orphan;
+
if (NEXT_ORPHAN(inode) > max_ino)
goto bad_orphan;
brelse(bitmap_bh);
@@ -690,6 +698,7 @@ bad_orphan:
printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n",
NEXT_ORPHAN(inode));
printk(KERN_NOTICE "max_ino=%lu\n", max_ino);
+ printk(KERN_NOTICE "i_nlink=%lu\n", inode->i_nlink);
/* Avoid freeing blocks if we got a bad deleted inode */
if (inode->i_nlink == 0)
inode->i_blocks = 0;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 6ae4ecf..7b7e234 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2253,6 +2253,14 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
}
}
+int ext3_can_truncate(struct inode *inode)
+{
+ return (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+ S_ISLNK(inode->i_mode)) &&
+ !ext3_inode_is_fast_symlink(inode) &&
+ !(IS_APPEND(inode) || IS_IMMUTABLE(inode));
+}
+
/*
* ext3_truncate()
*
@@ -2297,12 +2305,7 @@ void ext3_truncate(struct inode *inode)
unsigned blocksize = inode->i_sb->s_blocksize;
struct page *page;
- if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
- S_ISLNK(inode->i_mode)))
- return;
- if (ext3_inode_is_fast_symlink(inode))
- return;
- if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+ if (!ext3_can_truncate(inode))
return;
/*
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index 36c5403..80171ee 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -832,6 +832,7 @@ extern void ext3_discard_reservation (struct inode *);
extern void ext3_dirty_inode(struct inode *);
extern int ext3_change_inode_journal_flag(struct inode *, int);
extern int ext3_get_inode_loc(struct inode *, struct ext3_iloc *);
+extern int ext3_can_truncate(struct inode *inode);
extern void ext3_truncate (struct inode *);
extern void ext3_set_inode_flags(struct inode *);
extern void ext3_get_inode_flags(struct ext3_inode_info *);
^ permalink raw reply related [flat|nested] 22+ messages in thread
* Re: [PATCH] ext3: handle corrupted orphan list at mount
2008-06-23 21:56 ` [PATCH] ext3: handle corrupted orphan list at mount Duane Griffin
@ 2008-06-23 22:32 ` Sami Liedes
2008-06-24 0:09 ` Duane Griffin
2008-06-24 16:08 ` Jan Kara
1 sibling, 1 reply; 22+ messages in thread
From: Sami Liedes @ 2008-06-23 22:32 UTC (permalink / raw)
To: Duane Griffin; +Cc: linux-ext4, linux-kernel, akpm, sct, adilger
On Mon, Jun 23, 2008 at 10:56:40PM +0100, Duane Griffin wrote:
> If the orphan node list includes valid, untruncatable nodes with nlink > 0
> the ext3_orphan_cleanup loop which attempts to delete them will not do so,
> causing it to loop forever. Fix by checking for such nodes in the
> ext3_orphan_get function.
You people working hard to fix bugs and implement great filesystems
almost makes me feel bad for trying to break your code :)
> diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
> index 7712682..bc030f4 100644
> --- a/fs/ext3/ialloc.c
> +++ b/fs/ext3/ialloc.c
[...]
> @@ -690,6 +698,7 @@ bad_orphan:
> printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n",
> NEXT_ORPHAN(inode));
> printk(KERN_NOTICE "max_ino=%lu\n", max_ino);
> + printk(KERN_NOTICE "i_nlink=%lu\n", inode->i_nlink);
^^^
Here I get (on x86 gcc 4.3.1):
fs/ext3/ialloc.c: In function 'ext3_orphan_get':
fs/ext3/ialloc.c:701: warning: format '%lu' expects type 'long unsigned int', but argument 2 has type 'unsigned int'
So it probably should be %u or something.
Sami
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH] ext3: handle corrupted orphan list at mount
2008-06-23 22:32 ` Sami Liedes
@ 2008-06-24 0:09 ` Duane Griffin
0 siblings, 0 replies; 22+ messages in thread
From: Duane Griffin @ 2008-06-24 0:09 UTC (permalink / raw)
To: Sami Liedes; +Cc: linux-ext4, linux-kernel, akpm, sct, adilger
2008/6/23 Sami Liedes <sliedes@cc.hut.fi>:
> You people working hard to fix bugs and implement great filesystems
> almost makes me feel bad for trying to break your code :)
Not at all, it gives me something productive to do with my time! ;)
> Here I get (on x86 gcc 4.3.1):
>
> fs/ext3/ialloc.c: In function 'ext3_orphan_get':
> fs/ext3/ialloc.c:701: warning: format '%lu' expects type 'long unsigned int', but argument 2 has type 'unsigned int'
>
> So it probably should be %u or something.
Thanks, fixed for the next version.
Cheers,
Duane.
--
"I never could learn to drink that blood and call it wine" - Bob Dylan
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH] ext3: validate directory entry data before use
2008-06-21 1:54 ` [PATCH] ext3: validate directory entry data before use Duane Griffin
2008-06-21 15:54 ` [PATCH, v2] " Duane Griffin
@ 2008-06-24 6:36 ` Andreas Dilger
1 sibling, 0 replies; 22+ messages in thread
From: Andreas Dilger @ 2008-06-24 6:36 UTC (permalink / raw)
To: Duane Griffin; +Cc: linux-ext4, linux-kernel, akpm, sct, adilger, Sami Liedes
On Jun 21, 2008 02:54 +0100, Duane Griffin wrote:
> Various places in fs/ext3/namei.c use ext3_next_entry to loop over
> directory entries, but not all of them verify that entries are valid before
> attempting to move to the next one. In the case where rec_len == 0 this
> causes an infinite loop.
>
> Introduce a new version of ext3_next_entry that checks the validity of the
> entry before using its data to find the next one. Rename the original
> function to __ext3_next_entry and use it in places where we already know
> the data is valid.
>
> Note that the changes to empty_dir follow the original code in reporting
> the directory as empty in the presence of errors.
>
> This patch fixes the first case (image hdb.25.softlockup.gz) reported in
> http://bugzilla.kernel.org/show_bug.cgi?id=10882.
>
> Signed-off-by: Duane Griffin <duaneg@dghda.com>
> --
>
> Please note that I've only properly tested the originally reported failure
> case (i.e. the changes to ext3_dx_find_entry).
>
> Reviewers may want to pay particular attention to the changes to do_split,
> make_indexed_dir and empty_dir. I've tried to follow the original code's
> error handling conventions, as noted above for empty_dir, but I'm not sure
> this is the best way to do things.
Just as a note, and not to detract from the validity of this patch -
in the ext2 page-based directory code is somewhat more efficient in this
area. It checks each page a single time when it is first read from disk,
marks the page as checked, and then never has to check the page again.
This wasn't implemented for ext3 because it never changed from buffer-
based directories to page-based directories due to the dependence on
buffer heads for the journaling code.
It would be possible to implement this for ext3/ext4 readdir/lookup by
replacing use of ext3_bread() with a new ext3_bread_dir() - a copy of
ext3_bread() that validates the buffer if it actually needs ll_rw_block()
to read the buffer from disk.
I also note in ext3_readdir() for non-indexed directories that the readahead
doesn't check for !buffer_uptodate(bh) before forcing a read of the next
block.
Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH] ext3: handle deleting corrupted indirect blocks
2008-06-07 19:19 ` [Bugme-new] [Bug 10882] New: Different kernel crashes on corrupted filesystems Andrew Morton
2008-06-21 1:54 ` [PATCH] ext3: validate directory entry data before use Duane Griffin
2008-06-23 21:56 ` [PATCH] ext3: handle corrupted orphan list at mount Duane Griffin
@ 2008-06-24 13:47 ` Duane Griffin
2008-06-24 13:57 ` Eric Sandeen
2008-06-24 21:05 ` Andrew Morton
2 siblings, 2 replies; 22+ messages in thread
From: Duane Griffin @ 2008-06-24 13:47 UTC (permalink / raw)
To: linux-ext4; +Cc: linux-kernel, akpm, sct, adilger, Sami Liedes, Duane Griffin
While freeing indirect blocks we attach a journal head to the parent buffer
head, free the blocks, then journal the parent. If the indirect block list
is corrupted and points to the parent the journal head will be detached
when the block is cleared, causing an OOPS.
Check for that explicitly and handle it gracefully.
This patch fixes the third case (image hdb.20000057.nullderef.gz)
reported in http://bugzilla.kernel.org/show_bug.cgi?id=10882.
Signed-off-by: Duane Griffin <duaneg@dghda.com>
---
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 6ae4ecf..8019bf2 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2127,7 +2127,20 @@ static void ext3_free_data(handle_t *handle, struct inode *inode,
if (this_bh) {
BUFFER_TRACE(this_bh, "call ext3_journal_dirty_metadata");
- ext3_journal_dirty_metadata(handle, this_bh);
+
+ /*
+ * The buffer head should have an attached journal head at this
+ * point. However, if the data is corrupted and an indirect
+ * block pointed to itself, it would have been detached when
+ * the block was cleared. Check for this instead of OOPSing.
+ */
+ if (bh2jh(this_bh))
+ ext3_journal_dirty_metadata(handle, this_bh);
+ else
+ ext3_error(inode->i_sb, "ext3_free_data",
+ "circular indirect block detected, "
+ "inode=%lu, block="E3FSBLK,
+ inode->i_ino, this_bh->b_blocknr);
}
}
^ permalink raw reply related [flat|nested] 22+ messages in thread
* Re: [PATCH] ext3: handle deleting corrupted indirect blocks
2008-06-24 13:47 ` [PATCH] ext3: handle deleting corrupted indirect blocks Duane Griffin
@ 2008-06-24 13:57 ` Eric Sandeen
2008-06-24 14:17 ` Duane Griffin
2008-06-24 21:05 ` Andrew Morton
1 sibling, 1 reply; 22+ messages in thread
From: Eric Sandeen @ 2008-06-24 13:57 UTC (permalink / raw)
To: Duane Griffin; +Cc: linux-ext4, linux-kernel, akpm, sct, adilger, Sami Liedes
Duane Griffin wrote:
> While freeing indirect blocks we attach a journal head to the parent buffer
> head, free the blocks, then journal the parent. If the indirect block list
> is corrupted and points to the parent the journal head will be detached
> when the block is cleared, causing an OOPS.
>
> Check for that explicitly and handle it gracefully.
>
> This patch fixes the third case (image hdb.20000057.nullderef.gz)
> reported in http://bugzilla.kernel.org/show_bug.cgi?id=10882.
>
> Signed-off-by: Duane Griffin <duaneg@dghda.com>
> ---
>
> diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
> index 6ae4ecf..8019bf2 100644
> --- a/fs/ext3/inode.c
> +++ b/fs/ext3/inode.c
> @@ -2127,7 +2127,20 @@ static void ext3_free_data(handle_t *handle, struct inode *inode,
>
> if (this_bh) {
> BUFFER_TRACE(this_bh, "call ext3_journal_dirty_metadata");
> - ext3_journal_dirty_metadata(handle, this_bh);
> +
> + /*
> + * The buffer head should have an attached journal head at this
> + * point. However, if the data is corrupted and an indirect
> + * block pointed to itself, it would have been detached when
> + * the block was cleared. Check for this instead of OOPSing.
> + */
> + if (bh2jh(this_bh))
Should it also (or only) be checking for buffer_jbd rather than testing
bh2jh which is just shorthand for "is b_private non-null?"
Also maybe I should know this intuitively by now, but what is the path
where b_private / BH_JBD got cleared on this corrupted image?
Thanks,
-Eric
> + ext3_journal_dirty_metadata(handle, this_bh);
> + else
> + ext3_error(inode->i_sb, "ext3_free_data",
> + "circular indirect block detected, "
> + "inode=%lu, block="E3FSBLK,
> + inode->i_ino, this_bh->b_blocknr);
> }
> }
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH] ext3: handle deleting corrupted indirect blocks
2008-06-24 13:57 ` Eric Sandeen
@ 2008-06-24 14:17 ` Duane Griffin
0 siblings, 0 replies; 22+ messages in thread
From: Duane Griffin @ 2008-06-24 14:17 UTC (permalink / raw)
To: Eric Sandeen; +Cc: linux-ext4, linux-kernel, akpm, sct, adilger, Sami Liedes
2008/6/24 Eric Sandeen <sandeen@redhat.com>:
>> diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
>> index 6ae4ecf..8019bf2 100644
>> --- a/fs/ext3/inode.c
>> +++ b/fs/ext3/inode.c
>> @@ -2127,7 +2127,20 @@ static void ext3_free_data(handle_t *handle, struct inode *inode,
>>
>> if (this_bh) {
>> BUFFER_TRACE(this_bh, "call ext3_journal_dirty_metadata");
>> - ext3_journal_dirty_metadata(handle, this_bh);
>> +
>> + /*
>> + * The buffer head should have an attached journal head at this
>> + * point. However, if the data is corrupted and an indirect
>> + * block pointed to itself, it would have been detached when
>> + * the block was cleared. Check for this instead of OOPSing.
>> + */
>> + if (bh2jh(this_bh))
>
> Should it also (or only) be checking for buffer_jbd rather than testing
> bh2jh which is just shorthand for "is b_private non-null?"
I don't think so. The bug was occurring because journal_dirty_metadata
dereferences b_private (via bh2jh) unconditionally. In practice
checking with buffer_jbd should be equivalent, but this way seems more
robust since it is checking the actual pointer being accessed rather
than a separate bit, albeit one that should be in sync with it.
> Also maybe I should know this intuitively by now, but what is the path
> where b_private / BH_JBD got cleared on this corrupted image?
Immediately above the change, in the ext3_free_data function, we call
ext3_clear_blocks to clear the indirect blocks in this parent block.
If one of those blocks happens to actually be the parent block it will
clear b_private / BH_JBD.
I did the check at the end rather than earlier as it seemed more
elegant. I don't think there should be much practical difference,
although it is possible the FS may not be quite so badly corrupted if
we did it the other way (and didn't clear the block at all). To be
honest, I'm not convinced there aren't other similar failure modes
lurking in this code, although I couldn't find any with a quick
review.
Just let me know if you think it should be done another way.
Cheers,
Duane.
--
"I never could learn to drink that blood and call it wine" - Bob Dylan
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH] ext3: handle corrupted orphan list at mount
2008-06-23 21:56 ` [PATCH] ext3: handle corrupted orphan list at mount Duane Griffin
2008-06-23 22:32 ` Sami Liedes
@ 2008-06-24 16:08 ` Jan Kara
2008-06-24 17:16 ` Duane Griffin
1 sibling, 1 reply; 22+ messages in thread
From: Jan Kara @ 2008-06-24 16:08 UTC (permalink / raw)
To: Duane Griffin; +Cc: linux-ext4, linux-kernel, akpm, sct, adilger, Sami Liedes
> If the orphan node list includes valid, untruncatable nodes with nlink > 0
> the ext3_orphan_cleanup loop which attempts to delete them will not do so,
> causing it to loop forever. Fix by checking for such nodes in the
> ext3_orphan_get function.
>
> This patch fixes the second case (image hdb.20000009.softlockup.gz)
> reported in http://bugzilla.kernel.org/show_bug.cgi?id=10882.
>
> Signed-off-by: Duane Griffin <duaneg@dghda.com>
> --
>
> Note that we can still end up in an infinite loop if the ext3_truncate
> errors out early (and keeps doing so). I'm not sure if we should worry
> about that. If we did want to handle it then we could change ext3_truncate
> to return success/failure and exit the ext3_orphan_cleanup.
>
> ---
>
> diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
> index 7712682..bc030f4 100644
> --- a/fs/ext3/ialloc.c
> +++ b/fs/ext3/ialloc.c
> @@ -669,6 +669,14 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
> if (IS_ERR(inode))
> goto iget_failed;
>
> + /*
> + * If the orphans has i_nlinks > 0 then it should be able to be
> + * truncated, otherwise it won't be removed from the orphan list
> + * during processing and an infinite loop will result.
> + */
> + if (inode->i_nlink && !ext3_can_truncate(inode))
> + goto bad_orphan;
> +
Maybe I miss something but shouldn't above rather be ||?
Honza
> if (NEXT_ORPHAN(inode) > max_ino)
> goto bad_orphan;
> brelse(bitmap_bh);
> @@ -690,6 +698,7 @@ bad_orphan:
> printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n",
> NEXT_ORPHAN(inode));
> printk(KERN_NOTICE "max_ino=%lu\n", max_ino);
> + printk(KERN_NOTICE "i_nlink=%lu\n", inode->i_nlink);
> /* Avoid freeing blocks if we got a bad deleted inode */
> if (inode->i_nlink == 0)
> inode->i_blocks = 0;
> diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
> index 6ae4ecf..7b7e234 100644
> --- a/fs/ext3/inode.c
> +++ b/fs/ext3/inode.c
> @@ -2253,6 +2253,14 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
> }
> }
>
> +int ext3_can_truncate(struct inode *inode)
> +{
> + return (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
> + S_ISLNK(inode->i_mode)) &&
> + !ext3_inode_is_fast_symlink(inode) &&
> + !(IS_APPEND(inode) || IS_IMMUTABLE(inode));
> +}
> +
> /*
> * ext3_truncate()
> *
> @@ -2297,12 +2305,7 @@ void ext3_truncate(struct inode *inode)
> unsigned blocksize = inode->i_sb->s_blocksize;
> struct page *page;
>
> - if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
> - S_ISLNK(inode->i_mode)))
> - return;
> - if (ext3_inode_is_fast_symlink(inode))
> - return;
> - if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
> + if (!ext3_can_truncate(inode))
> return;
>
> /*
> diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
> index 36c5403..80171ee 100644
> --- a/include/linux/ext3_fs.h
> +++ b/include/linux/ext3_fs.h
> @@ -832,6 +832,7 @@ extern void ext3_discard_reservation (struct inode *);
> extern void ext3_dirty_inode(struct inode *);
> extern int ext3_change_inode_journal_flag(struct inode *, int);
> extern int ext3_get_inode_loc(struct inode *, struct ext3_iloc *);
> +extern int ext3_can_truncate(struct inode *inode);
> extern void ext3_truncate (struct inode *);
> extern void ext3_set_inode_flags(struct inode *);
> extern void ext3_get_inode_flags(struct ext3_inode_info *);
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
--
Jan Kara <jack@suse.cz>
SuSE CR Labs
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH] ext3: handle corrupted orphan list at mount
2008-06-24 16:08 ` Jan Kara
@ 2008-06-24 17:16 ` Duane Griffin
2008-06-24 17:18 ` Jan Kara
0 siblings, 1 reply; 22+ messages in thread
From: Duane Griffin @ 2008-06-24 17:16 UTC (permalink / raw)
To: Jan Kara; +Cc: linux-ext4, linux-kernel, akpm, sct, adilger, Sami Liedes
2008/6/24 Jan Kara <jack@suse.cz>:
>> + /*
>> + * If the orphans has i_nlinks > 0 then it should be able to be
>> + * truncated, otherwise it won't be removed from the orphan list
>> + * during processing and an infinite loop will result.
>> + */
>> + if (inode->i_nlink && !ext3_can_truncate(inode))
>> + goto bad_orphan;
>> +
> Maybe I miss something but shouldn't above rather be ||?
No, it is correct. If i_nlink == 0 the orphan will be deleted in the
cleanup loop by the iput. If i_nlink > 0 then ext3_truncate is called,
which usually calls ext3_orphan_del on the way out, thereby removing
the node from the orphan list. However, if it exits too early
(basically if the ext3_can_truncate check fails, although there are
other failure conditions such as OOM that can also cause it to exit
early) then it doesn't, hence we end up in the infinite loop. So the
check here says, if this node is not going to be deleted or truncated
then it is invalid.
Cheers,
Duane.
--
"I never could learn to drink that blood and call it wine" - Bob Dylan
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH] ext3: handle corrupted orphan list at mount
2008-06-24 17:16 ` Duane Griffin
@ 2008-06-24 17:18 ` Jan Kara
0 siblings, 0 replies; 22+ messages in thread
From: Jan Kara @ 2008-06-24 17:18 UTC (permalink / raw)
To: Duane Griffin; +Cc: linux-ext4, linux-kernel, akpm, sct, adilger, Sami Liedes
On Tue 24-06-08 18:16:21, Duane Griffin wrote:
> 2008/6/24 Jan Kara <jack@suse.cz>:
> >> + /*
> >> + * If the orphans has i_nlinks > 0 then it should be able to be
> >> + * truncated, otherwise it won't be removed from the orphan list
> >> + * during processing and an infinite loop will result.
> >> + */
> >> + if (inode->i_nlink && !ext3_can_truncate(inode))
> >> + goto bad_orphan;
> >> +
> > Maybe I miss something but shouldn't above rather be ||?
>
> No, it is correct. If i_nlink == 0 the orphan will be deleted in the
> cleanup loop by the iput. If i_nlink > 0 then ext3_truncate is called,
> which usually calls ext3_orphan_del on the way out, thereby removing
> the node from the orphan list. However, if it exits too early
> (basically if the ext3_can_truncate check fails, although there are
> other failure conditions such as OOM that can also cause it to exit
> early) then it doesn't, hence we end up in the infinite loop. So the
> check here says, if this node is not going to be deleted or truncated
> then it is invalid.
Ah, OK, I forgot that you want to handle also truncation without deletion
of the inode itself. Thanks for explanation.
Honza
--
Jan Kara <jack@suse.cz>
SUSE Labs, CR
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH] ext3: handle deleting corrupted indirect blocks
2008-06-24 13:47 ` [PATCH] ext3: handle deleting corrupted indirect blocks Duane Griffin
2008-06-24 13:57 ` Eric Sandeen
@ 2008-06-24 21:05 ` Andrew Morton
2008-06-25 0:13 ` Mingming
2008-06-25 0:15 ` Duane Griffin
1 sibling, 2 replies; 22+ messages in thread
From: Andrew Morton @ 2008-06-24 21:05 UTC (permalink / raw)
To: Duane Griffin; +Cc: linux-ext4, linux-kernel, sct, adilger, sliedes, duaneg
On Tue, 24 Jun 2008 14:47:20 +0100
"Duane Griffin" <duaneg@dghda.com> wrote:
> While freeing indirect blocks we attach a journal head to the parent buffer
> head, free the blocks, then journal the parent. If the indirect block list
> is corrupted and points to the parent the journal head will be detached
> when the block is cleared, causing an OOPS.
>
> Check for that explicitly and handle it gracefully.
>
> This patch fixes the third case (image hdb.20000057.nullderef.gz)
> reported in http://bugzilla.kernel.org/show_bug.cgi?id=10882.
Thanks.
Quite a few minorish ext3 fixes are coming in lately. Is anyone
checking whether they are needed in ext4 and if so, porting them
over?
-mm's current queue is:
ext3-fix-synchronization-of-quota-files-in-journal=data-mode.patch
ext3-fix-typos-in-messages-and-comments-journalled-journaled.patch
ext3-correct-mount-option-parsing-to-detect-when-quota-options-can-be-changed.patch
jbd-replace-potentially-false-assertion-with-if-block.patch
jbd-eliminate-duplicated-code-in-revocation-table-init-destroy-functions.patch
jbd-tidy-up-revoke-cache-initialisation-and-destruction.patch
ext3-improve-some-code-in-rb-tree-part-of-dirc.patch
jbd-fix-race-between-free-buffer-and-commit-trasanction.patch
jbd-fix-race-between-free-buffer-and-commit-trasanction-checkpatch-fixes.patch
jbd-fix-race-between-free-buffer-and-commit-trasanction-checkpatch-fixes-fix.patch
ext3-remove-double-definitions-of-xattr-macros.patch
ext3-handle-corrupted-orphan-list-at-mount.patch
ext3-handle-corrupted-orphan-list-at-mount-cleanup.patch
ext3-handle-corrupted-orphan-list-at-mount-fix.patch
ext3-handle-corrupted-orphan-list-at-mount-cleanup-fix.patch
ext3-dont-read-inode-block-if-the-buffer-has-a-write-error.patch
ext3-handle-deleting-corrupted-indirect-blocks.patch
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH] ext3: handle deleting corrupted indirect blocks
2008-06-24 21:05 ` Andrew Morton
@ 2008-06-25 0:13 ` Mingming
2008-06-25 0:15 ` Duane Griffin
1 sibling, 0 replies; 22+ messages in thread
From: Mingming @ 2008-06-25 0:13 UTC (permalink / raw)
To: Andrew Morton
Cc: Duane Griffin, linux-ext4, linux-kernel, sct, adilger, sliedes
On Tue, 2008-06-24 at 14:05 -0700, Andrew Morton wrote:
> On Tue, 24 Jun 2008 14:47:20 +0100
> "Duane Griffin" <duaneg@dghda.com> wrote:
>
> > While freeing indirect blocks we attach a journal head to the parent buffer
> > head, free the blocks, then journal the parent. If the indirect block list
> > is corrupted and points to the parent the journal head will be detached
> > when the block is cleared, causing an OOPS.
> >
> > Check for that explicitly and handle it gracefully.
> >
> > This patch fixes the third case (image hdb.20000057.nullderef.gz)
> > reported in http://bugzilla.kernel.org/show_bug.cgi?id=10882.
>
> Thanks.
>
> Quite a few minorish ext3 fixes are coming in lately. Is anyone
> checking whether they are needed in ext4 and if so, porting them
> over?
>
Hi Andrew, thanks for the reminder, I checked and think there are a few
latest ext3 fixes need to port to ext4..
> -mm's current queue is:
>
> ext3-fix-synchronization-of-quota-files-in-journal=data-mode.patch
> ext3-fix-typos-in-messages-and-comments-journalled-journaled.patch
> ext3-correct-mount-option-parsing-to-detect-when-quota-options-can-be-changed.patch
These three (ext4 version) have been pushed to Linus tree(May 14th)
> jbd-replace-potentially-false-assertion-with-if-block.patch
Ext4 has this fix in upstream already
> jbd-eliminate-duplicated-code-in-revocation-table-init-destroy-functions.patch
> jbd-tidy-up-revoke-cache-initialisation-and-destruction.patch
Pushed to Linus already
> ext3-improve-some-code-in-rb-tree-part-of-dirc.patch
Ext4 version is queued in ext4 patch queue
> jbd-fix-race-between-free-buffer-and-commit-trasanction.patch
> jbd-fix-race-between-free-buffer-and-commit-trasanction-checkpatch-fixes.patch
> jbd-fix-race-between-free-buffer-and-commit-trasanction-checkpatch-fixes-fix.patch
jbd2 version is not needed with new ordered mode rewrite
> ext3-remove-double-definitions-of-xattr-macros.patch
ext4 version is in ext4 patch queue
> ext3-handle-corrupted-orphan-list-at-mount.patch
> ext3-handle-corrupted-orphan-list-at-mount-cleanup.patch
> ext3-handle-corrupted-orphan-list-at-mount-fix.patch
> ext3-handle-corrupted-orphan-list-at-mount-cleanup-fix.patch
> ext3-dont-read-inode-block-if-the-buffer-has-a-write-error.patch
> ext3-handle-deleting-corrupted-indirect-blocks.patch
>
These haven't port to ext4 yet. I could port them to ext4 if no one
wants to do so.
Mingming
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH] ext3: handle deleting corrupted indirect blocks
2008-06-24 21:05 ` Andrew Morton
2008-06-25 0:13 ` Mingming
@ 2008-06-25 0:15 ` Duane Griffin
1 sibling, 0 replies; 22+ messages in thread
From: Duane Griffin @ 2008-06-25 0:15 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-ext4, linux-kernel, sct, adilger, sliedes
2008/6/24 Andrew Morton <akpm@linux-foundation.org>:
> Quite a few minorish ext3 fixes are coming in lately. Is anyone
> checking whether they are needed in ext4 and if so, porting them
> over?
I was planning on doing my ones, once they'd gone through review and
been accepted into your tree. I'd be happy to do others too, if
needed.
> -mm's current queue is:
>
> ext3-fix-synchronization-of-quota-files-in-journal=data-mode.patch
> ext3-fix-typos-in-messages-and-comments-journalled-journaled.patch
> ext3-correct-mount-option-parsing-to-detect-when-quota-options-can-be-changed.patch
> jbd-replace-potentially-false-assertion-with-if-block.patch
> jbd-eliminate-duplicated-code-in-revocation-table-init-destroy-functions.patch
> jbd-tidy-up-revoke-cache-initialisation-and-destruction.patch
These three are mine and should have ext4 patches queued or upstream already.
> ext3-improve-some-code-in-rb-tree-part-of-dirc.patch
> jbd-fix-race-between-free-buffer-and-commit-trasanction.patch
> jbd-fix-race-between-free-buffer-and-commit-trasanction-checkpatch-fixes.patch
> jbd-fix-race-between-free-buffer-and-commit-trasanction-checkpatch-fixes-fix.patch
> ext3-remove-double-definitions-of-xattr-macros.patch
> ext3-handle-corrupted-orphan-list-at-mount.patch
> ext3-handle-corrupted-orphan-list-at-mount-cleanup.patch
> ext3-handle-corrupted-orphan-list-at-mount-fix.patch
> ext3-handle-corrupted-orphan-list-at-mount-cleanup-fix.patch
These four are new ones from me. They don't have ext4 versions yet but
I'll prepare them soon.
> ext3-dont-read-inode-block-if-the-buffer-has-a-write-error.patch
> ext3-handle-deleting-corrupted-indirect-blocks.patch
Ditto this one.
Cheers,
Duane.
--
"I never could learn to drink that blood and call it wine" - Bob Dylan
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH, v2] ext3: validate directory entry data before use
2008-06-21 15:54 ` [PATCH, v2] " Duane Griffin
2008-06-21 16:13 ` Jochen Voß
@ 2008-06-25 10:08 ` Jan Kara
2008-06-25 11:30 ` Duane Griffin
1 sibling, 1 reply; 22+ messages in thread
From: Jan Kara @ 2008-06-25 10:08 UTC (permalink / raw)
To: Duane Griffin; +Cc: linux-ext4, linux-kernel, akpm, sct, adilger, Sami Liedes
Hi,
> Various places in fs/ext3/namei.c use ext3_next_entry to loop over
> directory entries, but not all of them verify that entries are valid before
> attempting to move to the next one. In the case where rec_len == 0 this
> causes an infinite loop.
>
> Introduce a new version of ext3_next_entry that checks the validity of the
> entry before using its data to find the next one. Rename the original
> function to __ext3_next_entry and use it in places where we already know
> the data is valid.
>
> Note that the changes to empty_dir follow the original code in reporting
> the directory as empty in the presence of errors.
>
> This patch fixes the first case (image hdb.25.softlockup.gz) reported in
> http://bugzilla.kernel.org/show_bug.cgi?id=10882.
>
> Signed-off-by: Duane Griffin <duaneg@dghda.com>
> --
>
> This is version 2. The original patch was an earlier version causing
> warnings that I sent by mistake. This version just fixes those.
>
> Please note that I've only properly tested the originally reported failure
> case (i.e. the changes to ext3_dx_find_entry).
>
> Reviewers may want to pay particular attention to the changes to do_split,
> make_indexed_dir and empty_dir. I've tried to follow the original code's
> error handling conventions, as noted above for empty_dir, but I'm not sure
> this is the best way to do things.
The patch looks sane to me, only of few mostly coding style nits..
> ---
>
> diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
> index 0b8cf80..ea0236b 100644
> --- a/fs/ext3/namei.c
> +++ b/fs/ext3/namei.c
> @@ -185,13 +185,27 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
> * p is at least 6 bytes before the end of page
> */
> static inline struct ext3_dir_entry_2 *
> -ext3_next_entry(struct ext3_dir_entry_2 *p)
> +__ext3_next_entry(struct ext3_dir_entry_2 *p)
> {
> return (struct ext3_dir_entry_2 *)((char *)p +
> ext3_rec_len_from_disk(p->rec_len));
> }
>
> /*
> + * Checks the directory entry looks sane before using it to find the next one.
> + * Returns NULL and reports an error if an invalid entry is passed.
> + */
> +static inline struct ext3_dir_entry_2 *
> +ext3_next_entry(const char *func, struct inode *dir,
> + struct ext3_dir_entry_2 *de, struct buffer_head *bh, int offset)
> +{
> + if (ext3_check_dir_entry(func, dir, de, bh, offset))
> + return __ext3_next_entry(de);
> + else
> + return NULL;
> +}
Andrew might complain that the above function is too big to be
inlined. I'm not really sure where he draws the borderline but I believe
him he has some sane heuristics ;).
> +
> +/*
> * Future: use high four bits of block for coalesce-on-delete flags
> * Mask them off for now.
> */
> @@ -271,15 +285,17 @@ struct stats
> unsigned bcount;
> };
>
> -static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_entry_2 *de,
> - int size, int show_names)
> +static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct inode *dir,
> + struct buffer_head *bh, int size,
> + int show_names)
> {
> + struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) bh->b_data;
> unsigned names = 0, space = 0;
> char *base = (char *) de;
> struct dx_hash_info h = *hinfo;
>
> printk("names: ");
> - while ((char *) de < base + size)
> + while (de && (char *) de < base + size)
> {
> if (de->inode)
> {
> @@ -295,7 +311,7 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_ent
> space += EXT3_DIR_REC_LEN(de->name_len);
> names++;
> }
> - de = ext3_next_entry(de);
> + de = ext3_next_entry("dx_show_leaf", dir, de, bh, 0);
Why don't you use __func__?
> }
> printk("(%i)\n", names);
> return (struct stats) { names, space, 1 };
> @@ -319,7 +335,7 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
> if (!(bh = ext3_bread (NULL,dir, block, 0,&err))) continue;
> stats = levels?
> dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
> - dx_show_leaf(hinfo, (struct ext3_dir_entry_2 *) bh->b_data, blocksize, 0);
> + dx_show_leaf(hinfo, dir, bh, blocksize, 0);
> names += stats.names;
> space += stats.space;
> bcount += stats.bcount;
> @@ -583,7 +599,7 @@ static int htree_dirblock_to_tree(struct file *dir_file,
> top = (struct ext3_dir_entry_2 *) ((char *) de +
> dir->i_sb->s_blocksize -
> EXT3_DIR_REC_LEN(0));
> - for (; de < top; de = ext3_next_entry(de)) {
> + for (; de < top; de = __ext3_next_entry(de)) {
> if (!ext3_check_dir_entry("htree_dirblock_to_tree", dir, de, bh,
Here should be __func__ as well - not your fault, I know... Anyway,
maybe you could do global replacement for this ;)
> (block<<EXT3_BLOCK_SIZE_BITS(dir->i_sb))
> +((char *)de - bh->b_data))) {
> @@ -658,7 +674,12 @@ int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
> }
> if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) {
> de = (struct ext3_dir_entry_2 *) frames[0].bh->b_data;
> - de = ext3_next_entry(de);
> + de = ext3_next_entry("ext3_htree_fill_tree",
> + dir, de, frames[0].bh, 0);
> + if (de == NULL) {
> + err = -EIO;
> + goto errout;
> + }
> if ((err = ext3_htree_store_dirent(dir_file, 2, 0, de)) != 0)
> goto errout;
> count++;
> @@ -726,8 +747,7 @@ static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
> count++;
> cond_resched();
> }
> - /* XXX: do we need to check rec_len == 0 case? -Chris */
> - de = ext3_next_entry(de);
> + de = __ext3_next_entry(de);
> }
> return count;
> }
> @@ -991,24 +1011,28 @@ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
> de = (struct ext3_dir_entry_2 *) bh->b_data;
> top = (struct ext3_dir_entry_2 *) ((char *) de + sb->s_blocksize -
> EXT3_DIR_REC_LEN(0));
> - for (; de < top; de = ext3_next_entry(de))
> - if (ext3_match (namelen, name, de)) {
> + for (; de < top; de = __ext3_next_entry(de)) {
> + int offset = (block << EXT3_BLOCK_SIZE_BITS(sb))
> + + ((char *) de - bh->b_data);
> +
> if (!ext3_check_dir_entry("ext3_find_entry",
> - dir, de, bh,
> - (block<<EXT3_BLOCK_SIZE_BITS(sb))
> - +((char *)de - bh->b_data))) {
> - brelse (bh);
> + dir, de, bh, offset)) {
> + brelse(bh);
> *err = ERR_BAD_DX_DIR;
> goto errout;
> }
> - *res_dir = de;
> - dx_release (frames);
> - return bh;
> +
> + if (ext3_match(namelen, name, de)) {
> + *res_dir = de;
> + dx_release(frames);
> + return bh;
> + }
> }
> brelse (bh);
> +
> /* Check to see if we should continue to search */
> - retval = ext3_htree_next_block(dir, hash, frame,
> - frames, NULL);
> + retval = ext3_htree_next_block(dir, hash, frame, frames, NULL);
> +
> if (retval < 0) {
> ext3_warning(sb, __func__,
> "error reading index page in directory #%lu",
> @@ -1141,7 +1165,7 @@ static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)
>
> prev = to = de;
> while ((char*)de < base + size) {
> - next = ext3_next_entry(de);
> + next = __ext3_next_entry(de);
> if (de->inode && de->name_len) {
> rec_len = EXT3_DIR_REC_LEN(de->name_len);
> if (de > to)
> @@ -1172,9 +1196,22 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
> struct dx_map_entry *map;
> char *data1 = (*bh)->b_data, *data2;
> unsigned split, move, size, i;
> - struct ext3_dir_entry_2 *de = NULL, *de2;
> + struct ext3_dir_entry_2 *de, *de2;
> int err = 0;
>
> + /* Verify directory entries are sane before trying anything else. */
> + de = (struct ext3_dir_entry_2 *) data1;
> + de2 = (struct ext3_dir_entry_2 *) data1 +
> + dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0);
> + while (de < de2) {
> + de = ext3_next_entry("do_split", dir, de, *bh, 0);
> + if (de == NULL) {
> + brelse(*bh);
> + *bh = NULL;
> + goto errout;
> + }
> + }
> +
> bh2 = ext3_append (handle, dir, &newblock, &err);
> if (!(bh2)) {
> brelse(*bh);
> @@ -1397,8 +1434,15 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
> memcpy (data1, de, len);
> de = (struct ext3_dir_entry_2 *) data1;
> top = data1 + len;
> - while ((char *)(de2 = ext3_next_entry(de)) < top)
> +
> + while (1) {
> + de2 = ext3_next_entry("make_indexed_dir", dir, de, bh, 0);
> + if (de2 == NULL || (char *) (char *) (char *) (char *) (char *) (char *) (char *) (char *) (char *) de2 >= top) {
> + break;
> + }
Apart from (char *) thing, you also don't need braces above. Maybe the
whole while loop would be nicer as:
de2 = de;
while (de2 != NULL && de2 < top) {
de = de2;
de2 = ext3_next_entry(__func__, dir, de, bh, 0);
}
> de = de2;
> + }
> +
> de->rec_len = ext3_rec_len_to_disk(data1 + blocksize - (char *) de);
> /* Initialize the root; the dot dirents already exist */
> de = (struct ext3_dir_entry_2 *) (&root->dotdot);
> @@ -1655,7 +1699,7 @@ static int ext3_delete_entry (handle_t *handle,
> }
> i += ext3_rec_len_from_disk(de->rec_len);
> pde = de;
> - de = ext3_next_entry(de);
> + de = __ext3_next_entry(de);
> }
> return -ENOENT;
> }
> @@ -1792,7 +1836,7 @@ retry:
> de->rec_len = ext3_rec_len_to_disk(EXT3_DIR_REC_LEN(de->name_len));
> strcpy (de->name, ".");
> ext3_set_de_type(dir->i_sb, de, S_IFDIR);
> - de = ext3_next_entry(de);
> + de = __ext3_next_entry(de);
> de->inode = cpu_to_le32(dir->i_ino);
> de->rec_len = ext3_rec_len_to_disk(inode->i_sb->s_blocksize -
> EXT3_DIR_REC_LEN(1));
> @@ -1825,7 +1869,7 @@ out_stop:
> /*
> * routine to check that the specified directory is empty (for rmdir)
> */
> -static int empty_dir (struct inode * inode)
> +static int empty_dir(struct inode *dir, struct inode *inode)
> {
> unsigned long offset;
> struct buffer_head * bh;
> @@ -1846,8 +1890,14 @@ static int empty_dir (struct inode * inode)
> inode->i_ino);
> return 1;
> }
> +
> de = (struct ext3_dir_entry_2 *) bh->b_data;
> - de1 = ext3_next_entry(de);
> + de1 = ext3_next_entry("empty_dir", dir, de, bh, 0);
> + if (de1 == NULL) {
> + brelse(bh);
> + return 1;
> + }
> +
> if (le32_to_cpu(de->inode) != inode->i_ino ||
> !le32_to_cpu(de1->inode) ||
> strcmp (".", de->name) ||
> @@ -1860,8 +1910,8 @@ static int empty_dir (struct inode * inode)
> }
> offset = ext3_rec_len_from_disk(de->rec_len) +
> ext3_rec_len_from_disk(de1->rec_len);
> - de = ext3_next_entry(de1);
> - while (offset < inode->i_size ) {
> + de = ext3_next_entry("empty_dir", dir, de1, bh, offset);
> + while (de && offset < inode->i_size) {
> if (!bh ||
> (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
> err = 0;
> @@ -1890,7 +1940,7 @@ static int empty_dir (struct inode * inode)
> return 0;
> }
> offset += ext3_rec_len_from_disk(de->rec_len);
> - de = ext3_next_entry(de);
> + de = __ext3_next_entry(de);
> }
> brelse (bh);
> return 1;
> @@ -2068,7 +2121,7 @@ static int ext3_rmdir (struct inode * dir, struct dentry *dentry)
> goto end_rmdir;
>
> retval = -ENOTEMPTY;
> - if (!empty_dir (inode))
> + if (!empty_dir(dir, inode))
> goto end_rmdir;
>
> retval = ext3_delete_entry(handle, dir, de, bh);
> @@ -2244,7 +2297,7 @@ retry:
> }
>
> #define PARENT_INO(buffer) \
> - (ext3_next_entry((struct ext3_dir_entry_2 *)(buffer))->inode)
> + (__ext3_next_entry((struct ext3_dir_entry_2 *)(buffer))->inode)
>
> /*
> * Anybody can rename anything with this: the permission checks are left to the
> @@ -2297,7 +2350,7 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
> if (S_ISDIR(old_inode->i_mode)) {
> if (new_inode) {
> retval = -ENOTEMPTY;
> - if (!empty_dir (new_inode))
> + if (!empty_dir(new_dir, new_inode))
> goto end_rename;
> }
> retval = -EIO;
Honza
--
Jan Kara <jack@suse.cz>
SuSE CR Labs
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH, v2] ext3: validate directory entry data before use
2008-06-25 10:08 ` Jan Kara
@ 2008-06-25 11:30 ` Duane Griffin
2008-06-25 12:11 ` [PATCH, v3] " Duane Griffin
0 siblings, 1 reply; 22+ messages in thread
From: Duane Griffin @ 2008-06-25 11:30 UTC (permalink / raw)
To: Jan Kara
Cc: Duane Griffin, linux-ext4, linux-kernel, akpm, sct, adilger,
Sami Liedes
On Wed, Jun 25, 2008 at 12:08:35PM +0200, Jan Kara wrote:
> The patch looks sane to me, only of few mostly coding style nits..
Thanks for the review!
> > +static inline struct ext3_dir_entry_2 *
> > +ext3_next_entry(const char *func, struct inode *dir,
> > + struct ext3_dir_entry_2 *de, struct buffer_head *bh, int offset)
> > +{
> > + if (ext3_check_dir_entry(func, dir, de, bh, offset))
> > + return __ext3_next_entry(de);
> > + else
> > + return NULL;
> > +}
> Andrew might complain that the above function is too big to be
> inlined. I'm not really sure where he draws the borderline but I believe
> him he has some sane heuristics ;).
Oh, I'd certainly believe him! ;) Say the word and I'll change it.
> > - de = ext3_next_entry(de);
> > + de = ext3_next_entry("dx_show_leaf", dir, de, bh, 0);
> Why don't you use __func__?
Good question. Fixed.
> > - for (; de < top; de = ext3_next_entry(de)) {
> > + for (; de < top; de = __ext3_next_entry(de)) {
> > if (!ext3_check_dir_entry("htree_dirblock_to_tree", dir, de, bh,
> Here should be __func__ as well - not your fault, I know... Anyway,
> maybe you could do global replacement for this ;)
Done, fixing a couple of incorrect strings along the way, thereby
proving the usefulness of the exercise. A macro would make things
slightly tidier, but I'm not sure it is worth doing. Let me know if you
think so and I'll add it.
> > + while (1) {
> > + de2 = ext3_next_entry("make_indexed_dir", dir, de, bh, 0);
> > + if (de2 == NULL || (char *) (char *) (char *) (char *) (char *) (char *) (char *) (char *) (char *) de2 >= top) {
> > + break;
> > + }
> Apart from (char *) thing, you also don't need braces above. Maybe the
> whole while loop would be nicer as:
> de2 = de;
> while (de2 != NULL && de2 < top) {
> de = de2;
> de2 = ext3_next_entry(__func__, dir, de, bh, 0);
> }
Agreed, much nicer. Updated accordingly.
> Jan Kara <jack@suse.cz>
> SuSE CR Labs
Cheers,
Duane.
--
"I never could learn to drink that blood and call it wine" - Bob Dylan
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH, v3] ext3: validate directory entry data before use
2008-06-25 11:30 ` Duane Griffin
@ 2008-06-25 12:11 ` Duane Griffin
2008-06-25 12:18 ` Jan Kara
0 siblings, 1 reply; 22+ messages in thread
From: Duane Griffin @ 2008-06-25 12:11 UTC (permalink / raw)
To: linux-ext4
Cc: linux-kernel, akpm, sct, adilger, Sami Liedes, jochen.voss,
Jan Kara, Duane Griffin
Various places in fs/ext3/namei.c use ext3_next_entry to loop over
directory entries, but not all of them verify that entries are valid before
attempting to move to the next one. In the case where rec_len == 0 this
causes an infinite loop.
Introduce a new version of ext3_next_entry that checks the validity of the
entry before using its data to find the next one. Rename the original
function to __ext3_next_entry and use it in places where we already know
the data is valid.
Note that the changes to empty_dir follow the original code in reporting
the directory as empty in the presence of errors.
This patch fixes the first case (image hdb.25.softlockup.gz) reported in
http://bugzilla.kernel.org/show_bug.cgi?id=10882.
Signed-off-by: Duane Griffin <duaneg@dghda.com>
--
This is version 3. It includes some tidy-ups from v2 as suggested by
Jochen Voss and Jan Kara. It also replaces hard-coded function name strings
with __func__ in all calls to ext3_check_dir_entry, including one in an
otherwise untouched file. I don't think a separate patch for this is
necessary, but if it would be preferred I'd be happy to split it out.
I've removed some whitespace in a couple of places in order to fit lines
into 80 columns, so there are a couple of checkpatch warnings. Let me know
if you think it would be better to split the lines or go over 80 cols.
Please note that I've only properly tested the originally reported failure
case (i.e. the changes to ext3_dx_find_entry).
Reviewers may want to pay particular attention to the changes to do_split,
make_indexed_dir and empty_dir. I've tried to follow the original code's
error handling conventions, as noted above for empty_dir, but I'm not sure
this is the best way to do things.
---
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index 8ca3bfd..2a8ab33 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -187,7 +187,7 @@ revalidate:
while (!error && filp->f_pos < inode->i_size
&& offset < sb->s_blocksize) {
de = (struct ext3_dir_entry_2 *) (bh->b_data + offset);
- if (!ext3_check_dir_entry ("ext3_readdir", inode, de,
+ if (!ext3_check_dir_entry (__func__, inode, de,
bh, offset)) {
/* On error, skip the f_pos to the
next block. */
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 0b8cf80..bb35255 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -185,13 +185,27 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
* p is at least 6 bytes before the end of page
*/
static inline struct ext3_dir_entry_2 *
-ext3_next_entry(struct ext3_dir_entry_2 *p)
+__ext3_next_entry(struct ext3_dir_entry_2 *p)
{
return (struct ext3_dir_entry_2 *)((char *)p +
ext3_rec_len_from_disk(p->rec_len));
}
/*
+ * Checks the directory entry looks sane before using it to find the next one.
+ * Returns NULL and reports an error if an invalid entry is passed.
+ */
+static inline struct ext3_dir_entry_2 *
+ext3_next_entry(const char *func, struct inode *dir,
+ struct ext3_dir_entry_2 *de, struct buffer_head *bh, int offset)
+{
+ if (ext3_check_dir_entry(func, dir, de, bh, offset))
+ return __ext3_next_entry(de);
+ else
+ return NULL;
+}
+
+/*
* Future: use high four bits of block for coalesce-on-delete flags
* Mask them off for now.
*/
@@ -271,15 +285,17 @@ struct stats
unsigned bcount;
};
-static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_entry_2 *de,
- int size, int show_names)
+static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct inode *dir,
+ struct buffer_head *bh, int size,
+ int show_names)
{
+ struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) bh->b_data;
unsigned names = 0, space = 0;
char *base = (char *) de;
struct dx_hash_info h = *hinfo;
printk("names: ");
- while ((char *) de < base + size)
+ while (de && (char *) de < base + size)
{
if (de->inode)
{
@@ -295,7 +311,7 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_ent
space += EXT3_DIR_REC_LEN(de->name_len);
names++;
}
- de = ext3_next_entry(de);
+ de = ext3_next_entry(__func__, dir, de, bh, 0);
}
printk("(%i)\n", names);
return (struct stats) { names, space, 1 };
@@ -319,7 +335,7 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
if (!(bh = ext3_bread (NULL,dir, block, 0,&err))) continue;
stats = levels?
dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
- dx_show_leaf(hinfo, (struct ext3_dir_entry_2 *) bh->b_data, blocksize, 0);
+ dx_show_leaf(hinfo, dir, bh, blocksize, 0);
names += stats.names;
space += stats.space;
bcount += stats.bcount;
@@ -583,8 +599,8 @@ static int htree_dirblock_to_tree(struct file *dir_file,
top = (struct ext3_dir_entry_2 *) ((char *) de +
dir->i_sb->s_blocksize -
EXT3_DIR_REC_LEN(0));
- for (; de < top; de = ext3_next_entry(de)) {
- if (!ext3_check_dir_entry("htree_dirblock_to_tree", dir, de, bh,
+ for (; de < top; de = __ext3_next_entry(de)) {
+ if (!ext3_check_dir_entry(__func__, dir, de, bh,
(block<<EXT3_BLOCK_SIZE_BITS(dir->i_sb))
+((char *)de - bh->b_data))) {
/* On error, skip the f_pos to the next block. */
@@ -658,7 +674,11 @@ int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
}
if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) {
de = (struct ext3_dir_entry_2 *) frames[0].bh->b_data;
- de = ext3_next_entry(de);
+ de = ext3_next_entry(__func__, dir, de, frames[0].bh, 0);
+ if (de == NULL) {
+ err = -EIO;
+ goto errout;
+ }
if ((err = ext3_htree_store_dirent(dir_file, 2, 0, de)) != 0)
goto errout;
count++;
@@ -726,8 +746,7 @@ static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
count++;
cond_resched();
}
- /* XXX: do we need to check rec_len == 0 case? -Chris */
- de = ext3_next_entry(de);
+ de = __ext3_next_entry(de);
}
return count;
}
@@ -822,8 +841,7 @@ static inline int search_dirblock(struct buffer_head * bh,
if ((char *) de + namelen <= dlimit &&
ext3_match (namelen, name, de)) {
/* found a match - just to be sure, do a full check */
- if (!ext3_check_dir_entry("ext3_find_entry",
- dir, de, bh, offset))
+ if (!ext3_check_dir_entry(__func__, dir, de, bh,offset))
return -1;
*res_dir = de;
return 1;
@@ -991,24 +1009,27 @@ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
de = (struct ext3_dir_entry_2 *) bh->b_data;
top = (struct ext3_dir_entry_2 *) ((char *) de + sb->s_blocksize -
EXT3_DIR_REC_LEN(0));
- for (; de < top; de = ext3_next_entry(de))
- if (ext3_match (namelen, name, de)) {
- if (!ext3_check_dir_entry("ext3_find_entry",
- dir, de, bh,
- (block<<EXT3_BLOCK_SIZE_BITS(sb))
- +((char *)de - bh->b_data))) {
- brelse (bh);
+ for (; de < top; de = __ext3_next_entry(de)) {
+ int offset = (block << EXT3_BLOCK_SIZE_BITS(sb))
+ + ((char *) de - bh->b_data);
+
+ if (!ext3_check_dir_entry(__func__, dir,de,bh,offset)) {
+ brelse(bh);
*err = ERR_BAD_DX_DIR;
goto errout;
}
- *res_dir = de;
- dx_release (frames);
- return bh;
+
+ if (ext3_match(namelen, name, de)) {
+ *res_dir = de;
+ dx_release(frames);
+ return bh;
+ }
}
brelse (bh);
+
/* Check to see if we should continue to search */
- retval = ext3_htree_next_block(dir, hash, frame,
- frames, NULL);
+ retval = ext3_htree_next_block(dir, hash, frame, frames, NULL);
+
if (retval < 0) {
ext3_warning(sb, __func__,
"error reading index page in directory #%lu",
@@ -1141,7 +1162,7 @@ static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)
prev = to = de;
while ((char*)de < base + size) {
- next = ext3_next_entry(de);
+ next = __ext3_next_entry(de);
if (de->inode && de->name_len) {
rec_len = EXT3_DIR_REC_LEN(de->name_len);
if (de > to)
@@ -1172,9 +1193,22 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
struct dx_map_entry *map;
char *data1 = (*bh)->b_data, *data2;
unsigned split, move, size, i;
- struct ext3_dir_entry_2 *de = NULL, *de2;
+ struct ext3_dir_entry_2 *de, *de2;
int err = 0;
+ /* Verify directory entries are sane before trying anything else. */
+ de = (struct ext3_dir_entry_2 *) data1;
+ de2 = (struct ext3_dir_entry_2 *) data1 +
+ dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0);
+ while (de < de2) {
+ de = ext3_next_entry(__func__, dir, de, *bh, 0);
+ if (de == NULL) {
+ brelse(*bh);
+ *bh = NULL;
+ goto errout;
+ }
+ }
+
bh2 = ext3_append (handle, dir, &newblock, &err);
if (!(bh2)) {
brelse(*bh);
@@ -1281,8 +1315,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
de = (struct ext3_dir_entry_2 *)bh->b_data;
top = bh->b_data + dir->i_sb->s_blocksize - reclen;
while ((char *) de <= top) {
- if (!ext3_check_dir_entry("ext3_add_entry", dir, de,
- bh, offset)) {
+ if (!ext3_check_dir_entry(__func__, dir,de,bh,offset)) {
brelse (bh);
return -EIO;
}
@@ -1397,8 +1430,13 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
memcpy (data1, de, len);
de = (struct ext3_dir_entry_2 *) data1;
top = data1 + len;
- while ((char *)(de2 = ext3_next_entry(de)) < top)
+
+ de2 = de;
+ while (de2 != NULL && de2 < top) {
de = de2;
+ de2 = ext3_next_entry(__func__, dir, de, bh, 0);
+ }
+
de->rec_len = ext3_rec_len_to_disk(data1 + blocksize - (char *) de);
/* Initialize the root; the dot dirents already exist */
de = (struct ext3_dir_entry_2 *) (&root->dotdot);
@@ -1637,7 +1675,7 @@ static int ext3_delete_entry (handle_t *handle,
pde = NULL;
de = (struct ext3_dir_entry_2 *) bh->b_data;
while (i < bh->b_size) {
- if (!ext3_check_dir_entry("ext3_delete_entry", dir, de, bh, i))
+ if (!ext3_check_dir_entry(__func__, dir, de, bh, i))
return -EIO;
if (de == de_del) {
BUFFER_TRACE(bh, "get_write_access");
@@ -1655,7 +1693,7 @@ static int ext3_delete_entry (handle_t *handle,
}
i += ext3_rec_len_from_disk(de->rec_len);
pde = de;
- de = ext3_next_entry(de);
+ de = __ext3_next_entry(de);
}
return -ENOENT;
}
@@ -1792,7 +1830,7 @@ retry:
de->rec_len = ext3_rec_len_to_disk(EXT3_DIR_REC_LEN(de->name_len));
strcpy (de->name, ".");
ext3_set_de_type(dir->i_sb, de, S_IFDIR);
- de = ext3_next_entry(de);
+ de = __ext3_next_entry(de);
de->inode = cpu_to_le32(dir->i_ino);
de->rec_len = ext3_rec_len_to_disk(inode->i_sb->s_blocksize -
EXT3_DIR_REC_LEN(1));
@@ -1825,7 +1863,7 @@ out_stop:
/*
* routine to check that the specified directory is empty (for rmdir)
*/
-static int empty_dir (struct inode * inode)
+static int empty_dir(struct inode *dir, struct inode *inode)
{
unsigned long offset;
struct buffer_head * bh;
@@ -1846,8 +1884,14 @@ static int empty_dir (struct inode * inode)
inode->i_ino);
return 1;
}
+
de = (struct ext3_dir_entry_2 *) bh->b_data;
- de1 = ext3_next_entry(de);
+ de1 = ext3_next_entry(__func__, dir, de, bh, 0);
+ if (de1 == NULL) {
+ brelse(bh);
+ return 1;
+ }
+
if (le32_to_cpu(de->inode) != inode->i_ino ||
!le32_to_cpu(de1->inode) ||
strcmp (".", de->name) ||
@@ -1860,8 +1904,8 @@ static int empty_dir (struct inode * inode)
}
offset = ext3_rec_len_from_disk(de->rec_len) +
ext3_rec_len_from_disk(de1->rec_len);
- de = ext3_next_entry(de1);
- while (offset < inode->i_size ) {
+ de = ext3_next_entry(__func__, dir, de1, bh, offset);
+ while (de && offset < inode->i_size) {
if (!bh ||
(void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
err = 0;
@@ -1879,7 +1923,7 @@ static int empty_dir (struct inode * inode)
}
de = (struct ext3_dir_entry_2 *) bh->b_data;
}
- if (!ext3_check_dir_entry("empty_dir", inode, de, bh, offset)) {
+ if (!ext3_check_dir_entry(__func__, inode, de, bh, offset)) {
de = (struct ext3_dir_entry_2 *)(bh->b_data +
sb->s_blocksize);
offset = (offset | (sb->s_blocksize - 1)) + 1;
@@ -1890,7 +1934,7 @@ static int empty_dir (struct inode * inode)
return 0;
}
offset += ext3_rec_len_from_disk(de->rec_len);
- de = ext3_next_entry(de);
+ de = __ext3_next_entry(de);
}
brelse (bh);
return 1;
@@ -2068,7 +2112,7 @@ static int ext3_rmdir (struct inode * dir, struct dentry *dentry)
goto end_rmdir;
retval = -ENOTEMPTY;
- if (!empty_dir (inode))
+ if (!empty_dir(dir, inode))
goto end_rmdir;
retval = ext3_delete_entry(handle, dir, de, bh);
@@ -2244,7 +2288,7 @@ retry:
}
#define PARENT_INO(buffer) \
- (ext3_next_entry((struct ext3_dir_entry_2 *)(buffer))->inode)
+ (__ext3_next_entry((struct ext3_dir_entry_2 *)(buffer))->inode)
/*
* Anybody can rename anything with this: the permission checks are left to the
@@ -2297,7 +2341,7 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
if (S_ISDIR(old_inode->i_mode)) {
if (new_inode) {
retval = -ENOTEMPTY;
- if (!empty_dir (new_inode))
+ if (!empty_dir(new_dir, new_inode))
goto end_rename;
}
retval = -EIO;
^ permalink raw reply related [flat|nested] 22+ messages in thread
* Re: [PATCH, v3] ext3: validate directory entry data before use
2008-06-25 12:11 ` [PATCH, v3] " Duane Griffin
@ 2008-06-25 12:18 ` Jan Kara
0 siblings, 0 replies; 22+ messages in thread
From: Jan Kara @ 2008-06-25 12:18 UTC (permalink / raw)
To: Duane Griffin
Cc: linux-ext4, linux-kernel, akpm, sct, adilger, Sami Liedes,
jochen.voss, Jan Kara
On Wed 25-06-08 13:11:43, Duane Griffin wrote:
> Various places in fs/ext3/namei.c use ext3_next_entry to loop over
> directory entries, but not all of them verify that entries are valid before
> attempting to move to the next one. In the case where rec_len == 0 this
> causes an infinite loop.
>
> Introduce a new version of ext3_next_entry that checks the validity of the
> entry before using its data to find the next one. Rename the original
> function to __ext3_next_entry and use it in places where we already know
> the data is valid.
>
> Note that the changes to empty_dir follow the original code in reporting
> the directory as empty in the presence of errors.
>
> This patch fixes the first case (image hdb.25.softlockup.gz) reported in
> http://bugzilla.kernel.org/show_bug.cgi?id=10882.
>
> Signed-off-by: Duane Griffin <duaneg@dghda.com>
For what it's worth, you can add Acked-by: Jan Kara <jack@suse.cz>
Honza
> --
>
> This is version 3. It includes some tidy-ups from v2 as suggested by
> Jochen Voss and Jan Kara. It also replaces hard-coded function name strings
> with __func__ in all calls to ext3_check_dir_entry, including one in an
> otherwise untouched file. I don't think a separate patch for this is
> necessary, but if it would be preferred I'd be happy to split it out.
>
> I've removed some whitespace in a couple of places in order to fit lines
> into 80 columns, so there are a couple of checkpatch warnings. Let me know
> if you think it would be better to split the lines or go over 80 cols.
>
> Please note that I've only properly tested the originally reported failure
> case (i.e. the changes to ext3_dx_find_entry).
>
> Reviewers may want to pay particular attention to the changes to do_split,
> make_indexed_dir and empty_dir. I've tried to follow the original code's
> error handling conventions, as noted above for empty_dir, but I'm not sure
> this is the best way to do things.
>
> ---
>
> diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
> index 8ca3bfd..2a8ab33 100644
> --- a/fs/ext3/dir.c
> +++ b/fs/ext3/dir.c
> @@ -187,7 +187,7 @@ revalidate:
> while (!error && filp->f_pos < inode->i_size
> && offset < sb->s_blocksize) {
> de = (struct ext3_dir_entry_2 *) (bh->b_data + offset);
> - if (!ext3_check_dir_entry ("ext3_readdir", inode, de,
> + if (!ext3_check_dir_entry (__func__, inode, de,
> bh, offset)) {
> /* On error, skip the f_pos to the
> next block. */
> diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
> index 0b8cf80..bb35255 100644
> --- a/fs/ext3/namei.c
> +++ b/fs/ext3/namei.c
> @@ -185,13 +185,27 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
> * p is at least 6 bytes before the end of page
> */
> static inline struct ext3_dir_entry_2 *
> -ext3_next_entry(struct ext3_dir_entry_2 *p)
> +__ext3_next_entry(struct ext3_dir_entry_2 *p)
> {
> return (struct ext3_dir_entry_2 *)((char *)p +
> ext3_rec_len_from_disk(p->rec_len));
> }
>
> /*
> + * Checks the directory entry looks sane before using it to find the next one.
> + * Returns NULL and reports an error if an invalid entry is passed.
> + */
> +static inline struct ext3_dir_entry_2 *
> +ext3_next_entry(const char *func, struct inode *dir,
> + struct ext3_dir_entry_2 *de, struct buffer_head *bh, int offset)
> +{
> + if (ext3_check_dir_entry(func, dir, de, bh, offset))
> + return __ext3_next_entry(de);
> + else
> + return NULL;
> +}
> +
> +/*
> * Future: use high four bits of block for coalesce-on-delete flags
> * Mask them off for now.
> */
> @@ -271,15 +285,17 @@ struct stats
> unsigned bcount;
> };
>
> -static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_entry_2 *de,
> - int size, int show_names)
> +static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct inode *dir,
> + struct buffer_head *bh, int size,
> + int show_names)
> {
> + struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) bh->b_data;
> unsigned names = 0, space = 0;
> char *base = (char *) de;
> struct dx_hash_info h = *hinfo;
>
> printk("names: ");
> - while ((char *) de < base + size)
> + while (de && (char *) de < base + size)
> {
> if (de->inode)
> {
> @@ -295,7 +311,7 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_ent
> space += EXT3_DIR_REC_LEN(de->name_len);
> names++;
> }
> - de = ext3_next_entry(de);
> + de = ext3_next_entry(__func__, dir, de, bh, 0);
> }
> printk("(%i)\n", names);
> return (struct stats) { names, space, 1 };
> @@ -319,7 +335,7 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
> if (!(bh = ext3_bread (NULL,dir, block, 0,&err))) continue;
> stats = levels?
> dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
> - dx_show_leaf(hinfo, (struct ext3_dir_entry_2 *) bh->b_data, blocksize, 0);
> + dx_show_leaf(hinfo, dir, bh, blocksize, 0);
> names += stats.names;
> space += stats.space;
> bcount += stats.bcount;
> @@ -583,8 +599,8 @@ static int htree_dirblock_to_tree(struct file *dir_file,
> top = (struct ext3_dir_entry_2 *) ((char *) de +
> dir->i_sb->s_blocksize -
> EXT3_DIR_REC_LEN(0));
> - for (; de < top; de = ext3_next_entry(de)) {
> - if (!ext3_check_dir_entry("htree_dirblock_to_tree", dir, de, bh,
> + for (; de < top; de = __ext3_next_entry(de)) {
> + if (!ext3_check_dir_entry(__func__, dir, de, bh,
> (block<<EXT3_BLOCK_SIZE_BITS(dir->i_sb))
> +((char *)de - bh->b_data))) {
> /* On error, skip the f_pos to the next block. */
> @@ -658,7 +674,11 @@ int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
> }
> if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) {
> de = (struct ext3_dir_entry_2 *) frames[0].bh->b_data;
> - de = ext3_next_entry(de);
> + de = ext3_next_entry(__func__, dir, de, frames[0].bh, 0);
> + if (de == NULL) {
> + err = -EIO;
> + goto errout;
> + }
> if ((err = ext3_htree_store_dirent(dir_file, 2, 0, de)) != 0)
> goto errout;
> count++;
> @@ -726,8 +746,7 @@ static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
> count++;
> cond_resched();
> }
> - /* XXX: do we need to check rec_len == 0 case? -Chris */
> - de = ext3_next_entry(de);
> + de = __ext3_next_entry(de);
> }
> return count;
> }
> @@ -822,8 +841,7 @@ static inline int search_dirblock(struct buffer_head * bh,
> if ((char *) de + namelen <= dlimit &&
> ext3_match (namelen, name, de)) {
> /* found a match - just to be sure, do a full check */
> - if (!ext3_check_dir_entry("ext3_find_entry",
> - dir, de, bh, offset))
> + if (!ext3_check_dir_entry(__func__, dir, de, bh,offset))
> return -1;
> *res_dir = de;
> return 1;
> @@ -991,24 +1009,27 @@ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
> de = (struct ext3_dir_entry_2 *) bh->b_data;
> top = (struct ext3_dir_entry_2 *) ((char *) de + sb->s_blocksize -
> EXT3_DIR_REC_LEN(0));
> - for (; de < top; de = ext3_next_entry(de))
> - if (ext3_match (namelen, name, de)) {
> - if (!ext3_check_dir_entry("ext3_find_entry",
> - dir, de, bh,
> - (block<<EXT3_BLOCK_SIZE_BITS(sb))
> - +((char *)de - bh->b_data))) {
> - brelse (bh);
> + for (; de < top; de = __ext3_next_entry(de)) {
> + int offset = (block << EXT3_BLOCK_SIZE_BITS(sb))
> + + ((char *) de - bh->b_data);
> +
> + if (!ext3_check_dir_entry(__func__, dir,de,bh,offset)) {
> + brelse(bh);
> *err = ERR_BAD_DX_DIR;
> goto errout;
> }
> - *res_dir = de;
> - dx_release (frames);
> - return bh;
> +
> + if (ext3_match(namelen, name, de)) {
> + *res_dir = de;
> + dx_release(frames);
> + return bh;
> + }
> }
> brelse (bh);
> +
> /* Check to see if we should continue to search */
> - retval = ext3_htree_next_block(dir, hash, frame,
> - frames, NULL);
> + retval = ext3_htree_next_block(dir, hash, frame, frames, NULL);
> +
> if (retval < 0) {
> ext3_warning(sb, __func__,
> "error reading index page in directory #%lu",
> @@ -1141,7 +1162,7 @@ static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)
>
> prev = to = de;
> while ((char*)de < base + size) {
> - next = ext3_next_entry(de);
> + next = __ext3_next_entry(de);
> if (de->inode && de->name_len) {
> rec_len = EXT3_DIR_REC_LEN(de->name_len);
> if (de > to)
> @@ -1172,9 +1193,22 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
> struct dx_map_entry *map;
> char *data1 = (*bh)->b_data, *data2;
> unsigned split, move, size, i;
> - struct ext3_dir_entry_2 *de = NULL, *de2;
> + struct ext3_dir_entry_2 *de, *de2;
> int err = 0;
>
> + /* Verify directory entries are sane before trying anything else. */
> + de = (struct ext3_dir_entry_2 *) data1;
> + de2 = (struct ext3_dir_entry_2 *) data1 +
> + dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0);
> + while (de < de2) {
> + de = ext3_next_entry(__func__, dir, de, *bh, 0);
> + if (de == NULL) {
> + brelse(*bh);
> + *bh = NULL;
> + goto errout;
> + }
> + }
> +
> bh2 = ext3_append (handle, dir, &newblock, &err);
> if (!(bh2)) {
> brelse(*bh);
> @@ -1281,8 +1315,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
> de = (struct ext3_dir_entry_2 *)bh->b_data;
> top = bh->b_data + dir->i_sb->s_blocksize - reclen;
> while ((char *) de <= top) {
> - if (!ext3_check_dir_entry("ext3_add_entry", dir, de,
> - bh, offset)) {
> + if (!ext3_check_dir_entry(__func__, dir,de,bh,offset)) {
> brelse (bh);
> return -EIO;
> }
> @@ -1397,8 +1430,13 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
> memcpy (data1, de, len);
> de = (struct ext3_dir_entry_2 *) data1;
> top = data1 + len;
> - while ((char *)(de2 = ext3_next_entry(de)) < top)
> +
> + de2 = de;
> + while (de2 != NULL && de2 < top) {
> de = de2;
> + de2 = ext3_next_entry(__func__, dir, de, bh, 0);
> + }
> +
> de->rec_len = ext3_rec_len_to_disk(data1 + blocksize - (char *) de);
> /* Initialize the root; the dot dirents already exist */
> de = (struct ext3_dir_entry_2 *) (&root->dotdot);
> @@ -1637,7 +1675,7 @@ static int ext3_delete_entry (handle_t *handle,
> pde = NULL;
> de = (struct ext3_dir_entry_2 *) bh->b_data;
> while (i < bh->b_size) {
> - if (!ext3_check_dir_entry("ext3_delete_entry", dir, de, bh, i))
> + if (!ext3_check_dir_entry(__func__, dir, de, bh, i))
> return -EIO;
> if (de == de_del) {
> BUFFER_TRACE(bh, "get_write_access");
> @@ -1655,7 +1693,7 @@ static int ext3_delete_entry (handle_t *handle,
> }
> i += ext3_rec_len_from_disk(de->rec_len);
> pde = de;
> - de = ext3_next_entry(de);
> + de = __ext3_next_entry(de);
> }
> return -ENOENT;
> }
> @@ -1792,7 +1830,7 @@ retry:
> de->rec_len = ext3_rec_len_to_disk(EXT3_DIR_REC_LEN(de->name_len));
> strcpy (de->name, ".");
> ext3_set_de_type(dir->i_sb, de, S_IFDIR);
> - de = ext3_next_entry(de);
> + de = __ext3_next_entry(de);
> de->inode = cpu_to_le32(dir->i_ino);
> de->rec_len = ext3_rec_len_to_disk(inode->i_sb->s_blocksize -
> EXT3_DIR_REC_LEN(1));
> @@ -1825,7 +1863,7 @@ out_stop:
> /*
> * routine to check that the specified directory is empty (for rmdir)
> */
> -static int empty_dir (struct inode * inode)
> +static int empty_dir(struct inode *dir, struct inode *inode)
> {
> unsigned long offset;
> struct buffer_head * bh;
> @@ -1846,8 +1884,14 @@ static int empty_dir (struct inode * inode)
> inode->i_ino);
> return 1;
> }
> +
> de = (struct ext3_dir_entry_2 *) bh->b_data;
> - de1 = ext3_next_entry(de);
> + de1 = ext3_next_entry(__func__, dir, de, bh, 0);
> + if (de1 == NULL) {
> + brelse(bh);
> + return 1;
> + }
> +
> if (le32_to_cpu(de->inode) != inode->i_ino ||
> !le32_to_cpu(de1->inode) ||
> strcmp (".", de->name) ||
> @@ -1860,8 +1904,8 @@ static int empty_dir (struct inode * inode)
> }
> offset = ext3_rec_len_from_disk(de->rec_len) +
> ext3_rec_len_from_disk(de1->rec_len);
> - de = ext3_next_entry(de1);
> - while (offset < inode->i_size ) {
> + de = ext3_next_entry(__func__, dir, de1, bh, offset);
> + while (de && offset < inode->i_size) {
> if (!bh ||
> (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
> err = 0;
> @@ -1879,7 +1923,7 @@ static int empty_dir (struct inode * inode)
> }
> de = (struct ext3_dir_entry_2 *) bh->b_data;
> }
> - if (!ext3_check_dir_entry("empty_dir", inode, de, bh, offset)) {
> + if (!ext3_check_dir_entry(__func__, inode, de, bh, offset)) {
> de = (struct ext3_dir_entry_2 *)(bh->b_data +
> sb->s_blocksize);
> offset = (offset | (sb->s_blocksize - 1)) + 1;
> @@ -1890,7 +1934,7 @@ static int empty_dir (struct inode * inode)
> return 0;
> }
> offset += ext3_rec_len_from_disk(de->rec_len);
> - de = ext3_next_entry(de);
> + de = __ext3_next_entry(de);
> }
> brelse (bh);
> return 1;
> @@ -2068,7 +2112,7 @@ static int ext3_rmdir (struct inode * dir, struct dentry *dentry)
> goto end_rmdir;
>
> retval = -ENOTEMPTY;
> - if (!empty_dir (inode))
> + if (!empty_dir(dir, inode))
> goto end_rmdir;
>
> retval = ext3_delete_entry(handle, dir, de, bh);
> @@ -2244,7 +2288,7 @@ retry:
> }
>
> #define PARENT_INO(buffer) \
> - (ext3_next_entry((struct ext3_dir_entry_2 *)(buffer))->inode)
> + (__ext3_next_entry((struct ext3_dir_entry_2 *)(buffer))->inode)
>
> /*
> * Anybody can rename anything with this: the permission checks are left to the
> @@ -2297,7 +2341,7 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
> if (S_ISDIR(old_inode->i_mode)) {
> if (new_inode) {
> retval = -ENOTEMPTY;
> - if (!empty_dir (new_inode))
> + if (!empty_dir(new_dir, new_inode))
> goto end_rename;
> }
> retval = -EIO;
--
Jan Kara <jack@suse.cz>
SUSE Labs, CR
^ permalink raw reply [flat|nested] 22+ messages in thread
end of thread, other threads:[~2008-06-25 12:18 UTC | newest]
Thread overview: 22+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <bug-10882-10286@http.bugzilla.kernel.org/>
2008-06-07 19:19 ` [Bugme-new] [Bug 10882] New: Different kernel crashes on corrupted filesystems Andrew Morton
2008-06-21 1:54 ` [PATCH] ext3: validate directory entry data before use Duane Griffin
2008-06-21 15:54 ` [PATCH, v2] " Duane Griffin
2008-06-21 16:13 ` Jochen Voß
2008-06-21 16:31 ` Duane Griffin
2008-06-25 10:08 ` Jan Kara
2008-06-25 11:30 ` Duane Griffin
2008-06-25 12:11 ` [PATCH, v3] " Duane Griffin
2008-06-25 12:18 ` Jan Kara
2008-06-24 6:36 ` [PATCH] " Andreas Dilger
2008-06-23 21:56 ` [PATCH] ext3: handle corrupted orphan list at mount Duane Griffin
2008-06-23 22:32 ` Sami Liedes
2008-06-24 0:09 ` Duane Griffin
2008-06-24 16:08 ` Jan Kara
2008-06-24 17:16 ` Duane Griffin
2008-06-24 17:18 ` Jan Kara
2008-06-24 13:47 ` [PATCH] ext3: handle deleting corrupted indirect blocks Duane Griffin
2008-06-24 13:57 ` Eric Sandeen
2008-06-24 14:17 ` Duane Griffin
2008-06-24 21:05 ` Andrew Morton
2008-06-25 0:13 ` Mingming
2008-06-25 0:15 ` Duane Griffin
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).