* [PATCH 1/5] ext4: unlock group before ext4_error
@ 2008-11-20 18:26 Aneesh Kumar K.V
2008-11-20 18:26 ` [PATCH 3/5] ext4: Fix the race between read_block_bitmap and mark_diskspace_used Aneesh Kumar K.V
` (3 more replies)
0 siblings, 4 replies; 8+ messages in thread
From: Aneesh Kumar K.V @ 2008-11-20 18:26 UTC (permalink / raw)
To: cmm, tytso, sandeen; +Cc: linux-ext4, Aneesh Kumar K.V
Otherwise ext4_error will cause BUG because of
scheduling in atomic context.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
fs/ext4/mballoc.c | 12 ++++++++++++
1 files changed, 12 insertions(+), 0 deletions(-)
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 772e05b..039a5a6 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -460,10 +460,12 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
blocknr +=
le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
+ ext4_unlock_group(sb, e4b->bd_group);
ext4_error(sb, __func__, "double-free of inode"
" %lu's block %llu(bit %u in group %u)\n",
inode ? inode->i_ino : 0, blocknr,
first + i, e4b->bd_group);
+ ext4_unlock_group(sb, e4b->bd_group);
}
mb_clear_bit(first + i, e4b->bd_info->bb_bitmap);
}
@@ -704,6 +706,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
grp->bb_fragments = fragments;
if (free != grp->bb_free) {
+ ext4_unlock_group(sb, group);
ext4_error(sb, __func__,
"EXT4-fs: group %u: %u blocks in bitmap, %u in gd\n",
group, free, grp->bb_free);
@@ -711,6 +714,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
* If we intent to continue, we consider group descritor
* corrupt and update bb_free using bitmap value
*/
+ ext4_lock_group(sb, group);
grp->bb_free = free;
}
@@ -1625,15 +1629,18 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
* free blocks even though group info says we
* we have free blocks
*/
+ ext4_unlock_group(sb, e4b->bd_group);
ext4_error(sb, __func__, "%d free blocks as per "
"group info. But bitmap says 0\n",
free);
+ ext4_lock_group(sb, e4b->bd_group);
break;
}
mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex);
BUG_ON(ex.fe_len <= 0);
if (free < ex.fe_len) {
+ ext4_unlock_group(sb, e4b->bd_group);
ext4_error(sb, __func__, "%d free blocks as per "
"group info. But got %d blocks\n",
free, ex.fe_len);
@@ -1642,6 +1649,7 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
* indicate that the bitmap is corrupt. So exit
* without claiming the space.
*/
+ ext4_lock_group(sb, e4b->bd_group);
break;
}
@@ -3789,6 +3797,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
bit = next + 1;
}
if (free != pa->pa_free) {
+ ext4_unlock_group(sb, group);
printk(KERN_CRIT "pa %p: logic %lu, phys. %lu, len %lu\n",
pa, (unsigned long) pa->pa_lstart,
(unsigned long) pa->pa_pstart,
@@ -3799,6 +3808,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
* pa is already deleted so we use the value obtained
* from the bitmap and continue.
*/
+ ext4_lock_group(sb, group);
}
atomic_add(free, &sbi->s_mb_discarded);
@@ -4607,9 +4617,11 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
else if (block >= (entry->start_blk + entry->count))
n = &(*n)->rb_right;
else {
+ ext4_unlock_group(sb, e4b->bd_group);
ext4_error(sb, __func__,
"Double free of blocks %d (%d %d)\n",
block, entry->start_blk, entry->count);
+ ext4_unlock_group(sb, e4b->bd_group);
return 0;
}
}
--
1.6.0.4.735.gea4f
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH 3/5] ext4: Fix the race between read_block_bitmap and mark_diskspace_used 2008-11-20 18:26 [PATCH 1/5] ext4: unlock group before ext4_error Aneesh Kumar K.V @ 2008-11-20 18:26 ` Aneesh Kumar K.V 2008-11-20 18:26 ` [PATCH 4/5] ext4: Use both hi and lo bits of the group desc values Aneesh Kumar K.V 2008-11-20 18:34 ` [PATCH 1/5] ext4: unlock group before ext4_error Aneesh Kumar K.V ` (2 subsequent siblings) 3 siblings, 1 reply; 8+ messages in thread From: Aneesh Kumar K.V @ 2008-11-20 18:26 UTC (permalink / raw) To: cmm, tytso, sandeen; +Cc: linux-ext4, Aneesh Kumar K.V We need to make sure we update the block bitmap and clear EXT4_BG_BLOCK_UNINIT flag with sb_bgl_lock held. We look at EXT4_BG_BLOCK_UNINIT and reinit the block bitmap each time in ext4_read_block_bitmap (introduced by c806e68f5647109350ec546fee5b526962970fd2 ) Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> --- fs/ext4/mballoc.c | 17 ++++++++++++----- 1 files changed, 12 insertions(+), 5 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 0a53d1b..6155c94 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1071,6 +1071,8 @@ static void mb_clear_bits(spinlock_t *lock, void *bm, int cur, int len) __u32 *addr; len = cur + len; + if (lock) + spin_lock(lock); \ while (cur < len) { if ((cur & 31) == 0 && (len - cur) >= 32) { /* fast path: clear whole word at once */ @@ -1079,9 +1081,11 @@ static void mb_clear_bits(spinlock_t *lock, void *bm, int cur, int len) cur += 32; continue; } - mb_clear_bit_atomic(lock, cur, bm); + mb_clear_bit(cur, bm); cur++; } + if (lock) + spin_unlock(lock); \ } static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len) @@ -1089,6 +1093,8 @@ static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len) __u32 *addr; len = cur + len; + if (lock) + spin_lock(lock); \ while (cur < len) { if ((cur & 31) == 0 && (len - cur) >= 32) { /* fast path: set whole word at once */ @@ -1097,9 +1103,11 @@ static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len) cur += 32; continue; } - mb_set_bit_atomic(lock, cur, bm); + mb_set_bit(cur, bm); cur++; } + if (lock) + spin_unlock(lock); \ } static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, @@ -3053,10 +3061,9 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, } } #endif - mb_set_bits(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group), bitmap_bh->b_data, - ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len); ^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 4/5] ext4: Use both hi and lo bits of the group desc values. 2008-11-20 18:26 ` [PATCH 3/5] ext4: Fix the race between read_block_bitmap and mark_diskspace_used Aneesh Kumar K.V @ 2008-11-20 18:26 ` Aneesh Kumar K.V 2008-11-20 18:44 ` Aneesh Kumar K.V 0 siblings, 1 reply; 8+ messages in thread From: Aneesh Kumar K.V @ 2008-11-20 18:26 UTC (permalink / raw) To: cmm, tytso, sandeen; +Cc: linux-ext4, Aneesh Kumar K.V Rename the lower bits with suffix _lo and add helper to access the values. Also rename bg_itable_unused_hi to bg_pad as in e2fsprogs. bg_itable_unused_hi is never used before Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> --- fs/ext4/balloc.c | 11 ++++--- fs/ext4/ext4.h | 24 +++++++++++++-- fs/ext4/ialloc.c | 83 ++++++++++++++++++++++++++++------------------------- fs/ext4/inode.c | 2 +- fs/ext4/mballoc.c | 15 +++++---- fs/ext4/resize.c | 4 +- fs/ext4/super.c | 64 +++++++++++++++++++++++++++++++++++++++- 7 files changed, 143 insertions(+), 60 deletions(-) diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index d268f5f..efe68d9 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -102,9 +102,9 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { ext4_error(sb, __func__, "Checksum bad for group %u\n", block_group); - gdp->bg_free_blocks_count = 0; - gdp->bg_free_inodes_count = 0; - gdp->bg_itable_unused = 0; + ext4_free_blks_set(sb, gdp, 0); + ext4_free_inodes_set(sb, gdp, 0); + ext4_itable_unused_set(sb, gdp, 0); memset(bh->b_data, 0xff, sb->s_blocksize); return 0; } @@ -444,7 +444,8 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, } } spin_lock(sb_bgl_lock(sbi, block_group)); - le16_add_cpu(&desc->bg_free_blocks_count, blocks_freed); + blocks_freed += ext4_free_blks_count(sb, desc); + ext4_free_blks_set(sb, desc, blocks_freed); desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); spin_unlock(sb_bgl_lock(sbi, block_group)); percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed); @@ -742,7 +743,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) gdp = ext4_get_group_desc(sb, i, NULL); if (!gdp) continue; - desc_count += le16_to_cpu(gdp->bg_free_blocks_count); + desc_count += ext4_free_blks_count(sb, gdp); } return desc_count; diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 03aba86..2570883 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -161,9 +161,9 @@ struct ext4_group_desc __le32 bg_block_bitmap_lo; /* Blocks bitmap block */ __le32 bg_inode_bitmap_lo; /* Inodes bitmap block */ __le32 bg_inode_table_lo; /* Inodes table block */ - __le16 bg_free_blocks_count; /* Free blocks count */ - __le16 bg_free_inodes_count; /* Free inodes count */ - __le16 bg_used_dirs_count; /* Directories count */ + __le16 bg_free_blocks_count_lo;/* Free blocks count */ + __le16 bg_free_inodes_count_lo;/* Free inodes count */ + __le16 bg_used_dirs_count_lo; /* Directories count */ __le16 bg_flags; /* EXT4_BG_flags (INODE_UNINIT, etc) */ __u32 bg_reserved[2]; /* Likely block/inode bitmap checksum */ __le16 bg_itable_unused; /* Unused inodes count */ @@ -174,7 +174,7 @@ struct ext4_group_desc __le16 bg_free_blocks_count_hi;/* Free blocks count MSB */ __le16 bg_free_inodes_count_hi;/* Free inodes count MSB */ __le16 bg_used_dirs_count_hi; /* Directories count MSB */ - __le16 bg_itable_unused_hi; /* Unused inodes count MSB */ + __le16 bg_pad; __u32 bg_reserved2[3]; }; @@ -1194,12 +1194,28 @@ extern ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, struct ext4_group_desc *bg); extern ext4_fsblk_t ext4_inode_table(struct super_block *sb, struct ext4_group_desc *bg); +extern __u32 ext4_free_blks_count(struct super_block *sb, + struct ext4_group_desc *bg); +extern __u32 ext4_free_inodes_count(struct super_block *sb, + struct ext4_group_desc *bg); +extern __u32 ext4_used_dirs_count(struct super_block *sb, + struct ext4_group_desc *bg); +extern __u16 ext4_itable_unused_count(struct super_block *sb, + struct ext4_group_desc *bg); extern void ext4_block_bitmap_set(struct super_block *sb, struct ext4_group_desc *bg, ext4_fsblk_t blk); extern void ext4_inode_bitmap_set(struct super_block *sb, struct ext4_group_desc *bg, ext4_fsblk_t blk); extern void ext4_inode_table_set(struct super_block *sb, struct ext4_group_desc *bg, ext4_fsblk_t blk); +extern void ext4_free_blks_set(struct super_block *sb, + struct ext4_group_desc *bg, __u32 count); +extern void ext4_free_inodes_set(struct super_block *sb, + struct ext4_group_desc *bg, __u32 count); +extern void ext4_used_dirs_set(struct super_block *sb, + struct ext4_group_desc *bg, __u32 count); +extern void ext4_itable_unused_set(struct super_block *sb, + struct ext4_group_desc *bg, __u16 count); /* extents.c */ extern int ext4_ext_journal_restart(handle_t *handle, int needed); /* defrag.c */ diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 4039af6..84f060b 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -76,9 +76,9 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh, if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { ext4_error(sb, __func__, "Checksum bad for group %u\n", block_group); - gdp->bg_free_blocks_count = 0; - gdp->bg_free_inodes_count = 0; - gdp->bg_itable_unused = 0; + ext4_free_blks_set(sb, gdp, 0); + ext4_free_inodes_set(sb, gdp, 0); + ext4_itable_unused_set(sb, gdp, 0); memset(bh->b_data, 0xff, sb->s_blocksize); return 0; } @@ -168,7 +168,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) struct ext4_group_desc *gdp; struct ext4_super_block *es; struct ext4_sb_info *sbi; - int fatal = 0, err; + int fatal = 0, err, count; ext4_group_t flex_group; if (atomic_read(&inode->i_count) > 1) { @@ -236,9 +236,12 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) if (gdp) { spin_lock(sb_bgl_lock(sbi, block_group)); - le16_add_cpu(&gdp->bg_free_inodes_count, 1); - if (is_directory) - le16_add_cpu(&gdp->bg_used_dirs_count, -1); + count = ext4_free_inodes_count(sb, gdp) + 1; + ext4_free_inodes_set(sb, gdp, count); + if (is_directory) { + count = ext4_used_dirs_count(sb, gdp) - 1; + ext4_used_dirs_set(sb, gdp, count); + } gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); spin_unlock(sb_bgl_lock(sbi, block_group)); @@ -291,13 +294,13 @@ static int find_group_dir(struct super_block *sb, struct inode *parent, for (group = 0; group < ngroups; group++) { desc = ext4_get_group_desc(sb, group, NULL); - if (!desc || !desc->bg_free_inodes_count) + if (!desc || !ext4_free_inodes_count(sb, desc)) continue; - if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) + if (ext4_free_inodes_count(sb, desc) < avefreei) continue; if (!best_desc || - (le16_to_cpu(desc->bg_free_blocks_count) > - le16_to_cpu(best_desc->bg_free_blocks_count))) { + (ext4_free_blks_count(sb, desc) > + ext4_free_blks_count(sb, best_desc))) { *best_group = group; best_desc = desc; ret = 0; @@ -369,7 +372,7 @@ static int find_group_flex(struct super_block *sb, struct inode *parent, for (i = best_flex * flex_size; i < ngroups && i < (best_flex + 1) * flex_size; i++) { desc = ext4_get_group_desc(sb, i, &bh); - if (le16_to_cpu(desc->bg_free_inodes_count)) { + if (ext4_free_inodes_count(sb, desc)) { *best_group = i; goto out; } @@ -443,17 +446,17 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, for (i = 0; i < ngroups; i++) { grp = (parent_group + i) % ngroups; desc = ext4_get_group_desc(sb, grp, NULL); - if (!desc || !desc->bg_free_inodes_count) + if (!desc || !ext4_free_inodes_count(sb, desc)) continue; - if (le16_to_cpu(desc->bg_used_dirs_count) >= best_ndir) + if (ext4_used_dirs_count(sb, desc) >= best_ndir) continue; - if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) + if (ext4_free_inodes_count(sb, desc) < avefreei) continue; - if (le16_to_cpu(desc->bg_free_blocks_count) < avefreeb) + if (ext4_free_blks_count(sb, desc) < avefreeb) continue; *group = grp; ret = 0; - best_ndir = le16_to_cpu(desc->bg_used_dirs_count); + best_ndir = ext4_used_dirs_count(sb, desc); } if (ret == 0) return ret; @@ -479,13 +482,13 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, for (i = 0; i < ngroups; i++) { *group = (parent_group + i) % ngroups; desc = ext4_get_group_desc(sb, *group, NULL); - if (!desc || !desc->bg_free_inodes_count) + if (!desc || !ext4_free_inodes_count(sb, desc)) continue; - if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs) + if (ext4_used_dirs_count(sb, desc) >= max_dirs) continue; - if (le16_to_cpu(desc->bg_free_inodes_count) < min_inodes) + if (ext4_free_inodes_count(sb, desc) < min_inodes) continue; - if (le16_to_cpu(desc->bg_free_blocks_count) < min_blocks) + if (ext4_free_blks_count(sb, desc) < min_blocks) continue; return 0; } @@ -494,8 +497,8 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, for (i = 0; i < ngroups; i++) { *group = (parent_group + i) % ngroups; desc = ext4_get_group_desc(sb, *group, NULL); - if (desc && desc->bg_free_inodes_count && - le16_to_cpu(desc->bg_free_inodes_count) >= avefreei) + if (desc && ext4_free_inodes_count(sb, desc) && + ext4_free_inodes_count(sb, desc) >= avefreei) return 0; } @@ -524,8 +527,8 @@ static int find_group_other(struct super_block *sb, struct inode *parent, */ *group = parent_group; desc = ext4_get_group_desc(sb, *group, NULL); - if (desc && le16_to_cpu(desc->bg_free_inodes_count) && - le16_to_cpu(desc->bg_free_blocks_count)) + if (desc && ext4_free_inodes_count(sb, desc) && + ext4_free_blks_count(sb, desc)) return 0; /* @@ -548,8 +551,8 @@ static int find_group_other(struct super_block *sb, struct inode *parent, if (*group >= ngroups) *group -= ngroups; desc = ext4_get_group_desc(sb, *group, NULL); - if (desc && le16_to_cpu(desc->bg_free_inodes_count) && - le16_to_cpu(desc->bg_free_blocks_count)) + if (desc && ext4_free_inodes_count(sb, desc) && + ext4_free_blks_count(sb, desc)) return 0; } @@ -562,7 +565,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent, if (++*group >= ngroups) *group = 0; desc = ext4_get_group_desc(sb, *group, NULL); - if (desc && le16_to_cpu(desc->bg_free_inodes_count)) + if (desc && ext4_free_inodes_count(sb, desc)) return 0; } @@ -591,7 +594,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) struct ext4_super_block *es; struct ext4_inode_info *ei; struct ext4_sb_info *sbi; - int ret2, err = 0; + int ret2, err = 0, count; struct inode *ret; ext4_group_t i; int free = 0; @@ -717,7 +720,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); free = ext4_free_blocks_after_init(sb, group, gdp); - gdp->bg_free_blocks_count = cpu_to_le16(free); + ext4_free_blks_set(sb, gdp, free); gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); } @@ -751,7 +754,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) free = 0; } else { free = EXT4_INODES_PER_GROUP(sb) - - le16_to_cpu(gdp->bg_itable_unused); + ext4_itable_unused_count(sb, gdp); } /* @@ -761,13 +764,15 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) * */ if (ino > free) - gdp->bg_itable_unused = - cpu_to_le16(EXT4_INODES_PER_GROUP(sb) - ino); + ext4_itable_unused_set(sb, gdp, + (EXT4_INODES_PER_GROUP(sb) - ino)); } - le16_add_cpu(&gdp->bg_free_inodes_count, -1); + count = ext4_free_inodes_count(sb, gdp) - 1; + ext4_free_inodes_set(sb, gdp, count); if (S_ISDIR(mode)) { - le16_add_cpu(&gdp->bg_used_dirs_count, 1); + count = ext4_used_dirs_count(sb, gdp) + 1; + ext4_used_dirs_set(sb, gdp, count); } gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); spin_unlock(sb_bgl_lock(sbi, group)); @@ -981,7 +986,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb) gdp = ext4_get_group_desc(sb, i, NULL); if (!gdp) continue; - desc_count += le16_to_cpu(gdp->bg_free_inodes_count); + desc_count += ext4_free_inodes_count(sb, gdp); brelse(bitmap_bh); bitmap_bh = ext4_read_inode_bitmap(sb, i); if (!bitmap_bh) @@ -989,7 +994,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb) x = ext4_count_free(bitmap_bh, EXT4_INODES_PER_GROUP(sb) / 8); printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n", - i, le16_to_cpu(gdp->bg_free_inodes_count), x); + i, ext4_free_inodes_count(sb, gdp), x); bitmap_count += x; } brelse(bitmap_bh); @@ -1003,7 +1008,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb) gdp = ext4_get_group_desc(sb, i, NULL); if (!gdp) continue; - desc_count += le16_to_cpu(gdp->bg_free_inodes_count); + desc_count += ext4_free_inodes_count(sb, gdp); cond_resched(); } return desc_count; @@ -1020,7 +1025,7 @@ unsigned long ext4_count_dirs(struct super_block * sb) struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); if (!gdp) continue; - count += le16_to_cpu(gdp->bg_used_dirs_count); + count += ext4_used_dirs_count(sb, gdp); } return count; } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 76b1d3a..2d31597 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3987,7 +3987,7 @@ static int __ext4_get_inode_loc(struct inode *inode, num = EXT4_INODES_PER_GROUP(sb); if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) - num -= le16_to_cpu(gdp->bg_itable_unused); + num -= ext4_itable_unused_count(sb, gdp); table += num / inodes_per_block; if (end > table) end = table; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 6155c94..685c483 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2494,7 +2494,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, ext4_free_blocks_after_init(sb, group, desc); } else { meta_group_info[i]->bb_free = - le16_to_cpu(desc->bg_free_blocks_count); + ext4_free_blks_count(sb, desc); } INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); @@ -3066,12 +3066,12 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len); if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); - gdp->bg_free_blocks_count = - cpu_to_le16(ext4_free_blocks_after_init(sb, - ac->ac_b_ex.fe_group, - gdp)); + ext4_free_blks_set(sb, gdp, + ext4_free_blocks_after_init(sb, + ac->ac_b_ex.fe_group, gdp)); } - le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len); + len = ext4_free_blks_count(sb, gdp) - ac->ac_b_ex.fe_len; + ext4_free_blks_set(sb, gdp, len); gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len); @@ -4863,7 +4863,8 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode, } spin_lock(sb_bgl_lock(sbi, block_group)); - le16_add_cpu(&gdp->bg_free_blocks_count, count); + ret = ext4_free_blks_count(sb, gdp) + count; + ext4_free_blks_set(sb, gdp, ret); gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); spin_unlock(sb_bgl_lock(sbi, block_group)); percpu_counter_add(&sbi->s_freeblocks_counter, count); diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 939cf26..6e61613 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -862,8 +862,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */ ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */ ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */ - gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count); - gdp->bg_free_inodes_count = cpu_to_le16(EXT4_INODES_PER_GROUP(sb)); + ext4_free_blks_set(sb, gdp, input->free_blocks_count); + ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb)); gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp); /* diff --git a/fs/ext4/super.c b/fs/ext4/super.c index ddfa29b..3e62e09 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -93,6 +93,36 @@ ext4_fsblk_t ext4_inode_table(struct super_block *sb, (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0); } +__u32 ext4_free_blks_count(struct super_block *sb, + struct ext4_group_desc *bg) +{ + return le16_to_cpu(bg->bg_free_blocks_count_lo) | + (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? + (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0); +} + +__u32 ext4_free_inodes_count(struct super_block *sb, + struct ext4_group_desc *bg) +{ + return le16_to_cpu(bg->bg_free_inodes_count_lo) | + (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? + (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0); +} + +__u32 ext4_used_dirs_count(struct super_block *sb, + struct ext4_group_desc *bg) +{ + return le16_to_cpu(bg->bg_used_dirs_count_lo) | + (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? + (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0); +} + +__u16 ext4_itable_unused_count(struct super_block *sb, + struct ext4_group_desc *bg) +{ + return le16_to_cpu(bg->bg_itable_unused); +} + void ext4_block_bitmap_set(struct super_block *sb, struct ext4_group_desc *bg, ext4_fsblk_t blk) { @@ -117,6 +147,36 @@ void ext4_inode_table_set(struct super_block *sb, bg->bg_inode_table_hi = cpu_to_le32(blk >> 32); } +void ext4_free_blks_set(struct super_block *sb, + struct ext4_group_desc *bg, __u32 count) +{ + bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count); + if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) + bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16); +} + +void ext4_free_inodes_set(struct super_block *sb, + struct ext4_group_desc *bg, __u32 count) +{ + bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count); + if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) + bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16); +} + +void ext4_used_dirs_set(struct super_block *sb, + struct ext4_group_desc *bg, __u32 count) +{ + bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count); + if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) + bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16); +} + +void ext4_itable_unused_set(struct super_block *sb, + struct ext4_group_desc *bg, __u16 count) +{ + bg->bg_itable_unused = cpu_to_le16((__u16)count); +} + /* * Wrappers for jbd2_journal_start/end. * @@ -1478,9 +1538,9 @@ static int ext4_fill_flex_info(struct super_block *sb) flex_group = ext4_flex_group(sbi, i); sbi->s_flex_groups[flex_group].free_inodes += - le16_to_cpu(gdp->bg_free_inodes_count); + ext4_free_inodes_count(sb, gdp); sbi->s_flex_groups[flex_group].free_blocks += - le16_to_cpu(gdp->bg_free_blocks_count); + ext4_free_blks_count(sb, gdp); } return 1; -- 1.6.0.4.735.gea4f ^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH 4/5] ext4: Use both hi and lo bits of the group desc values. 2008-11-20 18:26 ` [PATCH 4/5] ext4: Use both hi and lo bits of the group desc values Aneesh Kumar K.V @ 2008-11-20 18:44 ` Aneesh Kumar K.V 0 siblings, 0 replies; 8+ messages in thread From: Aneesh Kumar K.V @ 2008-11-20 18:44 UTC (permalink / raw) To: cmm, tytso, sandeen; +Cc: linux-ext4 On Thu, Nov 20, 2008 at 11:56:20PM +0530, Aneesh Kumar K.V wrote: > Rename the lower bits with suffix _lo and add helper > to access the values. Also rename bg_itable_unused_hi > to bg_pad as in e2fsprogs. bg_itable_unused_hi is never > used before > > Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> > --- > fs/ext4/balloc.c | 11 ++++--- > fs/ext4/ext4.h | 24 +++++++++++++-- > fs/ext4/ialloc.c | 83 ++++++++++++++++++++++++++++------------------------- > fs/ext4/inode.c | 2 +- > fs/ext4/mballoc.c | 15 +++++---- > fs/ext4/resize.c | 4 +- > fs/ext4/super.c | 64 +++++++++++++++++++++++++++++++++++++++- > 7 files changed, 143 insertions(+), 60 deletions(-) > > diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c > index d268f5f..efe68d9 100644 > --- a/fs/ext4/balloc.c > +++ b/fs/ext4/balloc.c > @@ -102,9 +102,9 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, > if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { > ext4_error(sb, __func__, > "Checksum bad for group %u\n", block_group); > - gdp->bg_free_blocks_count = 0; > - gdp->bg_free_inodes_count = 0; > - gdp->bg_itable_unused = 0; > + ext4_free_blks_set(sb, gdp, 0); > + ext4_free_inodes_set(sb, gdp, 0); > + ext4_itable_unused_set(sb, gdp, 0); > memset(bh->b_data, 0xff, sb->s_blocksize); > return 0; > } > @@ -444,7 +444,8 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, > } > } > spin_lock(sb_bgl_lock(sbi, block_group)); > - le16_add_cpu(&desc->bg_free_blocks_count, blocks_freed); > + blocks_freed += ext4_free_blks_count(sb, desc); > + ext4_free_blks_set(sb, desc, blocks_freed); > desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); > spin_unlock(sb_bgl_lock(sbi, block_group)); > percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed); > @@ -742,7 +743,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) > gdp = ext4_get_group_desc(sb, i, NULL); > if (!gdp) > continue; > - desc_count += le16_to_cpu(gdp->bg_free_blocks_count); > + desc_count += ext4_free_blks_count(sb, gdp); > } > blocks_freed is used later so use count; @@ -372,7 +372,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, struct ext4_group_desc *desc; struct ext4_super_block *es; struct ext4_sb_info *sbi; - int err = 0, ret; + int err = 0, ret, count; ext4_grpblk_t blocks_freed; struct ext4_group_info *grp; @@ -444,7 +444,8 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, } } spin_lock(sb_bgl_lock(sbi, block_group)); - le16_add_cpu(&desc->bg_free_blocks_count, blocks_freed); + count = blocks_freed + ext4_free_blks_count(sb, desc); + ext4_free_blks_set(sb, desc, count); desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); spin_unlock(sb_bgl_lock(sbi, block_group)); percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed); ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 1/5] ext4: unlock group before ext4_error 2008-11-20 18:26 [PATCH 1/5] ext4: unlock group before ext4_error Aneesh Kumar K.V 2008-11-20 18:26 ` [PATCH 3/5] ext4: Fix the race between read_block_bitmap and mark_diskspace_used Aneesh Kumar K.V @ 2008-11-20 18:34 ` Aneesh Kumar K.V 2008-11-20 18:35 ` Peter Staubach 2008-11-20 22:38 ` Theodore Tso 3 siblings, 0 replies; 8+ messages in thread From: Aneesh Kumar K.V @ 2008-11-20 18:34 UTC (permalink / raw) To: cmm, tytso, sandeen; +Cc: linux-ext4 Hi All, I intent to sent only three patches. I had some debug patches in between so git format-patch generated patch number wrongly. I have also the inode uninit flag clearing race patch. But I am finding a hang during rename. So didn't sent the patch in the series. Attaching below for reference. Once I fix the rename hang I will send the patch again. commit 0181ece15c1e89c2825e581f03abe746fdebb7cf Author: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Date: Thu Nov 20 23:45:02 2008 +0530 ext4: Fix the race between read_inode_bitmap and ext4_new_inode We need to make sure we update the inode bitmap and clear EXT4_BG_INODE_UNINIT flag with sb_bgl_lock held. We look at EXT4_BG_INODE_UNINIT and reinit the inode bitmap each time in ext4_read_inode_bitmap (introduced by c806e68f5647109350ec546fee5b526962970fd2 ) Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 84f060b..99b1772 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -572,6 +572,70 @@ static int find_group_other(struct super_block *sb, struct inode *parent, return -1; } +static int ext4_claim_inode(struct super_block *sb, + struct buffer_head *inode_bitmap_bh, + unsigned long ino, ext4_group_t group, int mode) +{ + int free = 0, retval = 0, count; + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL); + + spin_lock(sb_bgl_lock(sbi, group)); + if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) { + /* not a free inode */ + retval = 1; + goto err_ret; + } + if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || + ino >= EXT4_INODES_PER_GROUP(sb)) { + spin_unlock(sb_bgl_lock(sbi, group)); + ext4_error(sb, __func__, + "reserved inode or inode > inodes count - " + "block_group = %u, inode=%lu", group, + ino + group * EXT4_INODES_PER_GROUP(sb)); + return 1; + } + /* If we didn't allocate from within the initialized part of the inode + * table then we need to initialize up to this inode. */ + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { + if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { + gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT); + + /* When marking the block group with + * ~EXT4_BG_INODE_UNINIT we don't want to depend + * on the value of bg_itable_unused even though + * mke2fs could have initialized the same for us. + * Instead we calculated the value below + */ + + free = 0; + } else { + free = EXT4_INODES_PER_GROUP(sb) - + ext4_itable_unused_count(sb, gdp); + } + + /* + * Check the relative inode number against the last used + * relative inode number in this group. if it is greater + * we need to update the bg_itable_unused count + * + */ + if (ino > free) + ext4_itable_unused_set(sb, gdp, + (EXT4_INODES_PER_GROUP(sb) - ino)); + } + count = ext4_free_inodes_count(sb, gdp) - 1; + ext4_free_inodes_set(sb, gdp, count); + if (S_ISDIR(mode)) { + count = ext4_used_dirs_count(sb, gdp) + 1; + ext4_used_dirs_set(sb, gdp, count); + } + gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); +err_ret: + spin_unlock(sb_bgl_lock(sbi, group)); + return retval; +} + /* * There are two policies for allocating an inode. If the new inode is * a directory, then a forward search is made for a block group with both @@ -585,8 +649,8 @@ static int find_group_other(struct super_block *sb, struct inode *parent, struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) { struct super_block *sb; - struct buffer_head *bitmap_bh = NULL; - struct buffer_head *bh2; + struct buffer_head *inode_bitmap_bh = NULL; + struct buffer_head *group_desc_bh; ext4_group_t group = 0; unsigned long ino = 0; struct inode *inode; @@ -594,7 +658,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) struct ext4_super_block *es; struct ext4_inode_info *ei; struct ext4_sb_info *sbi; - int ret2, err = 0, count; + int ret2, err = 0; struct inode *ret; ext4_group_t i; int free = 0; @@ -634,40 +698,48 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) for (i = 0; i < sbi->s_groups_count; i++) { err = -EIO; - gdp = ext4_get_group_desc(sb, group, &bh2); + gdp = ext4_get_group_desc(sb, group, &group_desc_bh); if (!gdp) goto fail; - brelse(bitmap_bh); - bitmap_bh = ext4_read_inode_bitmap(sb, group); - if (!bitmap_bh) + brelse(inode_bitmap_bh); + inode_bitmap_bh = ext4_read_inode_bitmap(sb, group); + if (!inode_bitmap_bh) goto fail; ino = 0; repeat_in_this_group: ino = ext4_find_next_zero_bit((unsigned long *) - bitmap_bh->b_data, EXT4_INODES_PER_GROUP(sb), ino); + inode_bitmap_bh->b_data, + EXT4_INODES_PER_GROUP(sb), ino); if (ino < EXT4_INODES_PER_GROUP(sb)) { - BUFFER_TRACE(bitmap_bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, bitmap_bh); + BUFFER_TRACE(inode_bitmap_bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, + inode_bitmap_bh); if (err) goto fail; - if (!ext4_set_bit_atomic(sb_bgl_lock(sbi, group), - ino, bitmap_bh->b_data)) { + BUFFER_TRACE(group_desc_bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, + group_desc_bh); + if (err) + goto fail; + if (!ext4_claim_inode(sb, inode_bitmap_bh, + ino, group, mode)) { /* we won it */ - BUFFER_TRACE(bitmap_bh, + BUFFER_TRACE(inode_bitmap_bh, "call ext4_journal_dirty_metadata"); err = ext4_journal_dirty_metadata(handle, - bitmap_bh); + inode_bitmap_bh); if (err) goto fail; goto got; } /* we lost it */ - jbd2_journal_release_buffer(handle, bitmap_bh); + jbd2_journal_release_buffer(handle, inode_bitmap_bh); + jbd2_journal_release_buffer(handle, group_desc_bh); if (++ino < EXT4_INODES_PER_GROUP(sb)) goto repeat_in_this_group; @@ -687,30 +759,16 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) goto out; got: - ino++; - if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || - ino > EXT4_INODES_PER_GROUP(sb)) { - ext4_error(sb, __func__, - "reserved inode or inode > inodes count - " - "block_group = %u, inode=%lu", group, - ino + group * EXT4_INODES_PER_GROUP(sb)); - err = -EIO; - goto fail; - } - - BUFFER_TRACE(bh2, "get_write_access"); - err = ext4_journal_get_write_access(handle, bh2); - if (err) goto fail; - /* We may have to initialize the block bitmap if it isn't already */ if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) && gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { - struct buffer_head *block_bh = ext4_read_block_bitmap(sb, group); + struct buffer_head *block_bitmap_bh; - BUFFER_TRACE(block_bh, "get block bitmap access"); - err = ext4_journal_get_write_access(handle, block_bh); + block_bitmap_bh = ext4_read_block_bitmap(sb, group); + BUFFER_TRACE(block_bitmap_bh, "get block bitmap access"); + err = ext4_journal_get_write_access(handle, block_bitmap_bh); if (err) { - brelse(block_bh); + brelse(block_bitmap_bh); goto fail; } @@ -728,57 +786,19 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) /* Don't need to dirty bitmap block if we didn't change it */ if (free) { - BUFFER_TRACE(block_bh, "dirty block bitmap"); - err = ext4_journal_dirty_metadata(handle, block_bh); + BUFFER_TRACE(block_bitmap_bh, "dirty block bitmap"); + err = ext4_journal_dirty_metadata(handle, + block_bitmap_bh); } - brelse(block_bh); + brelse(block_bitmap_bh); if (err) goto fail; } - - spin_lock(sb_bgl_lock(sbi, group)); - /* If we didn't allocate from within the initialized part of the inode - * table then we need to initialize up to this inode. */ - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { - if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { - gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT); - - /* When marking the block group with - * ~EXT4_BG_INODE_UNINIT we don't want to depend - * on the value of bg_itable_unused even though - * mke2fs could have initialized the same for us. - * Instead we calculated the value below - */ - - free = 0; - } else { - free = EXT4_INODES_PER_GROUP(sb) - - ext4_itable_unused_count(sb, gdp); - } - - /* - * Check the relative inode number against the last used - * relative inode number in this group. if it is greater - * we need to update the bg_itable_unused count - * - */ - if (ino > free) - ext4_itable_unused_set(sb, gdp, - (EXT4_INODES_PER_GROUP(sb) - ino)); - } - - count = ext4_free_inodes_count(sb, gdp) - 1; - ext4_free_inodes_set(sb, gdp, count); - if (S_ISDIR(mode)) { - count = ext4_used_dirs_count(sb, gdp) + 1; - ext4_used_dirs_set(sb, gdp, count); - } - gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); - spin_unlock(sb_bgl_lock(sbi, group)); - BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata"); - err = ext4_journal_dirty_metadata(handle, bh2); - if (err) goto fail; + BUFFER_TRACE(group_desc_bh, "call ext4_journal_dirty_metadata"); + err = ext4_journal_dirty_metadata(handle, group_desc_bh); + if (err) + goto fail; percpu_counter_dec(&sbi->s_freeinodes_counter); if (S_ISDIR(mode)) @@ -876,7 +896,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) iput(inode); ret = ERR_PTR(err); really_out: - brelse(bitmap_bh); + brelse(inode_bitmap_bh); return ret; fail_free_drop: @@ -887,7 +907,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) inode->i_flags |= S_NOQUOTA; inode->i_nlink = 0; iput(inode); - brelse(bitmap_bh); + brelse(inode_bitmap_bh); return ERR_PTR(err); } ^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH 1/5] ext4: unlock group before ext4_error 2008-11-20 18:26 [PATCH 1/5] ext4: unlock group before ext4_error Aneesh Kumar K.V 2008-11-20 18:26 ` [PATCH 3/5] ext4: Fix the race between read_block_bitmap and mark_diskspace_used Aneesh Kumar K.V 2008-11-20 18:34 ` [PATCH 1/5] ext4: unlock group before ext4_error Aneesh Kumar K.V @ 2008-11-20 18:35 ` Peter Staubach 2008-11-20 18:47 ` Aneesh Kumar K.V 2008-11-20 22:38 ` Theodore Tso 3 siblings, 1 reply; 8+ messages in thread From: Peter Staubach @ 2008-11-20 18:35 UTC (permalink / raw) To: Aneesh Kumar K.V; +Cc: cmm, tytso, sandeen, linux-ext4 Aneesh Kumar K.V wrote: > Otherwise ext4_error will cause BUG because of > scheduling in atomic context. > > Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> > --- > fs/ext4/mballoc.c | 12 ++++++++++++ > 1 files changed, 12 insertions(+), 0 deletions(-) > > diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c > index 772e05b..039a5a6 100644 > --- a/fs/ext4/mballoc.c > +++ b/fs/ext4/mballoc.c > @@ -460,10 +460,12 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b, > blocknr += > le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); > > + ext4_unlock_group(sb, e4b->bd_group); > ext4_error(sb, __func__, "double-free of inode" > " %lu's block %llu(bit %u in group %u)\n", > inode ? inode->i_ino : 0, blocknr, > first + i, e4b->bd_group); > + ext4_unlock_group(sb, e4b->bd_group); > This should be ext4_lock_group(sb, e4b->bd_group); shouldn't it? Thanx... ps > } > mb_clear_bit(first + i, e4b->bd_info->bb_bitmap); > } > @@ -704,6 +706,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb, > grp->bb_fragments = fragments; > > if (free != grp->bb_free) { > + ext4_unlock_group(sb, group); > ext4_error(sb, __func__, > "EXT4-fs: group %u: %u blocks in bitmap, %u in gd\n", > group, free, grp->bb_free); > @@ -711,6 +714,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb, > * If we intent to continue, we consider group descritor > * corrupt and update bb_free using bitmap value > */ > + ext4_lock_group(sb, group); > grp->bb_free = free; > } > > @@ -1625,15 +1629,18 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, > * free blocks even though group info says we > * we have free blocks > */ > + ext4_unlock_group(sb, e4b->bd_group); > ext4_error(sb, __func__, "%d free blocks as per " > "group info. But bitmap says 0\n", > free); > + ext4_lock_group(sb, e4b->bd_group); > break; > } > > mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex); > BUG_ON(ex.fe_len <= 0); > if (free < ex.fe_len) { > + ext4_unlock_group(sb, e4b->bd_group); > ext4_error(sb, __func__, "%d free blocks as per " > "group info. But got %d blocks\n", > free, ex.fe_len); > @@ -1642,6 +1649,7 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, > * indicate that the bitmap is corrupt. So exit > * without claiming the space. > */ > + ext4_lock_group(sb, e4b->bd_group); > break; > } > > @@ -3789,6 +3797,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, > bit = next + 1; > } > if (free != pa->pa_free) { > + ext4_unlock_group(sb, group); > printk(KERN_CRIT "pa %p: logic %lu, phys. %lu, len %lu\n", > pa, (unsigned long) pa->pa_lstart, > (unsigned long) pa->pa_pstart, > @@ -3799,6 +3808,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, > * pa is already deleted so we use the value obtained > * from the bitmap and continue. > */ > + ext4_lock_group(sb, group); > } > atomic_add(free, &sbi->s_mb_discarded); > > @@ -4607,9 +4617,11 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, > else if (block >= (entry->start_blk + entry->count)) > n = &(*n)->rb_right; > else { > + ext4_unlock_group(sb, e4b->bd_group); > ext4_error(sb, __func__, > "Double free of blocks %d (%d %d)\n", > block, entry->start_blk, entry->count); > + ext4_unlock_group(sb, e4b->bd_group); > return 0; > } > } > ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 1/5] ext4: unlock group before ext4_error 2008-11-20 18:35 ` Peter Staubach @ 2008-11-20 18:47 ` Aneesh Kumar K.V 0 siblings, 0 replies; 8+ messages in thread From: Aneesh Kumar K.V @ 2008-11-20 18:47 UTC (permalink / raw) To: Peter Staubach; +Cc: cmm, tytso, sandeen, linux-ext4 On Thu, Nov 20, 2008 at 01:35:11PM -0500, Peter Staubach wrote: >> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c >> index 772e05b..039a5a6 100644 >> --- a/fs/ext4/mballoc.c >> +++ b/fs/ext4/mballoc.c >> @@ -460,10 +460,12 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b, >> blocknr += >> le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); >> + ext4_unlock_group(sb, e4b->bd_group); >> ext4_error(sb, __func__, "double-free of inode" >> " %lu's block %llu(bit %u in group %u)\n", >> inode ? inode->i_ino : 0, blocknr, >> first + i, e4b->bd_group); >> + ext4_unlock_group(sb, e4b->bd_group); >> > > This should be ext4_lock_group(sb, e4b->bd_group); shouldn't it? Yes, I need to update at two place diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 039a5a6..728221e 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -465,7 +465,7 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b, " %lu's block %llu(bit %u in group %u)\n", inode ? inode->i_ino : 0, blocknr, first + i, e4b->bd_group); - ext4_unlock_group(sb, e4b->bd_group); + ext4_lock_group(sb, e4b->bd_group); } mb_clear_bit(first + i, e4b->bd_info->bb_bitmap); } @@ -4621,7 +4621,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, ext4_error(sb, __func__, "Double free of blocks %d (%d %d)\n", block, entry->start_blk, entry->count); - ext4_unlock_group(sb, e4b->bd_group); + ext4_lock_group(sb, e4b->bd_group); return 0; } } ^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH 1/5] ext4: unlock group before ext4_error 2008-11-20 18:26 [PATCH 1/5] ext4: unlock group before ext4_error Aneesh Kumar K.V ` (2 preceding siblings ...) 2008-11-20 18:35 ` Peter Staubach @ 2008-11-20 22:38 ` Theodore Tso 3 siblings, 0 replies; 8+ messages in thread From: Theodore Tso @ 2008-11-20 22:38 UTC (permalink / raw) To: Aneesh Kumar K.V; +Cc: cmm, sandeen, linux-ext4 On Thu, Nov 20, 2008 at 11:56:18PM +0530, Aneesh Kumar K.V wrote: > Otherwise ext4_error will cause BUG because of > scheduling in atomic context. Granted that it isn't necessarily safe as it is when errors-behaviour is set to continue, that is the default on a large number of filesystems. And unlocking the group, calling the ext4_error(), and then relocking the group can't possibly be safe; after all, we're holding the lock for a reason! In the errors=continue case, we don't actually need to unlock and relock the group, since all we need to do is to printk a message to the console. In the errors=panic case, we'll never return; and in the errors=remount-ro case, relocking is kind of pointless but harmless, since once the journal is aborted, there's no way the allocation is going to succeed anyway, and a subsequent call will return an error. This gets ugly to get right. Since the situation where we need to call ext4_error() while holding a spinlock which should be preserved in the errors=continue case seems to be unique to mballoc, maybe something like this? static int ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp, const char *function, const char *fmt, ...) { va_start(args, fmt); printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); vprintk(fmt, args); printk("\n"); va_end(args); if (test_opt(sb, ERROR_CONT)) { EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; es->s_state |= cpu_to_le16(EXT4_ERROR_FS); ext4_commit_super(sb, es, 0); return 0; } ext4_unlock_group(sb, grp); ext4_handle_error(sb); /* * We only get here in the ERRORS_RO case; relocking the group * may be dangerous, but nothing bad will happen since the * filesystem will have already been marked read/only and the * journal has been aborted. We return 1 as a hint to callers * who might what to use the return value from * ext4_grp_locked_error() to distinguish beween the * ERRORS_CONT and ERRORS_RO case, and perhaps return more * aggressively from the ext4 function in question, with a * more appropriate error code. */ ext4_lock_group(sb, grp); } This requires access to two static functions in fs/ext4/super.c, so probably the best thing to do is to put this function in fs/ext4/super.c, and move the definition of ext4_[un]lock_group from mballoc.h to ext4.h. Comments? - Ted ^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2008-11-20 22:39 UTC | newest] Thread overview: 8+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2008-11-20 18:26 [PATCH 1/5] ext4: unlock group before ext4_error Aneesh Kumar K.V 2008-11-20 18:26 ` [PATCH 3/5] ext4: Fix the race between read_block_bitmap and mark_diskspace_used Aneesh Kumar K.V 2008-11-20 18:26 ` [PATCH 4/5] ext4: Use both hi and lo bits of the group desc values Aneesh Kumar K.V 2008-11-20 18:44 ` Aneesh Kumar K.V 2008-11-20 18:34 ` [PATCH 1/5] ext4: unlock group before ext4_error Aneesh Kumar K.V 2008-11-20 18:35 ` Peter Staubach 2008-11-20 18:47 ` Aneesh Kumar K.V 2008-11-20 22:38 ` Theodore Tso
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).