linux-ext4.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/5] ext4: unlock group before ext4_error
@ 2008-11-20 18:26 Aneesh Kumar K.V
  2008-11-20 18:26 ` [PATCH 3/5] ext4: Fix the race between read_block_bitmap and mark_diskspace_used Aneesh Kumar K.V
                   ` (3 more replies)
  0 siblings, 4 replies; 8+ messages in thread
From: Aneesh Kumar K.V @ 2008-11-20 18:26 UTC (permalink / raw)
  To: cmm, tytso, sandeen; +Cc: linux-ext4, Aneesh Kumar K.V

Otherwise ext4_error will cause BUG because of
scheduling in atomic context.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 fs/ext4/mballoc.c |   12 ++++++++++++
 1 files changed, 12 insertions(+), 0 deletions(-)

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 772e05b..039a5a6 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -460,10 +460,12 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
 			blocknr +=
 			    le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
 
+			ext4_unlock_group(sb, e4b->bd_group);
 			ext4_error(sb, __func__, "double-free of inode"
 				   " %lu's block %llu(bit %u in group %u)\n",
 				   inode ? inode->i_ino : 0, blocknr,
 				   first + i, e4b->bd_group);
+			ext4_unlock_group(sb, e4b->bd_group);
 		}
 		mb_clear_bit(first + i, e4b->bd_info->bb_bitmap);
 	}
@@ -704,6 +706,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
 	grp->bb_fragments = fragments;
 
 	if (free != grp->bb_free) {
+		ext4_unlock_group(sb, group);
 		ext4_error(sb, __func__,
 			"EXT4-fs: group %u: %u blocks in bitmap, %u in gd\n",
 			group, free, grp->bb_free);
@@ -711,6 +714,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
 		 * If we intent to continue, we consider group descritor
 		 * corrupt and update bb_free using bitmap value
 		 */
+		ext4_lock_group(sb, group);
 		grp->bb_free = free;
 	}
 
@@ -1625,15 +1629,18 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
 			 * free blocks even though group info says we
 			 * we have free blocks
 			 */
+			ext4_unlock_group(sb, e4b->bd_group);
 			ext4_error(sb, __func__, "%d free blocks as per "
 					"group info. But bitmap says 0\n",
 					free);
+			ext4_lock_group(sb, e4b->bd_group);
 			break;
 		}
 
 		mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex);
 		BUG_ON(ex.fe_len <= 0);
 		if (free < ex.fe_len) {
+			ext4_unlock_group(sb, e4b->bd_group);
 			ext4_error(sb, __func__, "%d free blocks as per "
 					"group info. But got %d blocks\n",
 					free, ex.fe_len);
@@ -1642,6 +1649,7 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
 			 * indicate that the bitmap is corrupt. So exit
 			 * without claiming the space.
 			 */
+			ext4_lock_group(sb, e4b->bd_group);
 			break;
 		}
 
@@ -3789,6 +3797,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
 		bit = next + 1;
 	}
 	if (free != pa->pa_free) {
+		ext4_unlock_group(sb, group);
 		printk(KERN_CRIT "pa %p: logic %lu, phys. %lu, len %lu\n",
 			pa, (unsigned long) pa->pa_lstart,
 			(unsigned long) pa->pa_pstart,
@@ -3799,6 +3808,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
 		 * pa is already deleted so we use the value obtained
 		 * from the bitmap and continue.
 		 */
+		ext4_lock_group(sb, group);
 	}
 	atomic_add(free, &sbi->s_mb_discarded);
 
@@ -4607,9 +4617,11 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
 		else if (block >= (entry->start_blk + entry->count))
 			n = &(*n)->rb_right;
 		else {
+			ext4_unlock_group(sb, e4b->bd_group);
 			ext4_error(sb, __func__,
 			    "Double free of blocks %d (%d %d)\n",
 			    block, entry->start_blk, entry->count);
+			ext4_unlock_group(sb, e4b->bd_group);
 			return 0;
 		}
 	}
-- 
1.6.0.4.735.gea4f


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 3/5] ext4: Fix the race between read_block_bitmap and mark_diskspace_used
  2008-11-20 18:26 [PATCH 1/5] ext4: unlock group before ext4_error Aneesh Kumar K.V
@ 2008-11-20 18:26 ` Aneesh Kumar K.V
  2008-11-20 18:26   ` [PATCH 4/5] ext4: Use both hi and lo bits of the group desc values Aneesh Kumar K.V
  2008-11-20 18:34 ` [PATCH 1/5] ext4: unlock group before ext4_error Aneesh Kumar K.V
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 8+ messages in thread
From: Aneesh Kumar K.V @ 2008-11-20 18:26 UTC (permalink / raw)
  To: cmm, tytso, sandeen; +Cc: linux-ext4, Aneesh Kumar K.V

We need to make sure we update the block bitmap and clear
EXT4_BG_BLOCK_UNINIT flag with sb_bgl_lock held. We look
at EXT4_BG_BLOCK_UNINIT and reinit the block bitmap each
time in ext4_read_block_bitmap (introduced by
c806e68f5647109350ec546fee5b526962970fd2 )

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 fs/ext4/mballoc.c |   17 ++++++++++++-----
 1 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 0a53d1b..6155c94 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1071,6 +1071,8 @@ static void mb_clear_bits(spinlock_t *lock, void *bm, int cur, int len)
 	__u32 *addr;
 
 	len = cur + len;
+	if (lock)
+		spin_lock(lock);			\
 	while (cur < len) {
 		if ((cur & 31) == 0 && (len - cur) >= 32) {
 			/* fast path: clear whole word at once */
@@ -1079,9 +1081,11 @@ static void mb_clear_bits(spinlock_t *lock, void *bm, int cur, int len)
 			cur += 32;
 			continue;
 		}
-		mb_clear_bit_atomic(lock, cur, bm);
+		mb_clear_bit(cur, bm);
 		cur++;
 	}
+	if (lock)
+		spin_unlock(lock);			\
 }
 
 static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len)
@@ -1089,6 +1093,8 @@ static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len)
 	__u32 *addr;
 
 	len = cur + len;
+	if (lock)
+		spin_lock(lock);			\
 	while (cur < len) {
 		if ((cur & 31) == 0 && (len - cur) >= 32) {
 			/* fast path: set whole word at once */
@@ -1097,9 +1103,11 @@ static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len)
 			cur += 32;
 			continue;
 		}
-		mb_set_bit_atomic(lock, cur, bm);
+		mb_set_bit(cur, bm);
 		cur++;
 	}
+	if (lock)
+		spin_unlock(lock);			\
 }
 
 static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
@@ -3053,10 +3061,9 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
 		}
 	}
 #endif
-	mb_set_bits(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group), bitmap_bh->b_data,
-				ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len);

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 4/5] ext4: Use both hi and lo bits of the group desc values.
  2008-11-20 18:26 ` [PATCH 3/5] ext4: Fix the race between read_block_bitmap and mark_diskspace_used Aneesh Kumar K.V
@ 2008-11-20 18:26   ` Aneesh Kumar K.V
  2008-11-20 18:44     ` Aneesh Kumar K.V
  0 siblings, 1 reply; 8+ messages in thread
From: Aneesh Kumar K.V @ 2008-11-20 18:26 UTC (permalink / raw)
  To: cmm, tytso, sandeen; +Cc: linux-ext4, Aneesh Kumar K.V

Rename the lower bits with suffix _lo and add helper
to access the values. Also rename bg_itable_unused_hi
to bg_pad as in e2fsprogs. bg_itable_unused_hi is never
used before

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 fs/ext4/balloc.c  |   11 ++++---
 fs/ext4/ext4.h    |   24 +++++++++++++--
 fs/ext4/ialloc.c  |   83 ++++++++++++++++++++++++++++-------------------------
 fs/ext4/inode.c   |    2 +-
 fs/ext4/mballoc.c |   15 +++++----
 fs/ext4/resize.c  |    4 +-
 fs/ext4/super.c   |   64 +++++++++++++++++++++++++++++++++++++++-
 7 files changed, 143 insertions(+), 60 deletions(-)

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index d268f5f..efe68d9 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -102,9 +102,9 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
 		if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
 			ext4_error(sb, __func__,
 				  "Checksum bad for group %u\n", block_group);
-			gdp->bg_free_blocks_count = 0;
-			gdp->bg_free_inodes_count = 0;
-			gdp->bg_itable_unused = 0;
+			ext4_free_blks_set(sb, gdp, 0);
+			ext4_free_inodes_set(sb, gdp, 0);
+			ext4_itable_unused_set(sb, gdp, 0);
 			memset(bh->b_data, 0xff, sb->s_blocksize);
 			return 0;
 		}
@@ -444,7 +444,8 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
 		}
 	}
 	spin_lock(sb_bgl_lock(sbi, block_group));
-	le16_add_cpu(&desc->bg_free_blocks_count, blocks_freed);
+	blocks_freed += ext4_free_blks_count(sb, desc);
+	ext4_free_blks_set(sb, desc, blocks_freed);
 	desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
 	spin_unlock(sb_bgl_lock(sbi, block_group));
 	percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed);
@@ -742,7 +743,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
 		gdp = ext4_get_group_desc(sb, i, NULL);
 		if (!gdp)
 			continue;
-		desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
+		desc_count += ext4_free_blks_count(sb, gdp);
 	}
 
 	return desc_count;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 03aba86..2570883 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -161,9 +161,9 @@ struct ext4_group_desc
 	__le32	bg_block_bitmap_lo;	/* Blocks bitmap block */
 	__le32	bg_inode_bitmap_lo;	/* Inodes bitmap block */
 	__le32	bg_inode_table_lo;	/* Inodes table block */
-	__le16	bg_free_blocks_count;	/* Free blocks count */
-	__le16	bg_free_inodes_count;	/* Free inodes count */
-	__le16	bg_used_dirs_count;	/* Directories count */
+	__le16	bg_free_blocks_count_lo;/* Free blocks count */
+	__le16	bg_free_inodes_count_lo;/* Free inodes count */
+	__le16	bg_used_dirs_count_lo;	/* Directories count */
 	__le16	bg_flags;		/* EXT4_BG_flags (INODE_UNINIT, etc) */
 	__u32	bg_reserved[2];		/* Likely block/inode bitmap checksum */
 	__le16  bg_itable_unused;	/* Unused inodes count */
@@ -174,7 +174,7 @@ struct ext4_group_desc
 	__le16	bg_free_blocks_count_hi;/* Free blocks count MSB */
 	__le16	bg_free_inodes_count_hi;/* Free inodes count MSB */
 	__le16	bg_used_dirs_count_hi;	/* Directories count MSB */
-	__le16	bg_itable_unused_hi;	/* Unused inodes count MSB */
+	__le16  bg_pad;
 	__u32	bg_reserved2[3];
 };
 
@@ -1194,12 +1194,28 @@ extern ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
 				      struct ext4_group_desc *bg);
 extern ext4_fsblk_t ext4_inode_table(struct super_block *sb,
 				     struct ext4_group_desc *bg);
+extern __u32 ext4_free_blks_count(struct super_block *sb,
+				struct ext4_group_desc *bg);
+extern __u32 ext4_free_inodes_count(struct super_block *sb,
+				 struct ext4_group_desc *bg);
+extern __u32 ext4_used_dirs_count(struct super_block *sb,
+				struct ext4_group_desc *bg);
+extern __u16 ext4_itable_unused_count(struct super_block *sb,
+				   struct ext4_group_desc *bg);
 extern void ext4_block_bitmap_set(struct super_block *sb,
 				  struct ext4_group_desc *bg, ext4_fsblk_t blk);
 extern void ext4_inode_bitmap_set(struct super_block *sb,
 				  struct ext4_group_desc *bg, ext4_fsblk_t blk);
 extern void ext4_inode_table_set(struct super_block *sb,
 				 struct ext4_group_desc *bg, ext4_fsblk_t blk);
+extern void ext4_free_blks_set(struct super_block *sb,
+			       struct ext4_group_desc *bg, __u32 count);
+extern void ext4_free_inodes_set(struct super_block *sb,
+				struct ext4_group_desc *bg, __u32 count);
+extern void ext4_used_dirs_set(struct super_block *sb,
+				struct ext4_group_desc *bg, __u32 count);
+extern void ext4_itable_unused_set(struct super_block *sb,
+				   struct ext4_group_desc *bg, __u16 count);
 /* extents.c */
 extern int ext4_ext_journal_restart(handle_t *handle, int needed);
 /* defrag.c */
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 4039af6..84f060b 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -76,9 +76,9 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh,
 	if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
 		ext4_error(sb, __func__, "Checksum bad for group %u\n",
 			   block_group);
-		gdp->bg_free_blocks_count = 0;
-		gdp->bg_free_inodes_count = 0;
-		gdp->bg_itable_unused = 0;
+		ext4_free_blks_set(sb, gdp, 0);
+		ext4_free_inodes_set(sb, gdp, 0);
+		ext4_itable_unused_set(sb, gdp, 0);
 		memset(bh->b_data, 0xff, sb->s_blocksize);
 		return 0;
 	}
@@ -168,7 +168,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
 	struct ext4_group_desc *gdp;
 	struct ext4_super_block *es;
 	struct ext4_sb_info *sbi;
-	int fatal = 0, err;
+	int fatal = 0, err, count;
 	ext4_group_t flex_group;
 
 	if (atomic_read(&inode->i_count) > 1) {
@@ -236,9 +236,12 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
 
 		if (gdp) {
 			spin_lock(sb_bgl_lock(sbi, block_group));
-			le16_add_cpu(&gdp->bg_free_inodes_count, 1);
-			if (is_directory)
-				le16_add_cpu(&gdp->bg_used_dirs_count, -1);
+			count = ext4_free_inodes_count(sb, gdp) + 1;
+			ext4_free_inodes_set(sb, gdp, count);
+			if (is_directory) {
+				count = ext4_used_dirs_count(sb, gdp) - 1;
+				ext4_used_dirs_set(sb, gdp, count);
+			}
 			gdp->bg_checksum = ext4_group_desc_csum(sbi,
 							block_group, gdp);
 			spin_unlock(sb_bgl_lock(sbi, block_group));
@@ -291,13 +294,13 @@ static int find_group_dir(struct super_block *sb, struct inode *parent,
 
 	for (group = 0; group < ngroups; group++) {
 		desc = ext4_get_group_desc(sb, group, NULL);
-		if (!desc || !desc->bg_free_inodes_count)
+		if (!desc || !ext4_free_inodes_count(sb, desc))
 			continue;
-		if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
+		if (ext4_free_inodes_count(sb, desc) < avefreei)
 			continue;
 		if (!best_desc ||
-		    (le16_to_cpu(desc->bg_free_blocks_count) >
-		     le16_to_cpu(best_desc->bg_free_blocks_count))) {
+		    (ext4_free_blks_count(sb, desc) >
+		     ext4_free_blks_count(sb, best_desc))) {
 			*best_group = group;
 			best_desc = desc;
 			ret = 0;
@@ -369,7 +372,7 @@ static int find_group_flex(struct super_block *sb, struct inode *parent,
 	for (i = best_flex * flex_size; i < ngroups &&
 		     i < (best_flex + 1) * flex_size; i++) {
 		desc = ext4_get_group_desc(sb, i, &bh);
-		if (le16_to_cpu(desc->bg_free_inodes_count)) {
+		if (ext4_free_inodes_count(sb, desc)) {
 			*best_group = i;
 			goto out;
 		}
@@ -443,17 +446,17 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
 		for (i = 0; i < ngroups; i++) {
 			grp = (parent_group + i) % ngroups;
 			desc = ext4_get_group_desc(sb, grp, NULL);
-			if (!desc || !desc->bg_free_inodes_count)
+			if (!desc || !ext4_free_inodes_count(sb, desc))
 				continue;
-			if (le16_to_cpu(desc->bg_used_dirs_count) >= best_ndir)
+			if (ext4_used_dirs_count(sb, desc) >= best_ndir)
 				continue;
-			if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
+			if (ext4_free_inodes_count(sb, desc) < avefreei)
 				continue;
-			if (le16_to_cpu(desc->bg_free_blocks_count) < avefreeb)
+			if (ext4_free_blks_count(sb, desc) < avefreeb)
 				continue;
 			*group = grp;
 			ret = 0;
-			best_ndir = le16_to_cpu(desc->bg_used_dirs_count);
+			best_ndir = ext4_used_dirs_count(sb, desc);
 		}
 		if (ret == 0)
 			return ret;
@@ -479,13 +482,13 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
 	for (i = 0; i < ngroups; i++) {
 		*group = (parent_group + i) % ngroups;
 		desc = ext4_get_group_desc(sb, *group, NULL);
-		if (!desc || !desc->bg_free_inodes_count)
+		if (!desc || !ext4_free_inodes_count(sb, desc))
 			continue;
-		if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs)
+		if (ext4_used_dirs_count(sb, desc) >= max_dirs)
 			continue;
-		if (le16_to_cpu(desc->bg_free_inodes_count) < min_inodes)
+		if (ext4_free_inodes_count(sb, desc) < min_inodes)
 			continue;
-		if (le16_to_cpu(desc->bg_free_blocks_count) < min_blocks)
+		if (ext4_free_blks_count(sb, desc) < min_blocks)
 			continue;
 		return 0;
 	}
@@ -494,8 +497,8 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
 	for (i = 0; i < ngroups; i++) {
 		*group = (parent_group + i) % ngroups;
 		desc = ext4_get_group_desc(sb, *group, NULL);
-		if (desc && desc->bg_free_inodes_count &&
-			le16_to_cpu(desc->bg_free_inodes_count) >= avefreei)
+		if (desc && ext4_free_inodes_count(sb, desc) &&
+			ext4_free_inodes_count(sb, desc) >= avefreei)
 			return 0;
 	}
 
@@ -524,8 +527,8 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
 	 */
 	*group = parent_group;
 	desc = ext4_get_group_desc(sb, *group, NULL);
-	if (desc && le16_to_cpu(desc->bg_free_inodes_count) &&
-			le16_to_cpu(desc->bg_free_blocks_count))
+	if (desc && ext4_free_inodes_count(sb, desc) &&
+			ext4_free_blks_count(sb, desc))
 		return 0;
 
 	/*
@@ -548,8 +551,8 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
 		if (*group >= ngroups)
 			*group -= ngroups;
 		desc = ext4_get_group_desc(sb, *group, NULL);
-		if (desc && le16_to_cpu(desc->bg_free_inodes_count) &&
-				le16_to_cpu(desc->bg_free_blocks_count))
+		if (desc && ext4_free_inodes_count(sb, desc) &&
+				ext4_free_blks_count(sb, desc))
 			return 0;
 	}
 
@@ -562,7 +565,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
 		if (++*group >= ngroups)
 			*group = 0;
 		desc = ext4_get_group_desc(sb, *group, NULL);
-		if (desc && le16_to_cpu(desc->bg_free_inodes_count))
+		if (desc && ext4_free_inodes_count(sb, desc))
 			return 0;
 	}
 
@@ -591,7 +594,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
 	struct ext4_super_block *es;
 	struct ext4_inode_info *ei;
 	struct ext4_sb_info *sbi;
-	int ret2, err = 0;
+	int ret2, err = 0, count;
 	struct inode *ret;
 	ext4_group_t i;
 	int free = 0;
@@ -717,7 +720,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
 		if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
 			gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
 			free = ext4_free_blocks_after_init(sb, group, gdp);
-			gdp->bg_free_blocks_count = cpu_to_le16(free);
+			ext4_free_blks_set(sb, gdp, free);
 			gdp->bg_checksum = ext4_group_desc_csum(sbi, group,
 								gdp);
 		}
@@ -751,7 +754,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
 			free = 0;
 		} else {
 			free = EXT4_INODES_PER_GROUP(sb) -
-				le16_to_cpu(gdp->bg_itable_unused);
+				ext4_itable_unused_count(sb, gdp);
 		}
 
 		/*
@@ -761,13 +764,15 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
 		 *
 		 */
 		if (ino > free)
-			gdp->bg_itable_unused =
-				cpu_to_le16(EXT4_INODES_PER_GROUP(sb) - ino);
+			ext4_itable_unused_set(sb, gdp,
+					(EXT4_INODES_PER_GROUP(sb) - ino));
 	}
 
-	le16_add_cpu(&gdp->bg_free_inodes_count, -1);
+	count = ext4_free_inodes_count(sb, gdp) - 1;
+	ext4_free_inodes_set(sb, gdp, count);
 	if (S_ISDIR(mode)) {
-		le16_add_cpu(&gdp->bg_used_dirs_count, 1);
+		count = ext4_used_dirs_count(sb, gdp) + 1;
+		ext4_used_dirs_set(sb, gdp, count);
 	}
 	gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
 	spin_unlock(sb_bgl_lock(sbi, group));
@@ -981,7 +986,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
 		gdp = ext4_get_group_desc(sb, i, NULL);
 		if (!gdp)
 			continue;
-		desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
+		desc_count += ext4_free_inodes_count(sb, gdp);
 		brelse(bitmap_bh);
 		bitmap_bh = ext4_read_inode_bitmap(sb, i);
 		if (!bitmap_bh)
@@ -989,7 +994,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
 
 		x = ext4_count_free(bitmap_bh, EXT4_INODES_PER_GROUP(sb) / 8);
 		printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n",
-			i, le16_to_cpu(gdp->bg_free_inodes_count), x);
+			i, ext4_free_inodes_count(sb, gdp), x);
 		bitmap_count += x;
 	}
 	brelse(bitmap_bh);
@@ -1003,7 +1008,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
 		gdp = ext4_get_group_desc(sb, i, NULL);
 		if (!gdp)
 			continue;
-		desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
+		desc_count += ext4_free_inodes_count(sb, gdp);
 		cond_resched();
 	}
 	return desc_count;
@@ -1020,7 +1025,7 @@ unsigned long ext4_count_dirs(struct super_block * sb)
 		struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
 		if (!gdp)
 			continue;
-		count += le16_to_cpu(gdp->bg_used_dirs_count);
+		count += ext4_used_dirs_count(sb, gdp);
 	}
 	return count;
 }
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 76b1d3a..2d31597 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3987,7 +3987,7 @@ static int __ext4_get_inode_loc(struct inode *inode,
 			num = EXT4_INODES_PER_GROUP(sb);
 			if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
 				       EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
-				num -= le16_to_cpu(gdp->bg_itable_unused);
+				num -= ext4_itable_unused_count(sb, gdp);
 			table += num / inodes_per_block;
 			if (end > table)
 				end = table;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 6155c94..685c483 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2494,7 +2494,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
 			ext4_free_blocks_after_init(sb, group, desc);
 	} else {
 		meta_group_info[i]->bb_free =
-			le16_to_cpu(desc->bg_free_blocks_count);
+			ext4_free_blks_count(sb, desc);
 	}
 
 	INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
@@ -3066,12 +3066,12 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
 				ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len);
 	if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
 		gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
-		gdp->bg_free_blocks_count =
-			cpu_to_le16(ext4_free_blocks_after_init(sb,
-						ac->ac_b_ex.fe_group,
-						gdp));
+		ext4_free_blks_set(sb, gdp,
+					ext4_free_blocks_after_init(sb,
+					ac->ac_b_ex.fe_group, gdp));
 	}
-	le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len);
+	len = ext4_free_blks_count(sb, gdp) - ac->ac_b_ex.fe_len;
+	ext4_free_blks_set(sb, gdp, len);
 	gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
 	spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
 	percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
@@ -4863,7 +4863,8 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
 	}
 
 	spin_lock(sb_bgl_lock(sbi, block_group));
-	le16_add_cpu(&gdp->bg_free_blocks_count, count);
+	ret = ext4_free_blks_count(sb, gdp) + count;
+	ext4_free_blks_set(sb, gdp, ret);
 	gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
 	spin_unlock(sb_bgl_lock(sbi, block_group));
 	percpu_counter_add(&sbi->s_freeblocks_counter, count);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 939cf26..6e61613 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -862,8 +862,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
 	ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */
 	ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */
 	ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */
-	gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count);
-	gdp->bg_free_inodes_count = cpu_to_le16(EXT4_INODES_PER_GROUP(sb));
+	ext4_free_blks_set(sb, gdp, input->free_blocks_count);
+	ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb));
 	gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp);
 
 	/*
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index ddfa29b..3e62e09 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -93,6 +93,36 @@ ext4_fsblk_t ext4_inode_table(struct super_block *sb,
 		(ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
 }
 
+__u32 ext4_free_blks_count(struct super_block *sb,
+			      struct ext4_group_desc *bg)
+{
+	return le16_to_cpu(bg->bg_free_blocks_count_lo) |
+		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
+		(__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
+}
+
+__u32 ext4_free_inodes_count(struct super_block *sb,
+			      struct ext4_group_desc *bg)
+{
+	return le16_to_cpu(bg->bg_free_inodes_count_lo) |
+		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
+		(__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
+}
+
+__u32 ext4_used_dirs_count(struct super_block *sb,
+			      struct ext4_group_desc *bg)
+{
+	return le16_to_cpu(bg->bg_used_dirs_count_lo) |
+		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
+		(__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
+}
+
+__u16 ext4_itable_unused_count(struct super_block *sb,
+			      struct ext4_group_desc *bg)
+{
+	return le16_to_cpu(bg->bg_itable_unused);
+}
+
 void ext4_block_bitmap_set(struct super_block *sb,
 			   struct ext4_group_desc *bg, ext4_fsblk_t blk)
 {
@@ -117,6 +147,36 @@ void ext4_inode_table_set(struct super_block *sb,
 		bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
 }
 
+void ext4_free_blks_set(struct super_block *sb,
+			  struct ext4_group_desc *bg, __u32 count)
+{
+	bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count);
+	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
+		bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16);
+}
+
+void ext4_free_inodes_set(struct super_block *sb,
+			  struct ext4_group_desc *bg, __u32 count)
+{
+	bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count);
+	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
+		bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16);
+}
+
+void ext4_used_dirs_set(struct super_block *sb,
+			  struct ext4_group_desc *bg, __u32 count)
+{
+	bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count);
+	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
+		bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16);
+}
+
+void ext4_itable_unused_set(struct super_block *sb,
+			  struct ext4_group_desc *bg, __u16 count)
+{
+	bg->bg_itable_unused = cpu_to_le16((__u16)count);
+}
+
 /*
  * Wrappers for jbd2_journal_start/end.
  *
@@ -1478,9 +1538,9 @@ static int ext4_fill_flex_info(struct super_block *sb)
 
 		flex_group = ext4_flex_group(sbi, i);
 		sbi->s_flex_groups[flex_group].free_inodes +=
-			le16_to_cpu(gdp->bg_free_inodes_count);
+			ext4_free_inodes_count(sb, gdp);
 		sbi->s_flex_groups[flex_group].free_blocks +=
-			le16_to_cpu(gdp->bg_free_blocks_count);
+			ext4_free_blks_count(sb, gdp);
 	}
 
 	return 1;
-- 
1.6.0.4.735.gea4f


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/5] ext4: unlock group before ext4_error
  2008-11-20 18:26 [PATCH 1/5] ext4: unlock group before ext4_error Aneesh Kumar K.V
  2008-11-20 18:26 ` [PATCH 3/5] ext4: Fix the race between read_block_bitmap and mark_diskspace_used Aneesh Kumar K.V
@ 2008-11-20 18:34 ` Aneesh Kumar K.V
  2008-11-20 18:35 ` Peter Staubach
  2008-11-20 22:38 ` Theodore Tso
  3 siblings, 0 replies; 8+ messages in thread
From: Aneesh Kumar K.V @ 2008-11-20 18:34 UTC (permalink / raw)
  To: cmm, tytso, sandeen; +Cc: linux-ext4

Hi All,

I intent to sent only three patches. I had some debug
patches in between so git format-patch generated patch
number wrongly. I have also the inode uninit flag clearing race
patch. But I am finding a hang during rename. So didn't sent
the patch in the series. Attaching below for reference. Once
I fix the rename hang I will send the patch again.

commit 0181ece15c1e89c2825e581f03abe746fdebb7cf
Author: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Date:   Thu Nov 20 23:45:02 2008 +0530

    ext4: Fix the race between read_inode_bitmap and ext4_new_inode
    
    We need to make sure we update the inode bitmap and clear
    EXT4_BG_INODE_UNINIT flag with sb_bgl_lock held. We look
    at EXT4_BG_INODE_UNINIT and reinit the inode bitmap each
    time in ext4_read_inode_bitmap (introduced by
    c806e68f5647109350ec546fee5b526962970fd2 )
    
    Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>

diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 84f060b..99b1772 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -572,6 +572,70 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
 	return -1;
 }
 
+static int ext4_claim_inode(struct super_block *sb,
+			struct buffer_head *inode_bitmap_bh,
+			unsigned long ino, ext4_group_t group, int mode)
+{
+	int free = 0, retval = 0, count;
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+	struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
+
+	spin_lock(sb_bgl_lock(sbi, group));
+	if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) {
+		/* not a free inode */
+		retval = 1;
+		goto err_ret;
+	}
+	if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
+			ino >= EXT4_INODES_PER_GROUP(sb)) {
+		spin_unlock(sb_bgl_lock(sbi, group));
+		ext4_error(sb, __func__,
+			   "reserved inode or inode > inodes count - "
+			   "block_group = %u, inode=%lu", group,
+			   ino + group * EXT4_INODES_PER_GROUP(sb));
+		return 1;
+	}
+	/* If we didn't allocate from within the initialized part of the inode
+	 * table then we need to initialize up to this inode. */
+	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
+		if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
+			gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
+
+			/* When marking the block group with
+			 * ~EXT4_BG_INODE_UNINIT we don't want to depend
+			 * on the value of bg_itable_unused even though
+			 * mke2fs could have initialized the same for us.
+			 * Instead we calculated the value below
+			 */
+
+			free = 0;
+		} else {
+			free = EXT4_INODES_PER_GROUP(sb) -
+				ext4_itable_unused_count(sb, gdp);
+		}
+
+		/*
+		 * Check the relative inode number against the last used
+		 * relative inode number in this group. if it is greater
+		 * we need to  update the bg_itable_unused count
+		 *
+		 */
+		if (ino > free)
+			ext4_itable_unused_set(sb, gdp,
+					(EXT4_INODES_PER_GROUP(sb) - ino));
+	}
+	count = ext4_free_inodes_count(sb, gdp) - 1;
+	ext4_free_inodes_set(sb, gdp, count);
+	if (S_ISDIR(mode)) {
+		count = ext4_used_dirs_count(sb, gdp) + 1;
+		ext4_used_dirs_set(sb, gdp, count);
+	}
+	gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
+err_ret:
+	spin_unlock(sb_bgl_lock(sbi, group));
+	return retval;
+}
+
 /*
  * There are two policies for allocating an inode.  If the new inode is
  * a directory, then a forward search is made for a block group with both
@@ -585,8 +649,8 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
 struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
 {
 	struct super_block *sb;
-	struct buffer_head *bitmap_bh = NULL;
-	struct buffer_head *bh2;
+	struct buffer_head *inode_bitmap_bh = NULL;
+	struct buffer_head *group_desc_bh;
 	ext4_group_t group = 0;
 	unsigned long ino = 0;
 	struct inode *inode;
@@ -594,7 +658,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
 	struct ext4_super_block *es;
 	struct ext4_inode_info *ei;
 	struct ext4_sb_info *sbi;
-	int ret2, err = 0, count;
+	int ret2, err = 0;
 	struct inode *ret;
 	ext4_group_t i;
 	int free = 0;
@@ -634,40 +698,48 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
 	for (i = 0; i < sbi->s_groups_count; i++) {
 		err = -EIO;
 
-		gdp = ext4_get_group_desc(sb, group, &bh2);
+		gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
 		if (!gdp)
 			goto fail;
 
-		brelse(bitmap_bh);
-		bitmap_bh = ext4_read_inode_bitmap(sb, group);
-		if (!bitmap_bh)
+		brelse(inode_bitmap_bh);
+		inode_bitmap_bh = ext4_read_inode_bitmap(sb, group);
+		if (!inode_bitmap_bh)
 			goto fail;
 
 		ino = 0;
 
 repeat_in_this_group:
 		ino = ext4_find_next_zero_bit((unsigned long *)
-				bitmap_bh->b_data, EXT4_INODES_PER_GROUP(sb), ino);
+					inode_bitmap_bh->b_data,
+					EXT4_INODES_PER_GROUP(sb), ino);
 		if (ino < EXT4_INODES_PER_GROUP(sb)) {
 
-			BUFFER_TRACE(bitmap_bh, "get_write_access");
-			err = ext4_journal_get_write_access(handle, bitmap_bh);
+			BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
+			err = ext4_journal_get_write_access(handle,
+								inode_bitmap_bh);
 			if (err)
 				goto fail;
 
-			if (!ext4_set_bit_atomic(sb_bgl_lock(sbi, group),
-						ino, bitmap_bh->b_data)) {
+			BUFFER_TRACE(group_desc_bh, "get_write_access");
+			err = ext4_journal_get_write_access(handle,
+								group_desc_bh);
+			if (err)
+				goto fail;
+			if (!ext4_claim_inode(sb, inode_bitmap_bh,
+						ino, group, mode)) {
 				/* we won it */
-				BUFFER_TRACE(bitmap_bh,
+				BUFFER_TRACE(inode_bitmap_bh,
 					"call ext4_journal_dirty_metadata");
 				err = ext4_journal_dirty_metadata(handle,
-								bitmap_bh);
+							inode_bitmap_bh);
 				if (err)
 					goto fail;
 				goto got;
 			}
 			/* we lost it */
-			jbd2_journal_release_buffer(handle, bitmap_bh);
+			jbd2_journal_release_buffer(handle, inode_bitmap_bh);
+			jbd2_journal_release_buffer(handle, group_desc_bh);
 
 			if (++ino < EXT4_INODES_PER_GROUP(sb))
 				goto repeat_in_this_group;
@@ -687,30 +759,16 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
 	goto out;
 
 got:
-	ino++;
-	if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
-	    ino > EXT4_INODES_PER_GROUP(sb)) {
-		ext4_error(sb, __func__,
-			   "reserved inode or inode > inodes count - "
-			   "block_group = %u, inode=%lu", group,
-			   ino + group * EXT4_INODES_PER_GROUP(sb));
-		err = -EIO;
-		goto fail;
-	}
-
-	BUFFER_TRACE(bh2, "get_write_access");
-	err = ext4_journal_get_write_access(handle, bh2);
-	if (err) goto fail;
-
 	/* We may have to initialize the block bitmap if it isn't already */
 	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) &&
 	    gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
-		struct buffer_head *block_bh = ext4_read_block_bitmap(sb, group);
+		struct buffer_head *block_bitmap_bh;
 
-		BUFFER_TRACE(block_bh, "get block bitmap access");
-		err = ext4_journal_get_write_access(handle, block_bh);
+		block_bitmap_bh = ext4_read_block_bitmap(sb, group);
+		BUFFER_TRACE(block_bitmap_bh, "get block bitmap access");
+		err = ext4_journal_get_write_access(handle, block_bitmap_bh);
 		if (err) {
-			brelse(block_bh);
+			brelse(block_bitmap_bh);
 			goto fail;
 		}
 
@@ -728,57 +786,19 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
 
 		/* Don't need to dirty bitmap block if we didn't change it */
 		if (free) {
-			BUFFER_TRACE(block_bh, "dirty block bitmap");
-			err = ext4_journal_dirty_metadata(handle, block_bh);
+			BUFFER_TRACE(block_bitmap_bh, "dirty block bitmap");
+			err = ext4_journal_dirty_metadata(handle,
+							block_bitmap_bh);
 		}
 
-		brelse(block_bh);
+		brelse(block_bitmap_bh);
 		if (err)
 			goto fail;
 	}
-
-	spin_lock(sb_bgl_lock(sbi, group));
-	/* If we didn't allocate from within the initialized part of the inode
-	 * table then we need to initialize up to this inode. */
-	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
-		if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
-			gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
-
-			/* When marking the block group with
-			 * ~EXT4_BG_INODE_UNINIT we don't want to depend
-			 * on the value of bg_itable_unused even though
-			 * mke2fs could have initialized the same for us.
-			 * Instead we calculated the value below
-			 */
-
-			free = 0;
-		} else {
-			free = EXT4_INODES_PER_GROUP(sb) -
-				ext4_itable_unused_count(sb, gdp);
-		}
-
-		/*
-		 * Check the relative inode number against the last used
-		 * relative inode number in this group. if it is greater
-		 * we need to  update the bg_itable_unused count
-		 *
-		 */
-		if (ino > free)
-			ext4_itable_unused_set(sb, gdp,
-					(EXT4_INODES_PER_GROUP(sb) - ino));
-	}
-
-	count = ext4_free_inodes_count(sb, gdp) - 1;
-	ext4_free_inodes_set(sb, gdp, count);
-	if (S_ISDIR(mode)) {
-		count = ext4_used_dirs_count(sb, gdp) + 1;
-		ext4_used_dirs_set(sb, gdp, count);
-	}
-	gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
-	spin_unlock(sb_bgl_lock(sbi, group));
-	BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata");
-	err = ext4_journal_dirty_metadata(handle, bh2);
-	if (err) goto fail;
+	BUFFER_TRACE(group_desc_bh, "call ext4_journal_dirty_metadata");
+	err = ext4_journal_dirty_metadata(handle, group_desc_bh);
+	if (err)
+		goto fail;
 
 	percpu_counter_dec(&sbi->s_freeinodes_counter);
 	if (S_ISDIR(mode))
@@ -876,7 +896,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
 	iput(inode);
 	ret = ERR_PTR(err);
 really_out:
-	brelse(bitmap_bh);
+	brelse(inode_bitmap_bh);
 	return ret;
 
 fail_free_drop:
@@ -887,7 +907,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
 	inode->i_flags |= S_NOQUOTA;
 	inode->i_nlink = 0;
 	iput(inode);
-	brelse(bitmap_bh);
+	brelse(inode_bitmap_bh);
 	return ERR_PTR(err);
 }
 


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/5] ext4: unlock group before ext4_error
  2008-11-20 18:26 [PATCH 1/5] ext4: unlock group before ext4_error Aneesh Kumar K.V
  2008-11-20 18:26 ` [PATCH 3/5] ext4: Fix the race between read_block_bitmap and mark_diskspace_used Aneesh Kumar K.V
  2008-11-20 18:34 ` [PATCH 1/5] ext4: unlock group before ext4_error Aneesh Kumar K.V
@ 2008-11-20 18:35 ` Peter Staubach
  2008-11-20 18:47   ` Aneesh Kumar K.V
  2008-11-20 22:38 ` Theodore Tso
  3 siblings, 1 reply; 8+ messages in thread
From: Peter Staubach @ 2008-11-20 18:35 UTC (permalink / raw)
  To: Aneesh Kumar K.V; +Cc: cmm, tytso, sandeen, linux-ext4

Aneesh Kumar K.V wrote:
> Otherwise ext4_error will cause BUG because of
> scheduling in atomic context.
>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
> ---
>  fs/ext4/mballoc.c |   12 ++++++++++++
>  1 files changed, 12 insertions(+), 0 deletions(-)
>
> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> index 772e05b..039a5a6 100644
> --- a/fs/ext4/mballoc.c
> +++ b/fs/ext4/mballoc.c
> @@ -460,10 +460,12 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
>  			blocknr +=
>  			    le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
>  
> +			ext4_unlock_group(sb, e4b->bd_group);
>  			ext4_error(sb, __func__, "double-free of inode"
>  				   " %lu's block %llu(bit %u in group %u)\n",
>  				   inode ? inode->i_ino : 0, blocknr,
>  				   first + i, e4b->bd_group);
> +			ext4_unlock_group(sb, e4b->bd_group);
>   

This should be ext4_lock_group(sb, e4b->bd_group); shouldn't it?

    Thanx...

       ps

>  		}
>  		mb_clear_bit(first + i, e4b->bd_info->bb_bitmap);
>  	}
> @@ -704,6 +706,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
>  	grp->bb_fragments = fragments;
>  
>  	if (free != grp->bb_free) {
> +		ext4_unlock_group(sb, group);
>  		ext4_error(sb, __func__,
>  			"EXT4-fs: group %u: %u blocks in bitmap, %u in gd\n",
>  			group, free, grp->bb_free);
> @@ -711,6 +714,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
>  		 * If we intent to continue, we consider group descritor
>  		 * corrupt and update bb_free using bitmap value
>  		 */
> +		ext4_lock_group(sb, group);
>  		grp->bb_free = free;
>  	}
>  
> @@ -1625,15 +1629,18 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
>  			 * free blocks even though group info says we
>  			 * we have free blocks
>  			 */
> +			ext4_unlock_group(sb, e4b->bd_group);
>  			ext4_error(sb, __func__, "%d free blocks as per "
>  					"group info. But bitmap says 0\n",
>  					free);
> +			ext4_lock_group(sb, e4b->bd_group);
>  			break;
>  		}
>  
>  		mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex);
>  		BUG_ON(ex.fe_len <= 0);
>  		if (free < ex.fe_len) {
> +			ext4_unlock_group(sb, e4b->bd_group);
>  			ext4_error(sb, __func__, "%d free blocks as per "
>  					"group info. But got %d blocks\n",
>  					free, ex.fe_len);
> @@ -1642,6 +1649,7 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
>  			 * indicate that the bitmap is corrupt. So exit
>  			 * without claiming the space.
>  			 */
> +			ext4_lock_group(sb, e4b->bd_group);
>  			break;
>  		}
>  
> @@ -3789,6 +3797,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
>  		bit = next + 1;
>  	}
>  	if (free != pa->pa_free) {
> +		ext4_unlock_group(sb, group);
>  		printk(KERN_CRIT "pa %p: logic %lu, phys. %lu, len %lu\n",
>  			pa, (unsigned long) pa->pa_lstart,
>  			(unsigned long) pa->pa_pstart,
> @@ -3799,6 +3808,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
>  		 * pa is already deleted so we use the value obtained
>  		 * from the bitmap and continue.
>  		 */
> +		ext4_lock_group(sb, group);
>  	}
>  	atomic_add(free, &sbi->s_mb_discarded);
>  
> @@ -4607,9 +4617,11 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
>  		else if (block >= (entry->start_blk + entry->count))
>  			n = &(*n)->rb_right;
>  		else {
> +			ext4_unlock_group(sb, e4b->bd_group);
>  			ext4_error(sb, __func__,
>  			    "Double free of blocks %d (%d %d)\n",
>  			    block, entry->start_blk, entry->count);
> +			ext4_unlock_group(sb, e4b->bd_group);
>  			return 0;
>  		}
>  	}
>   


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 4/5] ext4: Use both hi and lo bits of the group desc values.
  2008-11-20 18:26   ` [PATCH 4/5] ext4: Use both hi and lo bits of the group desc values Aneesh Kumar K.V
@ 2008-11-20 18:44     ` Aneesh Kumar K.V
  0 siblings, 0 replies; 8+ messages in thread
From: Aneesh Kumar K.V @ 2008-11-20 18:44 UTC (permalink / raw)
  To: cmm, tytso, sandeen; +Cc: linux-ext4

On Thu, Nov 20, 2008 at 11:56:20PM +0530, Aneesh Kumar K.V wrote:
> Rename the lower bits with suffix _lo and add helper
> to access the values. Also rename bg_itable_unused_hi
> to bg_pad as in e2fsprogs. bg_itable_unused_hi is never
> used before
> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
> ---
>  fs/ext4/balloc.c  |   11 ++++---
>  fs/ext4/ext4.h    |   24 +++++++++++++--
>  fs/ext4/ialloc.c  |   83 ++++++++++++++++++++++++++++-------------------------
>  fs/ext4/inode.c   |    2 +-
>  fs/ext4/mballoc.c |   15 +++++----
>  fs/ext4/resize.c  |    4 +-
>  fs/ext4/super.c   |   64 +++++++++++++++++++++++++++++++++++++++-
>  7 files changed, 143 insertions(+), 60 deletions(-)
> 
> diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
> index d268f5f..efe68d9 100644
> --- a/fs/ext4/balloc.c
> +++ b/fs/ext4/balloc.c
> @@ -102,9 +102,9 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
>  		if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
>  			ext4_error(sb, __func__,
>  				  "Checksum bad for group %u\n", block_group);
> -			gdp->bg_free_blocks_count = 0;
> -			gdp->bg_free_inodes_count = 0;
> -			gdp->bg_itable_unused = 0;
> +			ext4_free_blks_set(sb, gdp, 0);
> +			ext4_free_inodes_set(sb, gdp, 0);
> +			ext4_itable_unused_set(sb, gdp, 0);
>  			memset(bh->b_data, 0xff, sb->s_blocksize);
>  			return 0;
>  		}
> @@ -444,7 +444,8 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
>  		}
>  	}
>  	spin_lock(sb_bgl_lock(sbi, block_group));
> -	le16_add_cpu(&desc->bg_free_blocks_count, blocks_freed);
> +	blocks_freed += ext4_free_blks_count(sb, desc);
> +	ext4_free_blks_set(sb, desc, blocks_freed);
>  	desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
>  	spin_unlock(sb_bgl_lock(sbi, block_group));
>  	percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed);
> @@ -742,7 +743,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
>  		gdp = ext4_get_group_desc(sb, i, NULL);
>  		if (!gdp)
>  			continue;
> -		desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
> +		desc_count += ext4_free_blks_count(sb, gdp);
>  	}
> 

blocks_freed is used later so use count;

@@ -372,7 +372,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
 	struct ext4_group_desc *desc;
 	struct ext4_super_block *es;
 	struct ext4_sb_info *sbi;
-	int err = 0, ret;
+	int err = 0, ret, count;
 	ext4_grpblk_t blocks_freed;
 	struct ext4_group_info *grp;
 
@@ -444,7 +444,8 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
 		}
 	}
 	spin_lock(sb_bgl_lock(sbi, block_group));
-	le16_add_cpu(&desc->bg_free_blocks_count, blocks_freed);
+	count = blocks_freed + ext4_free_blks_count(sb, desc);
+	ext4_free_blks_set(sb, desc, count);
 	desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
 	spin_unlock(sb_bgl_lock(sbi, block_group));
 	percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed);


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/5] ext4: unlock group before ext4_error
  2008-11-20 18:35 ` Peter Staubach
@ 2008-11-20 18:47   ` Aneesh Kumar K.V
  0 siblings, 0 replies; 8+ messages in thread
From: Aneesh Kumar K.V @ 2008-11-20 18:47 UTC (permalink / raw)
  To: Peter Staubach; +Cc: cmm, tytso, sandeen, linux-ext4

On Thu, Nov 20, 2008 at 01:35:11PM -0500, Peter Staubach wrote:
>> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
>> index 772e05b..039a5a6 100644
>> --- a/fs/ext4/mballoc.c
>> +++ b/fs/ext4/mballoc.c
>> @@ -460,10 +460,12 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
>>  			blocknr +=
>>  			    le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
>>  +			ext4_unlock_group(sb, e4b->bd_group);
>>  			ext4_error(sb, __func__, "double-free of inode"
>>  				   " %lu's block %llu(bit %u in group %u)\n",
>>  				   inode ? inode->i_ino : 0, blocknr,
>>  				   first + i, e4b->bd_group);
>> +			ext4_unlock_group(sb, e4b->bd_group);
>>   
>
> This should be ext4_lock_group(sb, e4b->bd_group); shouldn't it?

Yes, I need to update at two place

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 039a5a6..728221e 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -465,7 +465,7 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
 				   " %lu's block %llu(bit %u in group %u)\n",
 				   inode ? inode->i_ino : 0, blocknr,
 				   first + i, e4b->bd_group);
-			ext4_unlock_group(sb, e4b->bd_group);
+			ext4_lock_group(sb, e4b->bd_group);
 		}
 		mb_clear_bit(first + i, e4b->bd_info->bb_bitmap);
 	}
@@ -4621,7 +4621,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
 			ext4_error(sb, __func__,
 			    "Double free of blocks %d (%d %d)\n",
 			    block, entry->start_blk, entry->count);
-			ext4_unlock_group(sb, e4b->bd_group);
+			ext4_lock_group(sb, e4b->bd_group);
 			return 0;
 		}
 	}

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/5] ext4: unlock group before ext4_error
  2008-11-20 18:26 [PATCH 1/5] ext4: unlock group before ext4_error Aneesh Kumar K.V
                   ` (2 preceding siblings ...)
  2008-11-20 18:35 ` Peter Staubach
@ 2008-11-20 22:38 ` Theodore Tso
  3 siblings, 0 replies; 8+ messages in thread
From: Theodore Tso @ 2008-11-20 22:38 UTC (permalink / raw)
  To: Aneesh Kumar K.V; +Cc: cmm, sandeen, linux-ext4

On Thu, Nov 20, 2008 at 11:56:18PM +0530, Aneesh Kumar K.V wrote:
> Otherwise ext4_error will cause BUG because of
> scheduling in atomic context.

Granted that it isn't necessarily safe as it is when errors-behaviour
is set to continue, that is the default on a large number of
filesystems.  And unlocking the group, calling the ext4_error(), and
then relocking the group can't possibly be safe; after all, we're
holding the lock for a reason!  

In the errors=continue case, we don't actually need to unlock and
relock the group, since all we need to do is to printk a message to
the console.  In the errors=panic case, we'll never return; and in the
errors=remount-ro case, relocking is kind of pointless but harmless,
since once the journal is aborted, there's no way the allocation is
going to succeed anyway, and a subsequent call will return an error.

This gets ugly to get right.  Since the situation where we need to
call ext4_error() while holding a spinlock which should be preserved
in the errors=continue case seems to be unique to mballoc, maybe
something like this?

static int ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp,
       	    		          const char *function,
            		          const char *fmt, ...)
{

	va_start(args, fmt);
	printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
	vprintk(fmt, args);
	printk("\n");
	va_end(args);

	if (test_opt(sb, ERROR_CONT)) {
		EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
		es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
		ext4_commit_super(sb, es, 0);
		return 0;
	}
	ext4_unlock_group(sb, grp);
	ext4_handle_error(sb);
	/* 
	 * We only get here in the ERRORS_RO case; relocking the group
	 * may be dangerous, but nothing bad will happen since the
	 * filesystem will have already been marked read/only and the
	 * journal has been aborted.  We return 1 as a hint to callers
	 * who might what to use the return value from
	 * ext4_grp_locked_error() to distinguish beween the
	 * ERRORS_CONT and ERRORS_RO case, and perhaps return more
	 * aggressively from the ext4 function in question, with a
	 * more appropriate error code.
	 */
	ext4_lock_group(sb, grp);
}

This requires access to two static functions in fs/ext4/super.c, so
probably the best thing to do is to put this function in
fs/ext4/super.c, and move the definition of ext4_[un]lock_group from
mballoc.h to ext4.h.

Comments?

						- Ted

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2008-11-20 22:39 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-11-20 18:26 [PATCH 1/5] ext4: unlock group before ext4_error Aneesh Kumar K.V
2008-11-20 18:26 ` [PATCH 3/5] ext4: Fix the race between read_block_bitmap and mark_diskspace_used Aneesh Kumar K.V
2008-11-20 18:26   ` [PATCH 4/5] ext4: Use both hi and lo bits of the group desc values Aneesh Kumar K.V
2008-11-20 18:44     ` Aneesh Kumar K.V
2008-11-20 18:34 ` [PATCH 1/5] ext4: unlock group before ext4_error Aneesh Kumar K.V
2008-11-20 18:35 ` Peter Staubach
2008-11-20 18:47   ` Aneesh Kumar K.V
2008-11-20 22:38 ` Theodore Tso

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).