public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH]  FIXUP: Orlov block allocator for ext2
       [not found] <3DC1C852.E8DB711E@digeo.com>
@ 2002-11-02  6:41 ` Theodore Ts'o
  2002-11-02  6:42 ` [PATCH] Orlov block allocator for ext3 Theodore Ts'o
  1 sibling, 0 replies; 2+ messages in thread
From: Theodore Ts'o @ 2002-11-02  6:41 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Linus Torvalds, Alexander Viro, linux-kernel

On Thu, Oct 31, 2002 at 04:18:26PM -0800, Andrew Morton wrote:
> This is Al's implementation of the Orlov block allocator for ext2.
> 
> At least doubles the throughput for the traverse-a-kernel-tree
> test and is well tested.

I finally had time to look at the Orlov patches, and found a memory
leak; sbi->s_debts wasn't getting freed when the filesystem was
getting unmounted, or in the error path.

This patch also makes the following cleanups/changes:

1) Use sbi->s_debts instead of sbi->debts --- all other fields in
	struct ext2_sb_info are prefixed by "s_", so this makes things
	consistent.

2) Add support for a new inode flag, EXT2_TOPDIR_FL, which tells tells
	the Orlov allocator to treat that directory as the top of
	directory hierarchies, so that new subdirectories created in
	that directory should be spread apart.  System administrators
	should set this flag on directories like /usr/src, /usr/home, etc.

3) Add a mount-time flag, -o oldalloc, which forces the use of the old
	inode (pre-Orlov) allocator.  This makes it easier to do
	comparison benchmarks, and in case people want to use the old
	algorithm.

						- Ted

fs/ext2/ialloc.c           |   27 ++++++++++++++-------------
fs/ext2/super.c            |   17 ++++++++++++-----
include/linux/ext2_fs.h    |    6 ++++--
include/linux/ext2_fs_sb.h |    2 +-
4 files changed, 31 insertions(+), 21 deletions(-)

diff -Nru a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
--- a/fs/ext2/ialloc.c	Sat Nov  2 01:32:09 2002
+++ b/fs/ext2/ialloc.c	Sat Nov  2 01:32:09 2002
@@ -209,9 +209,7 @@
  * For other inodes, search forward from the parent directory\'s block
  * group to find a free inode.
  */
-#if 0
-
-static int find_group_dir(struct super_block *sb, int parent_group)
+static int find_group_dir(struct super_block *sb, struct inode *parent)
 {
 	struct ext2_super_block * es = EXT2_SB(sb)->s_es;
 	int ngroups = EXT2_SB(sb)->s_groups_count;
@@ -243,7 +241,6 @@
 	mark_buffer_dirty(best_bh);
 	return best_group;
 }
-#endif
 
 /* 
  * Orlov's allocator for directories. 
@@ -289,7 +286,8 @@
 	struct ext2_group_desc *desc;
 	struct buffer_head *bh;
 
-	if (parent == sb->s_root->d_inode) {
+	if ((parent == sb->s_root->d_inode) ||
+	    (parent->i_flags & EXT2_TOPDIR_FL)) {
 		struct ext2_group_desc *best_desc = NULL;
 		struct buffer_head *best_bh = NULL;
 		int best_ndir = inodes_per_group;
@@ -342,7 +340,7 @@
 		desc = ext2_get_group_desc (sb, group, &bh);
 		if (!desc || !desc->bg_free_inodes_count)
 			continue;
-		if (sbi->debts[group] >= max_debt)
+		if (sbi->s_debts[group] >= max_debt)
 			continue;
 		if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs)
 			continue;
@@ -447,9 +445,12 @@
 	lock_super (sb);
 	es = EXT2_SB(sb)->s_es;
 repeat:
-	if (S_ISDIR(mode))
-		group = find_group_orlov(sb, dir);
-	else 
+	if (S_ISDIR(mode)) {
+		if (test_opt (sb, OLDALLOC))
+			group = find_group_dir(sb, dir);
+		else
+			group = find_group_orlov(sb, dir);
+	} else 
 		group = find_group_other(sb, dir);
 
 	err = -ENOSPC;
@@ -488,11 +489,11 @@
 		cpu_to_le32(le32_to_cpu(es->s_free_inodes_count) - 1);
 
 	if (S_ISDIR(mode)) {
-		if (EXT2_SB(sb)->debts[group] < 255)
-			EXT2_SB(sb)->debts[group]++;
+		if (EXT2_SB(sb)->s_debts[group] < 255)
+			EXT2_SB(sb)->s_debts[group]++;
 	} else {
-		if (EXT2_SB(sb)->debts[group])
-			EXT2_SB(sb)->debts[group]--;
+		if (EXT2_SB(sb)->s_debts[group])
+			EXT2_SB(sb)->s_debts[group]--;
 	}
 
 	mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
diff -Nru a/fs/ext2/super.c b/fs/ext2/super.c
--- a/fs/ext2/super.c	Sat Nov  2 01:32:09 2002
+++ b/fs/ext2/super.c	Sat Nov  2 01:32:09 2002
@@ -140,6 +140,7 @@
 		if (sbi->s_group_desc[i])
 			brelse (sbi->s_group_desc[i]);
 	kfree(sbi->s_group_desc);
+	kfree(sbi->s_debts);
 	brelse (sbi->s_sbh);
 	sb->s_fs_info = NULL;
 	kfree(sbi);
@@ -385,6 +386,10 @@
 				return 0;
 			sbi->s_resuid = v;
 		}
+		else if (!strcmp (this_char, "oldalloc"))
+			set_opt (sbi->s_mount_opt, OLDALLOC);
+		else if (!strcmp (this_char, "orlov"))
+			clear_opt (sbi->s_mount_opt, OLDALLOC);
 		/* Silently ignore the quota options */
 		else if (!strcmp (this_char, "grpquota")
 		         || !strcmp (this_char, "noquota")
@@ -756,13 +761,13 @@
 		printk ("EXT2-fs: not enough memory\n");
 		goto failed_mount;
 	}
-	sbi->debts = kmalloc(sbi->s_groups_count * sizeof(*sbi->debts),
-			GFP_KERNEL);
-	if (!sbi->debts) {
+	sbi->s_debts = kmalloc(sbi->s_groups_count * sizeof(*sbi->s_debts),
+			       GFP_KERNEL);
+	if (!sbi->s_debts) {
 		printk ("EXT2-fs: not enough memory\n");
 		goto failed_mount_group_desc;
 	}
-	memset(sbi->debts, 0, sbi->s_groups_count * sizeof(*sbi->debts));
+	memset(sbi->s_debts, 0, sbi->s_groups_count * sizeof(*sbi->s_debts));
 	for (i = 0; i < db_count; i++) {
 		block = descriptor_loc(sb, logic_sb_block, i);
 		sbi->s_group_desc[i] = sb_bread(sb, block);
@@ -771,7 +776,7 @@
 				brelse (sbi->s_group_desc[j]);
 			kfree(sbi->s_group_desc);
 			printk ("EXT2-fs: unable to read group descriptors\n");
-			goto failed_mount;
+			goto failed_mount_group_desc;
 		}
 	}
 	if (!ext2_check_descriptors (sb)) {
@@ -808,6 +813,8 @@
 		brelse(sbi->s_group_desc[i]);
 failed_mount_group_desc:
 	kfree(sbi->s_group_desc);
+	if (sbi->s_debts)
+		kfree(sbi->s_debts);
 failed_mount:
 	brelse(bh);
 failed_sbi:
diff -Nru a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h
--- a/include/linux/ext2_fs.h	Sat Nov  2 01:32:09 2002
+++ b/include/linux/ext2_fs.h	Sat Nov  2 01:32:09 2002
@@ -191,10 +191,11 @@
 #define EXT2_JOURNAL_DATA_FL		0x00004000 /* Reserved for ext3 */
 #define EXT2_NOTAIL_FL			0x00008000 /* file tail should not be merged */
 #define EXT2_DIRSYNC_FL			0x00010000 /* dirsync behaviour (directories only) */
+#define EXT2_TOPDIR_FL			0x00020000 /* Top of directory hierarchies*/
 #define EXT2_RESERVED_FL		0x80000000 /* reserved for ext2 lib */
 
-#define EXT2_FL_USER_VISIBLE		0x00011FFF /* User visible flags */
-#define EXT2_FL_USER_MODIFIABLE		0x000100FF /* User modifiable flags */
+#define EXT2_FL_USER_VISIBLE		0x0003DFFF /* User visible flags */
+#define EXT2_FL_USER_MODIFIABLE		0x000380FF /* User modifiable flags */
 
 /*
  * ioctl commands
@@ -300,6 +301,7 @@
  * Mount flags
  */
 #define EXT2_MOUNT_CHECK		0x0001	/* Do mount-time checks */
+#define EXT2_MOUNT_OLDALLOC		0x0002  /* Don't use the new Orlov allocator */
 #define EXT2_MOUNT_GRPID		0x0004	/* Create files with directory's group */
 #define EXT2_MOUNT_DEBUG		0x0008	/* Some debugging messages */
 #define EXT2_MOUNT_ERRORS_CONT		0x0010	/* Continue on errors */
diff -Nru a/include/linux/ext2_fs_sb.h b/include/linux/ext2_fs_sb.h
--- a/include/linux/ext2_fs_sb.h	Sat Nov  2 01:32:09 2002
+++ b/include/linux/ext2_fs_sb.h	Sat Nov  2 01:32:09 2002
@@ -44,7 +44,7 @@
 	int s_first_ino;
 	u32 s_next_generation;
 	unsigned long s_dir_count;
-	u8 *debts;
+	u8 *s_debts;
 };
 
 #endif	/* _LINUX_EXT2_FS_SB */

^ permalink raw reply	[flat|nested] 2+ messages in thread

* [PATCH] Orlov block allocator for ext3
       [not found] <3DC1C852.E8DB711E@digeo.com>
  2002-11-02  6:41 ` [PATCH] FIXUP: Orlov block allocator for ext2 Theodore Ts'o
@ 2002-11-02  6:42 ` Theodore Ts'o
  1 sibling, 0 replies; 2+ messages in thread
From: Theodore Ts'o @ 2002-11-02  6:42 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Linus Torvalds, Alexander Viro, linux-kernel

On Thu, Oct 31, 2002 at 04:18:26PM -0800, Andrew Morton wrote:
> This is Al's implementation of the Orlov block allocator for ext2.
> 
> At least doubles the throughput for the traverse-a-kernel-tree
> test and is well tested.
> 
> I still need to do the ext3 version.

Here's the ext3 version.   :-)

						- Ted

fs/ext3/ialloc.c           |  336 ++++++++++++++++++++++++++++++++++-----------
fs/ext3/super.c            |   11 +
include/linux/ext3_fs.h    |    7 
include/linux/ext3_fs_sb.h |    2 
4 files changed, 277 insertions(+), 79 deletions(-)

diff -Nru a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
--- a/fs/ext3/ialloc.c	Sat Nov  2 01:32:18 2002
+++ b/fs/ext3/ialloc.c	Sat Nov  2 01:32:18 2002
@@ -21,6 +21,7 @@
 #include <linux/string.h>
 #include <linux/quotaops.h>
 #include <linux/buffer_head.h>
+#include <linux/random.h>
 
 #include <asm/bitops.h>
 #include <asm/byteorder.h>
@@ -198,6 +199,230 @@
  * the groups with above-average free space, that group with the fewest
  * directories already is chosen.
  *
+ * For other inodes, search forward from the parent directory\'s block
+ * group to find a free inode.
+ */
+static int find_group_dir(struct super_block *sb, struct inode *parent)
+{
+	struct ext3_super_block * es = EXT3_SB(sb)->s_es;
+	int ngroups = EXT3_SB(sb)->s_groups_count;
+	int avefreei = le32_to_cpu(es->s_free_inodes_count) / ngroups;
+	struct ext3_group_desc *desc, *best_desc = NULL;
+	struct buffer_head *bh, *best_bh = NULL;
+	int group, best_group = -1;
+
+	for (group = 0; group < ngroups; group++) {
+		desc = ext3_get_group_desc (sb, group, &bh);
+		if (!desc || !desc->bg_free_inodes_count)
+			continue;
+		if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
+			continue;
+		if (!best_desc || 
+		    (le16_to_cpu(desc->bg_free_blocks_count) >
+		     le16_to_cpu(best_desc->bg_free_blocks_count))) {
+			best_group = group;
+			best_desc = desc;
+			best_bh = bh;
+		}
+	}
+	if (!best_desc)
+		return -1;
+	best_desc->bg_free_inodes_count =
+		cpu_to_le16(le16_to_cpu(best_desc->bg_free_inodes_count) - 1);
+	best_desc->bg_used_dirs_count =
+		cpu_to_le16(le16_to_cpu(best_desc->bg_used_dirs_count) + 1);
+	mark_buffer_dirty(best_bh);
+	return best_group;
+}
+
+/* 
+ * Orlov's allocator for directories. 
+ * 
+ * We always try to spread first-level directories.
+ *
+ * If there are blockgroups with both free inodes and free blocks counts 
+ * not worse than average we return one with smallest directory count. 
+ * Otherwise we simply return a random group. 
+ * 
+ * For the rest rules look so: 
+ * 
+ * It's OK to put directory into a group unless 
+ * it has too many directories already (max_dirs) or 
+ * it has too few free inodes left (min_inodes) or 
+ * it has too few free blocks left (min_blocks) or 
+ * it's already running too large debt (max_debt). 
+ * Parent's group is prefered, if it doesn't satisfy these 
+ * conditions we search cyclically through the rest. If none 
+ * of the groups look good we just look for a group with more 
+ * free inodes than average (starting at parent's group). 
+ * 
+ * Debt is incremented each time we allocate a directory and decremented 
+ * when we allocate an inode, within 0--255. 
+ */ 
+
+#define INODE_COST 64
+#define BLOCK_COST 256
+
+static int find_group_orlov(struct super_block *sb, struct inode *parent)
+{
+	int parent_group = EXT3_I(parent)->i_block_group;
+	struct ext3_sb_info *sbi = EXT3_SB(sb);
+	struct ext3_super_block *es = sbi->s_es;
+	int ngroups = sbi->s_groups_count;
+	int inodes_per_group = EXT3_INODES_PER_GROUP(sb);
+	int avefreei = le32_to_cpu(es->s_free_inodes_count) / ngroups;
+	int avefreeb = le32_to_cpu(es->s_free_blocks_count) / ngroups;
+	int blocks_per_dir;
+	int ndirs = sbi->s_dir_count;
+	int max_debt, max_dirs, min_blocks, min_inodes;
+	int group = -1, i;
+	struct ext3_group_desc *desc;
+	struct buffer_head *bh;
+
+	if ((parent == sb->s_root->d_inode) ||
+	    (parent->i_flags & EXT3_TOPDIR_FL)) {
+		struct ext3_group_desc *best_desc = NULL;
+		struct buffer_head *best_bh = NULL;
+		int best_ndir = inodes_per_group;
+		int best_group = -1;
+
+		get_random_bytes(&group, sizeof(group));
+		parent_group = (unsigned)group % ngroups;
+		for (i = 0; i < ngroups; i++) {
+			group = (parent_group + i) % ngroups;
+			desc = ext3_get_group_desc (sb, group, &bh);
+			if (!desc || !desc->bg_free_inodes_count)
+				continue;
+			if (le16_to_cpu(desc->bg_used_dirs_count) >= best_ndir)
+				continue;
+			if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
+				continue;
+			if (le16_to_cpu(desc->bg_free_blocks_count) < avefreeb)
+				continue;
+			best_group = group;
+			best_ndir = le16_to_cpu(desc->bg_used_dirs_count);
+			best_desc = desc;
+			best_bh = bh;
+		}
+		if (best_group >= 0) {
+			desc = best_desc;
+			bh = best_bh;
+			group = best_group;
+			goto found;
+		}
+		goto fallback;
+	}
+
+	blocks_per_dir = (le32_to_cpu(es->s_blocks_count) -
+			  le32_to_cpu(es->s_free_blocks_count)) / ndirs;
+
+	max_dirs = ndirs / ngroups + inodes_per_group / 16;
+	min_inodes = avefreei - inodes_per_group / 4;
+	min_blocks = avefreeb - EXT3_BLOCKS_PER_GROUP(sb) / 4;
+
+	max_debt = EXT3_BLOCKS_PER_GROUP(sb) / max(blocks_per_dir, BLOCK_COST);
+	if (max_debt * INODE_COST > inodes_per_group)
+		max_debt = inodes_per_group / INODE_COST;
+	if (max_debt > 255)
+		max_debt = 255;
+	if (max_debt == 0)
+		max_debt = 1;
+
+	for (i = 0; i < ngroups; i++) {
+		group = (parent_group + i) % ngroups;
+		desc = ext3_get_group_desc (sb, group, &bh);
+		if (!desc || !desc->bg_free_inodes_count)
+			continue;
+		if (sbi->s_debts[group] >= max_debt)
+			continue;
+		if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs)
+			continue;
+		if (le16_to_cpu(desc->bg_free_inodes_count) < min_inodes)
+			continue;
+		if (le16_to_cpu(desc->bg_free_blocks_count) < min_blocks)
+			continue;
+		goto found;
+	}
+
+fallback:
+	for (i = 0; i < ngroups; i++) {
+		group = (parent_group + i) % ngroups;
+		desc = ext3_get_group_desc (sb, group, &bh);
+		if (!desc || !desc->bg_free_inodes_count)
+			continue;
+		if (le16_to_cpu(desc->bg_free_inodes_count) >= avefreei)
+			goto found;
+	}
+
+	return -1;
+
+found:
+	desc->bg_free_inodes_count =
+		cpu_to_le16(le16_to_cpu(desc->bg_free_inodes_count) - 1);
+	desc->bg_used_dirs_count =
+		cpu_to_le16(le16_to_cpu(desc->bg_used_dirs_count) + 1);
+	sbi->s_dir_count++;
+	mark_buffer_dirty(bh);
+	return group;
+}
+
+static int find_group_other(struct super_block *sb, struct inode *parent)
+{
+	int parent_group = EXT3_I(parent)->i_block_group;
+	int ngroups = EXT3_SB(sb)->s_groups_count;
+	struct ext3_group_desc *desc;
+	struct buffer_head *bh;
+	int group, i;
+
+	/*
+	 * Try to place the inode in its parent directory
+	 */
+	group = parent_group;
+	desc = ext3_get_group_desc (sb, group, &bh);
+	if (desc && le16_to_cpu(desc->bg_free_inodes_count))
+		goto found;
+
+	/*
+	 * Use a quadratic hash to find a group with a
+	 * free inode
+	 */
+	for (i = 1; i < ngroups; i <<= 1) {
+		group += i;
+		if (group >= ngroups)
+			group -= ngroups;
+		desc = ext3_get_group_desc (sb, group, &bh);
+		if (desc && le16_to_cpu(desc->bg_free_inodes_count))
+			goto found;
+	}
+
+	/*
+	 * That failed: try linear search for a free inode
+	 */
+	group = parent_group + 1;
+	for (i = 2; i < ngroups; i++) {
+		if (++group >= ngroups)
+			group = 0;
+		desc = ext3_get_group_desc (sb, group, &bh);
+		if (desc && le16_to_cpu(desc->bg_free_inodes_count))
+			goto found;
+	}
+
+	return -1;
+
+found:
+	desc->bg_free_inodes_count =
+		cpu_to_le16(le16_to_cpu(desc->bg_free_inodes_count) - 1);
+	mark_buffer_dirty(bh);
+	return group;
+}
+
+/*
+ * There are two policies for allocating an inode.  If the new inode is
+ * a directory, then a forward search is made for a block group with both
+ * free space and a low directory-to-inode ratio; if that fails, then of
+ * the groups with above-average free space, that group with the fewest
+ * directories already is chosen.
+ *
  * For other inodes, search forward from the parent directory's block
  * group to find a free inode.
  */
@@ -206,10 +431,10 @@
 	struct super_block *sb;
 	struct buffer_head *bitmap_bh = NULL;
 	struct buffer_head *bh2;
-	int i, j, avefreei;
+	int group;
+	ino_t ino;
 	struct inode * inode;
 	struct ext3_group_desc * gdp;
-	struct ext3_group_desc * tmp;
 	struct ext3_super_block * es;
 	struct ext3_inode_info *ei;
 	int err = 0;
@@ -228,93 +453,35 @@
 	lock_super (sb);
 	es = EXT3_SB(sb)->s_es;
 repeat:
-	gdp = NULL;
-	i = 0;
-
 	if (S_ISDIR(mode)) {
-		avefreei = le32_to_cpu(es->s_free_inodes_count) /
-			EXT3_SB(sb)->s_groups_count;
-		if (!gdp) {
-			for (j = 0; j < EXT3_SB(sb)->s_groups_count; j++) {
-				struct buffer_head *temp_buffer;
-				tmp = ext3_get_group_desc (sb, j, &temp_buffer);
-				if (tmp &&
-				    le16_to_cpu(tmp->bg_free_inodes_count) &&
-				    le16_to_cpu(tmp->bg_free_inodes_count) >=
-							avefreei) {
-					if (!gdp || (le16_to_cpu(tmp->bg_free_blocks_count) >
-						le16_to_cpu(gdp->bg_free_blocks_count))) {
-						i = j;
-						gdp = tmp;
-						bh2 = temp_buffer;
-					}
-				}
-			}
-		}
-	} else {
-		/*
-		 * Try to place the inode in its parent directory
-		 */
-		i = EXT3_I(dir)->i_block_group;
-		tmp = ext3_get_group_desc (sb, i, &bh2);
-		if (tmp && le16_to_cpu(tmp->bg_free_inodes_count))
-			gdp = tmp;
+		if (test_opt (sb, OLDALLOC))
+			group = find_group_dir(sb, dir);
 		else
-		{
-			/*
-			 * Use a quadratic hash to find a group with a
-			 * free inode
-			 */
-			for (j = 1; j < EXT3_SB(sb)->s_groups_count; j <<= 1) {
-				i += j;
-				if (i >= EXT3_SB(sb)->s_groups_count)
-					i -= EXT3_SB(sb)->s_groups_count;
-				tmp = ext3_get_group_desc (sb, i, &bh2);
-				if (tmp &&
-				    le16_to_cpu(tmp->bg_free_inodes_count)) {
-					gdp = tmp;
-					break;
-				}
-			}
-		}
-		if (!gdp) {
-			/*
-			 * That failed: try linear search for a free inode
-			 */
-			i = EXT3_I(dir)->i_block_group + 1;
-			for (j = 2; j < EXT3_SB(sb)->s_groups_count; j++) {
-				if (++i >= EXT3_SB(sb)->s_groups_count)
-					i = 0;
-				tmp = ext3_get_group_desc (sb, i, &bh2);
-				if (tmp &&
-				    le16_to_cpu(tmp->bg_free_inodes_count)) {
-					gdp = tmp;
-					break;
-				}
-			}
-		}
-	}
+			group = find_group_orlov(sb, dir);
+	} else 
+		group = find_group_other(sb, dir);
 
 	err = -ENOSPC;
-	if (!gdp)
+	if (group == -1)
 		goto out;
 
 	err = -EIO;
 	brelse(bitmap_bh);
-	bitmap_bh = read_inode_bitmap(sb, i);
+	bitmap_bh = read_inode_bitmap(sb, group);
 	if (!bitmap_bh)
 		goto fail;
+	gdp = ext3_get_group_desc (sb, group, &bh2);
 
-	if ((j = ext3_find_first_zero_bit((unsigned long *)bitmap_bh->b_data,
+	if ((ino = ext3_find_first_zero_bit((unsigned long *)bitmap_bh->b_data,
 				      EXT3_INODES_PER_GROUP(sb))) <
 	    EXT3_INODES_PER_GROUP(sb)) {
 		BUFFER_TRACE(bitmap_bh, "get_write_access");
 		err = ext3_journal_get_write_access(handle, bitmap_bh);
 		if (err) goto fail;
 		
-		if (ext3_set_bit(j, bitmap_bh->b_data)) {
+		if (ext3_set_bit(ino, bitmap_bh->b_data)) {
 			ext3_error (sb, "ext3_new_inode",
-				      "bit already set for inode %d", j);
+				      "bit already set for inode %lu", ino);
 			goto repeat;
 		}
 		BUFFER_TRACE(bitmap_bh, "call ext3_journal_dirty_metadata");
@@ -324,7 +491,7 @@
 		if (le16_to_cpu(gdp->bg_free_inodes_count) != 0) {
 			ext3_error (sb, "ext3_new_inode",
 				    "Free inodes count corrupted in group %d",
-				    i);
+				    group);
 			/* Is it really ENOSPC? */
 			err = -ENOSPC;
 			if (sb->s_flags & MS_RDONLY)
@@ -340,11 +507,11 @@
 		}
 		goto repeat;
 	}
-	j += i * EXT3_INODES_PER_GROUP(sb) + 1;
-	if (j < EXT3_FIRST_INO(sb) || j > le32_to_cpu(es->s_inodes_count)) {
+	ino += group * EXT3_INODES_PER_GROUP(sb) + 1;
+	if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
 		ext3_error (sb, "ext3_new_inode",
 			    "reserved inode or inode > inodes count - "
-			    "block_group = %d,inode=%d", i, j);
+			    "block_group = %d, inode=%lu", group, ino);
 		err = -EIO;
 		goto fail;
 	}
@@ -382,7 +549,7 @@
 		inode->i_gid = current->fsgid;
 	inode->i_mode = mode;
 
-	inode->i_ino = j;
+	inode->i_ino = ino;
 	/* This is the optimal IO size (for stat), not the fs block size */
 	inode->i_blksize = PAGE_SIZE;
 	inode->i_blocks = 0;
@@ -412,7 +579,7 @@
 	ei->i_prealloc_block = 0;
 	ei->i_prealloc_count = 0;
 #endif
-	ei->i_block_group = i;
+	ei->i_block_group = group;
 	
 	if (ei->i_flags & EXT3_SYNC_FL)
 		inode->i_flags |= S_SYNC;
@@ -560,6 +727,21 @@
 #else
 	return le32_to_cpu(EXT3_SB(sb)->s_es->s_free_inodes_count);
 #endif
+}
+
+/* Called at mount-time, super-block is locked */
+unsigned long ext3_count_dirs (struct super_block * sb)
+{
+	unsigned long count = 0;
+	int i;
+
+	for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
+		struct ext3_group_desc *gdp = ext3_get_group_desc (sb, i, NULL);
+		if (!gdp)
+			continue;
+		count += le16_to_cpu(gdp->bg_used_dirs_count);
+	}
+	return count;
 }
 
 #ifdef CONFIG_EXT3_CHECK
diff -Nru a/fs/ext3/super.c b/fs/ext3/super.c
--- a/fs/ext3/super.c	Sat Nov  2 01:32:18 2002
+++ b/fs/ext3/super.c	Sat Nov  2 01:32:18 2002
@@ -390,6 +390,7 @@
 	for (i = 0; i < sbi->s_gdb_count; i++)
 		brelse(sbi->s_group_desc[i]);
 	kfree(sbi->s_group_desc);
+	kfree(sbi->s_debts);
 	brelse(sbi->s_sbh);
 
 	/* Debugging code just in case the in-memory inode orphan list
@@ -1221,6 +1222,13 @@
 		printk (KERN_ERR "EXT3-fs: not enough memory\n");
 		goto failed_mount;
 	}
+	sbi->s_debts = kmalloc(sbi->s_groups_count * sizeof(*sbi->s_debts),
+			GFP_KERNEL);
+	if (!sbi->s_debts) {
+		printk ("EXT3-fs: not enough memory\n");
+		goto failed_mount2;
+	}
+	memset(sbi->s_debts, 0, sbi->s_groups_count * sizeof(*sbi->s_debts));
 	for (i = 0; i < db_count; i++) {
 		block = descriptor_loc(sb, logic_sb_block, i);
 		sbi->s_group_desc[i] = sb_bread(sb, block);
@@ -1236,6 +1244,7 @@
 		goto failed_mount2;
 	}
 	sbi->s_gdb_count = db_count;
+	sbi->s_dir_count = ext3_count_dirs(sb);
 	/*
 	 * set up enough so that it can read an inode
 	 */
@@ -1339,6 +1348,8 @@
 failed_mount3:
 	journal_destroy(sbi->s_journal);
 failed_mount2:
+	if (sbi->s_debts)
+		kfree(sbi->s_debts);
 	for (i = 0; i < db_count; i++)
 		brelse(sbi->s_group_desc[i]);
 	kfree(sbi->s_group_desc);
diff -Nru a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
--- a/include/linux/ext3_fs.h	Sat Nov  2 01:32:18 2002
+++ b/include/linux/ext3_fs.h	Sat Nov  2 01:32:18 2002
@@ -186,10 +186,11 @@
 #define EXT3_JOURNAL_DATA_FL		0x00004000 /* file data should be journaled */
 #define EXT3_NOTAIL_FL			0x00008000 /* file tail should not be merged */
 #define EXT3_DIRSYNC_FL			0x00010000 /* dirsync behaviour (directories only) */
+#define EXT3_TOPDIR_FL			0x00020000 /* Top of directory hierarchies*/
 #define EXT3_RESERVED_FL		0x80000000 /* reserved for ext3 lib */
 
-#define EXT3_FL_USER_VISIBLE		0x00015FFF /* User visible flags */
-#define EXT3_FL_USER_MODIFIABLE		0x000100FF /* User modifiable flags */
+#define EXT3_FL_USER_VISIBLE		0x0003DFFF /* User visible flags */
+#define EXT3_FL_USER_MODIFIABLE		0x000380FF /* User modifiable flags */
 
 /*
  * Inode dynamic state flags
@@ -308,6 +309,7 @@
  * Mount flags
  */
 #define EXT3_MOUNT_CHECK		0x0001	/* Do mount-time checks */
+#define EXT3_MOUNT_OLDALLOC		0x0002  /* Don't use the new Orlov allocator */
 #define EXT3_MOUNT_GRPID		0x0004	/* Create files with directory's group */
 #define EXT3_MOUNT_DEBUG		0x0008	/* Some debugging messages */
 #define EXT3_MOUNT_ERRORS_CONT		0x0010	/* Continue on errors */
@@ -704,6 +706,7 @@
 extern void ext3_free_inode (handle_t *, struct inode *);
 extern struct inode * ext3_orphan_get (struct super_block *, ino_t);
 extern unsigned long ext3_count_free_inodes (struct super_block *);
+extern unsigned long ext3_count_dirs (struct super_block *);
 extern void ext3_check_inodes_bitmap (struct super_block *);
 extern unsigned long ext3_count_free (struct buffer_head *, unsigned);
 
diff -Nru a/include/linux/ext3_fs_sb.h b/include/linux/ext3_fs_sb.h
--- a/include/linux/ext3_fs_sb.h	Sat Nov  2 01:32:18 2002
+++ b/include/linux/ext3_fs_sb.h	Sat Nov  2 01:32:18 2002
@@ -50,6 +50,8 @@
 	u32 s_next_generation;
 	u32 s_hash_seed[4];
 	int s_def_hash_version;
+	unsigned long s_dir_count;
+	u8 *s_debts;
 
 	/* Journaling */
 	struct inode * s_journal_inode;

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2002-11-02  6:36 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <3DC1C852.E8DB711E@digeo.com>
2002-11-02  6:41 ` [PATCH] FIXUP: Orlov block allocator for ext2 Theodore Ts'o
2002-11-02  6:42 ` [PATCH] Orlov block allocator for ext3 Theodore Ts'o

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox