[PATCH, RFC] Ext4 patches planned for submission upstream

linux-ext4.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH, RFC] Ext4 patches planned for submission upstream
@ 2007-10-04  5:50 Theodore Ts'o
  2007-10-04  5:50 ` [PATCH] jbd/jbd2: JBD memory allocation cleanups Theodore Ts'o
  0 siblings, 1 reply; 34+ messages in thread
From: Theodore Ts'o @ 2007-10-04  5:50 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-ext4

The following ext4 patches are planned for submission to Linus once
the merge window for 2.6.24-rc1 is opened.

						- Ted

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH] jbd/jbd2: JBD memory allocation cleanups
  2007-10-04  5:50 [PATCH, RFC] Ext4 patches planned for submission upstream Theodore Ts'o
@ 2007-10-04  5:50 ` Theodore Ts'o
  2007-10-04  5:50   ` [PATCH] jbd/jbd2: Journal initialization doesn't need __GFP_NOFAIL Theodore Ts'o
  2007-10-04  6:52   ` [PATCH] jbd/jbd2: JBD memory allocation cleanups Christoph Hellwig
  0 siblings, 2 replies; 34+ messages in thread
From: Theodore Ts'o @ 2007-10-04  5:50 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-ext4, Mingming Cao, Christoph Lameter

From: Mingming Cao <cmm@us.ibm.com>

JBD: Replace slab allocations with page cache allocations

JBD allocate memory for committed_data and frozen_data from slab. However
JBD should not pass slab pages down to the block layer. Use page allocator pages instead. This will also prepare JBD for the large blocksize patchset.


Also this patch cleans up jbd_kmalloc and replace it with kmalloc directly

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
---
 fs/jbd/commit.c       |    6 +-
 fs/jbd/journal.c      |   99 ++----------------------------------------------
 fs/jbd/transaction.c  |   12 +++---
 fs/jbd2/commit.c      |    6 +-
 fs/jbd2/journal.c     |   99 ++----------------------------------------------
 fs/jbd2/transaction.c |   18 ++++----
 include/linux/jbd.h   |   18 +++++----
 include/linux/jbd2.h  |   19 +++++----
 8 files changed, 51 insertions(+), 226 deletions(-)

diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index a003d50..a263d82 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -375,7 +375,7 @@ void journal_commit_transaction(journal_t *journal)
 			struct buffer_head *bh = jh2bh(jh);
 
 			jbd_lock_bh_state(bh);
-			jbd_slab_free(jh->b_committed_data, bh->b_size);
+			jbd_free(jh->b_committed_data, bh->b_size);
 			jh->b_committed_data = NULL;
 			jbd_unlock_bh_state(bh);
 		}
@@ -792,14 +792,14 @@ restart_loop:
 		 * Otherwise, we can just throw away the frozen data now.
 		 */
 		if (jh->b_committed_data) {
-			jbd_slab_free(jh->b_committed_data, bh->b_size);
+			jbd_free(jh->b_committed_data, bh->b_size);
 			jh->b_committed_data = NULL;
 			if (jh->b_frozen_data) {
 				jh->b_committed_data = jh->b_frozen_data;
 				jh->b_frozen_data = NULL;
 			}
 		} else if (jh->b_frozen_data) {
-			jbd_slab_free(jh->b_frozen_data, bh->b_size);
+			jbd_free(jh->b_frozen_data, bh->b_size);
 			jh->b_frozen_data = NULL;
 		}
 
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 06ab3c1..ae2c25d 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -83,7 +83,6 @@ EXPORT_SYMBOL(journal_force_commit);
 
 static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
 static void __journal_abort_soft (journal_t *journal, int errno);
-static int journal_create_jbd_slab(size_t slab_size);
 
 /*
  * Helper function used to manage commit timeouts
@@ -334,10 +333,10 @@ repeat:
 		char *tmp;
 
 		jbd_unlock_bh_state(bh_in);
-		tmp = jbd_slab_alloc(bh_in->b_size, GFP_NOFS);
+		tmp = jbd_alloc(bh_in->b_size, GFP_NOFS);
 		jbd_lock_bh_state(bh_in);
 		if (jh_in->b_frozen_data) {
-			jbd_slab_free(tmp, bh_in->b_size);
+			jbd_free(tmp, bh_in->b_size);
 			goto repeat;
 		}
 
@@ -654,7 +653,7 @@ static journal_t * journal_init_common (void)
 	journal_t *journal;
 	int err;
 
-	journal = jbd_kmalloc(sizeof(*journal), GFP_KERNEL);
+	journal = kmalloc(sizeof(*journal), GFP_KERNEL|__GFP_NOFAIL);
 	if (!journal)
 		goto fail;
 	memset(journal, 0, sizeof(*journal));
@@ -1095,13 +1094,6 @@ int journal_load(journal_t *journal)
 		}
 	}
 
-	/*
-	 * Create a slab for this blocksize
-	 */
-	err = journal_create_jbd_slab(be32_to_cpu(sb->s_blocksize));
-	if (err)
-		return err;
-
 	/* Let the recovery code check whether it needs to recover any
 	 * data from the journal. */
 	if (journal_recover(journal))
@@ -1615,86 +1607,6 @@ int journal_blocks_per_page(struct inode *inode)
 }
 
 /*
- * Simple support for retrying memory allocations.  Introduced to help to
- * debug different VM deadlock avoidance strategies.
- */
-void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry)
-{
-	return kmalloc(size, flags | (retry ? __GFP_NOFAIL : 0));
-}
-
-/*
- * jbd slab management: create 1k, 2k, 4k, 8k slabs as needed
- * and allocate frozen and commit buffers from these slabs.
- *
- * Reason for doing this is to avoid, SLAB_DEBUG - since it could
- * cause bh to cross page boundary.
- */
-
-#define JBD_MAX_SLABS 5
-#define JBD_SLAB_INDEX(size)  (size >> 11)
-
-static struct kmem_cache *jbd_slab[JBD_MAX_SLABS];
-static const char *jbd_slab_names[JBD_MAX_SLABS] = {
-	"jbd_1k", "jbd_2k", "jbd_4k", NULL, "jbd_8k"
-};
-
-static void journal_destroy_jbd_slabs(void)
-{
-	int i;
-
-	for (i = 0; i < JBD_MAX_SLABS; i++) {
-		if (jbd_slab[i])
-			kmem_cache_destroy(jbd_slab[i]);
-		jbd_slab[i] = NULL;
-	}
-}
-
-static int journal_create_jbd_slab(size_t slab_size)
-{
-	int i = JBD_SLAB_INDEX(slab_size);
-
-	BUG_ON(i >= JBD_MAX_SLABS);
-
-	/*
-	 * Check if we already have a slab created for this size
-	 */
-	if (jbd_slab[i])
-		return 0;
-
-	/*
-	 * Create a slab and force alignment to be same as slabsize -
-	 * this will make sure that allocations won't cross the page
-	 * boundary.
-	 */
-	jbd_slab[i] = kmem_cache_create(jbd_slab_names[i],
-				slab_size, slab_size, 0, NULL);
-	if (!jbd_slab[i]) {
-		printk(KERN_EMERG "JBD: no memory for jbd_slab cache\n");
-		return -ENOMEM;
-	}
-	return 0;
-}
-
-void * jbd_slab_alloc(size_t size, gfp_t flags)
-{
-	int idx;
-
-	idx = JBD_SLAB_INDEX(size);
-	BUG_ON(jbd_slab[idx] == NULL);
-	return kmem_cache_alloc(jbd_slab[idx], flags | __GFP_NOFAIL);
-}
-
-void jbd_slab_free(void *ptr,  size_t size)
-{
-	int idx;
-
-	idx = JBD_SLAB_INDEX(size);
-	BUG_ON(jbd_slab[idx] == NULL);
-	kmem_cache_free(jbd_slab[idx], ptr);
-}
-
-/*
  * Journal_head storage management
  */
 static struct kmem_cache *journal_head_cache;
@@ -1881,13 +1793,13 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
 				printk(KERN_WARNING "%s: freeing "
 						"b_frozen_data\n",
 						__FUNCTION__);
-				jbd_slab_free(jh->b_frozen_data, bh->b_size);
+				jbd_free(jh->b_frozen_data, bh->b_size);
 			}
 			if (jh->b_committed_data) {
 				printk(KERN_WARNING "%s: freeing "
 						"b_committed_data\n",
 						__FUNCTION__);
-				jbd_slab_free(jh->b_committed_data, bh->b_size);
+				jbd_free(jh->b_committed_data, bh->b_size);
 			}
 			bh->b_private = NULL;
 			jh->b_bh = NULL;	/* debug, really */
@@ -2042,7 +1954,6 @@ static void journal_destroy_caches(void)
 	journal_destroy_revoke_caches();
 	journal_destroy_journal_head_cache();
 	journal_destroy_handle_cache();
-	journal_destroy_jbd_slabs();
 }
 
 static int __init journal_init(void)
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 772b653..11d846c 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -96,8 +96,8 @@ static int start_this_handle(journal_t *journal, handle_t *handle)
 
 alloc_transaction:
 	if (!journal->j_running_transaction) {
-		new_transaction = jbd_kmalloc(sizeof(*new_transaction),
-						GFP_NOFS);
+		new_transaction = kmalloc(sizeof(*new_transaction),
+						GFP_NOFS|__GFP_NOFAIL);
 		if (!new_transaction) {
 			ret = -ENOMEM;
 			goto out;
@@ -668,7 +668,7 @@ repeat:
 				JBUFFER_TRACE(jh, "allocate memory for buffer");
 				jbd_unlock_bh_state(bh);
 				frozen_buffer =
-					jbd_slab_alloc(jh2bh(jh)->b_size,
+					jbd_alloc(jh2bh(jh)->b_size,
 							 GFP_NOFS);
 				if (!frozen_buffer) {
 					printk(KERN_EMERG
@@ -728,7 +728,7 @@ done:
 
 out:
 	if (unlikely(frozen_buffer))	/* It's usually NULL */
-		jbd_slab_free(frozen_buffer, bh->b_size);
+		jbd_free(frozen_buffer, bh->b_size);
 
 	JBUFFER_TRACE(jh, "exit");
 	return error;
@@ -881,7 +881,7 @@ int journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
 
 repeat:
 	if (!jh->b_committed_data) {
-		committed_data = jbd_slab_alloc(jh2bh(jh)->b_size, GFP_NOFS);
+		committed_data = jbd_alloc(jh2bh(jh)->b_size, GFP_NOFS);
 		if (!committed_data) {
 			printk(KERN_EMERG "%s: No memory for committed data\n",
 				__FUNCTION__);
@@ -908,7 +908,7 @@ repeat:
 out:
 	journal_put_journal_head(jh);
 	if (unlikely(committed_data))
-		jbd_slab_free(committed_data, bh->b_size);
+		jbd_free(committed_data, bh->b_size);
 	return err;
 }
 
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index c0f59d1..2cac34a 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -384,7 +384,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 			struct buffer_head *bh = jh2bh(jh);
 
 			jbd_lock_bh_state(bh);
-			jbd2_slab_free(jh->b_committed_data, bh->b_size);
+			jbd2_free(jh->b_committed_data, bh->b_size);
 			jh->b_committed_data = NULL;
 			jbd_unlock_bh_state(bh);
 		}
@@ -801,14 +801,14 @@ restart_loop:
 		 * Otherwise, we can just throw away the frozen data now.
 		 */
 		if (jh->b_committed_data) {
-			jbd2_slab_free(jh->b_committed_data, bh->b_size);
+			jbd2_free(jh->b_committed_data, bh->b_size);
 			jh->b_committed_data = NULL;
 			if (jh->b_frozen_data) {
 				jh->b_committed_data = jh->b_frozen_data;
 				jh->b_frozen_data = NULL;
 			}
 		} else if (jh->b_frozen_data) {
-			jbd2_slab_free(jh->b_frozen_data, bh->b_size);
+			jbd2_free(jh->b_frozen_data, bh->b_size);
 			jh->b_frozen_data = NULL;
 		}
 
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index f37324a..4281244 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -84,7 +84,6 @@ EXPORT_SYMBOL(jbd2_journal_force_commit);
 
 static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
 static void __journal_abort_soft (journal_t *journal, int errno);
-static int jbd2_journal_create_jbd_slab(size_t slab_size);
 
 /*
  * Helper function used to manage commit timeouts
@@ -335,10 +334,10 @@ repeat:
 		char *tmp;
 
 		jbd_unlock_bh_state(bh_in);
-		tmp = jbd2_slab_alloc(bh_in->b_size, GFP_NOFS);
+		tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS);
 		jbd_lock_bh_state(bh_in);
 		if (jh_in->b_frozen_data) {
-			jbd2_slab_free(tmp, bh_in->b_size);
+			jbd2_free(tmp, bh_in->b_size);
 			goto repeat;
 		}
 
@@ -655,7 +654,7 @@ static journal_t * journal_init_common (void)
 	journal_t *journal;
 	int err;
 
-	journal = jbd_kmalloc(sizeof(*journal), GFP_KERNEL);
+	journal = kmalloc(sizeof(*journal), GFP_KERNEL|__GFP_NOFAIL);
 	if (!journal)
 		goto fail;
 	memset(journal, 0, sizeof(*journal));
@@ -1096,13 +1095,6 @@ int jbd2_journal_load(journal_t *journal)
 		}
 	}
 
-	/*
-	 * Create a slab for this blocksize
-	 */
-	err = jbd2_journal_create_jbd_slab(be32_to_cpu(sb->s_blocksize));
-	if (err)
-		return err;
-
 	/* Let the recovery code check whether it needs to recover any
 	 * data from the journal. */
 	if (jbd2_journal_recover(journal))
@@ -1627,86 +1619,6 @@ size_t journal_tag_bytes(journal_t *journal)
 }
 
 /*
- * Simple support for retrying memory allocations.  Introduced to help to
- * debug different VM deadlock avoidance strategies.
- */
-void * __jbd2_kmalloc (const char *where, size_t size, gfp_t flags, int retry)
-{
-	return kmalloc(size, flags | (retry ? __GFP_NOFAIL : 0));
-}
-
-/*
- * jbd slab management: create 1k, 2k, 4k, 8k slabs as needed
- * and allocate frozen and commit buffers from these slabs.
- *
- * Reason for doing this is to avoid, SLAB_DEBUG - since it could
- * cause bh to cross page boundary.
- */
-
-#define JBD_MAX_SLABS 5
-#define JBD_SLAB_INDEX(size)  (size >> 11)
-
-static struct kmem_cache *jbd_slab[JBD_MAX_SLABS];
-static const char *jbd_slab_names[JBD_MAX_SLABS] = {
-	"jbd2_1k", "jbd2_2k", "jbd2_4k", NULL, "jbd2_8k"
-};
-
-static void jbd2_journal_destroy_jbd_slabs(void)
-{
-	int i;
-
-	for (i = 0; i < JBD_MAX_SLABS; i++) {
-		if (jbd_slab[i])
-			kmem_cache_destroy(jbd_slab[i]);
-		jbd_slab[i] = NULL;
-	}
-}
-
-static int jbd2_journal_create_jbd_slab(size_t slab_size)
-{
-	int i = JBD_SLAB_INDEX(slab_size);
-
-	BUG_ON(i >= JBD_MAX_SLABS);
-
-	/*
-	 * Check if we already have a slab created for this size
-	 */
-	if (jbd_slab[i])
-		return 0;
-
-	/*
-	 * Create a slab and force alignment to be same as slabsize -
-	 * this will make sure that allocations won't cross the page
-	 * boundary.
-	 */
-	jbd_slab[i] = kmem_cache_create(jbd_slab_names[i],
-				slab_size, slab_size, 0, NULL);
-	if (!jbd_slab[i]) {
-		printk(KERN_EMERG "JBD: no memory for jbd_slab cache\n");
-		return -ENOMEM;
-	}
-	return 0;
-}
-
-void * jbd2_slab_alloc(size_t size, gfp_t flags)
-{
-	int idx;
-
-	idx = JBD_SLAB_INDEX(size);
-	BUG_ON(jbd_slab[idx] == NULL);
-	return kmem_cache_alloc(jbd_slab[idx], flags | __GFP_NOFAIL);
-}
-
-void jbd2_slab_free(void *ptr,  size_t size)
-{
-	int idx;
-
-	idx = JBD_SLAB_INDEX(size);
-	BUG_ON(jbd_slab[idx] == NULL);
-	kmem_cache_free(jbd_slab[idx], ptr);
-}
-
-/*
  * Journal_head storage management
  */
 static struct kmem_cache *jbd2_journal_head_cache;
@@ -1893,13 +1805,13 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
 				printk(KERN_WARNING "%s: freeing "
 						"b_frozen_data\n",
 						__FUNCTION__);
-				jbd2_slab_free(jh->b_frozen_data, bh->b_size);
+				jbd2_free(jh->b_frozen_data, bh->b_size);
 			}
 			if (jh->b_committed_data) {
 				printk(KERN_WARNING "%s: freeing "
 						"b_committed_data\n",
 						__FUNCTION__);
-				jbd2_slab_free(jh->b_committed_data, bh->b_size);
+				jbd2_free(jh->b_committed_data, bh->b_size);
 			}
 			bh->b_private = NULL;
 			jh->b_bh = NULL;	/* debug, really */
@@ -2040,7 +1952,6 @@ static void jbd2_journal_destroy_caches(void)
 	jbd2_journal_destroy_revoke_caches();
 	jbd2_journal_destroy_jbd2_journal_head_cache();
 	jbd2_journal_destroy_handle_cache();
-	jbd2_journal_destroy_jbd_slabs();
 }
 
 static int __init journal_init(void)
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 7946ff4..a5fb70f 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -96,8 +96,8 @@ static int start_this_handle(journal_t *journal, handle_t *handle)
 
 alloc_transaction:
 	if (!journal->j_running_transaction) {
-		new_transaction = jbd_kmalloc(sizeof(*new_transaction),
-						GFP_NOFS);
+		new_transaction = kmalloc(sizeof(*new_transaction),
+						GFP_NOFS|__GFP_NOFAIL);
 		if (!new_transaction) {
 			ret = -ENOMEM;
 			goto out;
@@ -236,7 +236,7 @@ out:
 /* Allocate a new handle.  This should probably be in a slab... */
 static handle_t *new_handle(int nblocks)
 {
-	handle_t *handle = jbd_alloc_handle(GFP_NOFS);
+	handle_t *handle = jbd2_alloc_handle(GFP_NOFS);
 	if (!handle)
 		return NULL;
 	memset(handle, 0, sizeof(*handle));
@@ -282,7 +282,7 @@ handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
 
 	err = start_this_handle(journal, handle);
 	if (err < 0) {
-		jbd_free_handle(handle);
+		jbd2_free_handle(handle);
 		current->journal_info = NULL;
 		handle = ERR_PTR(err);
 	}
@@ -668,7 +668,7 @@ repeat:
 				JBUFFER_TRACE(jh, "allocate memory for buffer");
 				jbd_unlock_bh_state(bh);
 				frozen_buffer =
-					jbd2_slab_alloc(jh2bh(jh)->b_size,
+					jbd2_alloc(jh2bh(jh)->b_size,
 							 GFP_NOFS);
 				if (!frozen_buffer) {
 					printk(KERN_EMERG
@@ -728,7 +728,7 @@ done:
 
 out:
 	if (unlikely(frozen_buffer))	/* It's usually NULL */
-		jbd2_slab_free(frozen_buffer, bh->b_size);
+		jbd2_free(frozen_buffer, bh->b_size);
 
 	JBUFFER_TRACE(jh, "exit");
 	return error;
@@ -881,7 +881,7 @@ int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
 
 repeat:
 	if (!jh->b_committed_data) {
-		committed_data = jbd2_slab_alloc(jh2bh(jh)->b_size, GFP_NOFS);
+		committed_data = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS);
 		if (!committed_data) {
 			printk(KERN_EMERG "%s: No memory for committed data\n",
 				__FUNCTION__);
@@ -908,7 +908,7 @@ repeat:
 out:
 	jbd2_journal_put_journal_head(jh);
 	if (unlikely(committed_data))
-		jbd2_slab_free(committed_data, bh->b_size);
+		jbd2_free(committed_data, bh->b_size);
 	return err;
 }
 
@@ -1411,7 +1411,7 @@ int jbd2_journal_stop(handle_t *handle)
 		spin_unlock(&journal->j_state_lock);
 	}
 
-	jbd_free_handle(handle);
+	jbd2_free_handle(handle);
 	return err;
 }
 
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index 4527375..26216c1 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -71,14 +71,16 @@ extern int journal_enable_debug;
 #define jbd_debug(f, a...)	/**/
 #endif
 
-extern void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry);
-extern void * jbd_slab_alloc(size_t size, gfp_t flags);
-extern void jbd_slab_free(void *ptr, size_t size);
-
-#define jbd_kmalloc(size, flags) \
-	__jbd_kmalloc(__FUNCTION__, (size), (flags), journal_oom_retry)
-#define jbd_rep_kmalloc(size, flags) \
-	__jbd_kmalloc(__FUNCTION__, (size), (flags), 1)
+
+static inline void *jbd_alloc(size_t size, gfp_t flags)
+{
+	return (void *)__get_free_pages(flags, get_order(size));
+}
+
+static inline void jbd_free(void *ptr, size_t size)
+{
+	free_pages((unsigned long)ptr, get_order(size));
+};
 
 #define JFS_MIN_JOURNAL_BLOCKS 1024
 
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 260d6d7..5f8b876 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -71,14 +71,15 @@ extern u8 jbd2_journal_enable_debug;
 #define jbd_debug(f, a...)	/**/
 #endif
 
-extern void * __jbd2_kmalloc (const char *where, size_t size, gfp_t flags, int retry);
-extern void * jbd2_slab_alloc(size_t size, gfp_t flags);
-extern void jbd2_slab_free(void *ptr, size_t size);
+static inline void *jbd2_alloc(size_t size, gfp_t flags)
+{
+	return (void *)__get_free_pages(flags, get_order(size));
+}
 
-#define jbd_kmalloc(size, flags) \
-	__jbd2_kmalloc(__FUNCTION__, (size), (flags), journal_oom_retry)
-#define jbd_rep_kmalloc(size, flags) \
-	__jbd2_kmalloc(__FUNCTION__, (size), (flags), 1)
+static inline void jbd2_free(void *ptr, size_t size)
+{
+	free_pages((unsigned long)ptr, get_order(size));
+};
 
 #define JBD2_MIN_JOURNAL_BLOCKS 1024
 
@@ -959,12 +960,12 @@ void jbd2_journal_put_journal_head(struct journal_head *jh);
  */
 extern struct kmem_cache *jbd2_handle_cache;
 
-static inline handle_t *jbd_alloc_handle(gfp_t gfp_flags)
+static inline handle_t *jbd2_alloc_handle(gfp_t gfp_flags)
 {
 	return kmem_cache_alloc(jbd2_handle_cache, gfp_flags);
 }
 
-static inline void jbd_free_handle(handle_t *handle)
+static inline void jbd2_free_handle(handle_t *handle)
 {
 	kmem_cache_free(jbd2_handle_cache, handle);
 }
-- 
1.5.3.2.81.g17ed

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH] jbd/jbd2: Journal initialization doesn't need __GFP_NOFAIL
  2007-10-04  5:50 ` [PATCH] jbd/jbd2: JBD memory allocation cleanups Theodore Ts'o
@ 2007-10-04  5:50   ` Theodore Ts'o
  2007-10-04  5:50     ` [PATCH] JBD2/Ext4: Convert kmalloc to kzalloc in jbd2/ext4 Theodore Ts'o
  2007-10-04  6:52   ` [PATCH] jbd/jbd2: JBD memory allocation cleanups Christoph Hellwig
  1 sibling, 1 reply; 34+ messages in thread
From: Theodore Ts'o @ 2007-10-04  5:50 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-ext4, Aneesh Kumar K.V, Mingming Cao, Theodore Ts'o

From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/jbd/journal.c  |    2 +-
 fs/jbd2/journal.c |    2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index ae2c25d..8d6d475 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -653,7 +653,7 @@ static journal_t * journal_init_common (void)
 	journal_t *journal;
 	int err;
 
-	journal = kmalloc(sizeof(*journal), GFP_KERNEL|__GFP_NOFAIL);
+	journal = kmalloc(sizeof(*journal), GFP_KERNEL);
 	if (!journal)
 		goto fail;
 	memset(journal, 0, sizeof(*journal));
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 4281244..0e329a3 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -654,7 +654,7 @@ static journal_t * journal_init_common (void)
 	journal_t *journal;
 	int err;
 
-	journal = kmalloc(sizeof(*journal), GFP_KERNEL|__GFP_NOFAIL);
+	journal = kmalloc(sizeof(*journal), GFP_KERNEL);
 	if (!journal)
 		goto fail;
 	memset(journal, 0, sizeof(*journal));
-- 
1.5.3.2.81.g17ed

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH] JBD2/Ext4: Convert kmalloc to kzalloc in jbd2/ext4
  2007-10-04  5:50   ` [PATCH] jbd/jbd2: Journal initialization doesn't need __GFP_NOFAIL Theodore Ts'o
@ 2007-10-04  5:50     ` Theodore Ts'o
       [not found]       ` <1191477059-5357-5-git-send-email-tytso@mit.edu>
  0 siblings, 1 reply; 34+ messages in thread
From: Theodore Ts'o @ 2007-10-04  5:50 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-ext4, Mingming Cao

From: Mingming Cao <cmm@us.ibm.com>

Convert kmalloc to kzalloc() and get rid of the memset().

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
---
 fs/ext4/xattr.c       |    3 +--
 fs/jbd2/journal.c     |    3 +--
 fs/jbd2/transaction.c |    3 +--
 3 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index b10d68f..12c7d65 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -750,12 +750,11 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
 		}
 	} else {
 		/* Allocate a buffer where we construct the new block. */
-		s->base = kmalloc(sb->s_blocksize, GFP_KERNEL);
+		s->base = kzalloc(sb->s_blocksize, GFP_KERNEL);
 		/* assert(header == s->base) */
 		error = -ENOMEM;
 		if (s->base == NULL)
 			goto cleanup;
-		memset(s->base, 0, sb->s_blocksize);
 		header(s->base)->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
 		header(s->base)->h_blocks = cpu_to_le32(1);
 		header(s->base)->h_refcount = cpu_to_le32(1);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 0e329a3..f12c65b 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -654,10 +654,9 @@ static journal_t * journal_init_common (void)
 	journal_t *journal;
 	int err;
 
-	journal = kmalloc(sizeof(*journal), GFP_KERNEL);
+	journal = kzalloc(sizeof(*journal), GFP_KERNEL|__GFP_NOFAIL);
 	if (!journal)
 		goto fail;
-	memset(journal, 0, sizeof(*journal));
 
 	init_waitqueue_head(&journal->j_wait_transaction_locked);
 	init_waitqueue_head(&journal->j_wait_logspace);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index a5fb70f..b1fcf2b 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -96,13 +96,12 @@ static int start_this_handle(journal_t *journal, handle_t *handle)
 
 alloc_transaction:
 	if (!journal->j_running_transaction) {
-		new_transaction = kmalloc(sizeof(*new_transaction),
+		new_transaction = kzalloc(sizeof(*new_transaction),
 						GFP_NOFS|__GFP_NOFAIL);
 		if (!new_transaction) {
 			ret = -ENOMEM;
 			goto out;
 		}
-		memset(new_transaction, 0, sizeof(*new_transaction));
 	}
 
 	jbd_debug(3, "New handle %p going live.\n", handle);
-- 
1.5.3.2.81.g17ed

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH] jbd2: fix commit code to properly abort journal
       [not found]       ` <1191477059-5357-5-git-send-email-tytso@mit.edu>
@ 2007-10-04  5:50         ` Theodore Ts'o
  2007-10-04  5:50           ` [PATCH] JBD2: debug code cleanup Theodore Ts'o
  0 siblings, 1 reply; 34+ messages in thread
From: Theodore Ts'o @ 2007-10-04  5:50 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-ext4, Jan Kara, Andrew Morton

From: Jan Kara <jack@suse.cz>

We should really call journal_abort() and not __journal_abort_hard() in
case of errors.  The latter call does not record the error in the journal
superblock and thus filesystem won't be marked as with errors later (and
user could happily mount it without any warning).

Signed-off-by: Jan Kara <jack@suse.cz>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/jbd2/commit.c |    8 ++++----
 1 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index b898ee4..6986f33 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -475,7 +475,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 	spin_unlock(&journal->j_list_lock);
 
 	if (err)
-		__jbd2_journal_abort_hard(journal);
+		jbd2_journal_abort(journal, err);
 
 	jbd2_journal_write_revoke_records(journal, commit_transaction);
 
@@ -533,7 +533,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 
 			descriptor = jbd2_journal_get_descriptor_buffer(journal);
 			if (!descriptor) {
-				__jbd2_journal_abort_hard(journal);
+				jbd2_journal_abort(journal, -EIO);
 				continue;
 			}
 
@@ -566,7 +566,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 		   and repeat this loop: we'll fall into the
 		   refile-on-abort condition above. */
 		if (err) {
-			__jbd2_journal_abort_hard(journal);
+			jbd2_journal_abort(journal, err);
 			continue;
 		}
 
@@ -757,7 +757,7 @@ wait_for_iobuf:
 		err = -EIO;
 
 	if (err)
-		__jbd2_journal_abort_hard(journal);
+		jbd2_journal_abort(journal, err);
 
 	/* End of a transaction!  Finally, we can do checkpoint
            processing: any buffers committed as a result of this
-- 
1.5.3.2.81.g17ed

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH] JBD2: debug code cleanup.
  2007-10-04  5:50         ` [PATCH] jbd2: fix commit code to properly abort journal Theodore Ts'o
@ 2007-10-04  5:50           ` Theodore Ts'o
  2007-10-04  5:50             ` [PATCH] Once ext4 will not implement fragment, it is believed it will never be Theodore Ts'o
  0 siblings, 1 reply; 34+ messages in thread
From: Theodore Ts'o @ 2007-10-04  5:50 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-ext4, Jose R. Santos, Andrew Morton

From: Jose R. Santos <jrs@us.ibm.com>

Mostly stolen from akpm's JBD cleanup patch.

- use `#ifdef foo' instead of `#if defined(foo)'

- Make journal_enable_debug __read_mostly just for the heck of it

- Make jbd_debugfs_dir and jbd_debug static

- debugfs_remove(NULL) is legal: remove unneeded tests

- remove unnecessary empty loops

Signed-off-by: Jose R. Santos <jrs@us.ibm.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/jbd2/journal.c |   20 ++++++--------------
 1 files changed, 6 insertions(+), 14 deletions(-)

diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index ff07bff..6ddc553 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1864,16 +1864,14 @@ void jbd2_journal_put_journal_head(struct journal_head *jh)
 /*
  * debugfs tunables
  */
-#if defined(CONFIG_JBD2_DEBUG)
-u8 jbd2_journal_enable_debug;
+#ifdef CONFIG_JBD2_DEBUG
+u8 jbd2_journal_enable_debug __read_mostly;
 EXPORT_SYMBOL(jbd2_journal_enable_debug);
-#endif
-
-#if defined(CONFIG_JBD2_DEBUG) && defined(CONFIG_DEBUG_FS)
 
 #define JBD2_DEBUG_NAME "jbd2-debug"
 
-struct dentry *jbd2_debugfs_dir, *jbd2_debug;
+static struct dentry *jbd2_debugfs_dir;
+static struct dentry *jbd2_debug;
 
 static void __init jbd2_create_debugfs_entry(void)
 {
@@ -1886,24 +1884,18 @@ static void __init jbd2_create_debugfs_entry(void)
 
 static void __exit jbd2_remove_debugfs_entry(void)
 {
-	if (jbd2_debug)
-		debugfs_remove(jbd2_debug);
-	if (jbd2_debugfs_dir)
-		debugfs_remove(jbd2_debugfs_dir);
+	debugfs_remove(jbd2_debug);
+	debugfs_remove(jbd2_debugfs_dir);
 }
 
 #else
 
 static void __init jbd2_create_debugfs_entry(void)
 {
-	do {
-	} while (0);
 }
 
 static void __exit jbd2_remove_debugfs_entry(void)
 {
-	do {
-	} while (0);
 }
 
 #endif
-- 
1.5.3.2.81.g17ed

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH] Once ext4 will not implement fragment, it is believed it will never be
  2007-10-04  5:50           ` [PATCH] JBD2: debug code cleanup Theodore Ts'o
@ 2007-10-04  5:50             ` Theodore Ts'o
  2007-10-04  5:50               ` [PATCH] ext4: remove #ifdef CONFIG_EXT4_INDEX Theodore Ts'o
  0 siblings, 1 reply; 34+ messages in thread
From: Theodore Ts'o @ 2007-10-04  5:50 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-ext4, Coly Li, Andrew Morton

From: Coly Li <coyli@suse.de>

implement in future.  Therefore fragment related source code in ext4 should
be obsoleted -- no one will use it.

This patch obsolete fragment from ext4.  Another patch posted on linux-ext4
removing fragment supporting from e2fsprogs.

Signed-off-by: Coly Li <coyli@suse.de>
Acked-by: Andreas Dilger <adilger@clusterfs.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/ext4/ialloc.c           |    5 -----
 fs/ext4/inode.c            |   10 ----------
 fs/ext4/super.c            |   15 ---------------
 include/linux/ext4_fs.h    |   35 ++++++-----------------------------
 include/linux/ext4_fs_i.h  |    5 -----
 include/linux/ext4_fs_sb.h |    3 ---
 6 files changed, 6 insertions(+), 67 deletions(-)

diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 427f830..b8b538d 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -576,11 +576,6 @@ got:
 	/* dirsync only applies to directories */
 	if (!S_ISDIR(mode))
 		ei->i_flags &= ~EXT4_DIRSYNC_FL;
-#ifdef EXT4_FRAGMENTS
-	ei->i_faddr = 0;
-	ei->i_frag_no = 0;
-	ei->i_frag_size = 0;
-#endif
 	ei->i_file_acl = 0;
 	ei->i_dir_acl = 0;
 	ei->i_dtime = 0;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index a4848e0..f283522 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2645,11 +2645,6 @@ void ext4_read_inode(struct inode * inode)
 	}
 	inode->i_blocks = le32_to_cpu(raw_inode->i_blocks);
 	ei->i_flags = le32_to_cpu(raw_inode->i_flags);
-#ifdef EXT4_FRAGMENTS
-	ei->i_faddr = le32_to_cpu(raw_inode->i_faddr);
-	ei->i_frag_no = raw_inode->i_frag;
-	ei->i_frag_size = raw_inode->i_fsize;
-#endif
 	ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl);
 	if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
 	    cpu_to_le32(EXT4_OS_HURD))
@@ -2794,11 +2789,6 @@ static int ext4_do_update_inode(handle_t *handle,
 	raw_inode->i_blocks = cpu_to_le32(inode->i_blocks);
 	raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
 	raw_inode->i_flags = cpu_to_le32(ei->i_flags);
-#ifdef EXT4_FRAGMENTS
-	raw_inode->i_faddr = cpu_to_le32(ei->i_faddr);
-	raw_inode->i_frag = ei->i_frag_no;
-	raw_inode->i_fsize = ei->i_frag_size;
-#endif
 	if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
 	    cpu_to_le32(EXT4_OS_HURD))
 		raw_inode->i_file_acl_high =
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 42cbdb5..420d39d 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1655,14 +1655,6 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 		if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE)
 			sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2);
 	}
-	sbi->s_frag_size = EXT4_MIN_FRAG_SIZE <<
-				   le32_to_cpu(es->s_log_frag_size);
-	if (blocksize != sbi->s_frag_size) {
-		printk(KERN_ERR
-		       "EXT4-fs: fragsize %lu != blocksize %u (unsupported)\n",
-		       sbi->s_frag_size, blocksize);
-		goto failed_mount;
-	}
 	sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
 	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) {
 		if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
@@ -1676,7 +1668,6 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 	} else
 		sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
 	sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
-	sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group);
 	sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
 	if (EXT4_INODE_SIZE(sb) == 0)
 		goto cantfind_ext4;
@@ -1700,12 +1691,6 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 			sbi->s_blocks_per_group);
 		goto failed_mount;
 	}
-	if (sbi->s_frags_per_group > blocksize * 8) {
-		printk (KERN_ERR
-			"EXT4-fs: #fragments per group too big: %lu\n",
-			sbi->s_frags_per_group);
-		goto failed_mount;
-	}
 	if (sbi->s_inodes_per_group > blocksize * 8) {
 		printk (KERN_ERR
 			"EXT4-fs: #inodes per group too big: %lu\n",
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index cdee7aa..3baeb99 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -105,20 +105,6 @@
 #define EXT4_BLOCK_ALIGN(size, blkbits)		ALIGN((size), (1 << (blkbits)))
 
 /*
- * Macro-instructions used to manage fragments
- */
-#define EXT4_MIN_FRAG_SIZE		1024
-#define	EXT4_MAX_FRAG_SIZE		4096
-#define EXT4_MIN_FRAG_LOG_SIZE		  10
-#ifdef __KERNEL__
-# define EXT4_FRAG_SIZE(s)		(EXT4_SB(s)->s_frag_size)
-# define EXT4_FRAGS_PER_BLOCK(s)	(EXT4_SB(s)->s_frags_per_block)
-#else
-# define EXT4_FRAG_SIZE(s)		(EXT4_MIN_FRAG_SIZE << (s)->s_log_frag_size)
-# define EXT4_FRAGS_PER_BLOCK(s)	(EXT4_BLOCK_SIZE(s) / EXT4_FRAG_SIZE(s))
-#endif
-
-/*
  * Structure of a blocks group descriptor
  */
 struct ext4_group_desc
@@ -311,27 +297,24 @@ struct ext4_inode {
 	__le32	i_generation;	/* File version (for NFS) */
 	__le32	i_file_acl;	/* File ACL */
 	__le32	i_dir_acl;	/* Directory ACL */
-	__le32	i_faddr;	/* Fragment address */
+	__le32	i_obso_faddr;	/* Obsoleted fragment address */
 	union {
 		struct {
-			__u8	l_i_frag;	/* Fragment number */
-			__u8	l_i_fsize;	/* Fragment size */
+			__le16	l_i_reserved1;	/* Obsoleted fragment number/size which are removed in ext4 */
 			__le16	l_i_file_acl_high;
 			__le16	l_i_uid_high;	/* these 2 fields */
 			__le16	l_i_gid_high;	/* were reserved2[0] */
 			__u32	l_i_reserved2;
 		} linux2;
 		struct {
-			__u8	h_i_frag;	/* Fragment number */
-			__u8	h_i_fsize;	/* Fragment size */
+			__le16	h_i_reserved1;	/* Obsoleted fragment number/size which are removed in ext4 */
 			__u16	h_i_mode_high;
 			__u16	h_i_uid_high;
 			__u16	h_i_gid_high;
 			__u32	h_i_author;
 		} hurd2;
 		struct {
-			__u8	m_i_frag;	/* Fragment number */
-			__u8	m_i_fsize;	/* Fragment size */
+			__le16	h_i_reserved1;	/* Obsoleted fragment number/size which are removed in ext4 */
 			__le16	m_i_file_acl_high;
 			__u32	m_i_reserved2[2];
 		} masix2;
@@ -419,8 +402,6 @@ do {									       \
 
 #if defined(__KERNEL__) || defined(__linux__)
 #define i_reserved1	osd1.linux1.l_i_reserved1
-#define i_frag		osd2.linux2.l_i_frag
-#define i_fsize		osd2.linux2.l_i_fsize
 #define i_file_acl_high	osd2.linux2.l_i_file_acl_high
 #define i_uid_low	i_uid
 #define i_gid_low	i_gid
@@ -431,8 +412,6 @@ do {									       \
 #elif defined(__GNU__)
 
 #define i_translator	osd1.hurd1.h_i_translator
-#define i_frag		osd2.hurd2.h_i_frag;
-#define i_fsize		osd2.hurd2.h_i_fsize;
 #define i_uid_high	osd2.hurd2.h_i_uid_high
 #define i_gid_high	osd2.hurd2.h_i_gid_high
 #define i_author	osd2.hurd2.h_i_author
@@ -440,8 +419,6 @@ do {									       \
 #elif defined(__masix__)
 
 #define i_reserved1	osd1.masix1.m_i_reserved1
-#define i_frag		osd2.masix2.m_i_frag
-#define i_fsize		osd2.masix2.m_i_fsize
 #define i_file_acl_high	osd2.masix2.m_i_file_acl_high
 #define i_reserved2	osd2.masix2.m_i_reserved2
 
@@ -528,9 +505,9 @@ struct ext4_super_block {
 /*10*/	__le32	s_free_inodes_count;	/* Free inodes count */
 	__le32	s_first_data_block;	/* First Data Block */
 	__le32	s_log_block_size;	/* Block size */
-	__le32	s_log_frag_size;	/* Fragment size */
+	__le32	s_obso_log_frag_size;	/* Obsoleted fragment size */
 /*20*/	__le32	s_blocks_per_group;	/* # Blocks per group */
-	__le32	s_frags_per_group;	/* # Fragments per group */
+	__le32	s_obso_frags_per_group;	/* Obsoleted fragments per group */
 	__le32	s_inodes_per_group;	/* # Inodes per group */
 	__le32	s_mtime;		/* Mount time */
 /*30*/	__le32	s_wtime;		/* Write time */
diff --git a/include/linux/ext4_fs_i.h b/include/linux/ext4_fs_i.h
index 1a511e9..86ddfe2 100644
--- a/include/linux/ext4_fs_i.h
+++ b/include/linux/ext4_fs_i.h
@@ -78,11 +78,6 @@ struct ext4_ext_cache {
 struct ext4_inode_info {
 	__le32	i_data[15];	/* unconverted */
 	__u32	i_flags;
-#ifdef EXT4_FRAGMENTS
-	__u32	i_faddr;
-	__u8	i_frag_no;
-	__u8	i_frag_size;
-#endif
 	ext4_fsblk_t	i_file_acl;
 	__u32	i_dir_acl;
 	__u32	i_dtime;
diff --git a/include/linux/ext4_fs_sb.h b/include/linux/ext4_fs_sb.h
index 1b2ffee..a978fba 100644
--- a/include/linux/ext4_fs_sb.h
+++ b/include/linux/ext4_fs_sb.h
@@ -28,11 +28,8 @@
  * third extended-fs super-block data in memory
  */
 struct ext4_sb_info {
-	unsigned long s_frag_size;	/* Size of a fragment in bytes */
 	unsigned long s_desc_size;	/* Size of a group descriptor in bytes */
-	unsigned long s_frags_per_block;/* Number of fragments per block */
 	unsigned long s_inodes_per_block;/* Number of inodes per block */
-	unsigned long s_frags_per_group;/* Number of fragments in a group */
 	unsigned long s_blocks_per_group;/* Number of blocks in a group */
 	unsigned long s_inodes_per_group;/* Number of inodes in a group */
 	unsigned long s_itb_per_group;	/* Number of inode table blocks per group */
-- 
1.5.3.2.81.g17ed

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH] ext4: remove #ifdef CONFIG_EXT4_INDEX
  2007-10-04  5:50             ` [PATCH] Once ext4 will not implement fragment, it is believed it will never be Theodore Ts'o
@ 2007-10-04  5:50               ` Theodore Ts'o
  2007-10-04  5:50                 ` [PATCH] Ext4: Uninitialized Block Groups Theodore Ts'o
  0 siblings, 1 reply; 34+ messages in thread
From: Theodore Ts'o @ 2007-10-04  5:50 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-ext4, Eric Sandeen, Mingming Cao, Theodore Ts'o,
	Andrew Morton

From: Eric Sandeen <sandeen@redhat.com>

CONFIG_EXT4_INDEX is not an exposed config option in the kernel, and it is
unconditionally defined in ext4_fs.h.  tune2fs is already able to turn off
dir indexing, so at this point it's just cluttering up the code.  Remove
it.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/ext4/dir.c           |    7 -------
 fs/ext4/namei.c         |   20 --------------------
 include/linux/ext4_fs.h |   14 ++------------
 3 files changed, 2 insertions(+), 39 deletions(-)

diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 3ab01c0..2ba49ff 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -47,9 +47,7 @@ const struct file_operations ext4_dir_operations = {
 	.compat_ioctl	= ext4_compat_ioctl,
 #endif
 	.fsync		= ext4_sync_file,	/* BKL held */
-#ifdef CONFIG_EXT4_INDEX
 	.release	= ext4_release_dir,
-#endif
 };
 
 
@@ -107,7 +105,6 @@ static int ext4_readdir(struct file * filp,
 
 	sb = inode->i_sb;
 
-#ifdef CONFIG_EXT4_INDEX
 	if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
 				    EXT4_FEATURE_COMPAT_DIR_INDEX) &&
 	    ((EXT4_I(inode)->i_flags & EXT4_INDEX_FL) ||
@@ -123,7 +120,6 @@ static int ext4_readdir(struct file * filp,
 		 */
 		EXT4_I(filp->f_path.dentry->d_inode)->i_flags &= ~EXT4_INDEX_FL;
 	}
-#endif
 	stored = 0;
 	offset = filp->f_pos & (sb->s_blocksize - 1);
 
@@ -232,7 +228,6 @@ out:
 	return ret;
 }
 
-#ifdef CONFIG_EXT4_INDEX
 /*
  * These functions convert from the major/minor hash to an f_pos
  * value.
@@ -518,5 +513,3 @@ static int ext4_release_dir (struct inode * inode, struct file * filp)
 
 	return 0;
 }
-
-#endif
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 5fdb862..94ee6f3 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -144,7 +144,6 @@ struct dx_map_entry
 	u16 size;
 };
 
-#ifdef CONFIG_EXT4_INDEX
 static inline unsigned dx_get_block (struct dx_entry *entry);
 static void dx_set_block (struct dx_entry *entry, unsigned value);
 static inline unsigned dx_get_hash (struct dx_entry *entry);
@@ -766,8 +765,6 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, u32 block)
 	dx_set_block(new, block);
 	dx_set_count(entries, count + 1);
 }
-#endif
-
 
 static void ext4_update_dx_flag(struct inode *inode)
 {
@@ -869,7 +866,6 @@ static struct buffer_head * ext4_find_entry (struct dentry *dentry,
 	name = dentry->d_name.name;
 	if (namelen > EXT4_NAME_LEN)
 		return NULL;
-#ifdef CONFIG_EXT4_INDEX
 	if (is_dx(dir)) {
 		bh = ext4_dx_find_entry(dentry, res_dir, &err);
 		/*
@@ -881,7 +877,6 @@ static struct buffer_head * ext4_find_entry (struct dentry *dentry,
 			return bh;
 		dxtrace(printk("ext4_find_entry: dx failed, falling back\n"));
 	}
-#endif
 	nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
 	start = EXT4_I(dir)->i_dir_start_lookup;
 	if (start >= nblocks)
@@ -957,7 +952,6 @@ cleanup_and_exit:
 	return ret;
 }
 
-#ifdef CONFIG_EXT4_INDEX
 static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry,
 		       struct ext4_dir_entry_2 **res_dir, int *err)
 {
@@ -1025,7 +1019,6 @@ errout:
 	dx_release (frames);
 	return NULL;
 }
-#endif
 
 static struct dentry *ext4_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
 {
@@ -1121,7 +1114,6 @@ static inline void ext4_set_de_type(struct super_block *sb,
 		de->file_type = ext4_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
 }
 
-#ifdef CONFIG_EXT4_INDEX
 /*
  * Move count entries from end of map between two memory locations.
  * Returns pointer to last entry moved.
@@ -1266,8 +1258,6 @@ errout:
 	*error = err;
 	return NULL;
 }
-#endif
-
 
 /*
  * Add a new entry into a directory (leaf) block.  If de is non-NULL,
@@ -1364,7 +1354,6 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
 	return 0;
 }
 
-#ifdef CONFIG_EXT4_INDEX
 /*
  * This converts a one block unindexed directory to a 3 block indexed
  * directory, and adds the dentry to the indexed directory.
@@ -1443,7 +1432,6 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
 
 	return add_dirent_to_buf(handle, dentry, inode, de, bh);
 }
-#endif
 
 /*
  *	ext4_add_entry()
@@ -1464,9 +1452,7 @@ static int ext4_add_entry (handle_t *handle, struct dentry *dentry,
 	struct ext4_dir_entry_2 *de;
 	struct super_block * sb;
 	int	retval;
-#ifdef CONFIG_EXT4_INDEX
 	int	dx_fallback=0;
-#endif
 	unsigned blocksize;
 	u32 block, blocks;
 
@@ -1474,7 +1460,6 @@ static int ext4_add_entry (handle_t *handle, struct dentry *dentry,
 	blocksize = sb->s_blocksize;
 	if (!dentry->d_name.len)
 		return -EINVAL;
-#ifdef CONFIG_EXT4_INDEX
 	if (is_dx(dir)) {
 		retval = ext4_dx_add_entry(handle, dentry, inode);
 		if (!retval || (retval != ERR_BAD_DX_DIR))
@@ -1483,7 +1468,6 @@ static int ext4_add_entry (handle_t *handle, struct dentry *dentry,
 		dx_fallback++;
 		ext4_mark_inode_dirty(handle, dir);
 	}
-#endif
 	blocks = dir->i_size >> sb->s_blocksize_bits;
 	for (block = 0, offset = 0; block < blocks; block++) {
 		bh = ext4_bread(handle, dir, block, 0, &retval);
@@ -1493,11 +1477,9 @@ static int ext4_add_entry (handle_t *handle, struct dentry *dentry,
 		if (retval != -ENOSPC)
 			return retval;
 
-#ifdef CONFIG_EXT4_INDEX
 		if (blocks == 1 && !dx_fallback &&
 		    EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX))
 			return make_indexed_dir(handle, dentry, inode, bh);
-#endif
 		brelse(bh);
 	}
 	bh = ext4_append(handle, dir, &block, &retval);
@@ -1509,7 +1491,6 @@ static int ext4_add_entry (handle_t *handle, struct dentry *dentry,
 	return add_dirent_to_buf(handle, dentry, inode, de, bh);
 }
 
-#ifdef CONFIG_EXT4_INDEX
 /*
  * Returns 0 for success, or a negative error value
  */
@@ -1644,7 +1625,6 @@ cleanup:
 	dx_release(frames);
 	return err;
 }
-#endif
 
 /*
  * ext4_delete_entry deletes a directory entry by merging it with the
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index 3baeb99..151738a 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -36,10 +36,6 @@
 /*max window size: 1024(direct blocks) + 3([t,d]indirect blocks) */
 #define EXT4_MAX_RESERVE_BLOCKS		1027
 #define EXT4_RESERVE_WINDOW_NOT_ALLOCATED 0
-/*
- * Always enable hashed directories
- */
-#define CONFIG_EXT4_INDEX
 
 /*
  * Debug code
@@ -766,17 +762,11 @@ struct ext4_dir_entry_2 {
  * (c) Daniel Phillips, 2001
  */
 
-#ifdef CONFIG_EXT4_INDEX
-  #define is_dx(dir) (EXT4_HAS_COMPAT_FEATURE(dir->i_sb, \
-					      EXT4_FEATURE_COMPAT_DIR_INDEX) && \
+#define is_dx(dir) (EXT4_HAS_COMPAT_FEATURE(dir->i_sb, \
+				      EXT4_FEATURE_COMPAT_DIR_INDEX) && \
 		      (EXT4_I(dir)->i_flags & EXT4_INDEX_FL))
 #define EXT4_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT4_LINK_MAX)
 #define EXT4_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1)
-#else
-  #define is_dx(dir) 0
-#define EXT4_DIR_LINK_MAX(dir) ((dir)->i_nlink >= EXT4_LINK_MAX)
-#define EXT4_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2)
-#endif
 
 /* Legal values for the dx_root hash_version field: */
 
-- 
1.5.3.2.81.g17ed

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH] Ext4: Uninitialized Block Groups
  2007-10-04  5:50               ` [PATCH] ext4: remove #ifdef CONFIG_EXT4_INDEX Theodore Ts'o
@ 2007-10-04  5:50                 ` Theodore Ts'o
  2007-10-04  5:50                   ` [PATCH] ext4: Fix sparse warnings Theodore Ts'o
  0 siblings, 1 reply; 34+ messages in thread
From: Theodore Ts'o @ 2007-10-04  5:50 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-ext4, Andreas Dilger, Avantika Mathur, Mingming Cao,
	Aneesh Kumar K.V

From: Andreas Dilger <adilger@clusterfs.com>

In pass1 of e2fsck, every inode table in the fileystem is scanned and checked,
regardless of whether it is in use.  This is this the most time consuming part
of the filesystem check.  The unintialized block group feature can greatly
reduce e2fsck time by eliminating checking of uninitialized inodes.

With this feature, there is a a high water mark of used inodes for each block
group.  Block and inode bitmaps can be uninitialized on disk via a flag in the
group descriptor to avoid reading or scanning them at e2fsck time.  A checksum
of each group descriptor is used to ensure that corruption in the group
descriptor's bit flags does not cause incorrect operation.

The feature is enabled through a mkfs option

	mke2fs /dev/ -O uninit_groups

A patch adding support for uninitialized block groups to e2fsprogs tools has
been posted to the linux-ext4 mailing list.

The patches have been stress tested with fsstress and fsx.  In performance
tests testing e2fsck time, we have seen that e2fsck time on ext3 grows
linearly with the total number of inodes in the filesytem.  In ext4 with the
uninitialized block groups feature, the e2fsck time is constant, based
solely on the number of used inodes rather than the total inode count.
Since typical ext4 filesystems only use 1-10% of their inodes, this feature can
greatly reduce e2fsck time for users.  With performance improvement of 2-20
times, depending on how full the filesystem is.

The attached graph shows the major improvements in e2fsck times in filesystems
with a large total inode count, but few inodes in use.

In each group descriptor if we have

EXT4_BG_INODE_UNINIT set in bg_flags:
        Inode table is not initialized/used in this group. So we can skip
        the consistency check during fsck.
EXT4_BG_BLOCK_UNINIT set in bg_flags:
        No block in the group is used. So we can skip the block bitmap
        verification for this group.

We also add two new fields to group descriptor as a part of
uninitialized group patch.

        __le16  bg_itable_unused;       /* Unused inodes count */
        __le16  bg_checksum;            /* crc16(sb_uuid+group+desc) */


bg_itable_unused:

If we have EXT4_BG_INODE_UNINIT not set in bg_flags
then bg_itable_unused will give the offset within
the inode table till the inodes are used. This can be
used by fsck to skip list of inodes that are marked unused.


bg_checksum:
Now that we depend on bg_flags and bg_itable_unused to determine
the block and inode usage, we need to make sure group descriptor
is not corrupt. We add checksum to group descriptor to
detect corruption. If the descriptor is found to be corrupt, we
mark all the blocks and inodes in the group used.


Signed-off-by: Avantika Mathur <mathur@us.ibm.com>
Signed-off-by: Andreas Dilger <adilger@clusterfs.com>
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 fs/Kconfig              |    1 +
 fs/ext4/balloc.c        |   92 ++++++++++++++++++++++++++++-
 fs/ext4/group.h         |   29 +++++++++
 fs/ext4/ialloc.c        |  146 ++++++++++++++++++++++++++++++++++++++++++++---
 fs/ext4/resize.c        |    2 +
 fs/ext4/super.c         |   47 +++++++++++++++
 include/linux/ext4_fs.h |   16 ++++-
 7 files changed, 317 insertions(+), 16 deletions(-)
 create mode 100644 fs/ext4/group.h

diff --git a/fs/Kconfig b/fs/Kconfig
index f9eed6d..97eef97 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -140,6 +140,7 @@ config EXT4DEV_FS
 	tristate "Ext4dev/ext4 extended fs support development (EXPERIMENTAL)"
 	depends on EXPERIMENTAL
 	select JBD2
+	select CRC16
 	help
 	  Ext4dev is a predecessor filesystem of the next generation
 	  extended fs ext4, based on ext3 filesystem code. It will be
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index e53b4af..d1a8882 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -20,6 +20,7 @@
 #include <linux/quotaops.h>
 #include <linux/buffer_head.h>
 
+#include "group.h"
 /*
  * balloc.c contains the blocks allocation and deallocation routines
  */
@@ -42,6 +43,74 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
 
 }
 
+/* Initializes an uninitialized block bitmap if given, and returns the
+ * number of blocks free in the group. */
+unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
+				int block_group, struct ext4_group_desc *gdp)
+{
+	unsigned long start;
+	int bit, bit_max;
+	unsigned free_blocks;
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+	if (bh) {
+		J_ASSERT_BH(bh, buffer_locked(bh));
+
+		/* If checksum is bad mark all blocks used to prevent allocation
+		 * essentially implementing a per-group read-only flag. */
+		if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
+			ext4_error(sb, __FUNCTION__,
+				   "Checksum bad for group %u\n", block_group);
+			gdp->bg_free_blocks_count = 0;
+			gdp->bg_free_inodes_count = 0;
+			gdp->bg_itable_unused = 0;
+			memset(bh->b_data, 0xff, sb->s_blocksize);
+			return 0;
+		}
+		memset(bh->b_data, 0, sb->s_blocksize);
+	}
+
+	/* Check for superblock and gdt backups in this group */
+	bit_max = ext4_bg_has_super(sb, block_group);
+
+	if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) ||
+	    block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) *
+			  sbi->s_desc_per_block) {
+		if (bit_max) {
+			bit_max += ext4_bg_num_gdb(sb, block_group);
+			bit_max +=
+				le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks);
+		}
+	} else { /* For META_BG_BLOCK_GROUPS */
+		int group_rel = (block_group -
+				 le32_to_cpu(sbi->s_es->s_first_meta_bg)) %
+				EXT4_DESC_PER_BLOCK(sb);
+		if (group_rel == 0 || group_rel == 1 ||
+		    (group_rel == EXT4_DESC_PER_BLOCK(sb) - 1))
+			bit_max += 1;
+	}
+
+	/* Last and first groups are always initialized */
+	free_blocks = EXT4_BLOCKS_PER_GROUP(sb) - bit_max;
+
+	if (bh) {
+		for (bit = 0; bit < bit_max; bit++)
+			ext4_set_bit(bit, bh->b_data);
+
+		start = block_group * EXT4_BLOCKS_PER_GROUP(sb) +
+			le32_to_cpu(sbi->s_es->s_first_data_block);
+
+		/* Set bits for block and inode bitmaps, and inode table */
+		ext4_set_bit(ext4_block_bitmap(sb, gdp) - start, bh->b_data);
+		ext4_set_bit(ext4_inode_bitmap(sb, gdp) - start, bh->b_data);
+		for (bit = le32_to_cpu(gdp->bg_inode_table) - start,
+		     bit_max = bit + sbi->s_itb_per_group; bit < bit_max; bit++)
+			ext4_set_bit(bit, bh->b_data);
+	}
+
+	return free_blocks - sbi->s_itb_per_group - 2;
+}
+
 /*
  * The free blocks are managed by bitmaps.  A file system contains several
  * blocks groups.  Each group contains 1 bitmap block for blocks, 1 bitmap
@@ -110,16 +179,29 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
  *
  * Return buffer_head on success or NULL in case of failure.
  */
-static struct buffer_head *
+struct buffer_head *
 read_block_bitmap(struct super_block *sb, unsigned int block_group)
 {
 	struct ext4_group_desc * desc;
 	struct buffer_head * bh = NULL;
 
-	desc = ext4_get_group_desc (sb, block_group, NULL);
+	desc = ext4_get_group_desc(sb, block_group, NULL);
 	if (!desc)
 		goto error_out;
-	bh = sb_bread(sb, ext4_block_bitmap(sb, desc));
+	if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
+		bh = sb_getblk(sb, ext4_block_bitmap(sb, desc));
+		if (!buffer_uptodate(bh)) {
+			lock_buffer(bh);
+			if (!buffer_uptodate(bh)) {
+				ext4_init_block_bitmap(sb, bh, block_group,
+						       desc);
+				set_buffer_uptodate(bh);
+			}
+			unlock_buffer(bh);
+		}
+	} else {
+		bh = sb_bread(sb, ext4_block_bitmap(sb,desc));
+	}
 	if (!bh)
 		ext4_error (sb, "read_block_bitmap",
 			    "Cannot read block bitmap - "
@@ -586,6 +668,7 @@ do_more:
 	desc->bg_free_blocks_count =
 		cpu_to_le16(le16_to_cpu(desc->bg_free_blocks_count) +
 			group_freed);
+	desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
 	spin_unlock(sb_bgl_lock(sbi, block_group));
 	percpu_counter_mod(&sbi->s_freeblocks_counter, count);
 
@@ -1644,8 +1727,11 @@ allocated:
 			ret_block, goal_hits, goal_attempts);
 
 	spin_lock(sb_bgl_lock(sbi, group_no));
+	if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
+		gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
 	gdp->bg_free_blocks_count =
 			cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)-num);
+	gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp);
 	spin_unlock(sb_bgl_lock(sbi, group_no));
 	percpu_counter_mod(&sbi->s_freeblocks_counter, -num);
 
diff --git a/fs/ext4/group.h b/fs/ext4/group.h
new file mode 100644
index 0000000..9310979
--- /dev/null
+++ b/fs/ext4/group.h
@@ -0,0 +1,29 @@
+/*
+ *  linux/fs/ext4/group.h
+ *
+ * Copyright (C) 2007 Cluster File Systems, Inc
+ *
+ * Author: Andreas Dilger <adilger@clusterfs.com>
+ */
+
+#ifndef _LINUX_EXT4_GROUP_H
+#define _LINUX_EXT4_GROUP_H
+#if defined(CONFIG_CRC16)
+#include <linux/crc16.h>
+#endif
+
+extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group,
+				   struct ext4_group_desc *gdp);
+extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group,
+				       struct ext4_group_desc *gdp);
+struct buffer_head *read_block_bitmap(struct super_block *sb,
+				      unsigned int block_group);
+extern unsigned ext4_init_block_bitmap(struct super_block *sb,
+				       struct buffer_head *bh, int group,
+				       struct ext4_group_desc *desc);
+#define ext4_free_blocks_after_init(sb, group, desc)			\
+		ext4_init_block_bitmap(sb, NULL, group, desc)
+extern unsigned ext4_init_inode_bitmap(struct super_block *sb,
+				       struct buffer_head *bh, int group,
+				       struct ext4_group_desc *desc);
+#endif /* _LINUX_EXT4_GROUP_H */
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index b8b538d..1fa418c 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -28,6 +28,7 @@
 
 #include "xattr.h"
 #include "acl.h"
+#include "group.h"
 
 /*
  * ialloc.c contains the inodes allocation and deallocation routines
@@ -43,6 +44,52 @@
  * the free blocks count in the block.
  */
 
+/*
+ * To avoid calling the atomic setbit hundreds or thousands of times, we only
+ * need to use it within a single byte (to ensure we get endianness right).
+ * We can use memset for the rest of the bitmap as there are no other users.
+ */
+static void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
+{
+	int i;
+
+	if (start_bit >= end_bit)
+		return;
+
+	ext4_debug("mark end bits +%d through +%d used\n", start_bit, end_bit);
+	for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++)
+		ext4_set_bit(i, bitmap);
+	if (i < end_bit)
+		memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3);
+}
+
+/* Initializes an uninitialized inode bitmap */
+unsigned ext4_init_inode_bitmap(struct super_block *sb,
+				struct buffer_head *bh, int block_group,
+				struct ext4_group_desc *gdp)
+{
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+	J_ASSERT_BH(bh, buffer_locked(bh));
+
+	/* If checksum is bad mark all blocks and inodes use to prevent
+	 * allocation, essentially implementing a per-group read-only flag. */
+	if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
+		ext4_error(sb, __FUNCTION__, "Checksum bad for group %u\n",
+			   block_group);
+		gdp->bg_free_blocks_count = 0;
+		gdp->bg_free_inodes_count = 0;
+		gdp->bg_itable_unused = 0;
+		memset(bh->b_data, 0xff, sb->s_blocksize);
+		return 0;
+	}
+
+	memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8);
+	mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), EXT4_BLOCKS_PER_GROUP(sb),
+			bh->b_data);
+
+	return EXT4_INODES_PER_GROUP(sb);
+}
 
 /*
  * Read the inode allocation bitmap for a given block_group, reading
@@ -59,8 +106,20 @@ read_inode_bitmap(struct super_block * sb, unsigned long block_group)
 	desc = ext4_get_group_desc(sb, block_group, NULL);
 	if (!desc)
 		goto error_out;
-
-	bh = sb_bread(sb, ext4_inode_bitmap(sb, desc));
+	if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
+		bh = sb_getblk(sb, ext4_inode_bitmap(sb, desc));
+		if (!buffer_uptodate(bh)) {
+			lock_buffer(bh);
+			if (!buffer_uptodate(bh)) {
+				ext4_init_inode_bitmap(sb, bh, block_group,
+						       desc);
+				set_buffer_uptodate(bh);
+			}
+			unlock_buffer(bh);
+		}
+	} else {
+		bh = sb_bread(sb, ext4_inode_bitmap(sb, desc));
+	}
 	if (!bh)
 		ext4_error(sb, "read_inode_bitmap",
 			    "Cannot read inode bitmap - "
@@ -169,6 +228,8 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
 			if (is_directory)
 				gdp->bg_used_dirs_count = cpu_to_le16(
 				  le16_to_cpu(gdp->bg_used_dirs_count) - 1);
+			gdp->bg_checksum = ext4_group_desc_csum(sbi,
+							block_group, gdp);
 			spin_unlock(sb_bgl_lock(sbi, block_group));
 			percpu_counter_inc(&sbi->s_freeinodes_counter);
 			if (is_directory)
@@ -438,7 +499,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
 	struct ext4_sb_info *sbi;
 	int err = 0;
 	struct inode *ret;
-	int i;
+	int i, free = 0;
 
 	/* Cannot create files in a deleted directory */
 	if (!dir || !dir->i_nlink)
@@ -520,11 +581,13 @@ repeat_in_this_group:
 	goto out;
 
 got:
-	ino += group * EXT4_INODES_PER_GROUP(sb) + 1;
-	if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
-		ext4_error (sb, "ext4_new_inode",
-			    "reserved inode or inode > inodes count - "
-			    "block_group = %d, inode=%lu", group, ino);
+	ino++;
+	if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
+	    ino > EXT4_INODES_PER_GROUP(sb)) {
+		ext4_error(sb, __FUNCTION__,
+			   "reserved inode or inode > inodes count - "
+			   "block_group = %d, inode=%lu", group,
+			   ino + group * EXT4_INODES_PER_GROUP(sb));
 		err = -EIO;
 		goto fail;
 	}
@@ -532,13 +595,78 @@ got:
 	BUFFER_TRACE(bh2, "get_write_access");
 	err = ext4_journal_get_write_access(handle, bh2);
 	if (err) goto fail;
+
+	/* We may have to initialize the block bitmap if it isn't already */
+	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) &&
+	    gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
+		struct buffer_head *block_bh = read_block_bitmap(sb, group);
+
+		BUFFER_TRACE(block_bh, "get block bitmap access");
+		err = ext4_journal_get_write_access(handle, block_bh);
+		if (err) {
+			brelse(block_bh);
+			goto fail;
+		}
+
+		free = 0;
+		spin_lock(sb_bgl_lock(sbi, group));
+		/* recheck and clear flag under lock if we still need to */
+		if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
+			gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
+			free = ext4_free_blocks_after_init(sb, group, gdp);
+			gdp->bg_free_blocks_count = cpu_to_le16(free);
+		}
+		spin_unlock(sb_bgl_lock(sbi, group));
+
+		/* Don't need to dirty bitmap block if we didn't change it */
+		if (free) {
+			BUFFER_TRACE(block_bh, "dirty block bitmap");
+			err = ext4_journal_dirty_metadata(handle, block_bh);
+		}
+
+		brelse(block_bh);
+		if (err)
+			goto fail;
+	}
+
 	spin_lock(sb_bgl_lock(sbi, group));
+	/* If we didn't allocate from within the initialized part of the inode
+	 * table then we need to initialize up to this inode. */
+	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
+		if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
+			gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
+
+			/* When marking the block group with
+			 * ~EXT4_BG_INODE_UNINIT we don't want to depend
+			 * on the value of bg_itable_unsed even though
+			 * mke2fs could have initialized the same for us.
+			 * Instead we calculated the value below
+			 */
+
+			free = 0;
+		} else {
+			free = EXT4_INODES_PER_GROUP(sb) -
+				le16_to_cpu(gdp->bg_itable_unused);
+		}
+
+		/*
+		 * Check the relative inode number against the last used
+		 * relative inode number in this group. if it is greater
+		 * we need to  update the bg_itable_unused count
+		 *
+		 */
+		if (ino > free)
+			gdp->bg_itable_unused =
+				cpu_to_le16(EXT4_INODES_PER_GROUP(sb) - ino);
+	}
+
 	gdp->bg_free_inodes_count =
 		cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1);
 	if (S_ISDIR(mode)) {
 		gdp->bg_used_dirs_count =
 			cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1);
 	}
+	gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
 	spin_unlock(sb_bgl_lock(sbi, group));
 	BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata");
 	err = ext4_journal_dirty_metadata(handle, bh2);
@@ -560,7 +688,7 @@ got:
 		inode->i_gid = current->fsgid;
 	inode->i_mode = mode;
 
-	inode->i_ino = ino;
+	inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb);
 	/* This is the optimal IO size (for stat), not the fs block size */
 	inode->i_blocks = 0;
 	inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime =
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index aa11d7d..3359450 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -16,6 +16,7 @@
 #include <linux/errno.h>
 #include <linux/slab.h>
 
+#include "group.h"
 
 #define outside(b, first, last)	((b) < (first) || (b) >= (last))
 #define inside(b, first, last)	((b) >= (first) && (b) < (last))
@@ -842,6 +843,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
 	ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */
 	gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count);
 	gdp->bg_free_inodes_count = cpu_to_le16(EXT4_INODES_PER_GROUP(sb));
+	gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp);
 
 	/*
 	 * Make the new blocks and inodes valid next.  We do this before
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 420d39d..b59610d 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -37,12 +37,14 @@
 #include <linux/quotaops.h>
 #include <linux/seq_file.h>
 #include <linux/log2.h>
+#include <linux/crc16.h>
 
 #include <asm/uaccess.h>
 
 #include "xattr.h"
 #include "acl.h"
 #include "namei.h"
+#include "group.h"
 
 static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
 			     unsigned long journal_devnum);
@@ -1237,6 +1239,43 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
 	return res;
 }
 
+__le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
+			    struct ext4_group_desc *gdp)
+{
+	__u16 crc = 0;
+
+	if (sbi->s_es->s_feature_ro_compat &
+	    cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
+		int offset = offsetof(struct ext4_group_desc, bg_checksum);
+		__le32 le_group = cpu_to_le32(block_group);
+
+		crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
+		crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
+		crc = crc16(crc, (__u8 *)gdp, offset);
+		offset += sizeof(gdp->bg_checksum); /* skip checksum */
+		/* for checksum of struct ext4_group_desc do the rest...*/
+		if ((sbi->s_es->s_feature_incompat &
+		     cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) &&
+		    offset < le16_to_cpu(sbi->s_es->s_desc_size))
+			crc = crc16(crc, (__u8 *)gdp + offset,
+				    le16_to_cpu(sbi->s_es->s_desc_size) -
+					offset);
+	}
+
+	return cpu_to_le16(crc);
+}
+
+int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group,
+				struct ext4_group_desc *gdp)
+{
+	if ((sbi->s_es->s_feature_ro_compat &
+	     cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) &&
+	    (gdp->bg_checksum != ext4_group_desc_csum(sbi, block_group, gdp)))
+		return 0;
+
+	return 1;
+}
+
 /* Called at mount-time, super-block is locked */
 static int ext4_check_descriptors (struct super_block * sb)
 {
@@ -1291,6 +1330,14 @@ static int ext4_check_descriptors (struct super_block * sb)
 				    i, inode_table);
 			return 0;
 		}
+		if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
+			ext4_error(sb, __FUNCTION__,
+				   "Checksum for group %d failed (%u!=%u)\n", i,
+				   le16_to_cpu(ext4_group_desc_csum(sbi, i,
+								    gdp)),
+				   le16_to_cpu(gdp->bg_checksum));
+			return 0;
+		}
 		first_block += EXT4_BLOCKS_PER_GROUP(sb);
 		gdp = (struct ext4_group_desc *)
 			((__u8 *)gdp + EXT4_DESC_SIZE(sb));
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index 151738a..b77b59f 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -105,19 +105,25 @@
  */
 struct ext4_group_desc
 {
-	__le32	bg_block_bitmap;		/* Blocks bitmap block */
-	__le32	bg_inode_bitmap;		/* Inodes bitmap block */
+	__le32	bg_block_bitmap;	/* Blocks bitmap block */
+	__le32	bg_inode_bitmap;	/* Inodes bitmap block */
 	__le32	bg_inode_table;		/* Inodes table block */
 	__le16	bg_free_blocks_count;	/* Free blocks count */
 	__le16	bg_free_inodes_count;	/* Free inodes count */
 	__le16	bg_used_dirs_count;	/* Directories count */
-	__u16	bg_flags;
-	__u32	bg_reserved[3];
+	__le16	bg_flags;		/* EXT4_BG_flags (INODE_UNINIT, etc) */
+	__u32	bg_reserved[2];		/* Likely block/inode bitmap checksum */
+	__le16  bg_itable_unused;	/* Unused inodes count */
+	__le16  bg_checksum;		/* crc16(sb_uuid+group+desc) */
 	__le32	bg_block_bitmap_hi;	/* Blocks bitmap block MSB */
 	__le32	bg_inode_bitmap_hi;	/* Inodes bitmap block MSB */
 	__le32	bg_inode_table_hi;	/* Inodes table block MSB */
 };
 
+#define EXT4_BG_INODE_UNINIT	0x0001 /* Inode table/bitmap not in use */
+#define EXT4_BG_BLOCK_UNINIT	0x0002 /* Block bitmap not in use */
+#define EXT4_BG_INODE_ZEROED	0x0004 /* On-disk itable initialized to zero */
+
 #ifdef __KERNEL__
 #include <linux/ext4_fs_i.h>
 #include <linux/ext4_fs_sb.h>
@@ -665,6 +671,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
 #define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER	0x0001
 #define EXT4_FEATURE_RO_COMPAT_LARGE_FILE	0x0002
 #define EXT4_FEATURE_RO_COMPAT_BTREE_DIR	0x0004
+#define EXT4_FEATURE_RO_COMPAT_GDT_CSUM		0x0010
 #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK	0x0020
 #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE	0x0040
 
@@ -684,6 +691,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
 					 EXT4_FEATURE_INCOMPAT_64BIT)
 #define EXT4_FEATURE_RO_COMPAT_SUPP	(EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
 					 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
+					 EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
 					 EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \
 					 EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \
 					 EXT4_FEATURE_RO_COMPAT_BTREE_DIR)
-- 
1.5.3.2.81.g17ed

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH] ext4: Fix sparse warnings
  2007-10-04  5:50                 ` [PATCH] Ext4: Uninitialized Block Groups Theodore Ts'o
@ 2007-10-04  5:50                   ` Theodore Ts'o
  2007-10-04  5:50                     ` [PATCH] This feature relaxes check restrictions on where each block groups meta Theodore Ts'o
  0 siblings, 1 reply; 34+ messages in thread
From: Theodore Ts'o @ 2007-10-04  5:50 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-ext4, Aneesh Kumar K.V, Andrew Morton

From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/ext4/inode.c         |    6 ++++--
 include/linux/ext4_fs.h |   14 +++++++-------
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index f283522..a2e1ea4 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3167,12 +3167,14 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
 						      iloc, handle);
 			if (ret) {
 				EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND;
-				if (mnt_count != sbi->s_es->s_mnt_count) {
+				if (mnt_count !=
+					le16_to_cpu(sbi->s_es->s_mnt_count)) {
 					ext4_warning(inode->i_sb, __FUNCTION__,
 					"Unable to expand inode %lu. Delete"
 					" some EAs or run e2fsck.",
 					inode->i_ino);
-					mnt_count = sbi->s_es->s_mnt_count;
+					mnt_count =
+					  le16_to_cpu(sbi->s_es->s_mnt_count);
 				}
 			}
 		}
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index b77b59f..722d4ef 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -574,13 +574,13 @@ struct ext4_super_block {
 /*150*/	__le32	s_blocks_count_hi;	/* Blocks count */
 	__le32	s_r_blocks_count_hi;	/* Reserved blocks count */
 	__le32	s_free_blocks_count_hi;	/* Free blocks count */
-	__u16	s_min_extra_isize;	/* All inodes have at least # bytes */
-	__u16	s_want_extra_isize; 	/* New inodes should reserve # bytes */
-	__u32	s_flags;		/* Miscellaneous flags */
-	__u16   s_raid_stride;		/* RAID stride */
-	__u16   s_mmp_interval;         /* # seconds to wait in MMP checking */
-	__u64   s_mmp_block;            /* Block for multi-mount protection */
-	__u32   s_raid_stripe_width;    /* blocks on all data disks (N*stride)*/
+	__le16	s_min_extra_isize;	/* All inodes have at least # bytes */
+	__le16	s_want_extra_isize; 	/* New inodes should reserve # bytes */
+	__le32	s_flags;		/* Miscellaneous flags */
+	__le16  s_raid_stride;		/* RAID stride */
+	__le16  s_mmp_interval;         /* # seconds to wait in MMP checking */
+	__le64  s_mmp_block;            /* Block for multi-mount protection */
+	__le32  s_raid_stripe_width;    /* blocks on all data disks (N*stride)*/
 	__u32   s_reserved[163];        /* Padding to the end of the block */
 };
 
-- 
1.5.3.2.81.g17ed

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH] This feature relaxes check restrictions on where each block groups meta
  2007-10-04  5:50                   ` [PATCH] ext4: Fix sparse warnings Theodore Ts'o
@ 2007-10-04  5:50                     ` Theodore Ts'o
  2007-10-04  5:50                       ` [PATCH] Support large blocksize up to PAGESIZE (max 64KB) for ext4 Theodore Ts'o
  0 siblings, 1 reply; 34+ messages in thread
From: Theodore Ts'o @ 2007-10-04  5:50 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-ext4, Jose R. Santos, Andrew Morton

From: Jose R. Santos <jrs@us.ibm.com>

data is located within the storage media.  This allows for the allocation
of bitmaps or inode tables outside the block group boundaries in cases
where bad blocks forces us to look for new blocks which the owning block
group can not satisfy.  This will also allow for new meta-data allocation
schemes to improve performance and scalability.

Signed-off-by: Jose R. Santos <jrs@us.ibm.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/ext4/super.c         |    9 +++++++--
 include/linux/ext4_fs.h |    4 +++-
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index b59610d..619db84 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1287,13 +1287,17 @@ static int ext4_check_descriptors (struct super_block * sb)
 	ext4_fsblk_t inode_table;
 	struct ext4_group_desc * gdp = NULL;
 	int desc_block = 0;
+	int flexbg_flag = 0;
 	int i;
 
+	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
+		flexbg_flag = 1;
+
 	ext4_debug ("Checking group descriptors");
 
 	for (i = 0; i < sbi->s_groups_count; i++)
 	{
-		if (i == sbi->s_groups_count - 1)
+		if (i == sbi->s_groups_count - 1 || flexbg_flag)
 			last_block = ext4_blocks_count(sbi->s_es) - 1;
 		else
 			last_block = first_block +
@@ -1338,7 +1342,8 @@ static int ext4_check_descriptors (struct super_block * sb)
 				   le16_to_cpu(gdp->bg_checksum));
 			return 0;
 		}
-		first_block += EXT4_BLOCKS_PER_GROUP(sb);
+		if (!flexbg_flag)
+			first_block += EXT4_BLOCKS_PER_GROUP(sb);
 		gdp = (struct ext4_group_desc *)
 			((__u8 *)gdp + EXT4_DESC_SIZE(sb));
 	}
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index 722d4ef..46b304d 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -682,13 +682,15 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
 #define EXT4_FEATURE_INCOMPAT_META_BG		0x0010
 #define EXT4_FEATURE_INCOMPAT_EXTENTS		0x0040 /* extents support */
 #define EXT4_FEATURE_INCOMPAT_64BIT		0x0080
+#define EXT4_FEATURE_INCOMPAT_FLEX_BG		0x0200
 
 #define EXT4_FEATURE_COMPAT_SUPP	EXT2_FEATURE_COMPAT_EXT_ATTR
 #define EXT4_FEATURE_INCOMPAT_SUPP	(EXT4_FEATURE_INCOMPAT_FILETYPE| \
 					 EXT4_FEATURE_INCOMPAT_RECOVER| \
 					 EXT4_FEATURE_INCOMPAT_META_BG| \
 					 EXT4_FEATURE_INCOMPAT_EXTENTS| \
-					 EXT4_FEATURE_INCOMPAT_64BIT)
+					 EXT4_FEATURE_INCOMPAT_64BIT| \
+					 EXT4_FEATURE_INCOMPAT_FLEX_BG)
 #define EXT4_FEATURE_RO_COMPAT_SUPP	(EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
 					 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
 					 EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
-- 
1.5.3.2.81.g17ed

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH] Support large blocksize up to PAGESIZE (max 64KB) for ext4
  2007-10-04  5:50                     ` [PATCH] This feature relaxes check restrictions on where each block groups meta Theodore Ts'o
@ 2007-10-04  5:50                       ` Theodore Ts'o
  2007-10-04  5:50                         ` [PATCH] ext4: Avoid rec_len overflow with 64KB block size Theodore Ts'o
  0 siblings, 1 reply; 34+ messages in thread
From: Theodore Ts'o @ 2007-10-04  5:50 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-ext4, Takashi Sato, Mingming Cao

From: Takashi Sato <sho@tnes.nec.co.jp>

This patch set supports large block size(>4k, <=64k) in ext4,
just enlarging the block size limit. But it is NOT possible to have 64kB
blocksize on ext4 without some changes to the directory handling
code.  The reason is that an empty 64kB directory block would have a
rec_len == (__u16)2^16 == 0, and this would cause an error to be hit in
the filesystem.  The proposed solution is treat 64k rec_len
with a an impossible value like rec_len = 0xffff to handle this.

The Patch-set consists of the following 2 patches.
  [1/2]  ext4: enlarge blocksize
         - Allow blocksize up to pagesize

  [2/2]  ext4: fix rec_len overflow
         - prevent rec_len from overflow with 64KB blocksize

Now on 64k page ppc64 box runs with this patch set we could create a 64k
block size ext4dev, and able to handle empty directory block.

Signed-off-by: Takashi Sato <sho@tnes.nec.co.jp>
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
---
 fs/ext4/super.c         |    5 +++++
 include/linux/ext4_fs.h |    4 ++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 619db84..d8bb279 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1548,6 +1548,11 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 		goto out_fail;
 	}
 
+	if (!sb_set_blocksize(sb, blocksize)) {
+		printk(KERN_ERR "EXT4-fs: bad blocksize %d.\n", blocksize);
+		goto out_fail;
+	}
+
 	/*
 	 * The ext4 superblock will not be buffer aligned for other than 1kB
 	 * block sizes.  We need to calculate the offset from buffer start.
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index 46b304d..5d90616 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -73,8 +73,8 @@
  * Macro-instructions used to manage several block sizes
  */
 #define EXT4_MIN_BLOCK_SIZE		1024
-#define	EXT4_MAX_BLOCK_SIZE		4096
-#define EXT4_MIN_BLOCK_LOG_SIZE		  10
+#define	EXT4_MAX_BLOCK_SIZE		65536
+#define EXT4_MIN_BLOCK_LOG_SIZE		10
 #ifdef __KERNEL__
 # define EXT4_BLOCK_SIZE(s)		((s)->s_blocksize)
 #else
-- 
1.5.3.2.81.g17ed

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH] ext4: Avoid rec_len overflow with 64KB block size
  2007-10-04  5:50                       ` [PATCH] Support large blocksize up to PAGESIZE (max 64KB) for ext4 Theodore Ts'o
@ 2007-10-04  5:50                         ` Theodore Ts'o
  2007-10-04  5:50                           ` [PATCH] Support large blocksize up to PAGESIZE (max 64KB) for ext2 Theodore Ts'o
  0 siblings, 1 reply; 34+ messages in thread
From: Theodore Ts'o @ 2007-10-04  5:50 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-ext4, Jan Kara, Mingming Cao

From: Jan Kara <jack@suse.cz>

With 64KB blocksize, a directory entry can have size 64KB which does not fit
into 16 bits we have for entry lenght. So we store 0xffff instead and convert
value when read from / written to disk. The patch also converts some places
to use ext4_next_entry() when we are changing them anyway.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
---
 fs/ext4/dir.c           |   12 ++++----
 fs/ext4/namei.c         |   76 ++++++++++++++++++++++------------------------
 include/linux/ext4_fs.h |   20 ++++++++++++
 3 files changed, 62 insertions(+), 46 deletions(-)

diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 2ba49ff..8236352 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -67,7 +67,7 @@ int ext4_check_dir_entry (const char * function, struct inode * dir,
 			  unsigned long offset)
 {
 	const char * error_msg = NULL;
-	const int rlen = le16_to_cpu(de->rec_len);
+	const int rlen = ext4_rec_len_from_disk(de->rec_len);
 
 	if (rlen < EXT4_DIR_REC_LEN(1))
 		error_msg = "rec_len is smaller than minimal";
@@ -172,10 +172,10 @@ revalidate:
 				 * least that it is non-zero.  A
 				 * failure will be detected in the
 				 * dirent test below. */
-				if (le16_to_cpu(de->rec_len) <
-						EXT4_DIR_REC_LEN(1))
+				if (ext4_rec_len_from_disk(de->rec_len)
+						< EXT4_DIR_REC_LEN(1))
 					break;
-				i += le16_to_cpu(de->rec_len);
+				i += ext4_rec_len_from_disk(de->rec_len);
 			}
 			offset = i;
 			filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1))
@@ -197,7 +197,7 @@ revalidate:
 				ret = stored;
 				goto out;
 			}
-			offset += le16_to_cpu(de->rec_len);
+			offset += ext4_rec_len_from_disk(de->rec_len);
 			if (le32_to_cpu(de->inode)) {
 				/* We might block in the next section
 				 * if the data destination is
@@ -219,7 +219,7 @@ revalidate:
 					goto revalidate;
 				stored ++;
 			}
-			filp->f_pos += le16_to_cpu(de->rec_len);
+			filp->f_pos += ext4_rec_len_from_disk(de->rec_len);
 		}
 		offset = 0;
 		brelse (bh);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 94ee6f3..bc3825d 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -280,7 +280,7 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext4_dir_ent
 			space += EXT4_DIR_REC_LEN(de->name_len);
 			names++;
 		}
-		de = (struct ext4_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
+		de = ext4_next_entry(de);
 	}
 	printk("(%i)\n", names);
 	return (struct stats) { names, space, 1 };
@@ -551,7 +551,8 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash,
  */
 static inline struct ext4_dir_entry_2 *ext4_next_entry(struct ext4_dir_entry_2 *p)
 {
-	return (struct ext4_dir_entry_2 *)((char*)p + le16_to_cpu(p->rec_len));
+	return (struct ext4_dir_entry_2 *)((char*)p +
+		ext4_rec_len_from_disk(p->rec_len));
 }
 
 /*
@@ -720,7 +721,7 @@ static int dx_make_map (struct ext4_dir_entry_2 *de, int size,
 			cond_resched();
 		}
 		/* XXX: do we need to check rec_len == 0 case? -Chris */
-		de = (struct ext4_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
+		de = ext4_next_entry(de);
 	}
 	return count;
 }
@@ -820,7 +821,7 @@ static inline int search_dirblock(struct buffer_head * bh,
 			return 1;
 		}
 		/* prevent looping on a bad block */
-		de_len = le16_to_cpu(de->rec_len);
+		de_len = ext4_rec_len_from_disk(de->rec_len);
 		if (de_len <= 0)
 			return -1;
 		offset += de_len;
@@ -1128,7 +1129,7 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
 		rec_len = EXT4_DIR_REC_LEN(de->name_len);
 		memcpy (to, de, rec_len);
 		((struct ext4_dir_entry_2 *) to)->rec_len =
-				cpu_to_le16(rec_len);
+				ext4_rec_len_to_disk(rec_len);
 		de->inode = 0;
 		map++;
 		to += rec_len;
@@ -1147,13 +1148,12 @@ static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size)
 
 	prev = to = de;
 	while ((char*)de < base + size) {
-		next = (struct ext4_dir_entry_2 *) ((char *) de +
-						    le16_to_cpu(de->rec_len));
+		next = ext4_next_entry(de);
 		if (de->inode && de->name_len) {
 			rec_len = EXT4_DIR_REC_LEN(de->name_len);
 			if (de > to)
 				memmove(to, de, rec_len);
-			to->rec_len = cpu_to_le16(rec_len);
+			to->rec_len = ext4_rec_len_to_disk(rec_len);
 			prev = to;
 			to = (struct ext4_dir_entry_2 *) (((char *) to) + rec_len);
 		}
@@ -1227,8 +1227,8 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
 	/* Fancy dance to stay within two buffers */
 	de2 = dx_move_dirents(data1, data2, map + split, count - split);
 	de = dx_pack_dirents(data1,blocksize);
-	de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de);
-	de2->rec_len = cpu_to_le16(data2 + blocksize - (char *) de2);
+	de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de);
+	de2->rec_len = ext4_rec_len_to_disk(data2 + blocksize - (char *) de2);
 	dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1));
 	dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1));
 
@@ -1297,7 +1297,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
 				return -EEXIST;
 			}
 			nlen = EXT4_DIR_REC_LEN(de->name_len);
-			rlen = le16_to_cpu(de->rec_len);
+			rlen = ext4_rec_len_from_disk(de->rec_len);
 			if ((de->inode? rlen - nlen: rlen) >= reclen)
 				break;
 			de = (struct ext4_dir_entry_2 *)((char *)de + rlen);
@@ -1316,11 +1316,11 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
 
 	/* By now the buffer is marked for journaling */
 	nlen = EXT4_DIR_REC_LEN(de->name_len);
-	rlen = le16_to_cpu(de->rec_len);
+	rlen = ext4_rec_len_from_disk(de->rec_len);
 	if (de->inode) {
 		struct ext4_dir_entry_2 *de1 = (struct ext4_dir_entry_2 *)((char *)de + nlen);
-		de1->rec_len = cpu_to_le16(rlen - nlen);
-		de->rec_len = cpu_to_le16(nlen);
+		de1->rec_len = ext4_rec_len_to_disk(rlen - nlen);
+		de->rec_len = ext4_rec_len_to_disk(nlen);
 		de = de1;
 	}
 	de->file_type = EXT4_FT_UNKNOWN;
@@ -1397,17 +1397,18 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
 
 	/* The 0th block becomes the root, move the dirents out */
 	fde = &root->dotdot;
-	de = (struct ext4_dir_entry_2 *)((char *)fde + le16_to_cpu(fde->rec_len));
+	de = (struct ext4_dir_entry_2 *)((char *)fde +
+		ext4_rec_len_from_disk(fde->rec_len));
 	len = ((char *) root) + blocksize - (char *) de;
 	memcpy (data1, de, len);
 	de = (struct ext4_dir_entry_2 *) data1;
 	top = data1 + len;
-	while ((char *)(de2=(void*)de+le16_to_cpu(de->rec_len)) < top)
+	while ((char *)(de2 = ext4_next_entry(de)) < top)
 		de = de2;
-	de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de);
+	de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de);
 	/* Initialize the root; the dot dirents already exist */
 	de = (struct ext4_dir_entry_2 *) (&root->dotdot);
-	de->rec_len = cpu_to_le16(blocksize - EXT4_DIR_REC_LEN(2));
+	de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(2));
 	memset (&root->info, 0, sizeof(root->info));
 	root->info.info_length = sizeof(root->info);
 	root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
@@ -1487,7 +1488,7 @@ static int ext4_add_entry (handle_t *handle, struct dentry *dentry,
 		return retval;
 	de = (struct ext4_dir_entry_2 *) bh->b_data;
 	de->inode = 0;
-	de->rec_len = cpu_to_le16(blocksize);
+	de->rec_len = ext4_rec_len_to_disk(blocksize);
 	return add_dirent_to_buf(handle, dentry, inode, de, bh);
 }
 
@@ -1550,7 +1551,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
 			goto cleanup;
 		node2 = (struct dx_node *)(bh2->b_data);
 		entries2 = node2->entries;
-		node2->fake.rec_len = cpu_to_le16(sb->s_blocksize);
+		node2->fake.rec_len = ext4_rec_len_to_disk(sb->s_blocksize);
 		node2->fake.inode = 0;
 		BUFFER_TRACE(frame->bh, "get_write_access");
 		err = ext4_journal_get_write_access(handle, frame->bh);
@@ -1648,9 +1649,9 @@ static int ext4_delete_entry (handle_t *handle,
 			BUFFER_TRACE(bh, "get_write_access");
 			ext4_journal_get_write_access(handle, bh);
 			if (pde)
-				pde->rec_len =
-					cpu_to_le16(le16_to_cpu(pde->rec_len) +
-						    le16_to_cpu(de->rec_len));
+				pde->rec_len = ext4_rec_len_to_disk(
+					ext4_rec_len_from_disk(pde->rec_len) +
+					ext4_rec_len_from_disk(de->rec_len));
 			else
 				de->inode = 0;
 			dir->i_version++;
@@ -1658,10 +1659,9 @@ static int ext4_delete_entry (handle_t *handle,
 			ext4_journal_dirty_metadata(handle, bh);
 			return 0;
 		}
-		i += le16_to_cpu(de->rec_len);
+		i += ext4_rec_len_from_disk(de->rec_len);
 		pde = de;
-		de = (struct ext4_dir_entry_2 *)
-			((char *) de + le16_to_cpu(de->rec_len));
+		de = ext4_next_entry(de);
 	}
 	return -ENOENT;
 }
@@ -1824,13 +1824,12 @@ retry:
 	de = (struct ext4_dir_entry_2 *) dir_block->b_data;
 	de->inode = cpu_to_le32(inode->i_ino);
 	de->name_len = 1;
-	de->rec_len = cpu_to_le16(EXT4_DIR_REC_LEN(de->name_len));
+	de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len));
 	strcpy (de->name, ".");
 	ext4_set_de_type(dir->i_sb, de, S_IFDIR);
-	de = (struct ext4_dir_entry_2 *)
-			((char *) de + le16_to_cpu(de->rec_len));
+	de = ext4_next_entry(de);
 	de->inode = cpu_to_le32(dir->i_ino);
-	de->rec_len = cpu_to_le16(inode->i_sb->s_blocksize-EXT4_DIR_REC_LEN(1));
+	de->rec_len = ext4_rec_len_to_disk(inode->i_sb->s_blocksize-EXT4_DIR_REC_LEN(1));
 	de->name_len = 2;
 	strcpy (de->name, "..");
 	ext4_set_de_type(dir->i_sb, de, S_IFDIR);
@@ -1882,8 +1881,7 @@ static int empty_dir (struct inode * inode)
 		return 1;
 	}
 	de = (struct ext4_dir_entry_2 *) bh->b_data;
-	de1 = (struct ext4_dir_entry_2 *)
-			((char *) de + le16_to_cpu(de->rec_len));
+	de1 = ext4_next_entry(de);
 	if (le32_to_cpu(de->inode) != inode->i_ino ||
 			!le32_to_cpu(de1->inode) ||
 			strcmp (".", de->name) ||
@@ -1894,9 +1892,9 @@ static int empty_dir (struct inode * inode)
 		brelse (bh);
 		return 1;
 	}
-	offset = le16_to_cpu(de->rec_len) + le16_to_cpu(de1->rec_len);
-	de = (struct ext4_dir_entry_2 *)
-			((char *) de1 + le16_to_cpu(de1->rec_len));
+	offset = ext4_rec_len_from_disk(de->rec_len) +
+		 ext4_rec_len_from_disk(de1->rec_len);
+	de = ext4_next_entry(de1);
 	while (offset < inode->i_size ) {
 		if (!bh ||
 			(void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
@@ -1925,9 +1923,8 @@ static int empty_dir (struct inode * inode)
 			brelse (bh);
 			return 0;
 		}
-		offset += le16_to_cpu(de->rec_len);
-		de = (struct ext4_dir_entry_2 *)
-				((char *) de + le16_to_cpu(de->rec_len));
+		offset += ext4_rec_len_from_disk(de->rec_len);
+		de = ext4_next_entry(de);
 	}
 	brelse (bh);
 	return 1;
@@ -2282,8 +2279,7 @@ retry:
 }
 
 #define PARENT_INO(buffer) \
-	((struct ext4_dir_entry_2 *) ((char *) buffer + \
-	le16_to_cpu(((struct ext4_dir_entry_2 *) buffer)->rec_len)))->inode
+	(ext4_next_entry((struct ext4_dir_entry_2 *)(buffer))->inode)
 
 /*
  * Anybody can rename anything with this: the permission checks are left to the
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index 5d90616..ab608e8 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -767,6 +767,26 @@ struct ext4_dir_entry_2 {
 #define EXT4_DIR_ROUND			(EXT4_DIR_PAD - 1)
 #define EXT4_DIR_REC_LEN(name_len)	(((name_len) + 8 + EXT4_DIR_ROUND) & \
 					 ~EXT4_DIR_ROUND)
+#define EXT4_MAX_REC_LEN		((1<<16)-1)
+
+static inline unsigned ext4_rec_len_from_disk(__le16 dlen)
+{
+	unsigned len = le16_to_cpu(dlen);
+
+	if (len == EXT4_MAX_REC_LEN)
+		return 1 << 16;
+	return len;
+}
+
+static inline __le16 ext4_rec_len_to_disk(unsigned len)
+{
+	if (len == (1 << 16))
+		return cpu_to_le16(EXT4_MAX_REC_LEN);
+	else if (len > (1 << 16))
+		BUG();
+	return cpu_to_le16(len);
+}
+
 /*
  * Hash Tree Directory indexing
  * (c) Daniel Phillips, 2001
-- 
1.5.3.2.81.g17ed

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH] Support large blocksize up to PAGESIZE (max 64KB) for ext2
  2007-10-04  5:50                         ` [PATCH] ext4: Avoid rec_len overflow with 64KB block size Theodore Ts'o
@ 2007-10-04  5:50                           ` Theodore Ts'o
  2007-10-04  5:50                             ` [PATCH] Support large blocksize up to PAGESIZE (max 64KB) for ext3 Theodore Ts'o
  0 siblings, 1 reply; 34+ messages in thread
From: Theodore Ts'o @ 2007-10-04  5:50 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-ext4, Takashi Sato, Mingming Cao

From: Takashi Sato <sho@tnes.nec.co.jp>

This patch set supports large block size(>4k, <=64k) in ext2,
just enlarging the block size limit. But it is NOT possible to have 64kB
blocksize on ext2 without some changes to the directory handling
code.  The reason is that an empty 64kB directory block would have a
rec_len == (__u16)2^16 == 0, and this would cause an error to be hit in
the filesystem.  The proposed solution is treat 64k rec_len
with a an impossible value like rec_len = 0xffff to handle this.

The Patch-set consists of the following 2 patches.
  [1/2]  ext2: enlarge blocksize
         - Allow blocksize up to pagesize

  [2/2]  ext2: fix rec_len overflow
         - prevent rec_len from overflow with 64KB blocksize

Now on 64k page ppc64 box runs with this patch set we could create a 64k
block size ext2, and able to handle empty directory block.

Signed-off-by: Takashi Sato <sho@tnes.nec.co.jp>
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
---
 fs/ext2/super.c         |    3 ++-
 include/linux/ext2_fs.h |    4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 639a32c..765c805 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -775,7 +775,8 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
 		brelse(bh);
 
 		if (!sb_set_blocksize(sb, blocksize)) {
-			printk(KERN_ERR "EXT2-fs: blocksize too small for device.\n");
+			printk(KERN_ERR "EXT2-fs: bad blocksize %d.\n",
+				blocksize);
 			goto failed_sbi;
 		}
 
diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h
index 153d755..910a705 100644
--- a/include/linux/ext2_fs.h
+++ b/include/linux/ext2_fs.h
@@ -86,8 +86,8 @@ static inline struct ext2_sb_info *EXT2_SB(struct super_block *sb)
  * Macro-instructions used to manage several block sizes
  */
 #define EXT2_MIN_BLOCK_SIZE		1024
-#define	EXT2_MAX_BLOCK_SIZE		4096
-#define EXT2_MIN_BLOCK_LOG_SIZE		  10
+#define EXT2_MAX_BLOCK_SIZE		65536
+#define EXT2_MIN_BLOCK_LOG_SIZE		10
 #ifdef __KERNEL__
 # define EXT2_BLOCK_SIZE(s)		((s)->s_blocksize)
 #else
-- 
1.5.3.2.81.g17ed

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH] Support large blocksize up to PAGESIZE (max 64KB) for ext3
  2007-10-04  5:50                           ` [PATCH] Support large blocksize up to PAGESIZE (max 64KB) for ext2 Theodore Ts'o
@ 2007-10-04  5:50                             ` Theodore Ts'o
  2007-10-04  5:50                               ` [PATCH] ext2: Avoid rec_len overflow with 64KB block size Theodore Ts'o
  0 siblings, 1 reply; 34+ messages in thread
From: Theodore Ts'o @ 2007-10-04  5:50 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-ext4, Takashi Sato, Mingming Cao

From: Takashi Sato <sho@tnes.nec.co.jp>

This patch set supports large block size(>4k, <=64k) in ext3
just enlarging the block size limit. But it is NOT possible to have 64kB
blocksize on ext3 without some changes to the directory handling
code.  The reason is that an empty 64kB directory block would have a
rec_len == (__u16)2^16 == 0, and this would cause an error to be hit in
the filesystem.  The proposed solution is treat 64k rec_len
with a an impossible value like rec_len = 0xffff to handle this.

The Patch-set consists of the following 2 patches.
  [1/2]  ext3: enlarge blocksize
         - Allow blocksize up to pagesize

  [2/2]  ext3: fix rec_len overflow
         - prevent rec_len from overflow with 64KB blocksize

Now on 64k page ppc64 box runs with this patch set we could create a 64k
block size ext3, and able to handle empty directory block.

Signed-off-by: Takashi Sato <sho@tnes.nec.co.jp>
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
---
 fs/ext3/super.c         |    6 +++++-
 include/linux/ext3_fs.h |    4 ++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 9537316..b4bfd36 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1549,7 +1549,11 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 		}
 
 		brelse (bh);
-		sb_set_blocksize(sb, blocksize);
+		if (!sb_set_blocksize(sb, blocksize)) {
+			printk(KERN_ERR "EXT3-fs: bad blocksize %d.\n",
+				blocksize);
+			goto out_fail;
+		}
 		logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize;
 		offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize;
 		bh = sb_bread(sb, logic_sb_block);
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index ece49a8..7aa5556 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -76,8 +76,8 @@
  * Macro-instructions used to manage several block sizes
  */
 #define EXT3_MIN_BLOCK_SIZE		1024
-#define	EXT3_MAX_BLOCK_SIZE		4096
-#define EXT3_MIN_BLOCK_LOG_SIZE		  10
+#define	EXT3_MAX_BLOCK_SIZE		65536
+#define EXT3_MIN_BLOCK_LOG_SIZE		10
 #ifdef __KERNEL__
 # define EXT3_BLOCK_SIZE(s)		((s)->s_blocksize)
 #else
-- 
1.5.3.2.81.g17ed

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH] ext2: Avoid rec_len overflow with 64KB block size
  2007-10-04  5:50                             ` [PATCH] Support large blocksize up to PAGESIZE (max 64KB) for ext3 Theodore Ts'o
@ 2007-10-04  5:50                               ` Theodore Ts'o
  2007-10-04  5:50                                 ` [PATCH] ext3: " Theodore Ts'o
  0 siblings, 1 reply; 34+ messages in thread
From: Theodore Ts'o @ 2007-10-04  5:50 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-ext4, Jan Kara, Mingming Cao

From: Jan Kara <jack@suse.cz>

With 64KB blocksize, a directory entry can have size 64KB which does not fit
into 16 bits we have for entry lenght. So we store 0xffff instead and convert
value when read from / written to disk.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
---
 fs/ext2/dir.c           |   43 +++++++++++++++++++++++++++++++------------
 include/linux/ext2_fs.h |    1 +
 2 files changed, 32 insertions(+), 12 deletions(-)

diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 2bf49d7..1329bdb 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -26,6 +26,24 @@
 
 typedef struct ext2_dir_entry_2 ext2_dirent;
 
+static inline unsigned ext2_rec_len_from_disk(__le16 dlen)
+{
+	unsigned len = le16_to_cpu(dlen);
+
+	if (len == EXT2_MAX_REC_LEN)
+		return 1 << 16;
+	return len;
+}
+
+static inline __le16 ext2_rec_len_to_disk(unsigned len)
+{
+	if (len == (1 << 16))
+		return cpu_to_le16(EXT2_MAX_REC_LEN);
+	else if (len > (1 << 16))
+		BUG();
+	return cpu_to_le16(len);
+}
+
 /*
  * ext2 uses block-sized chunks. Arguably, sector-sized ones would be
  * more robust, but we have what we have
@@ -95,7 +113,7 @@ static void ext2_check_page(struct page *page)
 	}
 	for (offs = 0; offs <= limit - EXT2_DIR_REC_LEN(1); offs += rec_len) {
 		p = (ext2_dirent *)(kaddr + offs);
-		rec_len = le16_to_cpu(p->rec_len);
+		rec_len = ext2_rec_len_from_disk(p->rec_len);
 
 		if (rec_len < EXT2_DIR_REC_LEN(1))
 			goto Eshort;
@@ -193,7 +211,8 @@ static inline int ext2_match (int len, const char * const name,
  */
 static inline ext2_dirent *ext2_next_entry(ext2_dirent *p)
 {
-	return (ext2_dirent *)((char*)p + le16_to_cpu(p->rec_len));
+	return (ext2_dirent *)((char*)p +
+			ext2_rec_len_from_disk(p->rec_len));
 }
 
 static inline unsigned 
@@ -305,7 +324,7 @@ ext2_readdir (struct file * filp, void * dirent, filldir_t filldir)
 					return 0;
 				}
 			}
-			filp->f_pos += le16_to_cpu(de->rec_len);
+			filp->f_pos += ext2_rec_len_from_disk(de->rec_len);
 		}
 		ext2_put_page(page);
 	}
@@ -413,7 +432,7 @@ void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
 			struct page *page, struct inode *inode)
 {
 	unsigned from = (char *) de - (char *) page_address(page);
-	unsigned to = from + le16_to_cpu(de->rec_len);
+	unsigned to = from + ext2_rec_len_from_disk(de->rec_len);
 	int err;
 
 	lock_page(page);
@@ -469,7 +488,7 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode)
 				/* We hit i_size */
 				name_len = 0;
 				rec_len = chunk_size;
-				de->rec_len = cpu_to_le16(chunk_size);
+				de->rec_len = ext2_rec_len_to_disk(chunk_size);
 				de->inode = 0;
 				goto got_it;
 			}
@@ -483,7 +502,7 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode)
 			if (ext2_match (namelen, name, de))
 				goto out_unlock;
 			name_len = EXT2_DIR_REC_LEN(de->name_len);
-			rec_len = le16_to_cpu(de->rec_len);
+			rec_len = ext2_rec_len_from_disk(de->rec_len);
 			if (!de->inode && rec_len >= reclen)
 				goto got_it;
 			if (rec_len >= name_len + reclen)
@@ -504,8 +523,8 @@ got_it:
 		goto out_unlock;
 	if (de->inode) {
 		ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len);
-		de1->rec_len = cpu_to_le16(rec_len - name_len);
-		de->rec_len = cpu_to_le16(name_len);
+		de1->rec_len = ext2_rec_len_to_disk(rec_len - name_len);
+		de->rec_len = ext2_rec_len_to_disk(name_len);
 		de = de1;
 	}
 	de->name_len = namelen;
@@ -536,7 +555,7 @@ int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page )
 	struct inode *inode = mapping->host;
 	char *kaddr = page_address(page);
 	unsigned from = ((char*)dir - kaddr) & ~(ext2_chunk_size(inode)-1);
-	unsigned to = ((char*)dir - kaddr) + le16_to_cpu(dir->rec_len);
+	unsigned to = ((char*)dir - kaddr) + ext2_rec_len_from_disk(dir->rec_len);
 	ext2_dirent * pde = NULL;
 	ext2_dirent * de = (ext2_dirent *) (kaddr + from);
 	int err;
@@ -557,7 +576,7 @@ int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page )
 	err = mapping->a_ops->prepare_write(NULL, page, from, to);
 	BUG_ON(err);
 	if (pde)
-		pde->rec_len = cpu_to_le16(to-from);
+		pde->rec_len = ext2_rec_len_to_disk(to-from);
 	dir->inode = 0;
 	err = ext2_commit_chunk(page, from, to);
 	inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC;
@@ -591,14 +610,14 @@ int ext2_make_empty(struct inode *inode, struct inode *parent)
 	memset(kaddr, 0, chunk_size);
 	de = (struct ext2_dir_entry_2 *)kaddr;
 	de->name_len = 1;
-	de->rec_len = cpu_to_le16(EXT2_DIR_REC_LEN(1));
+	de->rec_len = ext2_rec_len_to_disk(EXT2_DIR_REC_LEN(1));
 	memcpy (de->name, ".\0\0", 4);
 	de->inode = cpu_to_le32(inode->i_ino);
 	ext2_set_de_type (de, inode);
 
 	de = (struct ext2_dir_entry_2 *)(kaddr + EXT2_DIR_REC_LEN(1));
 	de->name_len = 2;
-	de->rec_len = cpu_to_le16(chunk_size - EXT2_DIR_REC_LEN(1));
+	de->rec_len = ext2_rec_len_to_disk(chunk_size - EXT2_DIR_REC_LEN(1));
 	de->inode = cpu_to_le32(parent->i_ino);
 	memcpy (de->name, "..\0", 4);
 	ext2_set_de_type (de, inode);
diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h
index 910a705..41063d5 100644
--- a/include/linux/ext2_fs.h
+++ b/include/linux/ext2_fs.h
@@ -557,5 +557,6 @@ enum {
 #define EXT2_DIR_ROUND 			(EXT2_DIR_PAD - 1)
 #define EXT2_DIR_REC_LEN(name_len)	(((name_len) + 8 + EXT2_DIR_ROUND) & \
 					 ~EXT2_DIR_ROUND)
+#define EXT2_MAX_REC_LEN		((1<<16)-1)
 
 #endif	/* _LINUX_EXT2_FS_H */
-- 
1.5.3.2.81.g17ed

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH] ext3: Avoid rec_len overflow with 64KB block size
  2007-10-04  5:50                               ` [PATCH] ext2: Avoid rec_len overflow with 64KB block size Theodore Ts'o
@ 2007-10-04  5:50                                 ` Theodore Ts'o
  2007-10-04  5:50                                   ` [PATCH] ext4: Convert bg_block_bitmap to bg_block_bitmap_lo Theodore Ts'o
  0 siblings, 1 reply; 34+ messages in thread
From: Theodore Ts'o @ 2007-10-04  5:50 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-ext4, Jan Kara, Mingming Cao

From: Jan Kara <jack@suse.cz>

With 64KB blocksize, a directory entry can have size 64KB which does not fit
into 16 bits we have for entry lenght. So we store 0xffff instead and convert
value when read from / written to disk. The patch also converts some places
to use ext3_next_entry() when we are changing them anyway.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
---
 fs/ext3/dir.c           |   10 +++---
 fs/ext3/namei.c         |   90 ++++++++++++++++++++++------------------------
 include/linux/ext3_fs.h |   20 ++++++++++
 3 files changed, 68 insertions(+), 52 deletions(-)

diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index c00723a..3c4c43a 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -69,7 +69,7 @@ int ext3_check_dir_entry (const char * function, struct inode * dir,
 			  unsigned long offset)
 {
 	const char * error_msg = NULL;
-	const int rlen = le16_to_cpu(de->rec_len);
+	const int rlen = ext3_rec_len_from_disk(de->rec_len);
 
 	if (rlen < EXT3_DIR_REC_LEN(1))
 		error_msg = "rec_len is smaller than minimal";
@@ -177,10 +177,10 @@ revalidate:
 				 * least that it is non-zero.  A
 				 * failure will be detected in the
 				 * dirent test below. */
-				if (le16_to_cpu(de->rec_len) <
+				if (ext3_rec_len_from_disk(de->rec_len) <
 						EXT3_DIR_REC_LEN(1))
 					break;
-				i += le16_to_cpu(de->rec_len);
+				i += ext3_rec_len_from_disk(de->rec_len);
 			}
 			offset = i;
 			filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1))
@@ -201,7 +201,7 @@ revalidate:
 				ret = stored;
 				goto out;
 			}
-			offset += le16_to_cpu(de->rec_len);
+			offset += ext3_rec_len_from_disk(de->rec_len);
 			if (le32_to_cpu(de->inode)) {
 				/* We might block in the next section
 				 * if the data destination is
@@ -223,7 +223,7 @@ revalidate:
 					goto revalidate;
 				stored ++;
 			}
-			filp->f_pos += le16_to_cpu(de->rec_len);
+			filp->f_pos += ext3_rec_len_from_disk(de->rec_len);
 		}
 		offset = 0;
 		brelse (bh);
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index c1fa190..2c38eb6 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -144,6 +144,15 @@ struct dx_map_entry
 	u16 size;
 };
 
+/*
+ * p is at least 6 bytes before the end of page
+ */
+static inline struct ext3_dir_entry_2 *ext3_next_entry(struct ext3_dir_entry_2 *p)
+{
+	return (struct ext3_dir_entry_2 *)((char*)p +
+		ext3_rec_len_from_disk(p->rec_len));
+}
+
 #ifdef CONFIG_EXT3_INDEX
 static inline unsigned dx_get_block (struct dx_entry *entry);
 static void dx_set_block (struct dx_entry *entry, unsigned value);
@@ -281,7 +290,7 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_ent
 			space += EXT3_DIR_REC_LEN(de->name_len);
 			names++;
 		}
-		de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
+		de = ext3_next_entry(de);
 	}
 	printk("(%i)\n", names);
 	return (struct stats) { names, space, 1 };
@@ -548,14 +557,6 @@ static int ext3_htree_next_block(struct inode *dir, __u32 hash,
 
 
 /*
- * p is at least 6 bytes before the end of page
- */
-static inline struct ext3_dir_entry_2 *ext3_next_entry(struct ext3_dir_entry_2 *p)
-{
-	return (struct ext3_dir_entry_2 *)((char*)p + le16_to_cpu(p->rec_len));
-}
-
-/*
  * This function fills a red-black tree with information from a
  * directory block.  It returns the number directory entries loaded
  * into the tree.  If there is an error it is returned in err.
@@ -721,7 +722,7 @@ static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
 			cond_resched();
 		}
 		/* XXX: do we need to check rec_len == 0 case? -Chris */
-		de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
+		de = ext3_next_entry(de);
 	}
 	return count;
 }
@@ -825,7 +826,7 @@ static inline int search_dirblock(struct buffer_head * bh,
 			return 1;
 		}
 		/* prevent looping on a bad block */
-		de_len = le16_to_cpu(de->rec_len);
+		de_len = ext3_rec_len_from_disk(de->rec_len);
 		if (de_len <= 0)
 			return -1;
 		offset += de_len;
@@ -1138,7 +1139,7 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
 		rec_len = EXT3_DIR_REC_LEN(de->name_len);
 		memcpy (to, de, rec_len);
 		((struct ext3_dir_entry_2 *) to)->rec_len =
-				cpu_to_le16(rec_len);
+				ext3_rec_len_to_disk(rec_len);
 		de->inode = 0;
 		map++;
 		to += rec_len;
@@ -1157,13 +1158,12 @@ static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)
 
 	prev = to = de;
 	while ((char*)de < base + size) {
-		next = (struct ext3_dir_entry_2 *) ((char *) de +
-						    le16_to_cpu(de->rec_len));
+		next = ext3_next_entry(de);
 		if (de->inode && de->name_len) {
 			rec_len = EXT3_DIR_REC_LEN(de->name_len);
 			if (de > to)
 				memmove(to, de, rec_len);
-			to->rec_len = cpu_to_le16(rec_len);
+			to->rec_len = ext3_rec_len_to_disk(rec_len);
 			prev = to;
 			to = (struct ext3_dir_entry_2 *) (((char *) to) + rec_len);
 		}
@@ -1237,8 +1237,8 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
 	/* Fancy dance to stay within two buffers */
 	de2 = dx_move_dirents(data1, data2, map + split, count - split);
 	de = dx_pack_dirents(data1,blocksize);
-	de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de);
-	de2->rec_len = cpu_to_le16(data2 + blocksize - (char *) de2);
+	de->rec_len = ext3_rec_len_to_disk(data1 + blocksize - (char *) de);
+	de2->rec_len = ext3_rec_len_to_disk(data2 + blocksize - (char *) de2);
 	dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data1, blocksize, 1));
 	dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data2, blocksize, 1));
 
@@ -1309,7 +1309,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
 				return -EEXIST;
 			}
 			nlen = EXT3_DIR_REC_LEN(de->name_len);
-			rlen = le16_to_cpu(de->rec_len);
+			rlen = ext3_rec_len_from_disk(de->rec_len);
 			if ((de->inode? rlen - nlen: rlen) >= reclen)
 				break;
 			de = (struct ext3_dir_entry_2 *)((char *)de + rlen);
@@ -1328,11 +1328,11 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
 
 	/* By now the buffer is marked for journaling */
 	nlen = EXT3_DIR_REC_LEN(de->name_len);
-	rlen = le16_to_cpu(de->rec_len);
+	rlen = ext3_rec_len_from_disk(de->rec_len);
 	if (de->inode) {
 		struct ext3_dir_entry_2 *de1 = (struct ext3_dir_entry_2 *)((char *)de + nlen);
-		de1->rec_len = cpu_to_le16(rlen - nlen);
-		de->rec_len = cpu_to_le16(nlen);
+		de1->rec_len = ext3_rec_len_to_disk(rlen - nlen);
+		de->rec_len = ext3_rec_len_to_disk(nlen);
 		de = de1;
 	}
 	de->file_type = EXT3_FT_UNKNOWN;
@@ -1410,17 +1410,18 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
 
 	/* The 0th block becomes the root, move the dirents out */
 	fde = &root->dotdot;
-	de = (struct ext3_dir_entry_2 *)((char *)fde + le16_to_cpu(fde->rec_len));
+	de = (struct ext3_dir_entry_2 *)((char *)fde +
+			ext3_rec_len_from_disk(fde->rec_len));
 	len = ((char *) root) + blocksize - (char *) de;
 	memcpy (data1, de, len);
 	de = (struct ext3_dir_entry_2 *) data1;
 	top = data1 + len;
-	while ((char *)(de2=(void*)de+le16_to_cpu(de->rec_len)) < top)
+	while ((char *)(de2 = ext3_next_entry(de)) < top)
 		de = de2;
-	de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de);
+	de->rec_len = ext3_rec_len_to_disk(data1 + blocksize - (char *) de);
 	/* Initialize the root; the dot dirents already exist */
 	de = (struct ext3_dir_entry_2 *) (&root->dotdot);
-	de->rec_len = cpu_to_le16(blocksize - EXT3_DIR_REC_LEN(2));
+	de->rec_len = ext3_rec_len_to_disk(blocksize - EXT3_DIR_REC_LEN(2));
 	memset (&root->info, 0, sizeof(root->info));
 	root->info.info_length = sizeof(root->info);
 	root->info.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version;
@@ -1507,7 +1508,7 @@ static int ext3_add_entry (handle_t *handle, struct dentry *dentry,
 		return retval;
 	de = (struct ext3_dir_entry_2 *) bh->b_data;
 	de->inode = 0;
-	de->rec_len = cpu_to_le16(blocksize);
+	de->rec_len = ext3_rec_len_to_disk(blocksize);
 	return add_dirent_to_buf(handle, dentry, inode, de, bh);
 }
 
@@ -1571,7 +1572,7 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
 			goto cleanup;
 		node2 = (struct dx_node *)(bh2->b_data);
 		entries2 = node2->entries;
-		node2->fake.rec_len = cpu_to_le16(sb->s_blocksize);
+		node2->fake.rec_len = ext3_rec_len_to_disk(sb->s_blocksize);
 		node2->fake.inode = 0;
 		BUFFER_TRACE(frame->bh, "get_write_access");
 		err = ext3_journal_get_write_access(handle, frame->bh);
@@ -1670,9 +1671,9 @@ static int ext3_delete_entry (handle_t *handle,
 			BUFFER_TRACE(bh, "get_write_access");
 			ext3_journal_get_write_access(handle, bh);
 			if (pde)
-				pde->rec_len =
-					cpu_to_le16(le16_to_cpu(pde->rec_len) +
-						    le16_to_cpu(de->rec_len));
+				pde->rec_len = ext3_rec_len_to_disk(
+					ext3_rec_len_from_disk(pde->rec_len) +
+					ext3_rec_len_from_disk(de->rec_len));
 			else
 				de->inode = 0;
 			dir->i_version++;
@@ -1680,10 +1681,9 @@ static int ext3_delete_entry (handle_t *handle,
 			ext3_journal_dirty_metadata(handle, bh);
 			return 0;
 		}
-		i += le16_to_cpu(de->rec_len);
+		i += ext3_rec_len_from_disk(de->rec_len);
 		pde = de;
-		de = (struct ext3_dir_entry_2 *)
-			((char *) de + le16_to_cpu(de->rec_len));
+		de = ext3_next_entry(de);
 	}
 	return -ENOENT;
 }
@@ -1817,13 +1817,12 @@ retry:
 	de = (struct ext3_dir_entry_2 *) dir_block->b_data;
 	de->inode = cpu_to_le32(inode->i_ino);
 	de->name_len = 1;
-	de->rec_len = cpu_to_le16(EXT3_DIR_REC_LEN(de->name_len));
+	de->rec_len = ext3_rec_len_to_disk(EXT3_DIR_REC_LEN(de->name_len));
 	strcpy (de->name, ".");
 	ext3_set_de_type(dir->i_sb, de, S_IFDIR);
-	de = (struct ext3_dir_entry_2 *)
-			((char *) de + le16_to_cpu(de->rec_len));
+	de = ext3_next_entry(de);
 	de->inode = cpu_to_le32(dir->i_ino);
-	de->rec_len = cpu_to_le16(inode->i_sb->s_blocksize-EXT3_DIR_REC_LEN(1));
+	de->rec_len = ext3_rec_len_to_disk(inode->i_sb->s_blocksize-EXT3_DIR_REC_LEN(1));
 	de->name_len = 2;
 	strcpy (de->name, "..");
 	ext3_set_de_type(dir->i_sb, de, S_IFDIR);
@@ -1875,8 +1874,7 @@ static int empty_dir (struct inode * inode)
 		return 1;
 	}
 	de = (struct ext3_dir_entry_2 *) bh->b_data;
-	de1 = (struct ext3_dir_entry_2 *)
-			((char *) de + le16_to_cpu(de->rec_len));
+	de1 = ext3_next_entry(de);
 	if (le32_to_cpu(de->inode) != inode->i_ino ||
 			!le32_to_cpu(de1->inode) ||
 			strcmp (".", de->name) ||
@@ -1887,9 +1885,9 @@ static int empty_dir (struct inode * inode)
 		brelse (bh);
 		return 1;
 	}
-	offset = le16_to_cpu(de->rec_len) + le16_to_cpu(de1->rec_len);
-	de = (struct ext3_dir_entry_2 *)
-			((char *) de1 + le16_to_cpu(de1->rec_len));
+	offset = ext3_rec_len_from_disk(de->rec_len) +
+			ext3_rec_len_from_disk(de1->rec_len);
+	de = ext3_next_entry(de1);
 	while (offset < inode->i_size ) {
 		if (!bh ||
 			(void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
@@ -1918,9 +1916,8 @@ static int empty_dir (struct inode * inode)
 			brelse (bh);
 			return 0;
 		}
-		offset += le16_to_cpu(de->rec_len);
-		de = (struct ext3_dir_entry_2 *)
-				((char *) de + le16_to_cpu(de->rec_len));
+		offset += ext3_rec_len_from_disk(de->rec_len);
+		de = ext3_next_entry(de);
 	}
 	brelse (bh);
 	return 1;
@@ -2274,8 +2271,7 @@ retry:
 }
 
 #define PARENT_INO(buffer) \
-	((struct ext3_dir_entry_2 *) ((char *) buffer + \
-	le16_to_cpu(((struct ext3_dir_entry_2 *) buffer)->rec_len)))->inode
+	(ext3_next_entry((struct ext3_dir_entry_2 *)(buffer))->inode)
 
 /*
  * Anybody can rename anything with this: the permission checks are left to the
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index 7aa5556..d9e378d 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -660,6 +660,26 @@ struct ext3_dir_entry_2 {
 #define EXT3_DIR_ROUND			(EXT3_DIR_PAD - 1)
 #define EXT3_DIR_REC_LEN(name_len)	(((name_len) + 8 + EXT3_DIR_ROUND) & \
 					 ~EXT3_DIR_ROUND)
+#define EXT3_MAX_REC_LEN		((1<<16)-1)
+
+static inline unsigned ext3_rec_len_from_disk(__le16 dlen)
+{
+	unsigned len = le16_to_cpu(dlen);
+
+	if (len == EXT3_MAX_REC_LEN)
+		return 1 << 16;
+	return len;
+}
+
+static inline __le16 ext3_rec_len_to_disk(unsigned len)
+{
+	if (len == (1 << 16))
+		return cpu_to_le16(EXT3_MAX_REC_LEN);
+	else if (len > (1 << 16))
+		BUG();
+	return cpu_to_le16(len);
+}
+
 /*
  * Hash Tree Directory indexing
  * (c) Daniel Phillips, 2001
-- 
1.5.3.2.81.g17ed

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH] ext4: Convert bg_block_bitmap to bg_block_bitmap_lo
  2007-10-04  5:50                                 ` [PATCH] ext3: " Theodore Ts'o
@ 2007-10-04  5:50                                   ` Theodore Ts'o
  2007-10-04  5:50                                     ` [PATCH] ext4: Convert bg_inode_bitmap and bg_inode_table Theodore Ts'o
  0 siblings, 1 reply; 34+ messages in thread
From: Theodore Ts'o @ 2007-10-04  5:50 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-ext4, Aneesh Kumar K.V

From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>

Convert bg_block_bitmap to bg_block_bitmap_lo
This helps in catching some BUGS due to direct
partial access of these split fields.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 fs/ext4/super.c         |    6 +++---
 include/linux/ext4_fs.h |    2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index d8bb279..02a2418 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -70,9 +70,9 @@ static void ext4_write_super_lockfs(struct super_block *sb);
 ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
 			       struct ext4_group_desc *bg)
 {
-	return le32_to_cpu(bg->bg_block_bitmap) |
+	return le32_to_cpu(bg->bg_block_bitmap_lo) |
 		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
-		 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
+		(ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
 }
 
 ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
@@ -94,7 +94,7 @@ ext4_fsblk_t ext4_inode_table(struct super_block *sb,
 void ext4_block_bitmap_set(struct super_block *sb,
 			   struct ext4_group_desc *bg, ext4_fsblk_t blk)
 {
-	bg->bg_block_bitmap = cpu_to_le32((u32)blk);
+	bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk);
 	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 		bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32);
 }
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index ab608e8..c26e30e 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -105,7 +105,7 @@
  */
 struct ext4_group_desc
 {
-	__le32	bg_block_bitmap;	/* Blocks bitmap block */
+	__le32	bg_block_bitmap_lo;	/* Blocks bitmap block */
 	__le32	bg_inode_bitmap;	/* Inodes bitmap block */
 	__le32	bg_inode_table;		/* Inodes table block */
 	__le16	bg_free_blocks_count;	/* Free blocks count */
-- 
1.5.3.2.81.g17ed

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH] ext4: Convert bg_inode_bitmap and bg_inode_table
  2007-10-04  5:50                                   ` [PATCH] ext4: Convert bg_block_bitmap to bg_block_bitmap_lo Theodore Ts'o
@ 2007-10-04  5:50                                     ` Theodore Ts'o
  2007-10-04  5:50                                       ` [PATCH] ext4: Convert s_blocks_count to s_blocks_count_lo Theodore Ts'o
  0 siblings, 1 reply; 34+ messages in thread
From: Theodore Ts'o @ 2007-10-04  5:50 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-ext4, Aneesh Kumar K.V

From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>

Convert bg_inode_bitmap and bg_inode_table to bg_inode_bitmap_lo
and bg_inode_table_lo.  This helps in finding BUGs due to
direct partial access of these split 64 bit values

Also fix one direct partial access

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 fs/ext4/balloc.c        |    2 +-
 fs/ext4/super.c         |   12 ++++++------
 include/linux/ext4_fs.h |    4 ++--
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index d1a8882..7a24b3f 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -103,7 +103,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
 		/* Set bits for block and inode bitmaps, and inode table */
 		ext4_set_bit(ext4_block_bitmap(sb, gdp) - start, bh->b_data);
 		ext4_set_bit(ext4_inode_bitmap(sb, gdp) - start, bh->b_data);
-		for (bit = le32_to_cpu(gdp->bg_inode_table) - start,
+		for (bit = (ext4_inode_table(sb, gdp) - start),
 		     bit_max = bit + sbi->s_itb_per_group; bit < bit_max; bit++)
 			ext4_set_bit(bit, bh->b_data);
 	}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 02a2418..7548408 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -78,17 +78,17 @@ ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
 ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
 			       struct ext4_group_desc *bg)
 {
-	return le32_to_cpu(bg->bg_inode_bitmap) |
+	return le32_to_cpu(bg->bg_inode_bitmap_lo) |
 		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
-		 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
+		(ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
 }
 
 ext4_fsblk_t ext4_inode_table(struct super_block *sb,
 			      struct ext4_group_desc *bg)
 {
-	return le32_to_cpu(bg->bg_inode_table) |
+	return le32_to_cpu(bg->bg_inode_table_lo) |
 		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
-		 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
+		(ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
 }
 
 void ext4_block_bitmap_set(struct super_block *sb,
@@ -102,7 +102,7 @@ void ext4_block_bitmap_set(struct super_block *sb,
 void ext4_inode_bitmap_set(struct super_block *sb,
 			   struct ext4_group_desc *bg, ext4_fsblk_t blk)
 {
-	bg->bg_inode_bitmap  = cpu_to_le32((u32)blk);
+	bg->bg_inode_bitmap_lo  = cpu_to_le32((u32)blk);
 	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 		bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32);
 }
@@ -110,7 +110,7 @@ void ext4_inode_bitmap_set(struct super_block *sb,
 void ext4_inode_table_set(struct super_block *sb,
 			  struct ext4_group_desc *bg, ext4_fsblk_t blk)
 {
-	bg->bg_inode_table = cpu_to_le32((u32)blk);
+	bg->bg_inode_table_lo = cpu_to_le32((u32)blk);
 	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 		bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
 }
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index c26e30e..5eb4953 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -106,8 +106,8 @@
 struct ext4_group_desc
 {
 	__le32	bg_block_bitmap_lo;	/* Blocks bitmap block */
-	__le32	bg_inode_bitmap;	/* Inodes bitmap block */
-	__le32	bg_inode_table;		/* Inodes table block */
+	__le32	bg_inode_bitmap_lo;	/* Inodes bitmap block */
+	__le32	bg_inode_table_lo;	/* Inodes table block */
 	__le16	bg_free_blocks_count;	/* Free blocks count */
 	__le16	bg_free_inodes_count;	/* Free inodes count */
 	__le16	bg_used_dirs_count;	/* Directories count */
-- 
1.5.3.2.81.g17ed

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH] ext4: Convert s_blocks_count to s_blocks_count_lo
  2007-10-04  5:50                                     ` [PATCH] ext4: Convert bg_inode_bitmap and bg_inode_table Theodore Ts'o
@ 2007-10-04  5:50                                       ` Theodore Ts'o
  2007-10-04  5:50                                         ` [PATCH] ext4: Convert s_r_blocks_count and s_free_blocks_count Theodore Ts'o
  0 siblings, 1 reply; 34+ messages in thread
From: Theodore Ts'o @ 2007-10-04  5:50 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-ext4, Aneesh Kumar K.V

From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>

Convert s_blocks_count to s_blocks_count_lo
This helps in finding BUGs due to direct partial access of
these split 64 bit values

Also fix direct partial access in ext4 code

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 fs/ext4/super.c         |    4 ++--
 include/linux/ext4_fs.h |    6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 7548408..dc7fe4c 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2593,7 +2593,7 @@ static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf)
 
 	if (test_opt(sb, MINIX_DF)) {
 		sbi->s_overhead_last = 0;
-	} else if (sbi->s_blocks_last != le32_to_cpu(es->s_blocks_count)) {
+	} else if (sbi->s_blocks_last != ext4_blocks_count(es)) {
 		unsigned long ngroups = sbi->s_groups_count, i;
 		ext4_fsblk_t overhead = 0;
 		smp_rmb();
@@ -2628,7 +2628,7 @@ static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf)
 		overhead += ngroups * (2 + sbi->s_itb_per_group);
 		sbi->s_overhead_last = overhead;
 		smp_wmb();
-		sbi->s_blocks_last = le32_to_cpu(es->s_blocks_count);
+		sbi->s_blocks_last = ext4_blocks_count(es);
 	}
 
 	buf->f_type = EXT4_SUPER_MAGIC;
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index 5eb4953..339412d 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -501,7 +501,7 @@ do {									       \
  */
 struct ext4_super_block {
 /*00*/	__le32	s_inodes_count;		/* Inodes count */
-	__le32	s_blocks_count;		/* Blocks count */
+	__le32	s_blocks_count_lo;	/* Blocks count */
 	__le32	s_r_blocks_count;	/* Reserved blocks count */
 	__le32	s_free_blocks_count;	/* Free blocks count */
 /*10*/	__le32	s_free_inodes_count;	/* Free inodes count */
@@ -1001,7 +1001,7 @@ extern void ext4_inode_table_set(struct super_block *sb,
 static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
 {
 	return ((ext4_fsblk_t)le32_to_cpu(es->s_blocks_count_hi) << 32) |
-		le32_to_cpu(es->s_blocks_count);
+		le32_to_cpu(es->s_blocks_count_lo);
 }
 
 static inline ext4_fsblk_t ext4_r_blocks_count(struct ext4_super_block *es)
@@ -1019,7 +1019,7 @@ static inline ext4_fsblk_t ext4_free_blocks_count(struct ext4_super_block *es)
 static inline void ext4_blocks_count_set(struct ext4_super_block *es,
 					 ext4_fsblk_t blk)
 {
-	es->s_blocks_count = cpu_to_le32((u32)blk);
+	es->s_blocks_count_lo = cpu_to_le32((u32)blk);
 	es->s_blocks_count_hi = cpu_to_le32(blk >> 32);
 }
 
-- 
1.5.3.2.81.g17ed

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH] ext4: Convert s_r_blocks_count and s_free_blocks_count
  2007-10-04  5:50                                       ` [PATCH] ext4: Convert s_blocks_count to s_blocks_count_lo Theodore Ts'o
@ 2007-10-04  5:50                                         ` Theodore Ts'o
  2007-10-04  5:50                                           ` [PATCH] ext4: Convert ext4_extent.ee_start to ext4_extent.ee_start_lo Theodore Ts'o
  0 siblings, 1 reply; 34+ messages in thread
From: Theodore Ts'o @ 2007-10-04  5:50 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-ext4, Aneesh Kumar K.V

From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>

Convert s_r_blocks_count and s_free_blocks_count to
s_r_blocks_count_lo and s_free_blocks_count_lo

This helps in finding BUGs due to direct partial access of
these split 64 bit values

Also fix direct partial access in ext4 code

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 fs/ext4/super.c         |    2 +-
 include/linux/ext4_fs.h |   12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index dc7fe4c..dbd114c 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2635,7 +2635,7 @@ static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf)
 	buf->f_bsize = sb->s_blocksize;
 	buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last;
 	buf->f_bfree = percpu_counter_sum(&sbi->s_freeblocks_counter);
-	es->s_free_blocks_count = cpu_to_le32(buf->f_bfree);
+	ext4_free_blocks_count_set(es, buf->f_bfree);
 	buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
 	if (buf->f_bfree < ext4_r_blocks_count(es))
 		buf->f_bavail = 0;
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index 339412d..fb31c1a 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -502,8 +502,8 @@ do {									       \
 struct ext4_super_block {
 /*00*/	__le32	s_inodes_count;		/* Inodes count */
 	__le32	s_blocks_count_lo;	/* Blocks count */
-	__le32	s_r_blocks_count;	/* Reserved blocks count */
-	__le32	s_free_blocks_count;	/* Free blocks count */
+	__le32	s_r_blocks_count_lo;	/* Reserved blocks count */
+	__le32	s_free_blocks_count_lo;	/* Free blocks count */
 /*10*/	__le32	s_free_inodes_count;	/* Free inodes count */
 	__le32	s_first_data_block;	/* First Data Block */
 	__le32	s_log_block_size;	/* Block size */
@@ -1007,13 +1007,13 @@ static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
 static inline ext4_fsblk_t ext4_r_blocks_count(struct ext4_super_block *es)
 {
 	return ((ext4_fsblk_t)le32_to_cpu(es->s_r_blocks_count_hi) << 32) |
-		le32_to_cpu(es->s_r_blocks_count);
+		le32_to_cpu(es->s_r_blocks_count_lo);
 }
 
 static inline ext4_fsblk_t ext4_free_blocks_count(struct ext4_super_block *es)
 {
 	return ((ext4_fsblk_t)le32_to_cpu(es->s_free_blocks_count_hi) << 32) |
-		le32_to_cpu(es->s_free_blocks_count);
+		le32_to_cpu(es->s_free_blocks_count_lo);
 }
 
 static inline void ext4_blocks_count_set(struct ext4_super_block *es,
@@ -1026,14 +1026,14 @@ static inline void ext4_blocks_count_set(struct ext4_super_block *es,
 static inline void ext4_free_blocks_count_set(struct ext4_super_block *es,
 					      ext4_fsblk_t blk)
 {
-	es->s_free_blocks_count = cpu_to_le32((u32)blk);
+	es->s_free_blocks_count_lo = cpu_to_le32((u32)blk);
 	es->s_free_blocks_count_hi = cpu_to_le32(blk >> 32);
 }
 
 static inline void ext4_r_blocks_count_set(struct ext4_super_block *es,
 					   ext4_fsblk_t blk)
 {
-	es->s_r_blocks_count = cpu_to_le32((u32)blk);
+	es->s_r_blocks_count_lo = cpu_to_le32((u32)blk);
 	es->s_r_blocks_count_hi = cpu_to_le32(blk >> 32);
 }
 
-- 
1.5.3.2.81.g17ed

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH] ext4: Convert ext4_extent.ee_start to ext4_extent.ee_start_lo
  2007-10-04  5:50                                         ` [PATCH] ext4: Convert s_r_blocks_count and s_free_blocks_count Theodore Ts'o
@ 2007-10-04  5:50                                           ` Theodore Ts'o
  2007-10-04  5:50                                             ` [PATCH] ext4: Convert ext4_extent_idx.ei_leaf to ext4_extent_idx.ei_leaf_lo Theodore Ts'o
  0 siblings, 1 reply; 34+ messages in thread
From: Theodore Ts'o @ 2007-10-04  5:50 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-ext4, Aneesh Kumar K.V

From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>

Convert ext4_extent.ee_start to ext4_extent.ee_start_lo
This helps in finding BUGs due to direct partial access of
these split 48 bit values

Also fix direct partial access in ext4 code

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 fs/ext4/extents.c               |    8 +++-----
 include/linux/ext4_fs_extents.h |    2 +-
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 2be404f..c03056a 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -52,7 +52,7 @@ static ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
 {
 	ext4_fsblk_t block;
 
-	block = le32_to_cpu(ex->ee_start);
+	block = le32_to_cpu(ex->ee_start_lo);
 	block |= ((ext4_fsblk_t) le16_to_cpu(ex->ee_start_hi) << 31) << 1;
 	return block;
 }
@@ -77,7 +77,7 @@ static ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
  */
 static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
 {
-	ex->ee_start = cpu_to_le32((unsigned long) (pb & 0xffffffff));
+	ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
 	ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
 }
 
@@ -1409,8 +1409,7 @@ has_space:
 	eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)+1);
 	nearex = path[depth].p_ext;
 	nearex->ee_block = newext->ee_block;
-	nearex->ee_start = newext->ee_start;
-	nearex->ee_start_hi = newext->ee_start_hi;
+	ext4_ext_store_pblock(nearex, ext_pblock(newext));
 	nearex->ee_len = newext->ee_len;
 
 merge:
@@ -2177,7 +2176,6 @@ int ext4_ext_convert_to_initialized(handle_t *handle, struct inode *inode,
 	}
 	/* ex2: iblock to iblock + maxblocks-1 : initialised */
 	ex2->ee_block = cpu_to_le32(iblock);
-	ex2->ee_start = cpu_to_le32(newblock);
 	ext4_ext_store_pblock(ex2, newblock);
 	ex2->ee_len = cpu_to_le16(allocated);
 	if (ex2 != ex)
diff --git a/include/linux/ext4_fs_extents.h b/include/linux/ext4_fs_extents.h
index 81406f3..cb2dfbb 100644
--- a/include/linux/ext4_fs_extents.h
+++ b/include/linux/ext4_fs_extents.h
@@ -74,7 +74,7 @@ struct ext4_extent {
 	__le32	ee_block;	/* first logical block extent covers */
 	__le16	ee_len;		/* number of blocks covered by extent */
 	__le16	ee_start_hi;	/* high 16 bits of physical block */
-	__le32	ee_start;	/* low 32 bits of physical block */
+	__le32	ee_start_lo;	/* low 32 bits of physical block */
 };
 
 /*
-- 
1.5.3.2.81.g17ed

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH] ext4: Convert ext4_extent_idx.ei_leaf to ext4_extent_idx.ei_leaf_lo
  2007-10-04  5:50                                           ` [PATCH] ext4: Convert ext4_extent.ee_start to ext4_extent.ee_start_lo Theodore Ts'o
@ 2007-10-04  5:50                                             ` Theodore Ts'o
  2007-10-04  5:50                                               ` [PATCH] ext4: sparse fixes Theodore Ts'o
  0 siblings, 1 reply; 34+ messages in thread
From: Theodore Ts'o @ 2007-10-04  5:50 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-ext4, Aneesh Kumar K.V

From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>

Convert ext4_extent_idx.ei_leaf  ext4_extent_idx.ei_leaf_lo
This helps in finding BUGs due to direct partial access of
these split 48 bit values.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 fs/ext4/extents.c               |    4 ++--
 include/linux/ext4_fs_extents.h |    2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index c03056a..8528774 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -65,7 +65,7 @@ static ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
 {
 	ext4_fsblk_t block;
 
-	block = le32_to_cpu(ix->ei_leaf);
+	block = le32_to_cpu(ix->ei_leaf_lo);
 	block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1;
 	return block;
 }
@@ -88,7 +88,7 @@ static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
  */
 static void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb)
 {
-	ix->ei_leaf = cpu_to_le32((unsigned long) (pb & 0xffffffff));
+	ix->ei_leaf_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
 	ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
 }
 
diff --git a/include/linux/ext4_fs_extents.h b/include/linux/ext4_fs_extents.h
index cb2dfbb..d2045a2 100644
--- a/include/linux/ext4_fs_extents.h
+++ b/include/linux/ext4_fs_extents.h
@@ -83,7 +83,7 @@ struct ext4_extent {
  */
 struct ext4_extent_idx {
 	__le32	ei_block;	/* index covers logical blocks from 'block' */
-	__le32	ei_leaf;	/* pointer to the physical block of the next *
+	__le32	ei_leaf_lo;	/* pointer to the physical block of the next *
 				 * level. leaf or next index could be there */
 	__le16	ei_leaf_hi;	/* high 16 bits of physical block */
 	__u16	ei_unused;
-- 
1.5.3.2.81.g17ed

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH] ext4: sparse fixes
  2007-10-04  5:50                                             ` [PATCH] ext4: Convert ext4_extent_idx.ei_leaf to ext4_extent_idx.ei_leaf_lo Theodore Ts'o
@ 2007-10-04  5:50                                               ` Theodore Ts'o
  0 siblings, 0 replies; 34+ messages in thread
From: Theodore Ts'o @ 2007-10-04  5:50 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-ext4, Aneesh Kumar K.V

From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 fs/ext4/fsync.c |    2 +-
 fs/ext4/inode.c |    2 +-
 fs/ext4/xattr.c |    4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 2a167d7..8d50879 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -47,7 +47,7 @@ int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync)
 	struct inode *inode = dentry->d_inode;
 	int ret = 0;
 
-	J_ASSERT(ext4_journal_current_handle() == 0);
+	J_ASSERT(ext4_journal_current_handle() == NULL);
 
 	/*
 	 * data=writeback:
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index a2e1ea4..07afd4a 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1027,7 +1027,7 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
 		}
 		if (buffer_new(&dummy)) {
 			J_ASSERT(create != 0);
-			J_ASSERT(handle != 0);
+			J_ASSERT(handle != NULL);
 
 			/*
 			 * Now that we do not always journal data, we should
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 12c7d65..8638730 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1120,7 +1120,7 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
 	int total_ino, total_blk;
 	void *base, *start, *end;
 	int extra_isize = 0, error = 0, tried_min_extra_isize = 0;
-	int s_min_extra_isize = EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize;
+	int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize);
 
 	down_write(&EXT4_I(inode)->xattr_sem);
 retry:
@@ -1292,7 +1292,7 @@ retry:
 
 		i.name = b_entry_name;
 		i.value = buffer;
-		i.value_len = cpu_to_le32(size);
+		i.value_len = size;
 		error = ext4_xattr_block_find(inode, &i, bs);
 		if (error)
 			goto cleanup;
-- 
1.5.3.2.81.g17ed

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* Re: [PATCH] jbd/jbd2: JBD memory allocation cleanups
  2007-10-04  5:50 ` [PATCH] jbd/jbd2: JBD memory allocation cleanups Theodore Ts'o
  2007-10-04  5:50   ` [PATCH] jbd/jbd2: Journal initialization doesn't need __GFP_NOFAIL Theodore Ts'o
@ 2007-10-04  6:52   ` Christoph Hellwig
  2007-10-05 22:33     ` Mingming Cao
                       ` (4 more replies)
  1 sibling, 5 replies; 34+ messages in thread
From: Christoph Hellwig @ 2007-10-04  6:52 UTC (permalink / raw)
  To: Theodore Ts'o
  Cc: linux-kernel, linux-ext4, Mingming Cao, Christoph Lameter

On Thu, Oct 04, 2007 at 01:50:36AM -0400, Theodore Ts'o wrote:
> From: Mingming Cao <cmm@us.ibm.com>
> 
> JBD: Replace slab allocations with page cache allocations

It's page allocations, not page cache allocations.

> Also this patch cleans up jbd_kmalloc and replace it with kmalloc directly

That sounds like it should be a different patch..

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] jbd/jbd2: JBD memory allocation cleanups
  2007-10-04  6:52   ` [PATCH] jbd/jbd2: JBD memory allocation cleanups Christoph Hellwig
@ 2007-10-05 22:33     ` Mingming Cao
  2007-10-05 22:35     ` [PATCH] jbd: JBD slab " Mingming Cao
                       ` (3 subsequent siblings)
  4 siblings, 0 replies; 34+ messages in thread
From: Mingming Cao @ 2007-10-05 22:33 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Theodore Ts'o, linux-kernel, linux-ext4, Christoph Lameter

On Thu, 2007-10-04 at 07:52 +0100, Christoph Hellwig wrote:
> On Thu, Oct 04, 2007 at 01:50:36AM -0400, Theodore Ts'o wrote:
> > From: Mingming Cao <cmm@us.ibm.com>
> > 
> > JBD: Replace slab allocations with page cache allocations
> 
> It's page allocations, not page cache allocations.
> 
> > Also this patch cleans up jbd_kmalloc and replace it with kmalloc directly
> 
> That sounds like it should be a different patch..

Okay. Will sent the patches, that also separate JBD2 changes to a
different patch.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH] jbd: JBD slab allocation cleanups
  2007-10-04  6:52   ` [PATCH] jbd/jbd2: JBD memory allocation cleanups Christoph Hellwig
  2007-10-05 22:33     ` Mingming Cao
@ 2007-10-05 22:35     ` Mingming Cao
  2007-10-05 22:36     ` [PATCH] jbd2: JBD2 " Mingming Cao
                       ` (2 subsequent siblings)
  4 siblings, 0 replies; 34+ messages in thread
From: Mingming Cao @ 2007-10-05 22:35 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Theodore Ts'o, linux-kernel, linux-ext4, Christoph Lameter

JBD: JBD slab allocation cleanups

From: Mingming Cao <cmm@us.ibm.com>

JBD: Replace slab allocations with page allocations

JBD allocate memory for committed_data and frozen_data from slab. However
JBD should not pass slab pages down to the block layer. Use page allocator pages instead. This will also prepare JBD for the large blocksize patchset.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
---

 fs/jbd/commit.c      |    6 +--
 fs/jbd/journal.c     |   88 ++-------------------------------------------------
 fs/jbd/transaction.c |    8 ++--
 include/linux/jbd.h  |   13 +++++--
 4 files changed, 21 insertions(+), 94 deletions(-)


Index: linux-2.6.23-rc9/fs/jbd/commit.c
===================================================================
--- linux-2.6.23-rc9.orig/fs/jbd/commit.c	2007-10-05 12:03:43.000000000 -0700
+++ linux-2.6.23-rc9/fs/jbd/commit.c	2007-10-05 12:08:08.000000000 -0700
@@ -375,7 +375,7 @@ void journal_commit_transaction(journal_
 			struct buffer_head *bh = jh2bh(jh);
 
 			jbd_lock_bh_state(bh);
-			jbd_slab_free(jh->b_committed_data, bh->b_size);
+			jbd_free(jh->b_committed_data, bh->b_size);
 			jh->b_committed_data = NULL;
 			jbd_unlock_bh_state(bh);
 		}
@@ -792,14 +792,14 @@ restart_loop:
 		 * Otherwise, we can just throw away the frozen data now.
 		 */
 		if (jh->b_committed_data) {
-			jbd_slab_free(jh->b_committed_data, bh->b_size);
+			jbd_free(jh->b_committed_data, bh->b_size);
 			jh->b_committed_data = NULL;
 			if (jh->b_frozen_data) {
 				jh->b_committed_data = jh->b_frozen_data;
 				jh->b_frozen_data = NULL;
 			}
 		} else if (jh->b_frozen_data) {
-			jbd_slab_free(jh->b_frozen_data, bh->b_size);
+			jbd_free(jh->b_frozen_data, bh->b_size);
 			jh->b_frozen_data = NULL;
 		}
 
Index: linux-2.6.23-rc9/fs/jbd/journal.c
===================================================================
--- linux-2.6.23-rc9.orig/fs/jbd/journal.c	2007-10-05 12:03:43.000000000 -0700
+++ linux-2.6.23-rc9/fs/jbd/journal.c	2007-10-05 12:08:08.000000000 -0700
@@ -83,7 +83,6 @@ EXPORT_SYMBOL(journal_force_commit);
 
 static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
 static void __journal_abort_soft (journal_t *journal, int errno);
-static int journal_create_jbd_slab(size_t slab_size);
 
 /*
  * Helper function used to manage commit timeouts
@@ -334,10 +333,10 @@ repeat:
 		char *tmp;
 
 		jbd_unlock_bh_state(bh_in);
-		tmp = jbd_slab_alloc(bh_in->b_size, GFP_NOFS);
+		tmp = jbd_alloc(bh_in->b_size, GFP_NOFS);
 		jbd_lock_bh_state(bh_in);
 		if (jh_in->b_frozen_data) {
-			jbd_slab_free(tmp, bh_in->b_size);
+			jbd_free(tmp, bh_in->b_size);
 			goto repeat;
 		}
 
@@ -1095,13 +1094,6 @@ int journal_load(journal_t *journal)
 		}
 	}
 
-	/*
-	 * Create a slab for this blocksize
-	 */
-	err = journal_create_jbd_slab(be32_to_cpu(sb->s_blocksize));
-	if (err)
-		return err;
-
 	/* Let the recovery code check whether it needs to recover any
 	 * data from the journal. */
 	if (journal_recover(journal))
@@ -1624,77 +1616,6 @@ void * __jbd_kmalloc (const char *where,
 }
 
 /*
- * jbd slab management: create 1k, 2k, 4k, 8k slabs as needed
- * and allocate frozen and commit buffers from these slabs.
- *
- * Reason for doing this is to avoid, SLAB_DEBUG - since it could
- * cause bh to cross page boundary.
- */
-
-#define JBD_MAX_SLABS 5
-#define JBD_SLAB_INDEX(size)  (size >> 11)
-
-static struct kmem_cache *jbd_slab[JBD_MAX_SLABS];
-static const char *jbd_slab_names[JBD_MAX_SLABS] = {
-	"jbd_1k", "jbd_2k", "jbd_4k", NULL, "jbd_8k"
-};
-
-static void journal_destroy_jbd_slabs(void)
-{
-	int i;
-
-	for (i = 0; i < JBD_MAX_SLABS; i++) {
-		if (jbd_slab[i])
-			kmem_cache_destroy(jbd_slab[i]);
-		jbd_slab[i] = NULL;
-	}
-}
-
-static int journal_create_jbd_slab(size_t slab_size)
-{
-	int i = JBD_SLAB_INDEX(slab_size);
-
-	BUG_ON(i >= JBD_MAX_SLABS);
-
-	/*
-	 * Check if we already have a slab created for this size
-	 */
-	if (jbd_slab[i])
-		return 0;
-
-	/*
-	 * Create a slab and force alignment to be same as slabsize -
-	 * this will make sure that allocations won't cross the page
-	 * boundary.
-	 */
-	jbd_slab[i] = kmem_cache_create(jbd_slab_names[i],
-				slab_size, slab_size, 0, NULL);
-	if (!jbd_slab[i]) {
-		printk(KERN_EMERG "JBD: no memory for jbd_slab cache\n");
-		return -ENOMEM;
-	}
-	return 0;
-}
-
-void * jbd_slab_alloc(size_t size, gfp_t flags)
-{
-	int idx;
-
-	idx = JBD_SLAB_INDEX(size);
-	BUG_ON(jbd_slab[idx] == NULL);
-	return kmem_cache_alloc(jbd_slab[idx], flags | __GFP_NOFAIL);
-}
-
-void jbd_slab_free(void *ptr,  size_t size)
-{
-	int idx;
-
-	idx = JBD_SLAB_INDEX(size);
-	BUG_ON(jbd_slab[idx] == NULL);
-	kmem_cache_free(jbd_slab[idx], ptr);
-}
-
-/*
  * Journal_head storage management
  */
 static struct kmem_cache *journal_head_cache;
@@ -1881,13 +1802,13 @@ static void __journal_remove_journal_hea
 				printk(KERN_WARNING "%s: freeing "
 						"b_frozen_data\n",
 						__FUNCTION__);
-				jbd_slab_free(jh->b_frozen_data, bh->b_size);
+				jbd_free(jh->b_frozen_data, bh->b_size);
 			}
 			if (jh->b_committed_data) {
 				printk(KERN_WARNING "%s: freeing "
 						"b_committed_data\n",
 						__FUNCTION__);
-				jbd_slab_free(jh->b_committed_data, bh->b_size);
+				jbd_free(jh->b_committed_data, bh->b_size);
 			}
 			bh->b_private = NULL;
 			jh->b_bh = NULL;	/* debug, really */
@@ -2042,7 +1963,6 @@ static void journal_destroy_caches(void)
 	journal_destroy_revoke_caches();
 	journal_destroy_journal_head_cache();
 	journal_destroy_handle_cache();
-	journal_destroy_jbd_slabs();
 }
 
 static int __init journal_init(void)
Index: linux-2.6.23-rc9/fs/jbd/transaction.c
===================================================================
--- linux-2.6.23-rc9.orig/fs/jbd/transaction.c	2007-10-05 12:03:43.000000000 -0700
+++ linux-2.6.23-rc9/fs/jbd/transaction.c	2007-10-05 12:08:08.000000000 -0700
@@ -668,7 +668,7 @@ repeat:
 				JBUFFER_TRACE(jh, "allocate memory for buffer");
 				jbd_unlock_bh_state(bh);
 				frozen_buffer =
-					jbd_slab_alloc(jh2bh(jh)->b_size,
+					jbd_alloc(jh2bh(jh)->b_size,
 							 GFP_NOFS);
 				if (!frozen_buffer) {
 					printk(KERN_EMERG
@@ -728,7 +728,7 @@ done:
 
 out:
 	if (unlikely(frozen_buffer))	/* It's usually NULL */
-		jbd_slab_free(frozen_buffer, bh->b_size);
+		jbd_free(frozen_buffer, bh->b_size);
 
 	JBUFFER_TRACE(jh, "exit");
 	return error;
@@ -881,7 +881,7 @@ int journal_get_undo_access(handle_t *ha
 
 repeat:
 	if (!jh->b_committed_data) {
-		committed_data = jbd_slab_alloc(jh2bh(jh)->b_size, GFP_NOFS);
+		committed_data = jbd_alloc(jh2bh(jh)->b_size, GFP_NOFS);
 		if (!committed_data) {
 			printk(KERN_EMERG "%s: No memory for committed data\n",
 				__FUNCTION__);
@@ -908,7 +908,7 @@ repeat:
 out:
 	journal_put_journal_head(jh);
 	if (unlikely(committed_data))
-		jbd_slab_free(committed_data, bh->b_size);
+		jbd_free(committed_data, bh->b_size);
 	return err;
 }
 
Index: linux-2.6.23-rc9/include/linux/jbd.h
===================================================================
--- linux-2.6.23-rc9.orig/include/linux/jbd.h	2007-10-05 12:03:43.000000000 -0700
+++ linux-2.6.23-rc9/include/linux/jbd.h	2007-10-05 12:08:08.000000000 -0700
@@ -72,14 +72,21 @@ extern int journal_enable_debug;
 #endif
 
 extern void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry);
-extern void * jbd_slab_alloc(size_t size, gfp_t flags);
-extern void jbd_slab_free(void *ptr, size_t size);
-
 #define jbd_kmalloc(size, flags) \
 	__jbd_kmalloc(__FUNCTION__, (size), (flags), journal_oom_retry)
 #define jbd_rep_kmalloc(size, flags) \
 	__jbd_kmalloc(__FUNCTION__, (size), (flags), 1)
 
+static inline void *jbd_alloc(size_t size, gfp_t flags)
+{
+	return (void *)__get_free_pages(flags, get_order(size));
+}
+
+static inline void jbd_free(void *ptr, size_t size)
+{
+	free_pages((unsigned long)ptr, get_order(size));
+};
+
 #define JFS_MIN_JOURNAL_BLOCKS 1024
 
 #ifdef __KERNEL__

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH] jbd2: JBD2 slab allocation cleanups
  2007-10-04  6:52   ` [PATCH] jbd/jbd2: JBD memory allocation cleanups Christoph Hellwig
  2007-10-05 22:33     ` Mingming Cao
  2007-10-05 22:35     ` [PATCH] jbd: JBD slab " Mingming Cao
@ 2007-10-05 22:36     ` Mingming Cao
  2007-10-05 22:38     ` [PATCH] jbd: JBD replace jbd_kmalloc with kmalloc Mingming Cao
  2007-10-05 22:39     ` [PATCH] jbd2: JBD replace jbd2_kmalloc " Mingming Cao
  4 siblings, 0 replies; 34+ messages in thread
From: Mingming Cao @ 2007-10-05 22:36 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Theodore Ts'o, linux-kernel, linux-ext4, Christoph Lameter

JBD2: jbd2 slab allocation cleanups

From: Mingming Cao <cmm@us.ibm.com>

JBD2: Replace slab allocations with page allocations

JBD2 allocate memory for committed_data and frozen_data from slab. However
JBD2 should not pass slab pages down to the block layer. Use page allocator
pages instead. This will also prepare JBD for the large blocksize patchset.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
---

 fs/jbd2/commit.c      |    6 +--
 fs/jbd2/journal.c     |   88 ++------------------------------------------------
 fs/jbd2/transaction.c |   14 +++----
 include/linux/jbd2.h  |   18 +++++++---
 4 files changed, 27 insertions(+), 99 deletions(-)


Index: linux-2.6.23-rc9/fs/jbd2/commit.c
===================================================================
--- linux-2.6.23-rc9.orig/fs/jbd2/commit.c	2007-10-05 12:03:43.000000000 -0700
+++ linux-2.6.23-rc9/fs/jbd2/commit.c	2007-10-05 12:08:26.000000000 -0700
@@ -384,7 +384,7 @@ void jbd2_journal_commit_transaction(jou
 			struct buffer_head *bh = jh2bh(jh);
 
 			jbd_lock_bh_state(bh);
-			jbd2_slab_free(jh->b_committed_data, bh->b_size);
+			jbd2_free(jh->b_committed_data, bh->b_size);
 			jh->b_committed_data = NULL;
 			jbd_unlock_bh_state(bh);
 		}
@@ -801,14 +801,14 @@ restart_loop:
 		 * Otherwise, we can just throw away the frozen data now.
 		 */
 		if (jh->b_committed_data) {
-			jbd2_slab_free(jh->b_committed_data, bh->b_size);
+			jbd2_free(jh->b_committed_data, bh->b_size);
 			jh->b_committed_data = NULL;
 			if (jh->b_frozen_data) {
 				jh->b_committed_data = jh->b_frozen_data;
 				jh->b_frozen_data = NULL;
 			}
 		} else if (jh->b_frozen_data) {
-			jbd2_slab_free(jh->b_frozen_data, bh->b_size);
+			jbd2_free(jh->b_frozen_data, bh->b_size);
 			jh->b_frozen_data = NULL;
 		}
 
Index: linux-2.6.23-rc9/fs/jbd2/journal.c
===================================================================
--- linux-2.6.23-rc9.orig/fs/jbd2/journal.c	2007-10-05 12:03:43.000000000 -0700
+++ linux-2.6.23-rc9/fs/jbd2/journal.c	2007-10-05 12:08:26.000000000 -0700
@@ -84,7 +84,6 @@ EXPORT_SYMBOL(jbd2_journal_force_commit)
 
 static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
 static void __journal_abort_soft (journal_t *journal, int errno);
-static int jbd2_journal_create_jbd_slab(size_t slab_size);
 
 /*
  * Helper function used to manage commit timeouts
@@ -335,10 +334,10 @@ repeat:
 		char *tmp;
 
 		jbd_unlock_bh_state(bh_in);
-		tmp = jbd2_slab_alloc(bh_in->b_size, GFP_NOFS);
+		tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS);
 		jbd_lock_bh_state(bh_in);
 		if (jh_in->b_frozen_data) {
-			jbd2_slab_free(tmp, bh_in->b_size);
+			jbd2_free(tmp, bh_in->b_size);
 			goto repeat;
 		}
 
@@ -1096,13 +1095,6 @@ int jbd2_journal_load(journal_t *journal
 		}
 	}
 
-	/*
-	 * Create a slab for this blocksize
-	 */
-	err = jbd2_journal_create_jbd_slab(be32_to_cpu(sb->s_blocksize));
-	if (err)
-		return err;
-
 	/* Let the recovery code check whether it needs to recover any
 	 * data from the journal. */
 	if (jbd2_journal_recover(journal))
@@ -1636,77 +1628,6 @@ void * __jbd2_kmalloc (const char *where
 }
 
 /*
- * jbd slab management: create 1k, 2k, 4k, 8k slabs as needed
- * and allocate frozen and commit buffers from these slabs.
- *
- * Reason for doing this is to avoid, SLAB_DEBUG - since it could
- * cause bh to cross page boundary.
- */
-
-#define JBD_MAX_SLABS 5
-#define JBD_SLAB_INDEX(size)  (size >> 11)
-
-static struct kmem_cache *jbd_slab[JBD_MAX_SLABS];
-static const char *jbd_slab_names[JBD_MAX_SLABS] = {
-	"jbd2_1k", "jbd2_2k", "jbd2_4k", NULL, "jbd2_8k"
-};
-
-static void jbd2_journal_destroy_jbd_slabs(void)
-{
-	int i;
-
-	for (i = 0; i < JBD_MAX_SLABS; i++) {
-		if (jbd_slab[i])
-			kmem_cache_destroy(jbd_slab[i]);
-		jbd_slab[i] = NULL;
-	}
-}
-
-static int jbd2_journal_create_jbd_slab(size_t slab_size)
-{
-	int i = JBD_SLAB_INDEX(slab_size);
-
-	BUG_ON(i >= JBD_MAX_SLABS);
-
-	/*
-	 * Check if we already have a slab created for this size
-	 */
-	if (jbd_slab[i])
-		return 0;
-
-	/*
-	 * Create a slab and force alignment to be same as slabsize -
-	 * this will make sure that allocations won't cross the page
-	 * boundary.
-	 */
-	jbd_slab[i] = kmem_cache_create(jbd_slab_names[i],
-				slab_size, slab_size, 0, NULL);
-	if (!jbd_slab[i]) {
-		printk(KERN_EMERG "JBD: no memory for jbd_slab cache\n");
-		return -ENOMEM;
-	}
-	return 0;
-}
-
-void * jbd2_slab_alloc(size_t size, gfp_t flags)
-{
-	int idx;
-
-	idx = JBD_SLAB_INDEX(size);
-	BUG_ON(jbd_slab[idx] == NULL);
-	return kmem_cache_alloc(jbd_slab[idx], flags | __GFP_NOFAIL);
-}
-
-void jbd2_slab_free(void *ptr,  size_t size)
-{
-	int idx;
-
-	idx = JBD_SLAB_INDEX(size);
-	BUG_ON(jbd_slab[idx] == NULL);
-	kmem_cache_free(jbd_slab[idx], ptr);
-}
-
-/*
  * Journal_head storage management
  */
 static struct kmem_cache *jbd2_journal_head_cache;
@@ -1893,13 +1814,13 @@ static void __journal_remove_journal_hea
 				printk(KERN_WARNING "%s: freeing "
 						"b_frozen_data\n",
 						__FUNCTION__);
-				jbd2_slab_free(jh->b_frozen_data, bh->b_size);
+				jbd2_free(jh->b_frozen_data, bh->b_size);
 			}
 			if (jh->b_committed_data) {
 				printk(KERN_WARNING "%s: freeing "
 						"b_committed_data\n",
 						__FUNCTION__);
-				jbd2_slab_free(jh->b_committed_data, bh->b_size);
+				jbd2_free(jh->b_committed_data, bh->b_size);
 			}
 			bh->b_private = NULL;
 			jh->b_bh = NULL;	/* debug, really */
@@ -2040,7 +1961,6 @@ static void jbd2_journal_destroy_caches(
 	jbd2_journal_destroy_revoke_caches();
 	jbd2_journal_destroy_jbd2_journal_head_cache();
 	jbd2_journal_destroy_handle_cache();
-	jbd2_journal_destroy_jbd_slabs();
 }
 
 static int __init journal_init(void)
Index: linux-2.6.23-rc9/fs/jbd2/transaction.c
===================================================================
--- linux-2.6.23-rc9.orig/fs/jbd2/transaction.c	2007-10-05 12:03:43.000000000 -0700
+++ linux-2.6.23-rc9/fs/jbd2/transaction.c	2007-10-05 12:08:26.000000000 -0700
@@ -236,7 +236,7 @@ out:
 /* Allocate a new handle.  This should probably be in a slab... */
 static handle_t *new_handle(int nblocks)
 {
-	handle_t *handle = jbd_alloc_handle(GFP_NOFS);
+	handle_t *handle = jbd2_alloc_handle(GFP_NOFS);
 	if (!handle)
 		return NULL;
 	memset(handle, 0, sizeof(*handle));
@@ -282,7 +282,7 @@ handle_t *jbd2_journal_start(journal_t *
 
 	err = start_this_handle(journal, handle);
 	if (err < 0) {
-		jbd_free_handle(handle);
+		jbd2_free_handle(handle);
 		current->journal_info = NULL;
 		handle = ERR_PTR(err);
 	}
@@ -668,7 +668,7 @@ repeat:
 				JBUFFER_TRACE(jh, "allocate memory for buffer");
 				jbd_unlock_bh_state(bh);
 				frozen_buffer =
-					jbd2_slab_alloc(jh2bh(jh)->b_size,
+					jbd2_alloc(jh2bh(jh)->b_size,
 							 GFP_NOFS);
 				if (!frozen_buffer) {
 					printk(KERN_EMERG
@@ -728,7 +728,7 @@ done:
 
 out:
 	if (unlikely(frozen_buffer))	/* It's usually NULL */
-		jbd2_slab_free(frozen_buffer, bh->b_size);
+		jbd2_free(frozen_buffer, bh->b_size);
 
 	JBUFFER_TRACE(jh, "exit");
 	return error;
@@ -881,7 +881,7 @@ int jbd2_journal_get_undo_access(handle_
 
 repeat:
 	if (!jh->b_committed_data) {
-		committed_data = jbd2_slab_alloc(jh2bh(jh)->b_size, GFP_NOFS);
+		committed_data = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS);
 		if (!committed_data) {
 			printk(KERN_EMERG "%s: No memory for committed data\n",
 				__FUNCTION__);
@@ -908,7 +908,7 @@ repeat:
 out:
 	jbd2_journal_put_journal_head(jh);
 	if (unlikely(committed_data))
-		jbd2_slab_free(committed_data, bh->b_size);
+		jbd2_free(committed_data, bh->b_size);
 	return err;
 }
 
@@ -1411,7 +1411,7 @@ int jbd2_journal_stop(handle_t *handle)
 		spin_unlock(&journal->j_state_lock);
 	}
 
-	jbd_free_handle(handle);
+	jbd2_free_handle(handle);
 	return err;
 }
 
Index: linux-2.6.23-rc9/include/linux/jbd2.h
===================================================================
--- linux-2.6.23-rc9.orig/include/linux/jbd2.h	2007-10-05 12:03:43.000000000 -0700
+++ linux-2.6.23-rc9/include/linux/jbd2.h	2007-10-05 12:08:26.000000000 -0700
@@ -72,14 +72,22 @@ extern u8 jbd2_journal_enable_debug;
 #endif
 
 extern void * __jbd2_kmalloc (const char *where, size_t size, gfp_t flags, int retry);
-extern void * jbd2_slab_alloc(size_t size, gfp_t flags);
-extern void jbd2_slab_free(void *ptr, size_t size);
-
 #define jbd_kmalloc(size, flags) \
 	__jbd2_kmalloc(__FUNCTION__, (size), (flags), journal_oom_retry)
 #define jbd_rep_kmalloc(size, flags) \
 	__jbd2_kmalloc(__FUNCTION__, (size), (flags), 1)
 
+
+static inline void *jbd2_alloc(size_t size, gfp_t flags)
+{
+	return (void *)__get_free_pages(flags, get_order(size));
+}
+
+static inline void jbd2_free(void *ptr, size_t size)
+{
+	free_pages((unsigned long)ptr, get_order(size));
+};
+
 #define JBD2_MIN_JOURNAL_BLOCKS 1024
 
 #ifdef __KERNEL__
@@ -959,12 +967,12 @@ void jbd2_journal_put_journal_head(struc
  */
 extern struct kmem_cache *jbd2_handle_cache;
 
-static inline handle_t *jbd_alloc_handle(gfp_t gfp_flags)
+static inline handle_t *jbd2_alloc_handle(gfp_t gfp_flags)
 {
 	return kmem_cache_alloc(jbd2_handle_cache, gfp_flags);
 }
 
-static inline void jbd_free_handle(handle_t *handle)
+static inline void jbd2_free_handle(handle_t *handle)
 {
 	kmem_cache_free(jbd2_handle_cache, handle);
 }

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH] jbd: JBD replace jbd_kmalloc with kmalloc
  2007-10-04  6:52   ` [PATCH] jbd/jbd2: JBD memory allocation cleanups Christoph Hellwig
                       ` (2 preceding siblings ...)
  2007-10-05 22:36     ` [PATCH] jbd2: JBD2 " Mingming Cao
@ 2007-10-05 22:38     ` Mingming Cao
  2007-10-08 17:46       ` Christoph Lameter
  2007-10-05 22:39     ` [PATCH] jbd2: JBD replace jbd2_kmalloc " Mingming Cao
  4 siblings, 1 reply; 34+ messages in thread
From: Mingming Cao @ 2007-10-05 22:38 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Theodore Ts'o, linux-kernel, linux-ext4, Christoph Lameter

JBD: JBD replace jbd_kmalloc with kmalloc

From: Mingming Cao <cmm@us.ibm.com>

This patch cleans up jbd_kmalloc and replace it with kmalloc directly

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
---

 fs/jbd/journal.c     |   11 +----------
 fs/jbd/transaction.c |    4 ++--
 include/linux/jbd.h  |    6 ------
 3 files changed, 3 insertions(+), 18 deletions(-)


Index: linux-2.6.23-rc9/fs/jbd/journal.c
===================================================================
--- linux-2.6.23-rc9.orig/fs/jbd/journal.c	2007-10-05 12:08:08.000000000 -0700
+++ linux-2.6.23-rc9/fs/jbd/journal.c	2007-10-05 12:08:29.000000000 -0700
@@ -653,7 +653,7 @@ static journal_t * journal_init_common (
 	journal_t *journal;
 	int err;
 
-	journal = jbd_kmalloc(sizeof(*journal), GFP_KERNEL);
+	journal = kmalloc(sizeof(*journal), GFP_KERNEL);
 	if (!journal)
 		goto fail;
 	memset(journal, 0, sizeof(*journal));
@@ -1607,15 +1607,6 @@ int journal_blocks_per_page(struct inode
 }
 
 /*
- * Simple support for retrying memory allocations.  Introduced to help to
- * debug different VM deadlock avoidance strategies.
- */
-void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry)
-{
-	return kmalloc(size, flags | (retry ? __GFP_NOFAIL : 0));
-}
-
-/*
  * Journal_head storage management
  */
 static struct kmem_cache *journal_head_cache;
Index: linux-2.6.23-rc9/fs/jbd/transaction.c
===================================================================
--- linux-2.6.23-rc9.orig/fs/jbd/transaction.c	2007-10-05 12:08:08.000000000 -0700
+++ linux-2.6.23-rc9/fs/jbd/transaction.c	2007-10-05 12:08:29.000000000 -0700
@@ -96,8 +96,8 @@ static int start_this_handle(journal_t *
 
 alloc_transaction:
 	if (!journal->j_running_transaction) {
-		new_transaction = jbd_kmalloc(sizeof(*new_transaction),
-						GFP_NOFS);
+		new_transaction = kmalloc(sizeof(*new_transaction),
+						GFP_NOFS|__GFP_NOFAIL);
 		if (!new_transaction) {
 			ret = -ENOMEM;
 			goto out;
Index: linux-2.6.23-rc9/include/linux/jbd.h
===================================================================
--- linux-2.6.23-rc9.orig/include/linux/jbd.h	2007-10-05 12:08:08.000000000 -0700
+++ linux-2.6.23-rc9/include/linux/jbd.h	2007-10-05 12:08:29.000000000 -0700
@@ -71,12 +71,6 @@ extern int journal_enable_debug;
 #define jbd_debug(f, a...)	/**/
 #endif
 
-extern void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry);
-#define jbd_kmalloc(size, flags) \
-	__jbd_kmalloc(__FUNCTION__, (size), (flags), journal_oom_retry)
-#define jbd_rep_kmalloc(size, flags) \
-	__jbd_kmalloc(__FUNCTION__, (size), (flags), 1)
-
 static inline void *jbd_alloc(size_t size, gfp_t flags)
 {
 	return (void *)__get_free_pages(flags, get_order(size));

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH] jbd2: JBD replace jbd2_kmalloc with kmalloc
  2007-10-04  6:52   ` [PATCH] jbd/jbd2: JBD memory allocation cleanups Christoph Hellwig
                       ` (3 preceding siblings ...)
  2007-10-05 22:38     ` [PATCH] jbd: JBD replace jbd_kmalloc with kmalloc Mingming Cao
@ 2007-10-05 22:39     ` Mingming Cao
  2007-10-08 18:37       ` Christoph Lameter
  4 siblings, 1 reply; 34+ messages in thread
From: Mingming Cao @ 2007-10-05 22:39 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Theodore Ts'o, linux-kernel, linux-ext4, Christoph Lameter

JBD2: JBD2 replace jbd2_kmalloc with kmalloc

From: Mingming Cao <cmm@us.ibm.com>

This patch cleans up jbd_kmalloc and replace it with kmalloc directly

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
---

 fs/jbd2/journal.c     |   11 +----------
 fs/jbd2/transaction.c |    4 ++--
 include/linux/jbd2.h  |    7 -------
 3 files changed, 3 insertions(+), 19 deletions(-)


Index: linux-2.6.23-rc9/fs/jbd2/journal.c
===================================================================
--- linux-2.6.23-rc9.orig/fs/jbd2/journal.c	2007-10-05 12:08:26.000000000 -0700
+++ linux-2.6.23-rc9/fs/jbd2/journal.c	2007-10-05 12:08:32.000000000 -0700
@@ -654,7 +654,7 @@ static journal_t * journal_init_common (
 	journal_t *journal;
 	int err;
 
-	journal = jbd_kmalloc(sizeof(*journal), GFP_KERNEL);
+	journal = kmalloc(sizeof(*journal), GFP_KERNEL);
 	if (!journal)
 		goto fail;
 	memset(journal, 0, sizeof(*journal));
@@ -1619,15 +1619,6 @@ size_t journal_tag_bytes(journal_t *jour
 }
 
 /*
- * Simple support for retrying memory allocations.  Introduced to help to
- * debug different VM deadlock avoidance strategies.
- */
-void * __jbd2_kmalloc (const char *where, size_t size, gfp_t flags, int retry)
-{
-	return kmalloc(size, flags | (retry ? __GFP_NOFAIL : 0));
-}
-
-/*
  * Journal_head storage management
  */
 static struct kmem_cache *jbd2_journal_head_cache;
Index: linux-2.6.23-rc9/fs/jbd2/transaction.c
===================================================================
--- linux-2.6.23-rc9.orig/fs/jbd2/transaction.c	2007-10-05 12:08:26.000000000 -0700
+++ linux-2.6.23-rc9/fs/jbd2/transaction.c	2007-10-05 12:08:32.000000000 -0700
@@ -96,8 +96,8 @@ static int start_this_handle(journal_t *
 
 alloc_transaction:
 	if (!journal->j_running_transaction) {
-		new_transaction = jbd_kmalloc(sizeof(*new_transaction),
-						GFP_NOFS);
+		new_transaction = kmalloc(sizeof(*new_transaction),
+						GFP_NOFS|__GFP_NOFAIL);
 		if (!new_transaction) {
 			ret = -ENOMEM;
 			goto out;
Index: linux-2.6.23-rc9/include/linux/jbd2.h
===================================================================
--- linux-2.6.23-rc9.orig/include/linux/jbd2.h	2007-10-05 12:08:26.000000000 -0700
+++ linux-2.6.23-rc9/include/linux/jbd2.h	2007-10-05 12:08:32.000000000 -0700
@@ -71,13 +71,6 @@ extern u8 jbd2_journal_enable_debug;
 #define jbd_debug(f, a...)	/**/
 #endif
 
-extern void * __jbd2_kmalloc (const char *where, size_t size, gfp_t flags, int retry);
-#define jbd_kmalloc(size, flags) \
-	__jbd2_kmalloc(__FUNCTION__, (size), (flags), journal_oom_retry)
-#define jbd_rep_kmalloc(size, flags) \
-	__jbd2_kmalloc(__FUNCTION__, (size), (flags), 1)
-
-
 static inline void *jbd2_alloc(size_t size, gfp_t flags)
 {
 	return (void *)__get_free_pages(flags, get_order(size));

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] jbd: JBD replace jbd_kmalloc with kmalloc
  2007-10-05 22:38     ` [PATCH] jbd: JBD replace jbd_kmalloc with kmalloc Mingming Cao
@ 2007-10-08 17:46       ` Christoph Lameter
  2007-10-08 18:26         ` Mingming Cao
  0 siblings, 1 reply; 34+ messages in thread
From: Christoph Lameter @ 2007-10-08 17:46 UTC (permalink / raw)
  To: Mingming Cao
  Cc: Christoph Hellwig, Theodore Ts'o, linux-kernel, linux-ext4

On Fri, 5 Oct 2007, Mingming Cao wrote:

> Index: linux-2.6.23-rc9/fs/jbd/transaction.c
> ===================================================================
> --- linux-2.6.23-rc9.orig/fs/jbd/transaction.c	2007-10-05 12:08:08.000000000 -0700
> +++ linux-2.6.23-rc9/fs/jbd/transaction.c	2007-10-05 12:08:29.000000000 -0700
> @@ -96,8 +96,8 @@ static int start_this_handle(journal_t *
>  
>  alloc_transaction:
>  	if (!journal->j_running_transaction) {
> -		new_transaction = jbd_kmalloc(sizeof(*new_transaction),
> -						GFP_NOFS);
> +		new_transaction = kmalloc(sizeof(*new_transaction),
> +						GFP_NOFS|__GFP_NOFAIL);


Why was a __GFP_NOFAIL added here? I do not see a use of jbd_rep_kmalloc?

> -#define jbd_kmalloc(size, flags) \
> -	__jbd_kmalloc(__FUNCTION__, (size), (flags), journal_oom_retry)

journal_oom_retry is no longer used?

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] jbd: JBD replace jbd_kmalloc with kmalloc
  2007-10-08 17:46       ` Christoph Lameter
@ 2007-10-08 18:26         ` Mingming Cao
  2007-10-08 18:34           ` Christoph Lameter
  0 siblings, 1 reply; 34+ messages in thread
From: Mingming Cao @ 2007-10-08 18:26 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Christoph Hellwig, Theodore Ts'o, linux-kernel, linux-ext4

On Mon, 2007-10-08 at 10:46 -0700, Christoph Lameter wrote:
> On Fri, 5 Oct 2007, Mingming Cao wrote:
> 
> > Index: linux-2.6.23-rc9/fs/jbd/transaction.c
> > ===================================================================
> > --- linux-2.6.23-rc9.orig/fs/jbd/transaction.c	2007-10-05 12:08:08.000000000 -0700
> > +++ linux-2.6.23-rc9/fs/jbd/transaction.c	2007-10-05 12:08:29.000000000 -0700
> > @@ -96,8 +96,8 @@ static int start_this_handle(journal_t *
> >  
> >  alloc_transaction:
> >  	if (!journal->j_running_transaction) {
> > -		new_transaction = jbd_kmalloc(sizeof(*new_transaction),
> > -						GFP_NOFS);
> > +		new_transaction = kmalloc(sizeof(*new_transaction),
> > +						GFP_NOFS|__GFP_NOFAIL);
> 
> 
> Why was a __GFP_NOFAIL added here? I do not see a use of jbd_rep_kmalloc?
> 
> > -#define jbd_kmalloc(size, flags) \
> > -	__jbd_kmalloc(__FUNCTION__, (size), (flags), journal_oom_retry)
> 
> journal_oom_retry is no longer used?
> -

journal_oom_retry (which is defined as 1 currently) is still being used
in revoke.c, the cleanup patch doesn't remove the define of
journal_oom_retry.

Since journal_oom_retry is always 1 to __jbd_kmalloc, 

void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int
retry)
{
        return kmalloc(size, flags | (retry ? __GFP_NOFAIL : 0));
}

So we replace jbd_kmalloc() to kmalloc() with __GFP_NOFAIL flag on in
start_this_handle(). Other two places replacing to kmalloc() is part of
the init process, so no need for __GFP_NOFAIL flag there.

Mingming

> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] jbd: JBD replace jbd_kmalloc with kmalloc
  2007-10-08 18:26         ` Mingming Cao
@ 2007-10-08 18:34           ` Christoph Lameter
  0 siblings, 0 replies; 34+ messages in thread
From: Christoph Lameter @ 2007-10-08 18:34 UTC (permalink / raw)
  To: Mingming Cao
  Cc: Christoph Hellwig, Theodore Ts'o, linux-kernel, linux-ext4

On Mon, 8 Oct 2007, Mingming Cao wrote:

> So we replace jbd_kmalloc() to kmalloc() with __GFP_NOFAIL flag on in
> start_this_handle(). Other two places replacing to kmalloc() is part of
> the init process, so no need for __GFP_NOFAIL flag there.

Acked-by: Christoph Lameter <clameter@sgi.com>

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH] jbd2: JBD replace jbd2_kmalloc with kmalloc
  2007-10-05 22:39     ` [PATCH] jbd2: JBD replace jbd2_kmalloc " Mingming Cao
@ 2007-10-08 18:37       ` Christoph Lameter
  0 siblings, 0 replies; 34+ messages in thread
From: Christoph Lameter @ 2007-10-08 18:37 UTC (permalink / raw)
  To: Mingming Cao
  Cc: Christoph Hellwig, Theodore Ts'o, linux-kernel, linux-ext4

Acked-by: Christoph Lameter <clameter@sgi.com>

^ permalink raw reply	[flat|nested] 34+ messages in thread

end of thread, other threads:[~2007-10-08 18:37 UTC | newest]

Thread overview: 34+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-10-04  5:50 [PATCH, RFC] Ext4 patches planned for submission upstream Theodore Ts'o
2007-10-04  5:50 ` [PATCH] jbd/jbd2: JBD memory allocation cleanups Theodore Ts'o
2007-10-04  5:50   ` [PATCH] jbd/jbd2: Journal initialization doesn't need __GFP_NOFAIL Theodore Ts'o
2007-10-04  5:50     ` [PATCH] JBD2/Ext4: Convert kmalloc to kzalloc in jbd2/ext4 Theodore Ts'o
     [not found]       ` <1191477059-5357-5-git-send-email-tytso@mit.edu>
2007-10-04  5:50         ` [PATCH] jbd2: fix commit code to properly abort journal Theodore Ts'o
2007-10-04  5:50           ` [PATCH] JBD2: debug code cleanup Theodore Ts'o
2007-10-04  5:50             ` [PATCH] Once ext4 will not implement fragment, it is believed it will never be Theodore Ts'o
2007-10-04  5:50               ` [PATCH] ext4: remove #ifdef CONFIG_EXT4_INDEX Theodore Ts'o
2007-10-04  5:50                 ` [PATCH] Ext4: Uninitialized Block Groups Theodore Ts'o
2007-10-04  5:50                   ` [PATCH] ext4: Fix sparse warnings Theodore Ts'o
2007-10-04  5:50                     ` [PATCH] This feature relaxes check restrictions on where each block groups meta Theodore Ts'o
2007-10-04  5:50                       ` [PATCH] Support large blocksize up to PAGESIZE (max 64KB) for ext4 Theodore Ts'o
2007-10-04  5:50                         ` [PATCH] ext4: Avoid rec_len overflow with 64KB block size Theodore Ts'o
2007-10-04  5:50                           ` [PATCH] Support large blocksize up to PAGESIZE (max 64KB) for ext2 Theodore Ts'o
2007-10-04  5:50                             ` [PATCH] Support large blocksize up to PAGESIZE (max 64KB) for ext3 Theodore Ts'o
2007-10-04  5:50                               ` [PATCH] ext2: Avoid rec_len overflow with 64KB block size Theodore Ts'o
2007-10-04  5:50                                 ` [PATCH] ext3: " Theodore Ts'o
2007-10-04  5:50                                   ` [PATCH] ext4: Convert bg_block_bitmap to bg_block_bitmap_lo Theodore Ts'o
2007-10-04  5:50                                     ` [PATCH] ext4: Convert bg_inode_bitmap and bg_inode_table Theodore Ts'o
2007-10-04  5:50                                       ` [PATCH] ext4: Convert s_blocks_count to s_blocks_count_lo Theodore Ts'o
2007-10-04  5:50                                         ` [PATCH] ext4: Convert s_r_blocks_count and s_free_blocks_count Theodore Ts'o
2007-10-04  5:50                                           ` [PATCH] ext4: Convert ext4_extent.ee_start to ext4_extent.ee_start_lo Theodore Ts'o
2007-10-04  5:50                                             ` [PATCH] ext4: Convert ext4_extent_idx.ei_leaf to ext4_extent_idx.ei_leaf_lo Theodore Ts'o
2007-10-04  5:50                                               ` [PATCH] ext4: sparse fixes Theodore Ts'o
2007-10-04  6:52   ` [PATCH] jbd/jbd2: JBD memory allocation cleanups Christoph Hellwig
2007-10-05 22:33     ` Mingming Cao
2007-10-05 22:35     ` [PATCH] jbd: JBD slab " Mingming Cao
2007-10-05 22:36     ` [PATCH] jbd2: JBD2 " Mingming Cao
2007-10-05 22:38     ` [PATCH] jbd: JBD replace jbd_kmalloc with kmalloc Mingming Cao
2007-10-08 17:46       ` Christoph Lameter
2007-10-08 18:26         ` Mingming Cao
2007-10-08 18:34           ` Christoph Lameter
2007-10-05 22:39     ` [PATCH] jbd2: JBD replace jbd2_kmalloc " Mingming Cao
2007-10-08 18:37       ` Christoph Lameter

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).