public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [RESEND][PATCH 4/9] Ext3: extended attribute sharing fixes and in-inode EAs
  2005-01-13 10:31 [RESEND][PATCH 0/9] Ext3: extended attribute sharing fixes and in-inode EAs Andreas Gruenbacher
                   ` (7 preceding siblings ...)
  2005-01-13 10:31 ` [RESEND][PATCH 1/9] " Andreas Gruenbacher
@ 2005-01-13 10:31 ` Andreas Gruenbacher
  8 siblings, 0 replies; 10+ messages in thread
From: Andreas Gruenbacher @ 2005-01-13 10:31 UTC (permalink / raw)
  To: Linus Torvalds, Andrew Morton
  Cc: linux-kernel, Alex Tomas, Andreas Dilger, Stephen C. Tweedie,
	Andrew Tridgell

Ext3: do not use journal_release_buffer

The use of journal_release_buffer is unsafe; it can overflow the
journal: When a buffer is stolen from a transaction and later removed
from that transaction with journal_release_buffer, the buffer is not
accounted to the transaction that now "owns" the buffer, and one extra
credit appears to be available.  Don't use journal_release_buffer:

We did rely on the buffer lock to synchronize xattr block accesses, and
get write access to the buffer first to get atomicity.  Return the
mb_cache_entry from ext3_xattr_cache_find instead, and do the
check/update under its lock. Only get write access when we know we will
use the buffer.

Signed-off-by: Andreas Gruenbacher <agruen@suse.de>

Index: linux-2.6.10/fs/ext3/xattr.c
===================================================================
--- linux-2.6.10.orig/fs/ext3/xattr.c
+++ linux-2.6.10/fs/ext3/xattr.c
@@ -94,9 +94,9 @@ static int ext3_xattr_set_handle2(handle
 				  struct ext3_xattr_header *);
 
 static int ext3_xattr_cache_insert(struct buffer_head *);
-static struct buffer_head *ext3_xattr_cache_find(handle_t *, struct inode *,
+static struct buffer_head *ext3_xattr_cache_find(struct inode *,
 						 struct ext3_xattr_header *,
-						 int *);
+						 struct mb_cache_entry **);
 static void ext3_xattr_rehash(struct ext3_xattr_header *,
 			      struct ext3_xattr_entry *);
 
@@ -500,33 +500,24 @@ bad_block:		ext3_error(sb, "ext3_xattr_s
 
 	if (header) {
 		struct mb_cache_entry *ce;
-		int credits = 0;
 
 		/* assert(header == HDR(bh)); */
-		if (header->h_refcount != cpu_to_le32(1))
-			goto skip_get_write_access;
-		/* ext3_journal_get_write_access() requires an unlocked bh,
-		   which complicates things here. */
-		error = ext3_journal_get_write_access_credits(handle, bh,
-							      &credits);
-		if (error)
-			goto cleanup;
 		ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_bdev,
 					bh->b_blocknr);
-		lock_buffer(bh);
 		if (header->h_refcount == cpu_to_le32(1)) {
 			if (ce)
 				mb_cache_entry_free(ce);
 			ea_bdebug(bh, "modifying in-place");
+			error = ext3_journal_get_write_access(handle, bh);
+			if (error)
+				goto cleanup;
+			lock_buffer(bh);
 			/* keep the buffer locked while modifying it. */
 		} else {
 			int offset;
 
 			if (ce)
 				mb_cache_entry_release(ce);
-			unlock_buffer(bh);
-			journal_release_buffer(handle, bh, credits);
-		skip_get_write_access:
 			ea_bdebug(bh, "cloning");
 			header = kmalloc(bh->b_size, GFP_KERNEL);
 			error = -ENOMEM;
@@ -622,17 +613,18 @@ bad_block:		ext3_error(sb, "ext3_xattr_s
 	}
 
 skip_replace:
-	if (IS_LAST_ENTRY(ENTRY(header+1))) {
-		/* This block is now empty. */
-		if (bh && header == HDR(bh))
-			unlock_buffer(bh);  /* we were modifying in-place. */
-		error = ext3_xattr_set_handle2(handle, inode, bh, NULL);
-	} else {
+	if (!IS_LAST_ENTRY(ENTRY(header+1)))
 		ext3_xattr_rehash(header, here);
-		if (bh && header == HDR(bh))
-			unlock_buffer(bh);  /* we were modifying in-place. */
-		error = ext3_xattr_set_handle2(handle, inode, bh, header);
+	if (bh && header == HDR(bh)) {
+		/* we were modifying in-place. */
+		unlock_buffer(bh);
+		error = ext3_journal_dirty_metadata(handle, bh);
+		if (error)
+			goto cleanup;
 	}
+	error = ext3_xattr_set_handle2(handle, inode, bh,
+				       IS_LAST_ENTRY(ENTRY(header+1)) ?
+				       NULL : header);
 
 cleanup:
 	brelse(bh);
@@ -653,10 +645,11 @@ ext3_xattr_set_handle2(handle_t *handle,
 {
 	struct super_block *sb = inode->i_sb;
 	struct buffer_head *new_bh = NULL;
-	int credits = 0, error;
+	struct mb_cache_entry *ce = NULL;
+	int error;
 
 	if (header) {
-		new_bh = ext3_xattr_cache_find(handle, inode, header, &credits);
+		new_bh = ext3_xattr_cache_find(inode, header, &ce);
 		if (new_bh) {
 			/* We found an identical block in the cache. */
 			if (new_bh == old_bh)
@@ -667,19 +660,26 @@ ext3_xattr_set_handle2(handle_t *handle,
 				ea_bdebug(new_bh, "reusing block");
 
 				error = -EDQUOT;
-				if (DQUOT_ALLOC_BLOCK(inode, 1)) {
-					unlock_buffer(new_bh);
-					journal_release_buffer(handle, new_bh,
-							       credits);
+				if (DQUOT_ALLOC_BLOCK(inode, 1))
 					goto cleanup;
-				}
+				error = ext3_journal_get_write_access(handle, new_bh);
+				if (error)
+					goto cleanup;
+				lock_buffer(new_bh);
 				HDR(new_bh)->h_refcount = cpu_to_le32(1 +
 					le32_to_cpu(HDR(new_bh)->h_refcount));
 				ea_bdebug(new_bh, "refcount now=%d",
 					le32_to_cpu(HDR(new_bh)->h_refcount));
+				unlock_buffer(new_bh);
+				error = ext3_journal_dirty_metadata(handle, new_bh);
+				if (error)
+					goto cleanup;
 			}
-			unlock_buffer(new_bh);
+			mb_cache_entry_release(ce);
+			ce = NULL;
 		} else if (old_bh && header == HDR(old_bh)) {
+			/* We were modifying this block in-place. */
+
 			/* Keep this block. No need to lock the block as we
 			 * don't need to change the reference count. */
 			new_bh = old_bh;
@@ -715,10 +715,10 @@ getblk_failed:
 			ext3_xattr_cache_insert(new_bh);
 
 			ext3_xattr_update_super_block(handle, sb);
+			error = ext3_journal_dirty_metadata(handle, new_bh);
+			if (error)
+				goto cleanup;
 		}
-		error = ext3_journal_dirty_metadata(handle, new_bh);
-		if (error)
-			goto cleanup;
 	}
 
 	/* Update the inode. */
@@ -730,22 +730,14 @@ getblk_failed:
 
 	error = 0;
 	if (old_bh && old_bh != new_bh) {
-		struct mb_cache_entry *ce;
-
 		/*
 		 * If there was an old block, and we are no longer using it,
 		 * release the old block.
 		*/
-		error = ext3_journal_get_write_access(handle, old_bh);
-		if (error)
-			goto cleanup;
 		ce = mb_cache_entry_get(ext3_xattr_cache, old_bh->b_bdev,
 					old_bh->b_blocknr);
-		lock_buffer(old_bh);
 		if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) {
 			/* Free the old block. */
-			if (ce)
-				mb_cache_entry_free(ce);
 			ea_bdebug(old_bh, "freeing");
 			ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1);
 
@@ -754,21 +746,29 @@ getblk_failed:
 			   duplicate the handle before. */
 			get_bh(old_bh);
 			ext3_forget(handle, 1, inode, old_bh,old_bh->b_blocknr);
+			if (ce) {
+				mb_cache_entry_free(ce);
+				ce = NULL;
+			}
 		} else {
+			error = ext3_journal_get_write_access(handle, old_bh);
+			if (error)
+				goto cleanup;
 			/* Decrement the refcount only. */
+			lock_buffer(old_bh);
 			HDR(old_bh)->h_refcount = cpu_to_le32(
 				le32_to_cpu(HDR(old_bh)->h_refcount) - 1);
-			if (ce)
-				mb_cache_entry_release(ce);
 			DQUOT_FREE_BLOCK(inode, 1);
 			ext3_journal_dirty_metadata(handle, old_bh);
 			ea_bdebug(old_bh, "refcount now=%d",
 				le32_to_cpu(HDR(old_bh)->h_refcount));
+			unlock_buffer(old_bh);
 		}
-		unlock_buffer(old_bh);
 	}
 
 cleanup:
+	if (ce)
+		mb_cache_entry_release(ce);
 	brelse(new_bh);
 
 	return error;
@@ -819,7 +819,7 @@ void
 ext3_xattr_delete_inode(handle_t *handle, struct inode *inode)
 {
 	struct buffer_head *bh = NULL;
-	struct mb_cache_entry *ce;
+	struct mb_cache_entry *ce = NULL;
 
 	down_write(&EXT3_I(inode)->xattr_sem);
 	if (!EXT3_I(inode)->i_file_acl)
@@ -838,31 +838,33 @@ ext3_xattr_delete_inode(handle_t *handle
 			EXT3_I(inode)->i_file_acl);
 		goto cleanup;
 	}
-	if (ext3_journal_get_write_access(handle, bh) != 0)
-		goto cleanup;
 	ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_bdev, bh->b_blocknr);
-	lock_buffer(bh);
 	if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
-		if (ce)
+		if (ce) {
 			mb_cache_entry_free(ce);
+			ce = NULL;
+		}
 		ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1);
 		get_bh(bh);
 		ext3_forget(handle, 1, inode, bh, EXT3_I(inode)->i_file_acl);
 	} else {
+		if (ext3_journal_get_write_access(handle, bh) != 0)
+			goto cleanup;
+		lock_buffer(bh);
 		HDR(bh)->h_refcount = cpu_to_le32(
 			le32_to_cpu(HDR(bh)->h_refcount) - 1);
-		if (ce)
-			mb_cache_entry_release(ce);
 		ext3_journal_dirty_metadata(handle, bh);
 		if (IS_SYNC(inode))
 			handle->h_sync = 1;
 		DQUOT_FREE_BLOCK(inode, 1);
+		unlock_buffer(bh);
 	}
 	ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1);
-	unlock_buffer(bh);
 	EXT3_I(inode)->i_file_acl = 0;
 
 cleanup:
+	if (ce)
+		mb_cache_entry_release(ce);
 	brelse(bh);
 	up_write(&EXT3_I(inode)->xattr_sem);
 }
@@ -960,8 +962,8 @@ ext3_xattr_cmp(struct ext3_xattr_header 
  * not found or an error occurred.
  */
 static struct buffer_head *
-ext3_xattr_cache_find(handle_t *handle, struct inode *inode,
-		      struct ext3_xattr_header *header, int *credits)
+ext3_xattr_cache_find(struct inode *inode, struct ext3_xattr_header *header,
+		      struct mb_cache_entry **pce)
 {
 	__u32 hash = le32_to_cpu(header->h_hash);
 	struct mb_cache_entry *ce;
@@ -985,27 +987,17 @@ again:
 			ext3_error(inode->i_sb, "ext3_xattr_cache_find",
 				"inode %ld: block %ld read error",
 				inode->i_ino, (unsigned long) ce->e_block);
-		} else if (ext3_journal_get_write_access_credits(
-				handle, bh, credits) == 0) {
-			/* ext3_journal_get_write_access() requires an unlocked
-			 * bh, which complicates things here. */
-			lock_buffer(bh);
-			if (le32_to_cpu(HDR(bh)->h_refcount) >
-				   EXT3_XATTR_REFCOUNT_MAX) {
-				ea_idebug(inode, "block %ld refcount %d>%d",
-					  (unsigned long) ce->e_block,
-					  le32_to_cpu(HDR(bh)->h_refcount),
+		} else if (le32_to_cpu(HDR(bh)->h_refcount) >
+				EXT3_XATTR_REFCOUNT_MAX) {
+			ea_idebug(inode, "block %ld refcount %d>%d",
+				  (unsigned long) ce->e_block,
+				  le32_to_cpu(HDR(bh)->h_refcount),
 					  EXT3_XATTR_REFCOUNT_MAX);
-			} else if (!ext3_xattr_cmp(header, HDR(bh))) {
-				mb_cache_entry_release(ce);
-				/* buffer will be unlocked by caller */
-				return bh;
-			}
-			unlock_buffer(bh);
-			journal_release_buffer(handle, bh, *credits);
-			*credits = 0;
-			brelse(bh);
+		} else if (ext3_xattr_cmp(header, HDR(bh)) == 0) {
+			*pce = ce;
+			return bh;
 		}
+		brelse(bh);
 		ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash);
 	}
 	return NULL;
--
Andreas Gruenbacher <agruen@suse.de>
SUSE Labs, SUSE LINUX PRODUCTS GMBH


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [RESEND][PATCH 8/9] Ext3: extended attribute sharing fixes and in-inode EAs
  2005-01-13 10:31 [RESEND][PATCH 0/9] Ext3: extended attribute sharing fixes and in-inode EAs Andreas Gruenbacher
                   ` (5 preceding siblings ...)
  2005-01-13 10:31 ` [RESEND][PATCH 3/9] " Andreas Gruenbacher
@ 2005-01-13 10:31 ` Andreas Gruenbacher
  2005-01-13 10:31 ` [RESEND][PATCH 1/9] " Andreas Gruenbacher
  2005-01-13 10:31 ` [RESEND][PATCH 4/9] " Andreas Gruenbacher
  8 siblings, 0 replies; 10+ messages in thread
From: Andreas Gruenbacher @ 2005-01-13 10:31 UTC (permalink / raw)
  To: Linus Torvalds, Andrew Morton
  Cc: linux-kernel, Alex Tomas, Andreas Dilger, Stephen C. Tweedie,
	Andrew Tridgell

Hide ext3_get_inode_loc in_mem option

The in_mem optimization in ext3_get_inode_loc avoids a disk read when
only the requested inode in the block group is allocated: In that case
ext3_get_inode_loc assumes that it can recreate the inode from the
in-memory inode. This is incorrect with in-inode extended attributes,
which don't have a shadow copy in memory. Hide the in_mem option and
clarify comments; the subsequent ea-in-inode changes the in_mem check as
required.

Signed-off-by: Andreas Gruenbacher <agruen@suse.de>

Index: linux-2.6.10/include/linux/ext3_fs.h
===================================================================
--- linux-2.6.10.orig/include/linux/ext3_fs.h
+++ linux-2.6.10/include/linux/ext3_fs.h
@@ -764,6 +764,7 @@ extern int  ext3_sync_inode (handle_t *,
 extern void ext3_discard_reservation (struct inode *);
 extern void ext3_dirty_inode(struct inode *);
 extern int ext3_change_inode_journal_flag(struct inode *, int);
+extern int ext3_get_inode_loc(struct inode *, struct ext3_iloc *);
 extern void ext3_truncate (struct inode *);
 extern void ext3_set_inode_flags(struct inode *);
 extern void ext3_set_aops(struct inode *inode);
Index: linux-2.6.10/fs/ext3/inode.c
===================================================================
--- linux-2.6.10.orig/fs/ext3/inode.c
+++ linux-2.6.10/fs/ext3/inode.c
@@ -2269,13 +2269,13 @@ static unsigned long ext3_get_inode_bloc
 	return block;
 }
 
-/* 
+/*
  * ext3_get_inode_loc returns with an extra refcount against the inode's
- * underlying buffer_head on success.  If `in_mem' is false then we're purely
- * trying to determine the inode's location on-disk and no read need be
- * performed.
+ * underlying buffer_head on success. If 'in_mem' is true, we have all
+ * data in memory that is needed to recreate the on-disk version of this
+ * inode.
  */
-static int ext3_get_inode_loc(struct inode *inode,
+static int __ext3_get_inode_loc(struct inode *inode,
 				struct ext3_iloc *iloc, int in_mem)
 {
 	unsigned long block;
@@ -2300,7 +2300,11 @@ static int ext3_get_inode_loc(struct ino
 			goto has_buffer;
 		}
 
-		/* we can't skip I/O if inode is on a disk only */
+		/*
+		 * If we have all information of the inode in memory and this
+		 * is the only valid inode in the block, we need not read the
+		 * block.
+		 */
 		if (in_mem) {
 			struct buffer_head *bitmap_bh;
 			struct ext3_group_desc *desc;
@@ -2309,10 +2313,6 @@ static int ext3_get_inode_loc(struct ino
 			int block_group;
 			int start;
 
-			/*
-			 * If this is the only valid inode in the block we
-			 * need not read the block.
-			 */
 			block_group = (inode->i_ino - 1) /
 					EXT3_INODES_PER_GROUP(inode->i_sb);
 			inodes_per_buffer = bh->b_size /
@@ -2359,8 +2359,9 @@ static int ext3_get_inode_loc(struct ino
 
 make_io:
 		/*
-		 * There are another valid inodes in the buffer so we must
-		 * read the block from disk
+		 * There are other valid inodes in the buffer, this inode
+		 * has in-inode xattrs, or we don't have this inode in memory.
+		 * Read the block from disk.
 		 */
 		get_bh(bh);
 		bh->b_end_io = end_buffer_read_sync;
@@ -2380,6 +2381,11 @@ has_buffer:
 	return 0;
 }
 
+int ext3_get_inode_loc(struct inode *inode, struct ext3_iloc *iloc)
+{
+	return __ext3_get_inode_loc(inode, iloc, 1);
+}
+
 void ext3_set_inode_flags(struct inode *inode)
 {
 	unsigned int flags = EXT3_I(inode)->i_flags;
@@ -2411,7 +2417,7 @@ void ext3_read_inode(struct inode * inod
 #endif
 	ei->i_rsv_window.rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
 
-	if (ext3_get_inode_loc(inode, &iloc, 0))
+	if (__ext3_get_inode_loc(inode, &iloc, 0))
 		goto bad_inode;
 	bh = iloc.bh;
 	raw_inode = ext3_raw_inode(&iloc);
@@ -2848,7 +2854,7 @@ ext3_reserve_inode_write(handle_t *handl
 {
 	int err = 0;
 	if (handle) {
-		err = ext3_get_inode_loc(inode, iloc, 1);
+		err = ext3_get_inode_loc(inode, iloc);
 		if (!err) {
 			BUFFER_TRACE(iloc->bh, "get_write_access");
 			err = ext3_journal_get_write_access(handle, iloc->bh);
@@ -2947,7 +2953,7 @@ ext3_pin_inode(handle_t *handle, struct 
 
 	int err = 0;
 	if (handle) {
-		err = ext3_get_inode_loc(inode, &iloc, 1);
+		err = ext3_get_inode_loc(inode, &iloc);
 		if (!err) {
 			BUFFER_TRACE(iloc.bh, "get_write_access");
 			err = journal_get_write_access(handle, iloc.bh);
--
Andreas Gruenbacher <agruen@suse.de>
SUSE Labs, SUSE LINUX PRODUCTS GMBH


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [RESEND][PATCH 2/9] Ext3: extended attribute sharing fixes and in-inode EAs
  2005-01-13 10:31 [RESEND][PATCH 0/9] Ext3: extended attribute sharing fixes and in-inode EAs Andreas Gruenbacher
                   ` (3 preceding siblings ...)
  2005-01-13 10:31 ` [RESEND][PATCH 7/9] " Andreas Gruenbacher
@ 2005-01-13 10:31 ` Andreas Gruenbacher
  2005-01-13 10:31 ` [RESEND][PATCH 3/9] " Andreas Gruenbacher
                   ` (3 subsequent siblings)
  8 siblings, 0 replies; 10+ messages in thread
From: Andreas Gruenbacher @ 2005-01-13 10:31 UTC (permalink / raw)
  To: Linus Torvalds, Andrew Morton
  Cc: linux-kernel, Alex Tomas, Andreas Dilger, Stephen C. Tweedie,
	Andrew Tridgell

Mbcache cleanup

There is no need to export struct mb_cache outside mbcache.c. Move
struct mb_cache to fs/mbcache.c and remove the superfluous struct
mb_cache_entry_index declaration.

Signed-off-by: Andreas Gruenbacher <agruen@suse.de>

Index: linux-2.6.10/fs/mbcache.c
===================================================================
--- linux-2.6.10.orig/fs/mbcache.c
+++ linux-2.6.10/fs/mbcache.c
@@ -72,6 +72,20 @@ EXPORT_SYMBOL(mb_cache_entry_find_first)
 EXPORT_SYMBOL(mb_cache_entry_find_next);
 #endif
 
+struct mb_cache {
+	struct list_head		c_cache_list;
+	const char			*c_name;
+	struct mb_cache_op		c_op;
+	atomic_t			c_entry_count;
+	int				c_bucket_bits;
+#ifndef MB_CACHE_INDEXES_COUNT
+	int				c_indexes_count;
+#endif
+	kmem_cache_t			*c_entry_cache;
+	struct list_head		*c_block_hash;
+	struct list_head		*c_indexes_hash[0];
+};
+
 
 /*
  * Global data: list of all mbcache's, lru list, and a spinlock for
@@ -229,7 +243,7 @@ mb_cache_create(const char *name, struct
 	struct mb_cache *cache = NULL;
 
 	if(entry_size < sizeof(struct mb_cache_entry) +
-	   indexes_count * sizeof(struct mb_cache_entry_index))
+	   indexes_count * sizeof(((struct mb_cache_entry *) 0)->e_indexes[0]))
 		return NULL;
 
 	cache = kmalloc(sizeof(struct mb_cache) +
Index: linux-2.6.10/include/linux/mbcache.h
===================================================================
--- linux-2.6.10.orig/include/linux/mbcache.h
+++ linux-2.6.10/include/linux/mbcache.h
@@ -7,31 +7,6 @@
 /* Hardwire the number of additional indexes */
 #define MB_CACHE_INDEXES_COUNT 1
 
-struct mb_cache_entry;
-
-struct mb_cache_op {
-	int (*free)(struct mb_cache_entry *, int);
-};
-
-struct mb_cache {
-	struct list_head		c_cache_list;
-	const char			*c_name;
-	struct mb_cache_op		c_op;
-	atomic_t			c_entry_count;
-	int				c_bucket_bits;
-#ifndef MB_CACHE_INDEXES_COUNT
-	int				c_indexes_count;
-#endif
-	kmem_cache_t			*c_entry_cache;
-	struct list_head		*c_block_hash;
-	struct list_head		*c_indexes_hash[0];
-};
-
-struct mb_cache_entry_index {
-	struct list_head		o_list;
-	unsigned int			o_key;
-};
-
 struct mb_cache_entry {
 	struct list_head		e_lru_list;
 	struct mb_cache			*e_cache;
@@ -39,7 +14,14 @@ struct mb_cache_entry {
 	struct block_device		*e_bdev;
 	sector_t			e_block;
 	struct list_head		e_block_list;
-	struct mb_cache_entry_index	e_indexes[0];
+	struct {
+		struct list_head	o_list;
+		unsigned int		o_key;
+	} e_indexes[0];
+};
+
+struct mb_cache_op {
+	int (*free)(struct mb_cache_entry *, int);
 };
 
 /* Functions on caches */
@@ -54,7 +36,6 @@ void mb_cache_destroy(struct mb_cache *)
 struct mb_cache_entry *mb_cache_entry_alloc(struct mb_cache *);
 int mb_cache_entry_insert(struct mb_cache_entry *, struct block_device *,
 			  sector_t, unsigned int[]);
-void mb_cache_entry_rehash(struct mb_cache_entry *, unsigned int[]);
 void mb_cache_entry_release(struct mb_cache_entry *);
 void mb_cache_entry_free(struct mb_cache_entry *);
 struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *,
Index: linux-2.6.10/fs/ext3/xattr.c
===================================================================
--- linux-2.6.10.orig/fs/ext3/xattr.c
+++ linux-2.6.10/fs/ext3/xattr.c
@@ -1080,7 +1080,7 @@ init_ext3_xattr(void)
 {
 	ext3_xattr_cache = mb_cache_create("ext3_xattr", NULL,
 		sizeof(struct mb_cache_entry) +
-		sizeof(struct mb_cache_entry_index), 1, 6);
+		sizeof(((struct mb_cache_entry *) 0)->e_indexes[0]), 1, 6);
 	if (!ext3_xattr_cache)
 		return -ENOMEM;
 	return 0;
Index: linux-2.6.10/fs/ext2/xattr.c
===================================================================
--- linux-2.6.10.orig/fs/ext2/xattr.c
+++ linux-2.6.10/fs/ext2/xattr.c
@@ -1016,7 +1016,7 @@ init_ext2_xattr(void)
 {
 	ext2_xattr_cache = mb_cache_create("ext2_xattr", NULL,
 		sizeof(struct mb_cache_entry) +
-		sizeof(struct mb_cache_entry_index), 1, 6);
+		sizeof(((struct mb_cache_entry *) 0)->e_indexes[0]), 1, 6);
 	if (!ext2_xattr_cache)
 		return -ENOMEM;
 	return 0;
--
Andreas Gruenbacher <agruen@suse.de>
SUSE Labs, SUSE LINUX PRODUCTS GMBH


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [RESEND][PATCH 0/9] Ext3: extended attribute sharing fixes and in-inode EAs
@ 2005-01-13 10:31 Andreas Gruenbacher
  2005-01-13 10:31 ` [RESEND][PATCH 9/9] " Andreas Gruenbacher
                   ` (8 more replies)
  0 siblings, 9 replies; 10+ messages in thread
From: Andreas Gruenbacher @ 2005-01-13 10:31 UTC (permalink / raw)
  To: Linus Torvalds, Andrew Morton
  Cc: linux-kernel, Alex Tomas, Andreas Dilger, Stephen C. Tweedie,
	Andrew Tridgell

Hello,

it seems this patchset did not make it through to LKML, so I'm trying to resend.

This set of patches contains cleanups and fixes to the mbcache and ext2/ext3 xattr code, and adds completely reworks Alex Tomas's in-inode attribute patch. I did a fair amount of review and testing on all the patches; Andrew Tridgell did some testing as well. As far as I can tell the patches are ready for direct merging. Alternatively the fixes could be merged while the in-inode attribute support could go into -mm; either way won't make much difference for me. Please comment.

In order for the patches to apply, please first revert Alex's in-inode-ea patch. The patch was not ready for merging yet. I could retrofit the rewrite on top of Alex's patch, but this would cause a lot of work, and would benefit the code quality in no way.

Alex's patch was merged in this changeset:
http://linux.bkbits.net:8080/linux-2.6/cset@41db7b23z5xP6bWuxXwEgBu1AxyStQ?nav=index.html|ChangeSet@-4w


All the patches contain decsriptions. The bugfix patches in this set are:

revert-old-ea-in-inode.diff
  A reverse diff of Alex's patch.

mbcache-cleanup.diff
  A small mbcache cleanup.

mbcache-bug.diff
  Convert the mbcache to protect cache entries readers/writer
  style. This fixes an attribute sharing race.

xattr-credits.diff
  Don't use journal_release_buffer in ext3 xattrs; it's unsafe.

ext3_xattr_release_block.diff
  Factor our common xattr code.

remove-XATTR_INDEX_MAX.diff
  Minor cleaup.


The patches for in-inode attribute support are:

xattr-cleanup.diff
  Prepare xattr code for in-inode xattrs.
  
fix-get_inode_loc.diff
  Hide ext3_get_inode_loc in_mem optimization from users;
  the optimization applies differently with in-inode xattrs.

ea-in-inode.diff
  In-inode attribute support. This is the original patch from Alex
  heavily cleaned up.


Please apply the patches in this order:
  revert-old-ea-in-inode.diff
  mbcache-cleanup.diff
  mbcache-bug.diff
  xattr-credits.diff
  ext3_xattr_release_block.diff
  remove-XATTR_INDEX_MAX.diff
  xattr-cleanup.diff
  fix-get_inode_loc.diff
  ea-in-inode.diff


Regards, 
--
Andreas Gruenbacher <agruen@suse.de>
SUSE Labs, SUSE LINUX PRODUCTS GMBH


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [RESEND][PATCH 3/9] Ext3: extended attribute sharing fixes and in-inode EAs
  2005-01-13 10:31 [RESEND][PATCH 0/9] Ext3: extended attribute sharing fixes and in-inode EAs Andreas Gruenbacher
                   ` (4 preceding siblings ...)
  2005-01-13 10:31 ` [RESEND][PATCH 2/9] " Andreas Gruenbacher
@ 2005-01-13 10:31 ` Andreas Gruenbacher
  2005-01-13 10:31 ` [RESEND][PATCH 8/9] " Andreas Gruenbacher
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 10+ messages in thread
From: Andreas Gruenbacher @ 2005-01-13 10:31 UTC (permalink / raw)
  To: Linus Torvalds, Andrew Morton
  Cc: linux-kernel, Alex Tomas, Andreas Dilger, Stephen C. Tweedie,
	Andrew Tridgell

Race in ext[23] xattr sharing code

Andrew Tridgell and Stephen C. Tweedie have reported two different
Oopses caused by a race condition in the mbcache, which is responsible
for extended attribute sharing in ext2 and ext3.  Stephen tracked down
the bug; I did the fix.

Explanation: 
The mbcache caches the locations and content hashes of xattr blocks.
There are two access strategies: [1] xattr block disposal via
mb_cache_entry_get(), [2] xattr block reuse (sharing) via
mb_cache_entry_find_{first,next}().  There is no locking between the two
methods, so between one mb_cache_entry_find_x and the next, a
mb_cache_entry_get might come in, unhash the cache entry, and change the
journaling state of the xattr buffer.  Subsequently, two things can
happen: [a] the next mb_cache_entry_find_x may try to follow the mbcache
hash chain starting from the entry that has become unhashed, which now
is a stale pointer, [b] the block may have become deallocated, and then
we try to reuse it. 

Fix this by converting the mbcache into a readers-writer style lock, and
protect all block accesses in ext2/ext3 by the mbcache entry lock.  This
ensures that destroying blocks is an exclusive operation that may not
overlap xattr block reuse, while allowing multiple "re-users".  Write
access to the xattr block's buffer is protected by the buffer lock. 

Signed-off-by: Andreas Gruenbacher <agruen@suse.de>

Index: linux-2.6.10/fs/mbcache.c
===================================================================
--- linux-2.6.10.orig/fs/mbcache.c
+++ linux-2.6.10/fs/mbcache.c
@@ -54,6 +54,10 @@
 		printk(KERN_ERR f); \
 		printk("\n"); \
 	} while(0)
+
+#define MB_CACHE_WRITER ((unsigned short)~0U >> 1)
+
+DECLARE_WAIT_QUEUE_HEAD(mb_cache_queue);
 		
 MODULE_AUTHOR("Andreas Gruenbacher <a.gruenbacher@computer.org>");
 MODULE_DESCRIPTION("Meta block cache (for extended attributes)");
@@ -140,7 +144,7 @@ __mb_cache_entry_forget(struct mb_cache_
 {
 	struct mb_cache *cache = ce->e_cache;
 
-	mb_assert(atomic_read(&ce->e_used) == 0);
+	mb_assert(!(ce->e_used || ce->e_queued));
 	if (cache->c_op.free && cache->c_op.free(ce, gfp_mask)) {
 		/* free failed -- put back on the lru list
 		   for freeing later. */
@@ -157,9 +161,16 @@ __mb_cache_entry_forget(struct mb_cache_
 static inline void
 __mb_cache_entry_release_unlock(struct mb_cache_entry *ce)
 {
-	if (atomic_dec_and_test(&ce->e_used)) {
+	/* Wake up all processes queuing for this cache entry. */
+	if (ce->e_queued)
+		wake_up_all(&mb_cache_queue);
+	if (ce->e_used >= MB_CACHE_WRITER)
+		ce->e_used -= MB_CACHE_WRITER;
+	ce->e_used--;
+	if (!(ce->e_used || ce->e_queued)) {
 		if (!__mb_cache_entry_is_hashed(ce))
 			goto forget;
+		mb_assert(list_empty(&ce->e_lru_list));
 		list_add_tail(&ce->e_lru_list, &mb_cache_lru_list);
 	}
 	spin_unlock(&mb_cache_spinlock);
@@ -396,7 +407,8 @@ mb_cache_entry_alloc(struct mb_cache *ca
 		INIT_LIST_HEAD(&ce->e_lru_list);
 		INIT_LIST_HEAD(&ce->e_block_list);
 		ce->e_cache = cache;
-		atomic_set(&ce->e_used, 1);
+		ce->e_used = 1 + MB_CACHE_WRITER;
+		ce->e_queued = 0;
 	}
 	return ce;
 }
@@ -488,7 +500,8 @@ mb_cache_entry_free(struct mb_cache_entr
  *
  * Get a cache entry  by device / block number. (There can only be one entry
  * in the cache per device and block.) Returns NULL if no such cache entry
- * exists.
+ * exists. The returned cache entry is locked for exclusive access ("single
+ * writer").
  */
 struct mb_cache_entry *
 mb_cache_entry_get(struct mb_cache *cache, struct block_device *bdev,
@@ -504,9 +517,27 @@ mb_cache_entry_get(struct mb_cache *cach
 	list_for_each(l, &cache->c_block_hash[bucket]) {
 		ce = list_entry(l, struct mb_cache_entry, e_block_list);
 		if (ce->e_bdev == bdev && ce->e_block == block) {
+			DEFINE_WAIT(wait);
+
 			if (!list_empty(&ce->e_lru_list))
 				list_del_init(&ce->e_lru_list);
-			atomic_inc(&ce->e_used);
+
+			while (ce->e_used > 0) {
+				ce->e_queued++;
+				prepare_to_wait(&mb_cache_queue, &wait,
+						TASK_UNINTERRUPTIBLE);
+				spin_unlock(&mb_cache_spinlock);
+				schedule();
+				spin_lock(&mb_cache_spinlock);
+				ce->e_queued--;
+			}
+			finish_wait(&mb_cache_queue, &wait);
+			ce->e_used += 1 + MB_CACHE_WRITER;
+			
+			if (!__mb_cache_entry_is_hashed(ce)) {
+				__mb_cache_entry_release_unlock(ce);
+				return NULL;
+			}
 			goto cleanup;
 		}
 	}
@@ -523,14 +554,37 @@ static struct mb_cache_entry *
 __mb_cache_entry_find(struct list_head *l, struct list_head *head,
 		      int index, struct block_device *bdev, unsigned int key)
 {
+	DEFINE_WAIT(wait);
+
 	while (l != head) {
 		struct mb_cache_entry *ce =
 			list_entry(l, struct mb_cache_entry,
 			           e_indexes[index].o_list);
 		if (ce->e_bdev == bdev && ce->e_indexes[index].o_key == key) {
+			DEFINE_WAIT(wait);
+
 			if (!list_empty(&ce->e_lru_list))
 				list_del_init(&ce->e_lru_list);
-			atomic_inc(&ce->e_used);
+
+			/* Incrementing before holding the lock gives readers
+			   priority over writers. */
+			ce->e_used++;
+			while (ce->e_used >= MB_CACHE_WRITER) {
+				ce->e_queued++;
+				prepare_to_wait(&mb_cache_queue, &wait,
+						TASK_UNINTERRUPTIBLE);
+				spin_unlock(&mb_cache_spinlock);
+				schedule();
+				spin_lock(&mb_cache_spinlock);
+				ce->e_queued--;
+			}
+			finish_wait(&mb_cache_queue, &wait);
+			
+			if (!__mb_cache_entry_is_hashed(ce)) {
+				__mb_cache_entry_release_unlock(ce);
+				spin_lock(&mb_cache_spinlock);
+				return ERR_PTR(-EAGAIN);
+			}
 			return ce;
 		}
 		l = l->next;
@@ -544,7 +598,8 @@ __mb_cache_entry_find(struct list_head *
  *
  * Find the first cache entry on a given device with a certain key in
  * an additional index. Additonal matches can be found with
- * mb_cache_entry_find_next(). Returns NULL if no match was found.
+ * mb_cache_entry_find_next(). Returns NULL if no match was found. The
+ * returned cache entry is locked for shared access ("multiple readers").
  *
  * @cache: the cache to search
  * @index: the number of the additonal index to search (0<=index<indexes_count)
Index: linux-2.6.10/fs/ext3/xattr.c
===================================================================
--- linux-2.6.10.orig/fs/ext3/xattr.c
+++ linux-2.6.10/fs/ext3/xattr.c
@@ -97,7 +97,6 @@ static int ext3_xattr_cache_insert(struc
 static struct buffer_head *ext3_xattr_cache_find(handle_t *, struct inode *,
 						 struct ext3_xattr_header *,
 						 int *);
-static void ext3_xattr_cache_remove(struct buffer_head *);
 static void ext3_xattr_rehash(struct ext3_xattr_header *,
 			      struct ext3_xattr_entry *);
 
@@ -500,6 +499,7 @@ bad_block:		ext3_error(sb, "ext3_xattr_s
 	/* Here we know that we can set the new attribute. */
 
 	if (header) {
+		struct mb_cache_entry *ce;
 		int credits = 0;
 
 		/* assert(header == HDR(bh)); */
@@ -511,14 +511,19 @@ bad_block:		ext3_error(sb, "ext3_xattr_s
 							      &credits);
 		if (error)
 			goto cleanup;
+		ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_bdev,
+					bh->b_blocknr);
 		lock_buffer(bh);
 		if (header->h_refcount == cpu_to_le32(1)) {
+			if (ce)
+				mb_cache_entry_free(ce);
 			ea_bdebug(bh, "modifying in-place");
-			ext3_xattr_cache_remove(bh);
 			/* keep the buffer locked while modifying it. */
 		} else {
 			int offset;
 
+			if (ce)
+				mb_cache_entry_release(ce);
 			unlock_buffer(bh);
 			journal_release_buffer(handle, bh, credits);
 		skip_get_write_access:
@@ -725,6 +730,8 @@ getblk_failed:
 
 	error = 0;
 	if (old_bh && old_bh != new_bh) {
+		struct mb_cache_entry *ce;
+
 		/*
 		 * If there was an old block, and we are no longer using it,
 		 * release the old block.
@@ -732,9 +739,13 @@ getblk_failed:
 		error = ext3_journal_get_write_access(handle, old_bh);
 		if (error)
 			goto cleanup;
+		ce = mb_cache_entry_get(ext3_xattr_cache, old_bh->b_bdev,
+					old_bh->b_blocknr);
 		lock_buffer(old_bh);
 		if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) {
 			/* Free the old block. */
+			if (ce)
+				mb_cache_entry_free(ce);
 			ea_bdebug(old_bh, "freeing");
 			ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1);
 
@@ -747,6 +758,8 @@ getblk_failed:
 			/* Decrement the refcount only. */
 			HDR(old_bh)->h_refcount = cpu_to_le32(
 				le32_to_cpu(HDR(old_bh)->h_refcount) - 1);
+			if (ce)
+				mb_cache_entry_release(ce);
 			DQUOT_FREE_BLOCK(inode, 1);
 			ext3_journal_dirty_metadata(handle, old_bh);
 			ea_bdebug(old_bh, "refcount now=%d",
@@ -806,6 +819,7 @@ void
 ext3_xattr_delete_inode(handle_t *handle, struct inode *inode)
 {
 	struct buffer_head *bh = NULL;
+	struct mb_cache_entry *ce;
 
 	down_write(&EXT3_I(inode)->xattr_sem);
 	if (!EXT3_I(inode)->i_file_acl)
@@ -826,15 +840,19 @@ ext3_xattr_delete_inode(handle_t *handle
 	}
 	if (ext3_journal_get_write_access(handle, bh) != 0)
 		goto cleanup;
+	ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_bdev, bh->b_blocknr);
 	lock_buffer(bh);
 	if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
-		ext3_xattr_cache_remove(bh);
+		if (ce)
+			mb_cache_entry_free(ce);
 		ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1);
 		get_bh(bh);
 		ext3_forget(handle, 1, inode, bh, EXT3_I(inode)->i_file_acl);
 	} else {
 		HDR(bh)->h_refcount = cpu_to_le32(
 			le32_to_cpu(HDR(bh)->h_refcount) - 1);
+		if (ce)
+			mb_cache_entry_release(ce);
 		ext3_journal_dirty_metadata(handle, bh);
 		if (IS_SYNC(inode))
 			handle->h_sync = 1;
@@ -951,11 +969,18 @@ ext3_xattr_cache_find(handle_t *handle, 
 	if (!header->h_hash)
 		return NULL;  /* never share */
 	ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
+again:
 	ce = mb_cache_entry_find_first(ext3_xattr_cache, 0,
 				       inode->i_sb->s_bdev, hash);
 	while (ce) {
-		struct buffer_head *bh = sb_bread(inode->i_sb, ce->e_block);
+		struct buffer_head *bh;
 
+		if (IS_ERR(ce)) {
+			if (PTR_ERR(ce) == -EAGAIN)
+				goto again;
+			break;
+		}
+		bh = sb_bread(inode->i_sb, ce->e_block);
 		if (!bh) {
 			ext3_error(inode->i_sb, "ext3_xattr_cache_find",
 				"inode %ld: block %ld read error",
@@ -986,27 +1011,6 @@ ext3_xattr_cache_find(handle_t *handle, 
 	return NULL;
 }
 
-/*
- * ext3_xattr_cache_remove()
- *
- * Remove the cache entry of a block from the cache. Called when a
- * block becomes invalid.
- */
-static void
-ext3_xattr_cache_remove(struct buffer_head *bh)
-{
-	struct mb_cache_entry *ce;
-
-	ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_bdev,
-				bh->b_blocknr);
-	if (ce) {
-		ea_bdebug(bh, "removing (%d cache entries remaining)",
-			  atomic_read(&ext3_xattr_cache->c_entry_count)-1);
-		mb_cache_entry_free(ce);
-	} else 
-		ea_bdebug(bh, "no cache entry");
-}
-
 #define NAME_HASH_SHIFT 5
 #define VALUE_HASH_SHIFT 16
 
Index: linux-2.6.10/include/linux/mbcache.h
===================================================================
--- linux-2.6.10.orig/include/linux/mbcache.h
+++ linux-2.6.10/include/linux/mbcache.h
@@ -10,7 +10,8 @@
 struct mb_cache_entry {
 	struct list_head		e_lru_list;
 	struct mb_cache			*e_cache;
-	atomic_t			e_used;
+	unsigned short			e_used;
+	unsigned short			e_queued;
 	struct block_device		*e_bdev;
 	sector_t			e_block;
 	struct list_head		e_block_list;
Index: linux-2.6.10/fs/ext2/xattr.c
===================================================================
--- linux-2.6.10.orig/fs/ext2/xattr.c
+++ linux-2.6.10/fs/ext2/xattr.c
@@ -95,7 +95,6 @@ static int ext2_xattr_set2(struct inode 
 static int ext2_xattr_cache_insert(struct buffer_head *);
 static struct buffer_head *ext2_xattr_cache_find(struct inode *,
 						 struct ext2_xattr_header *);
-static void ext2_xattr_cache_remove(struct buffer_head *);
 static void ext2_xattr_rehash(struct ext2_xattr_header *,
 			      struct ext2_xattr_entry *);
 
@@ -494,15 +493,22 @@ bad_block:		ext2_error(sb, "ext2_xattr_s
 	/* Here we know that we can set the new attribute. */
 
 	if (header) {
+		struct mb_cache_entry *ce;
+
 		/* assert(header == HDR(bh)); */
+		ce = mb_cache_entry_get(ext2_xattr_cache, bh->b_bdev,
+					bh->b_blocknr);
 		lock_buffer(bh);
 		if (header->h_refcount == cpu_to_le32(1)) {
 			ea_bdebug(bh, "modifying in-place");
-			ext2_xattr_cache_remove(bh);
+			if (ce)
+				mb_cache_entry_free(ce);
 			/* keep the buffer locked while modifying it. */
 		} else {
 			int offset;
 
+			if (ce)
+				mb_cache_entry_release(ce);
 			unlock_buffer(bh);
 			ea_bdebug(bh, "cloning");
 			header = kmalloc(bh->b_size, GFP_KERNEL);
@@ -707,13 +713,19 @@ ext2_xattr_set2(struct inode *inode, str
 
 	error = 0;
 	if (old_bh && old_bh != new_bh) {
+		struct mb_cache_entry *ce;
+
 		/*
 		 * If there was an old block and we are no longer using it,
 		 * release the old block.
 		 */
+		ce = mb_cache_entry_get(ext2_xattr_cache, old_bh->b_bdev,
+					old_bh->b_blocknr);
 		lock_buffer(old_bh);
 		if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) {
 			/* Free the old block. */
+			if (ce)
+				mb_cache_entry_free(ce);
 			ea_bdebug(old_bh, "freeing");
 			ext2_free_blocks(inode, old_bh->b_blocknr, 1);
 			/* We let our caller release old_bh, so we
@@ -724,6 +736,8 @@ ext2_xattr_set2(struct inode *inode, str
 			/* Decrement the refcount only. */
 			HDR(old_bh)->h_refcount = cpu_to_le32(
 				le32_to_cpu(HDR(old_bh)->h_refcount) - 1);
+			if (ce)
+				mb_cache_entry_release(ce);
 			DQUOT_FREE_BLOCK(inode, 1);
 			mark_buffer_dirty(old_bh);
 			ea_bdebug(old_bh, "refcount now=%d",
@@ -748,6 +762,7 @@ void
 ext2_xattr_delete_inode(struct inode *inode)
 {
 	struct buffer_head *bh = NULL;
+	struct mb_cache_entry *ce;
 
 	down_write(&EXT2_I(inode)->xattr_sem);
 	if (!EXT2_I(inode)->i_file_acl)
@@ -767,15 +782,19 @@ ext2_xattr_delete_inode(struct inode *in
 			EXT2_I(inode)->i_file_acl);
 		goto cleanup;
 	}
+	ce = mb_cache_entry_get(ext2_xattr_cache, bh->b_bdev, bh->b_blocknr);
 	lock_buffer(bh);
 	if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
-		ext2_xattr_cache_remove(bh);
+		if (ce)
+			mb_cache_entry_free(ce);
 		ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1);
 		get_bh(bh);
 		bforget(bh);
 	} else {
 		HDR(bh)->h_refcount = cpu_to_le32(
 			le32_to_cpu(HDR(bh)->h_refcount) - 1);
+		if (ce)
+			mb_cache_entry_release(ce);
 		mark_buffer_dirty(bh);
 		if (IS_SYNC(inode))
 			sync_dirty_buffer(bh);
@@ -892,11 +911,19 @@ ext2_xattr_cache_find(struct inode *inod
 	if (!header->h_hash)
 		return NULL;  /* never share */
 	ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
+again:
 	ce = mb_cache_entry_find_first(ext2_xattr_cache, 0,
 				       inode->i_sb->s_bdev, hash);
 	while (ce) {
-		struct buffer_head *bh = sb_bread(inode->i_sb, ce->e_block);
+		struct buffer_head *bh;
+		
+		if (IS_ERR(ce)) {
+			if (PTR_ERR(ce) == -EAGAIN)
+				goto again;
+			break;
+		}
 
+		bh = sb_bread(inode->i_sb, ce->e_block);
 		if (!bh) {
 			ext2_error(inode->i_sb, "ext2_xattr_cache_find",
 				"inode %ld: block %ld read error",
@@ -923,26 +950,6 @@ ext2_xattr_cache_find(struct inode *inod
 	return NULL;
 }
 
-/*
- * ext2_xattr_cache_remove()
- *
- * Remove the cache entry of a block from the cache. Called when a
- * block becomes invalid.
- */
-static void
-ext2_xattr_cache_remove(struct buffer_head *bh)
-{
-	struct mb_cache_entry *ce;
-
-	ce = mb_cache_entry_get(ext2_xattr_cache, bh->b_bdev, bh->b_blocknr);
-	if (ce) {
-		ea_bdebug(bh, "removing (%d cache entries remaining)",
-			  atomic_read(&ext2_xattr_cache->c_entry_count)-1);
-		mb_cache_entry_free(ce);
-	} else 
-		ea_bdebug(bh, "no cache entry");
-}
-
 #define NAME_HASH_SHIFT 5
 #define VALUE_HASH_SHIFT 16
 
--
Andreas Gruenbacher <agruen@suse.de>
SUSE Labs, SUSE LINUX PRODUCTS GMBH


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [RESEND][PATCH 6/9] Ext3: extended attribute sharing fixes and in-inode EAs
  2005-01-13 10:31 [RESEND][PATCH 0/9] Ext3: extended attribute sharing fixes and in-inode EAs Andreas Gruenbacher
  2005-01-13 10:31 ` [RESEND][PATCH 9/9] " Andreas Gruenbacher
  2005-01-13 10:31 ` [RESEND][PATCH 5/9] " Andreas Gruenbacher
@ 2005-01-13 10:31 ` Andreas Gruenbacher
  2005-01-13 10:31 ` [RESEND][PATCH 7/9] " Andreas Gruenbacher
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 10+ messages in thread
From: Andreas Gruenbacher @ 2005-01-13 10:31 UTC (permalink / raw)
  To: Linus Torvalds, Andrew Morton
  Cc: linux-kernel, Alex Tomas, Andreas Dilger, Stephen C. Tweedie,
	Andrew Tridgell

Ext[23]: no spare xattr handler slots needed

The ext3_xattr_set_handle2 and ext3_xattr_delete_inode functions contain
duplicate code to decrease the reference count of an xattr block. Move
this to a separate function.

Also we know we have exclusive access to the inode in
ext3_xattr_delete_inode; there is no need to grab the xattr_sem lock.

Signed-off-by: Andreas Gruenbacher <agruen@suse.de>

Index: linux-2.6.10/fs/ext3/xattr.c
===================================================================
--- linux-2.6.10.orig/fs/ext3/xattr.c
+++ linux-2.6.10/fs/ext3/xattr.c
@@ -102,7 +102,7 @@ static void ext3_xattr_rehash(struct ext
 
 static struct mb_cache *ext3_xattr_cache;
 
-static struct xattr_handler *ext3_xattr_handler_map[EXT3_XATTR_INDEX_MAX] = {
+static struct xattr_handler *ext3_xattr_handler_map[] = {
 	[EXT3_XATTR_INDEX_USER]		     = &ext3_xattr_user_handler,
 #ifdef CONFIG_EXT3_FS_POSIX_ACL
 	[EXT3_XATTR_INDEX_POSIX_ACL_ACCESS]  = &ext3_xattr_acl_access_handler,
@@ -132,7 +132,7 @@ ext3_xattr_handler(int name_index)
 {
 	struct xattr_handler *handler = NULL;
 
-	if (name_index > 0 && name_index <= EXT3_XATTR_INDEX_MAX)
+	if (name_index > 0 && name_index < ARRAY_SIZE(ext3_xattr_handler_map))
 		handler = ext3_xattr_handler_map[name_index];
 	return handler;
 }
Index: linux-2.6.10/fs/ext2/xattr.c
===================================================================
--- linux-2.6.10.orig/fs/ext2/xattr.c
+++ linux-2.6.10/fs/ext2/xattr.c
@@ -100,7 +100,7 @@ static void ext2_xattr_rehash(struct ext
 
 static struct mb_cache *ext2_xattr_cache;
 
-static struct xattr_handler *ext2_xattr_handler_map[EXT2_XATTR_INDEX_MAX] = {
+static struct xattr_handler *ext2_xattr_handler_map[] = {
 	[EXT2_XATTR_INDEX_USER]		     = &ext2_xattr_user_handler,
 #ifdef CONFIG_EXT2_FS_POSIX_ACL
 	[EXT2_XATTR_INDEX_POSIX_ACL_ACCESS]  = &ext2_xattr_acl_access_handler,
@@ -130,7 +130,7 @@ ext2_xattr_handler(int name_index)
 {
 	struct xattr_handler *handler = NULL;
 
-	if (name_index > 0 && name_index <= EXT2_XATTR_INDEX_MAX)
+	if (name_index > 0 && name_index < ARRAY_SIZE(ext2_xattr_handler_map))
 		handler = ext2_xattr_handler_map[name_index];
 	return handler;
 }
Index: linux-2.6.10/fs/ext3/xattr.h
===================================================================
--- linux-2.6.10.orig/fs/ext3/xattr.h
+++ linux-2.6.10/fs/ext3/xattr.h
@@ -16,7 +16,6 @@
 #define EXT3_XATTR_REFCOUNT_MAX		1024
 
 /* Name indexes */
-#define EXT3_XATTR_INDEX_MAX			10
 #define EXT3_XATTR_INDEX_USER			1
 #define EXT3_XATTR_INDEX_POSIX_ACL_ACCESS	2
 #define EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT	3
Index: linux-2.6.10/fs/ext2/xattr.h
===================================================================
--- linux-2.6.10.orig/fs/ext2/xattr.h
+++ linux-2.6.10/fs/ext2/xattr.h
@@ -17,7 +17,6 @@
 #define EXT2_XATTR_REFCOUNT_MAX		1024
 
 /* Name indexes */
-#define EXT2_XATTR_INDEX_MAX			10
 #define EXT2_XATTR_INDEX_USER			1
 #define EXT2_XATTR_INDEX_POSIX_ACL_ACCESS	2
 #define EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT	3
--
Andreas Gruenbacher <agruen@suse.de>
SUSE Labs, SUSE LINUX PRODUCTS GMBH


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [RESEND][PATCH 1/9] Ext3: extended attribute sharing fixes and in-inode EAs
  2005-01-13 10:31 [RESEND][PATCH 0/9] Ext3: extended attribute sharing fixes and in-inode EAs Andreas Gruenbacher
                   ` (6 preceding siblings ...)
  2005-01-13 10:31 ` [RESEND][PATCH 8/9] " Andreas Gruenbacher
@ 2005-01-13 10:31 ` Andreas Gruenbacher
  2005-01-13 10:31 ` [RESEND][PATCH 4/9] " Andreas Gruenbacher
  8 siblings, 0 replies; 10+ messages in thread
From: Andreas Gruenbacher @ 2005-01-13 10:31 UTC (permalink / raw)
  To: Linus Torvalds, Andrew Morton
  Cc: linux-kernel, Alex Tomas, Andreas Dilger, Stephen C. Tweedie,
	Andrew Tridgell

This is a reverse diff of the following changeset:

# ChangeSet
#   2005/01/04 21:29:07-08:00 alex@clusterfs.com 
#   [PATCH] ext3: support for EA in inode
#   
#   1) intent of the patch is to get possibility to store EAs in the body of large
#      inode. it saves space and improves performance in some cases
#   
#   2) the patch is quite simple: it works the same way original xattr does, but
#      using other storage (inode body). body has priority over separate block.
#      original routines (ext3_xattr_get, ext3_xattr_list, ext3_xattr_set) are
#      renamed to ext3_xattr_block_*. new routines that handle inode storate are
#      added (ext3_xattr_ibody_get, ext3_xattr_ibody_list, ext3_xattr_ibody_set).
#      routines ext3_xattr_get, ext3_xattr_list and ext3_xattr_set allow user to
#      accesss both the storages transparently
#   
#   3) the change makes sense on filesystem with inode size >= 256 bytes only.
#      2.4 kernels don't support such a filesystems, AFAIK. 2.6 kernels do support
#      and ignore EAs stored in a body w/o the patch
#   
#   4) debugfs and e2fsck need to be patched to deal with EAs in inode
#      the patch will be sent later
#   
#   5) testing results:
#   	a) Andrew Samba Master (tridge) has done successful tests
#   	b) we've been using ea-in-inode feature in Lustre for many months
#   
#   Signed-off-by: Andreas Dilger <adilger@clusterfs.com>
#   Signed-off-by: Alex Tomas <alex@clusterfs.com>
#   Signed-off-by: Andrew Morton <akpm@osdl.org>
#   Signed-off-by: Linus Torvalds <torvalds@osdl.org>
# 
# fs/ext3/ialloc.c
#   2005/01/04 20:24:30-08:00 alex@clusterfs.com +5 -0
#   ext3: support for EA in inode
# 
# fs/ext3/inode.c
#   2005/01/04 20:24:30-08:00 alex@clusterfs.com +9 -1
#   ext3: support for EA in inode
# 
# fs/ext3/xattr.c
#   2005/01/04 20:24:30-08:00 alex@clusterfs.com +603 -36
#   ext3: support for EA in inode
# 
# fs/ext3/xattr.h
#   2005/01/04 18:48:13-08:00 alex@clusterfs.com +2 -1
#   ext3: support for EA in inode
# 
# include/linux/ext3_fs.h
#   2005/01/04 20:24:22-08:00 alex@clusterfs.com +3 -0
#   ext3: support for EA in inode
# 
# include/linux/ext3_fs_i.h
#   2005/01/04 18:48:13-08:00 alex@clusterfs.com +3 -0
#   ext3: support for EA in inode
# 
Index: linux-2.6.10-bk13/include/linux/ext3_fs.h
===================================================================
--- linux-2.6.10-bk13.orig/include/linux/ext3_fs.h
+++ linux-2.6.10-bk13/include/linux/ext3_fs.h
@@ -293,8 +293,6 @@ struct ext3_inode {
 			__u32	m_i_reserved2[2];
 		} masix2;
 	} osd2;				/* OS dependent 2 */
-	__u16	i_extra_isize;
-	__u16	i_pad1;
 };
 
 #define i_size_high	i_dir_acl
@@ -757,7 +755,6 @@ extern int ext3_forget(handle_t *, int, 
 extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
 extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
 
-extern int ext3_get_inode_loc(struct inode *, struct ext3_iloc *, int);
 extern void ext3_read_inode (struct inode *);
 extern int  ext3_write_inode (struct inode *, int);
 extern int  ext3_setattr (struct dentry *, struct iattr *);
Index: linux-2.6.10-bk13/include/linux/ext3_fs_i.h
===================================================================
--- linux-2.6.10-bk13.orig/include/linux/ext3_fs_i.h
+++ linux-2.6.10-bk13/include/linux/ext3_fs_i.h
@@ -113,9 +113,6 @@ struct ext3_inode_info {
 	 */
 	loff_t	i_disksize;
 
-	/* on-disk additional length */
-	__u16 i_extra_isize;
-
 	/*
 	 * truncate_sem is for serialising ext3_truncate() against
 	 * ext3_getblock().  In the 2.4 ext2 design, great chunks of inode's
Index: linux-2.6.10-bk13/fs/ext3/xattr.c
===================================================================
--- linux-2.6.10-bk13.orig/fs/ext3/xattr.c
+++ linux-2.6.10-bk13/fs/ext3/xattr.c
@@ -9,7 +9,6 @@
  *  suggestion of Luka Renko <luka.renko@hermes.si>.
  * xattr consolidation Copyright (c) 2004 James Morris <jmorris@redhat.com>,
  *  Red Hat Inc.
- * ea-in-inode support by Alex Tomas <alex@clusterfs.com> aka bzzz
  */
 
 /*
@@ -90,9 +89,10 @@
 # define ea_bdebug(f...)
 #endif
 
-static int ext3_xattr_set_handle2(handle_t *handle, struct inode *inode,
-				       struct buffer_head *old_bh,
-				       struct ext3_xattr_header *header);
+static int ext3_xattr_set_handle2(handle_t *, struct inode *,
+				  struct buffer_head *,
+				  struct ext3_xattr_header *);
+
 static int ext3_xattr_cache_insert(struct buffer_head *);
 static struct buffer_head *ext3_xattr_cache_find(handle_t *, struct inode *,
 						 struct ext3_xattr_header *,
@@ -150,12 +150,17 @@ ext3_listxattr(struct dentry *dentry, ch
 }
 
 /*
- * ext3_xattr_block_get()
+ * ext3_xattr_get()
+ *
+ * Copy an extended attribute into the buffer
+ * provided, or compute the buffer size required.
+ * Buffer is NULL to compute the size of the buffer required.
  *
- * routine looks for attribute in EA block and returns it's value and size
+ * Returns a negative error number on failure, or the number of bytes
+ * used / required on success.
  */
 int
-ext3_xattr_block_get(struct inode *inode, int name_index, const char *name,
+ext3_xattr_get(struct inode *inode, int name_index, const char *name,
 	       void *buffer, size_t buffer_size)
 {
 	struct buffer_head *bh = NULL;
@@ -169,6 +174,7 @@ ext3_xattr_block_get(struct inode *inode
 
 	if (name == NULL)
 		return -EINVAL;
+	down_read(&EXT3_I(inode)->xattr_sem);
 	error = -ENODATA;
 	if (!EXT3_I(inode)->i_file_acl)
 		goto cleanup;
@@ -241,87 +247,15 @@ found:
 
 cleanup:
 	brelse(bh);
+	up_read(&EXT3_I(inode)->xattr_sem);
 
 	return error;
 }
 
 /*
- * ext3_xattr_ibody_get()
- *
- * routine looks for attribute in inode body and returns it's value and size
- */
-int
-ext3_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
-	       void *buffer, size_t buffer_size)
-{
-	int size, name_len = strlen(name), storage_size;
-	struct ext3_xattr_entry *last;
-	struct ext3_inode *raw_inode;
-	struct ext3_iloc iloc;
-	char *start, *end;
-	int ret = -ENOENT;
-
-	if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE)
-		return -ENOENT;
-
-	ret = ext3_get_inode_loc(inode, &iloc, 1);
-	if (ret)
-		return ret;
-	raw_inode = ext3_raw_inode(&iloc);
-
-	storage_size = EXT3_SB(inode->i_sb)->s_inode_size -
-				EXT3_GOOD_OLD_INODE_SIZE -
-				EXT3_I(inode)->i_extra_isize -
-				sizeof(__u32);
-	start = (char *) raw_inode + EXT3_GOOD_OLD_INODE_SIZE +
-			EXT3_I(inode)->i_extra_isize;
-	if (le32_to_cpu((*(__u32*) start)) != EXT3_XATTR_MAGIC) {
-		brelse(iloc.bh);
-		return -ENOENT;
-	}
-	start += sizeof(__u32);
-	end = (char *) raw_inode + EXT3_SB(inode->i_sb)->s_inode_size;
-
-	last = (struct ext3_xattr_entry *) start;
-	while (!IS_LAST_ENTRY(last)) {
-		struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last);
-		if (le32_to_cpu(last->e_value_size) > storage_size ||
-				(char *) next >= end) {
-			ext3_error(inode->i_sb, "ext3_xattr_ibody_get",
-				"inode %ld", inode->i_ino);
-			brelse(iloc.bh);
-			return -EIO;
-		}
-		if (name_index == last->e_name_index &&
-		    name_len == last->e_name_len &&
-		    !memcmp(name, last->e_name, name_len))
-			goto found;
-		last = next;
-	}
-
-	/* can't find EA */
-	brelse(iloc.bh);
-	return -ENOENT;
-
-found:
-	size = le32_to_cpu(last->e_value_size);
-	if (buffer) {
-		ret = -ERANGE;
-		if (buffer_size >= size) {
-			memcpy(buffer, start + le16_to_cpu(last->e_value_offs),
-				size);
-			ret = size;
-		}
-	} else
-		ret = size;
-	brelse(iloc.bh);
-	return ret;
-}
-
-/*
- * ext3_xattr_get()
+ * ext3_xattr_list()
  *
- * Copy an extended attribute into the buffer
+ * Copy a list of attribute names into the buffer
  * provided, or compute the buffer size required.
  * Buffer is NULL to compute the size of the buffer required.
  *
@@ -329,31 +263,7 @@ found:
  * used / required on success.
  */
 int
-ext3_xattr_get(struct inode *inode, int name_index, const char *name,
-	       void *buffer, size_t buffer_size)
-{
-	int err;
-
-	down_read(&EXT3_I(inode)->xattr_sem);
-
-	/* try to find attribute in inode body */
-	err = ext3_xattr_ibody_get(inode, name_index, name,
-					buffer, buffer_size);
-	if (err < 0)
-		/* search was unsuccessful, try to find EA in dedicated block */
-		err = ext3_xattr_block_get(inode, name_index, name,
-				buffer, buffer_size);
-	up_read(&EXT3_I(inode)->xattr_sem);
-
-	return err;
-}
-
-/* ext3_xattr_block_list()
- *
- * generate list of attributes stored in EA block
- */
-int
-ext3_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
+ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
 {
 	struct buffer_head *bh = NULL;
 	struct ext3_xattr_entry *entry;
@@ -364,6 +274,7 @@ ext3_xattr_block_list(struct inode *inod
 	ea_idebug(inode, "buffer=%p, buffer_size=%ld",
 		  buffer, (long)buffer_size);
 
+	down_read(&EXT3_I(inode)->xattr_sem);
 	error = 0;
 	if (!EXT3_I(inode)->i_file_acl)
 		goto cleanup;
@@ -373,7 +284,7 @@ ext3_xattr_block_list(struct inode *inod
 	if (!bh)
 		goto cleanup;
 	ea_bdebug(bh, "b_count=%d, refcount=%d",
-		(int) atomic_read(&(bh->b_count)), (int) le32_to_cpu(HDR(bh)->h_refcount));
+		atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
 	end = bh->b_data + bh->b_size;
 	if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
 	    HDR(bh)->h_blocks != cpu_to_le32(1)) {
@@ -420,131 +331,11 @@ bad_block:	ext3_error(inode->i_sb, "ext3
 
 cleanup:
 	brelse(bh);
+	up_read(&EXT3_I(inode)->xattr_sem);
 
 	return error;
 }
 
-/* ext3_xattr_ibody_list()
- *
- * generate list of attributes stored in inode body
- */
-int
-ext3_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size)
-{
-	struct ext3_xattr_entry *last;
-	struct ext3_inode *raw_inode;
-	size_t rest = buffer_size;
-	struct ext3_iloc iloc;
-	char *start, *end;
-	int storage_size;
-	int size = 0;
-	int ret;
-
-	if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE)
-		return 0;
-
-	ret = ext3_get_inode_loc(inode, &iloc, 1);
-	if (ret)
-		return ret;
-	raw_inode = ext3_raw_inode(&iloc);
-
-	storage_size = EXT3_SB(inode->i_sb)->s_inode_size -
-				EXT3_GOOD_OLD_INODE_SIZE -
-				EXT3_I(inode)->i_extra_isize -
-				sizeof(__u32);
-	start = (char *) raw_inode + EXT3_GOOD_OLD_INODE_SIZE +
-			EXT3_I(inode)->i_extra_isize;
-	if (le32_to_cpu((*(__u32*) start)) != EXT3_XATTR_MAGIC)
-		goto cleanup;
-	start += sizeof(__u32);
-	end = (char *) raw_inode + EXT3_SB(inode->i_sb)->s_inode_size;
-
-	last = (struct ext3_xattr_entry *) start;
-	while (!IS_LAST_ENTRY(last)) {
-		struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last);
-		if ((char *) next >= end) {
-			ext3_error(inode->i_sb, "ext3_xattr_ibody_list",
-					"inode %ld", inode->i_ino);
-			ret = -EIO;
-			goto cleanup;
-		}
-		last = next;
-	}
-
-	last = (struct ext3_xattr_entry *) start;
-	for (; !IS_LAST_ENTRY(last); last = EXT3_XATTR_NEXT(last)) {
-		struct xattr_handler *handler =
-			ext3_xattr_handler(last->e_name_index);
-
-		if (!handler)
-			continue;
-
-		size += handler->list(inode, buffer, rest, last->e_name,
-					last->e_name_len);
-		if (buffer) {
-			if (size > rest) {
-				ret = -ERANGE;
-				goto cleanup;
-			}
-			buffer += size;
-		}
-		rest -= size;
-	}
-	ret = buffer_size - rest; /* total size */
-
-cleanup:
-	brelse(iloc.bh);
-	return ret;
-}
-
-/*
- * ext3_xattr_list()
- *
- * Copy a list of attribute names into the buffer
- * provided, or compute the buffer size required.
- * Buffer is NULL to compute the size of the buffer required.
- *
- * Returns a negative error number on failure, or the number of bytes
- * used / required on success.
- */
-int
-ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
-{
-	int size = buffer_size;
-	int error;
-
-	down_read(&EXT3_I(inode)->xattr_sem);
-
-	/* get list of attributes stored in inode body */
-	error = ext3_xattr_ibody_list(inode, buffer, buffer_size);
-	if (error < 0) {
-		/* some error occured while collecting
-		 * attributes in inode body */
-		size = 0;
-		goto cleanup;
-	}
-	size = error;
-
-	/* get list of attributes stored in dedicated block */
-	if (buffer) {
-		buffer_size -= error;
-		if (buffer_size <= 0) {
-			buffer = NULL;
-			buffer_size = 0;
-		} else
-			buffer += error;
-	}
-
-	error = ext3_xattr_block_list(inode, buffer, buffer_size);
-	/* listing was successful, so we return len */
-	if (error < 0)
-		size = 0;
-
-cleanup:
-	up_read(&EXT3_I(inode)->xattr_sem);
-	return error + size;
-}
-
 /*
  * If the EXT3_FEATURE_COMPAT_EXT_ATTR feature of this file system is
  * not set, set it.
@@ -566,286 +357,6 @@ static void ext3_xattr_update_super_bloc
 }
 
 /*
- * ext3_xattr_ibody_find()
- *
- * search attribute and calculate free space in inode body
- * NOTE: free space includes space our attribute hold
- */
-int
-ext3_xattr_ibody_find(struct inode *inode, int name_index,
-			const char *name, int *free)
-{
-	struct ext3_xattr_entry *last;
-	struct ext3_inode *raw_inode;
-	int name_len = strlen(name);
-	int err, storage_size;
-	struct ext3_iloc iloc;
-	char *start, *end;
-	int ret = -ENOENT;
-
-	*free = 0;
-	if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE)
-		return ret;
-
-	err = ext3_get_inode_loc(inode, &iloc, 1);
-	if (err)
-		return -EIO;
-	raw_inode = ext3_raw_inode(&iloc);
-
-	storage_size = EXT3_SB(inode->i_sb)->s_inode_size -
-				EXT3_GOOD_OLD_INODE_SIZE -
-				EXT3_I(inode)->i_extra_isize -
-				sizeof(__u32);
-	*free = storage_size - sizeof(__u32);
-	start = (char *) raw_inode + EXT3_GOOD_OLD_INODE_SIZE +
-			EXT3_I(inode)->i_extra_isize;
-	if (le32_to_cpu((*(__u32*) start)) != EXT3_XATTR_MAGIC) {
-		brelse(iloc.bh);
-		return -ENOENT;
-	}
-	start += sizeof(__u32);
-	end = (char *) raw_inode + EXT3_SB(inode->i_sb)->s_inode_size;
-
-	last = (struct ext3_xattr_entry *) start;
-	while (!IS_LAST_ENTRY(last)) {
-		struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last);
-		if (le32_to_cpu(last->e_value_size) > storage_size ||
-				(char *) next >= end) {
-			ext3_error(inode->i_sb, "ext3_xattr_ibody_find",
-				"inode %ld", inode->i_ino);
-			brelse(iloc.bh);
-			return -EIO;
-		}
-
-		if (name_index == last->e_name_index &&
-		    name_len == last->e_name_len &&
-		    !memcmp(name, last->e_name, name_len)) {
-			ret = 0;
-		} else {
-			*free -= EXT3_XATTR_LEN(last->e_name_len);
-			*free -= le32_to_cpu(last->e_value_size);
-		}
-		last = next;
-	}
-
-	brelse(iloc.bh);
-	return ret;
-}
-
-/*
- * ext3_xattr_block_find()
- *
- * search attribute and calculate free space in EA block (if it allocated)
- * NOTE: free space includes space our attribute hold
- */
-int
-ext3_xattr_block_find(struct inode *inode, int name_index,
-			const char *name, int *free)
-{
-	struct buffer_head *bh = NULL;
-	struct ext3_xattr_entry *entry;
-	char *end;
-	int name_len, error = -ENOENT;
-
-	if (!EXT3_I(inode)->i_file_acl) {
-		*free = inode->i_sb->s_blocksize -
-			sizeof(struct ext3_xattr_header) -
-			sizeof(__u32);
-		return -ENOENT;
-	}
-	ea_idebug(inode, "reading block %d", EXT3_I(inode)->i_file_acl);
-	bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
-	if (!bh)
-		return -EIO;
-	ea_bdebug(bh, "b_count=%d, refcount=%d",
-		atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
-	end = bh->b_data + bh->b_size;
-	if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
-	    HDR(bh)->h_blocks != cpu_to_le32(1)) {
-bad_block:	ext3_error(inode->i_sb, "ext3_xattr_get",
-			"inode %ld: bad block %d", inode->i_ino,
-			EXT3_I(inode)->i_file_acl);
-		brelse(bh);
-		return -EIO;
-	}
-	/* find named attribute */
-	name_len = strlen(name);
-	*free = bh->b_size - sizeof(__u32);
-
-	entry = FIRST_ENTRY(bh);
-	while (!IS_LAST_ENTRY(entry)) {
-		struct ext3_xattr_entry *next =
-			EXT3_XATTR_NEXT(entry);
-		if ((char *)next >= end)
-			goto bad_block;
-		if (name_index == entry->e_name_index &&
-		    name_len == entry->e_name_len &&
-		    memcmp(name, entry->e_name, name_len) == 0) {
-			error = 0;
-		} else {
-			*free -= EXT3_XATTR_LEN(entry->e_name_len);
-			*free -= le32_to_cpu(entry->e_value_size);
-		}
-		entry = next;
-	}
-	brelse(bh);
-
-	return error;
-}
-
-/*
- * ext3_xattr_ibody_set()
- *
- * this routine add/remove/replace attribute in inode body
- */
-int
-ext3_xattr_ibody_set(handle_t *handle, struct inode *inode, int name_index,
-		      const char *name, const void *value, size_t value_len,
-		      int flags)
-{
-	struct ext3_xattr_entry *last, *next, *here = NULL;
-	struct ext3_inode *raw_inode;
-	int name_len = strlen(name);
-	int esize = EXT3_XATTR_LEN(name_len);
-	struct buffer_head *bh;
-	int err, storage_size;
-	struct ext3_iloc iloc;
-	int free, min_offs;
-	char *start, *end;
-
-	if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE)
-		return -ENOSPC;
-
-	err = ext3_get_inode_loc(inode, &iloc, 1);
-	if (err)
-		return err;
-	raw_inode = ext3_raw_inode(&iloc);
-	bh = iloc.bh;
-
-	storage_size = EXT3_SB(inode->i_sb)->s_inode_size -
-				EXT3_GOOD_OLD_INODE_SIZE -
-				EXT3_I(inode)->i_extra_isize -
-				sizeof(__u32);
-	start = (char *) raw_inode + EXT3_GOOD_OLD_INODE_SIZE +
-			EXT3_I(inode)->i_extra_isize;
-	if ((*(__u32*) start) != EXT3_XATTR_MAGIC) {
-		/* inode had no attributes before */
-		*((__u32*) start) = cpu_to_le32(EXT3_XATTR_MAGIC);
-	}
-	start += sizeof(__u32);
-	end = (char *) raw_inode + EXT3_SB(inode->i_sb)->s_inode_size;
-	min_offs = storage_size;
-	free = storage_size - sizeof(__u32);
-
-	last = (struct ext3_xattr_entry *) start;
-	while (!IS_LAST_ENTRY(last)) {
-		next = EXT3_XATTR_NEXT(last);
-		if (le32_to_cpu(last->e_value_size) > storage_size ||
-				(char *) next >= end) {
-			ext3_error(inode->i_sb, "ext3_xattr_ibody_set",
-				"inode %ld", inode->i_ino);
-			brelse(bh);
-			return -EIO;
-		}
-
-		if (last->e_value_size) {
-			int offs = le16_to_cpu(last->e_value_offs);
-			if (offs < min_offs)
-				min_offs = offs;
-		}
-		if (name_index == last->e_name_index &&
-			name_len == last->e_name_len &&
-			!memcmp(name, last->e_name, name_len))
-			here = last;
-		else {
-			/* we calculate all but our attribute
-			 * because it will be removed before changing */
-			free -= EXT3_XATTR_LEN(last->e_name_len);
-			free -= le32_to_cpu(last->e_value_size);
-		}
-		last = next;
-	}
-
-	if (value && (esize + value_len > free)) {
-		brelse(bh);
-		return -ENOSPC;
-	}
-
-	err = ext3_reserve_inode_write(handle, inode, &iloc);
-	if (err) {
-		brelse(bh);
-		return err;
-	}
-
-	/* optimization: can we simple replace old value ? */
-	if (here && value_len == le32_to_cpu(here->e_value_size)) {
-		int offs = le16_to_cpu(here->e_value_offs);
-		memcpy(start + offs, value, value_len);
-		goto done;
-	}
-
-	if (here) {
-		/* time to remove old value */
-		struct ext3_xattr_entry *e;
-		int size = le32_to_cpu(here->e_value_size);
-		int border = le16_to_cpu(here->e_value_offs);
-		char *src;
-
-		/* move tail */
-		memmove(start + min_offs + size, start + min_offs,
-				border - min_offs);
-
-		/* recalculate offsets */
-		e = (struct ext3_xattr_entry *) start;
-		while (!IS_LAST_ENTRY(e)) {
-			struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(e);
-			int offs = le16_to_cpu(e->e_value_offs);
-			if (offs < border)
-				e->e_value_offs =
-					cpu_to_le16(offs + size);
-			e = next;
-		}
-		min_offs += size;
-
-		/* remove entry */
-		border = EXT3_XATTR_LEN(here->e_name_len);
-		src = (char *) here + EXT3_XATTR_LEN(here->e_name_len);
-		size = (char *) last - src;
-		if ((char *) here + size > end)
-			printk("ALERT at %s:%d: 0x%p + %d > 0x%p\n",
-					__FILE__, __LINE__, here, size, end);
-		memmove(here, src, size);
-		last = (struct ext3_xattr_entry *) ((char *) last - border);
-		*((__u32 *) last) = 0;
-	}
-
-	if (value) {
-		int offs = min_offs - value_len;
-		/* use last to create new entry */
-		last->e_name_len = strlen(name);
-		last->e_name_index = name_index;
-		last->e_value_offs = cpu_to_le16(offs);
-		last->e_value_size = cpu_to_le32(value_len);
-		last->e_hash = last->e_value_block = 0;
-		memset(last->e_name, 0, esize);
-		memcpy(last->e_name, name, last->e_name_len);
-		if (start + offs + value_len > end)
-			printk("ALERT at %s:%d: 0x%p + %d + %zd > 0x%p\n",
-					__FILE__, __LINE__, start, offs,
-					value_len, end);
-		memcpy(start + offs, value, value_len);
-		last = EXT3_XATTR_NEXT(last);
-		*((__u32 *) last) = 0;
-	}
-
-done:
-	ext3_mark_iloc_dirty(handle, inode, &iloc);
-	brelse(bh);
-
-	return 0;
-}
-
-/*
  * ext3_xattr_set_handle()
  *
  * Create, replace or remove an extended attribute for this inode. Buffer
@@ -859,100 +370,6 @@ done:
  */
 int
 ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
-		const char *name, const void *value, size_t value_len,
-		int flags)
-{
-	int free1 = -1, free2 = -1;
-	int err, where = 0, total;
-	int name_len;
-
-	ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld",
-		  name_index, name, value, (long)value_len);
-
-	if (IS_RDONLY(inode))
-		return -EROFS;
-	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
-		return -EPERM;
-	if (value == NULL)
-		value_len = 0;
-	if (name == NULL)
-		return -EINVAL;
-	name_len = strlen(name);
-	if (name_len > 255 || value_len > inode->i_sb->s_blocksize)
-		return -ERANGE;
-	down_write(&EXT3_I(inode)->xattr_sem);
-
-#define EX_FOUND_IN_IBODY	1
-#define EX_FOUND_IN_BLOCK	2
-
-	/* try to find attribute in inode body */
-	err = ext3_xattr_ibody_find(inode, name_index, name, &free1);
-	if (err == 0) {
-		/* found EA in inode */
-		where = EX_FOUND_IN_IBODY;
-	} else if (err == -ENOENT) {
-		/* there is no such attribute in inode body */
-		/* try to find attribute in dedicated block */
-		err = ext3_xattr_block_find(inode, name_index, name, &free2);
-		if (err != 0 && err != -ENOENT) {
-			/* not found EA in block */
-			goto finish;
-		} else if (err == 0) {
-			/* found EA in block */
-			where = EX_FOUND_IN_BLOCK;
-		}
-	} else
-		goto finish;
-
-	/* check flags: may replace? may create ? */
-	if (where && (flags & XATTR_CREATE)) {
-		err = -EEXIST;
-		goto finish;
-	} else if (!where && (flags & XATTR_REPLACE)) {
-		err = -ENODATA;
-		goto finish;
-	}
-
-	/* check if we have enough space to store attribute */
-	total = EXT3_XATTR_LEN(strlen(name)) + value_len;
-	if (total > free1 && free2 > 0 && total > free2) {
-		/* have no enough space */
-		err = -ENOSPC;
-		goto finish;
-	}
-
-	/* there are two cases when we want to remove EA from original storage:
-	 * a) EA is stored in the inode, but new value doesn't fit
-	 * b) EA is stored in the block, but new value fit in inode
-	 */
-	if (where == EX_FOUND_IN_IBODY && total > free1)
-		ext3_xattr_ibody_set(handle, inode, name_index, name,
-					NULL, 0, flags);
-	else if (where == EX_FOUND_IN_BLOCK && total <= free1)
-		ext3_xattr_block_set(handle, inode, name_index,
-					name, NULL, 0, flags);
-
-	/* try to store EA in inode body */
-	err = ext3_xattr_ibody_set(handle, inode, name_index, name,
-					value, value_len, flags);
-	if (err) {
-		/* can't store EA in inode body: try to store in block */
-		err = ext3_xattr_block_set(handle, inode, name_index, name,
-						value, value_len, flags);
-	}
-
-finish:
-	up_write(&EXT3_I(inode)->xattr_sem);
-	return err;
-}
-
-/*
- * ext3_xattr_block_set()
- *
- * this routine add/remove/replace attribute in EA block
- */
-int
-ext3_xattr_block_set(handle_t *handle, struct inode *inode, int name_index,
 		      const char *name, const void *value, size_t value_len,
 		      int flags)
 {
@@ -975,7 +392,22 @@ ext3_xattr_block_set(handle_t *handle, s
 	 *             towards the end of the block).
 	 * end -- Points right after the block pointed to by header.
 	 */
+
+	ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld",
+		  name_index, name, value, (long)value_len);
+
+	if (IS_RDONLY(inode))
+		return -EROFS;
+	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+		return -EPERM;
+	if (value == NULL)
+		value_len = 0;
+	if (name == NULL)
+		return -EINVAL;
 	name_len = strlen(name);
+	if (name_len > 255 || value_len > sb->s_blocksize)
+		return -ERANGE;
+	down_write(&EXT3_I(inode)->xattr_sem);
 	if (EXT3_I(inode)->i_file_acl) {
 		/* The inode already has an extended attribute block. */
 		bh = sb_bread(sb, EXT3_I(inode)->i_file_acl);
@@ -1201,6 +633,7 @@ cleanup:
 	brelse(bh);
 	if (!(bh && header == HDR(bh)))
 		kfree(header);
+	up_write(&EXT3_I(inode)->xattr_sem);
 
 	return error;
 }
Index: linux-2.6.10-bk13/fs/ext3/inode.c
===================================================================
--- linux-2.6.10-bk13.orig/fs/ext3/inode.c
+++ linux-2.6.10-bk13/fs/ext3/inode.c
@@ -2275,7 +2275,7 @@ static unsigned long ext3_get_inode_bloc
  * trying to determine the inode's location on-disk and no read need be
  * performed.
  */
-int ext3_get_inode_loc(struct inode *inode,
+static int ext3_get_inode_loc(struct inode *inode,
 				struct ext3_iloc *iloc, int in_mem)
 {
 	unsigned long block;
@@ -2483,11 +2483,6 @@ void ext3_read_inode(struct inode * inod
 		ei->i_data[block] = raw_inode->i_block[block];
 	INIT_LIST_HEAD(&ei->i_orphan);
 
-	if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE)
-		ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
-	else
-		ei->i_extra_isize = 0;
-
 	if (S_ISREG(inode->i_mode)) {
 		inode->i_op = &ext3_file_inode_operations;
 		inode->i_fop = &ext3_file_operations;
@@ -2623,9 +2618,6 @@ static int ext3_do_update_inode(handle_t
 	} else for (block = 0; block < EXT3_N_BLOCKS; block++)
 		raw_inode->i_block[block] = ei->i_data[block];
 
-	if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE)
-		raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize);
-
 	BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
 	rc = ext3_journal_dirty_metadata(handle, bh);
 	if (!err)
Index: linux-2.6.10-bk13/fs/ext3/xattr.h
===================================================================
--- linux-2.6.10-bk13.orig/fs/ext3/xattr.h
+++ linux-2.6.10-bk13/fs/ext3/xattr.h
@@ -67,8 +67,7 @@ extern ssize_t ext3_listxattr(struct den
 extern int ext3_xattr_get(struct inode *, int, const char *, void *, size_t);
 extern int ext3_xattr_list(struct inode *, char *, size_t);
 extern int ext3_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
-extern int ext3_xattr_set_handle(handle_t *, struct inode *, int, const char *,const void *,size_t,int);
-extern int ext3_xattr_block_set(handle_t *, struct inode *, int, const char *,const void *,size_t,int);
+extern int ext3_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int);
 
 extern void ext3_xattr_delete_inode(handle_t *, struct inode *);
 extern void ext3_xattr_put_super(struct super_block *);
Index: linux-2.6.10-bk13/fs/ext3/ialloc.c
===================================================================
--- linux-2.6.10-bk13.orig/fs/ext3/ialloc.c
+++ linux-2.6.10-bk13/fs/ext3/ialloc.c
@@ -596,11 +596,6 @@ got:
 	spin_unlock(&sbi->s_next_gen_lock);
 
 	ei->i_state = EXT3_STATE_NEW;
-	if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) {
-		ei->i_extra_isize = sizeof(__u16)	/* i_extra_isize */
-				+ sizeof(__u16);	/* i_pad1 */
-	} else
-		ei->i_extra_isize = 0;
 
 	ret = inode;
 	if(DQUOT_ALLOC_INODE(inode)) {
--
Andreas Gruenbacher <agruen@suse.de>
SUSE Labs, SUSE LINUX PRODUCTS GMBH


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [RESEND][PATCH 9/9] Ext3: extended attribute sharing fixes and in-inode EAs
  2005-01-13 10:31 [RESEND][PATCH 0/9] Ext3: extended attribute sharing fixes and in-inode EAs Andreas Gruenbacher
@ 2005-01-13 10:31 ` Andreas Gruenbacher
  2005-01-13 10:31 ` [RESEND][PATCH 5/9] " Andreas Gruenbacher
                   ` (7 subsequent siblings)
  8 siblings, 0 replies; 10+ messages in thread
From: Andreas Gruenbacher @ 2005-01-13 10:31 UTC (permalink / raw)
  To: Linus Torvalds, Andrew Morton
  Cc: linux-kernel, Alex Tomas, Andreas Dilger, Stephen C. Tweedie,
	Andrew Tridgell

In-inode extended attributes for ext3

This started of as a patch by Alex Tomas <alex@clusterfs.com> and got an
overhaul by me. The on-disk structure used is the same as in Alex's
original patch.

Signed-off-by: Andreas Gruenbacher <agruen@suse.de>

Index: linux-2.6.10/fs/ext3/xattr.c
===================================================================
--- linux-2.6.10.orig/fs/ext3/xattr.c	2005-01-11 00:54:42.000000000 +0100
+++ linux-2.6.10/fs/ext3/xattr.c	2005-01-11 02:58:47.296493192 +0100
@@ -9,18 +9,21 @@
  *  suggestion of Luka Renko <luka.renko@hermes.si>.
  * xattr consolidation Copyright (c) 2004 James Morris <jmorris@redhat.com>,
  *  Red Hat Inc.
+ * ea-in-inode support by Alex Tomas <alex@clusterfs.com> aka bzzz
+ *  and Andreas Gruenbacher <agruen@suse.de>.
  */
 
 /*
- * Extended attributes are stored on disk blocks allocated outside of
- * any inode. The i_file_acl field is then made to point to this allocated
- * block. If all extended attributes of an inode are identical, these
- * inodes may share the same extended attribute block. Such situations
- * are automatically detected by keeping a cache of recent attribute block
- * numbers and hashes over the block's contents in memory.
+ * Extended attributes are stored directly in inodes (on file systems with
+ * inodes bigger than 128 bytes) and on additional disk blocks. The i_file_acl
+ * field contains the block number if an inode uses an additional block. All
+ * attributes must fit in the inode and one additional block. Blocks that
+ * contain the identical set of attributes may be shared among several inodes.
+ * Identical blocks are detected by keeping a cache of blocks that have
+ * recently been accessed.
  *
- *
- * Extended attribute block layout:
+ * The attributes in inodes and on blocks have a different header; the entries
+ * are stored in the same format:
  *
  *   +------------------+
  *   | header           |
@@ -34,23 +37,17 @@
  *   | value 2          | |
  *   +------------------+
  *
- * The block header is followed by multiple entry descriptors. These entry
- * descriptors are variable in size, and alligned to EXT3_XATTR_PAD
- * byte boundaries. The entry descriptors are sorted by attribute name,
- * so that two extended attribute blocks can be compared efficiently.
- *
- * Attribute values are aligned to the end of the block, stored in
- * no specific order. They are also padded to EXT3_XATTR_PAD byte
- * boundaries. No additional gaps are left between them.
+ * The header is followed by multiple entry descriptors. Descriptors are
+ * kept sorted. The attribute values are aligned to the end of the block
+ * in no specific order.
  *
  * Locking strategy
  * ----------------
  * EXT3_I(inode)->i_file_acl is protected by EXT3_I(inode)->xattr_sem.
  * EA blocks are only changed if they are exclusive to an inode, so
  * holding xattr_sem also means that nothing but the EA block's reference
- * count will change. Multiple writers to an EA block are synchronized
- * by the bh lock. No more than a single bh lock is held at any time
- * to avoid deadlocks.
+ * count can change. Multiple writers to the same block are synchronized
+ * by the buffer lock.
  */
 
 #include <linux/init.h>
@@ -69,6 +66,13 @@
 #define BFIRST(bh) ENTRY(BHDR(bh)+1)
 #define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0)
 
+#define IHDR(inode, raw_inode) \
+	((struct ext3_xattr_ibody_header *) \
+		((void *)raw_inode + \
+		 EXT3_GOOD_OLD_INODE_SIZE + \
+		 EXT3_I(inode)->i_extra_isize))
+#define IFIRST(hdr) ((struct ext3_xattr_entry *)((hdr)+1))
+
 #ifdef EXT3_XATTR_DEBUG
 # define ea_idebug(inode, f...) do { \
 		printk(KERN_DEBUG "inode %s:%ld: ", \
@@ -206,19 +210,9 @@
 	return cmp ? -ENODATA : 0;
 }
 
-/*
- * ext3_xattr_get()
- *
- * Copy an extended attribute into the buffer
- * provided, or compute the buffer size required.
- * Buffer is NULL to compute the size of the buffer required.
- *
- * Returns a negative error number on failure, or the number of bytes
- * used / required on success.
- */
 int
-ext3_xattr_get(struct inode *inode, int name_index, const char *name,
-	       void *buffer, size_t buffer_size)
+ext3_xattr_block_get(struct inode *inode, int name_index, const char *name,
+		     void *buffer, size_t buffer_size)
 {
 	struct buffer_head *bh = NULL;
 	struct ext3_xattr_entry *entry;
@@ -228,9 +222,6 @@
 	ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
 		  name_index, name, buffer, (long)buffer_size);
 
-	if (name == NULL)
-		return -EINVAL;
-	down_read(&EXT3_I(inode)->xattr_sem);
 	error = -ENODATA;
 	if (!EXT3_I(inode)->i_file_acl)
 		goto cleanup;
@@ -266,6 +257,75 @@
 
 cleanup:
 	brelse(bh);
+	return error;
+}
+
+static int
+ext3_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
+		     void *buffer, size_t buffer_size)
+{
+	struct ext3_xattr_ibody_header *header;
+	struct ext3_xattr_entry *entry;
+	struct ext3_inode *raw_inode;
+	struct ext3_iloc iloc;
+	size_t size;
+	void *end;
+	int error;
+
+	if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE ||
+	    !(EXT3_I(inode)->i_state & EXT3_STATE_XATTR))
+		return -ENODATA;
+	error = ext3_get_inode_loc(inode, &iloc);
+	if (error)
+		return error;
+	raw_inode = ext3_raw_inode(&iloc);
+	header = IHDR(inode, raw_inode);
+	entry = IFIRST(header);
+	end = (void *)raw_inode + EXT3_SB(inode->i_sb)->s_inode_size;
+	error = ext3_xattr_check_names(entry, end);
+	if (error)
+		goto cleanup;
+	error = ext3_xattr_find_entry(&entry, name_index, name,
+				      end - (void *)entry, 0);
+	if (error)
+		goto cleanup;
+	size = le32_to_cpu(entry->e_value_size);
+	if (buffer) {
+		error = -ERANGE;
+		if (size > buffer_size)
+			goto cleanup;
+		memcpy(buffer, (void *)IFIRST(header) +
+		       le16_to_cpu(entry->e_value_offs), size);
+	}
+	error = size;
+
+cleanup:
+	brelse(iloc.bh);
+	return error;
+}
+
+/*
+ * ext3_xattr_get()
+ *
+ * Copy an extended attribute into the buffer
+ * provided, or compute the buffer size required.
+ * Buffer is NULL to compute the size of the buffer required.
+ *
+ * Returns a negative error number on failure, or the number of bytes
+ * used / required on success.
+ */
+int
+ext3_xattr_get(struct inode *inode, int name_index, const char *name,
+	       void *buffer, size_t buffer_size)
+{
+	int error;
+
+	down_read(&EXT3_I(inode)->xattr_sem);
+	error = ext3_xattr_ibody_get(inode, name_index, name, buffer,
+				     buffer_size);
+	if (error == -ENODATA)
+		error = ext3_xattr_block_get(inode, name_index, name, buffer,
+					     buffer_size);
 	up_read(&EXT3_I(inode)->xattr_sem);
 	return error;
 }
@@ -295,18 +355,8 @@
 	return buffer_size - rest;
 }
 
-/*
- * ext3_xattr_list()
- *
- * Copy a list of attribute names into the buffer
- * provided, or compute the buffer size required.
- * Buffer is NULL to compute the size of the buffer required.
- *
- * Returns a negative error number on failure, or the number of bytes
- * used / required on success.
- */
 int
-ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
+ext3_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
 {
 	struct buffer_head *bh = NULL;
 	int error;
@@ -314,7 +364,6 @@
 	ea_idebug(inode, "buffer=%p, buffer_size=%ld",
 		  buffer, (long)buffer_size);
 
-	down_read(&EXT3_I(inode)->xattr_sem);
 	error = 0;
 	if (!EXT3_I(inode)->i_file_acl)
 		goto cleanup;
@@ -337,11 +386,71 @@
 
 cleanup:
 	brelse(bh);
-	up_read(&EXT3_I(inode)->xattr_sem);
 
 	return error;
 }
 
+static int
+ext3_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size)
+{
+	struct ext3_xattr_ibody_header *header;
+	struct ext3_inode *raw_inode;
+	struct ext3_iloc iloc;
+	void *end;
+	int error;
+
+	if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE ||
+	    !(EXT3_I(inode)->i_state & EXT3_STATE_XATTR))
+		return 0;
+	error = ext3_get_inode_loc(inode, &iloc);
+	if (error)
+		return error;
+	raw_inode = ext3_raw_inode(&iloc);
+	header = IHDR(inode, raw_inode);
+	end = (void *)raw_inode + EXT3_SB(inode->i_sb)->s_inode_size;
+	error = ext3_xattr_check_names(IFIRST(header), end);
+	if (error)
+		goto cleanup;
+	error = ext3_xattr_list_entries(inode, IFIRST(header),
+					buffer, buffer_size);
+
+cleanup:
+	brelse(iloc.bh);
+	return error;
+}
+
+/*
+ * ext3_xattr_list()
+ *
+ * Copy a list of attribute names into the buffer
+ * provided, or compute the buffer size required.
+ * Buffer is NULL to compute the size of the buffer required.
+ *
+ * Returns a negative error number on failure, or the number of bytes
+ * used / required on success.
+ */
+int
+ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
+{
+	int i_error, b_error;
+
+	down_read(&EXT3_I(inode)->xattr_sem);
+	i_error = ext3_xattr_ibody_list(inode, buffer, buffer_size);
+	if (i_error < 0) {
+		b_error = 0;
+	} else {
+		if (buffer) {
+			buffer += i_error;
+			buffer_size -= i_error;
+		}
+		b_error = ext3_xattr_block_list(inode, buffer, buffer_size);
+		if (b_error < 0)
+			i_error = 0;
+	}
+	up_read(&EXT3_I(inode)->xattr_sem);
+	return i_error + b_error;
+}
+
 /*
  * If the EXT3_FEATURE_COMPAT_EXT_ATTR feature of this file system is
  * not set, set it.
@@ -514,175 +623,146 @@
 	return 0;
 }
 
-/*
- * ext3_xattr_set_handle()
- *
- * Create, replace or remove an extended attribute for this inode. Buffer
- * is NULL to remove an existing extended attribute, and non-NULL to
- * either replace an existing extended attribute, or create a new extended
- * attribute. The flags XATTR_REPLACE and XATTR_CREATE
- * specify that an extended attribute must exist and must not exist
- * previous to the call, respectively.
- *
- * Returns 0, or a negative error number on failure.
- */
+struct ext3_xattr_block_find {
+	struct ext3_xattr_search s;
+	struct buffer_head *bh;
+};
+
 int
-ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
-		      const char *name, const void *value, size_t value_len,
-		      int flags)
+ext3_xattr_block_find(struct inode *inode, struct ext3_xattr_info *i,
+		      struct ext3_xattr_block_find *bs)
 {
 	struct super_block *sb = inode->i_sb;
-	struct buffer_head *old_bh = NULL, *new_bh = NULL;
-	struct ext3_xattr_info i = {
-		.name_index = name_index,
-		.name = name,
-		.value = value,
-		.value_len = value_len,
-	};
-	struct ext3_xattr_search s = {
-		.not_found = 1,
-	};
-	struct mb_cache_entry *ce = NULL;
 	int error;
 
-#define header ((struct ext3_xattr_header *)(s.base))
-
-	/*
-	 * header -- Points either into bh, or to a temporarily
-	 *           allocated buffer.
-	 */
-
 	ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld",
-		  name_index, name, value, (long)value_len);
+		  i->name_index, i->name, i->value, (long)i->value_len);
 
-	if (IS_RDONLY(inode))
-		return -EROFS;
-	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
-		return -EPERM;
-	if (i.value == NULL)
-		i.value_len = 0;
-	down_write(&EXT3_I(inode)->xattr_sem);
 	if (EXT3_I(inode)->i_file_acl) {
 		/* The inode already has an extended attribute block. */
-		old_bh = sb_bread(sb, EXT3_I(inode)->i_file_acl);
+		bs->bh = sb_bread(sb, EXT3_I(inode)->i_file_acl);
 		error = -EIO;
-		if (!old_bh)
+		if (!bs->bh)
 			goto cleanup;
-		ea_bdebug(old_bh, "b_count=%d, refcount=%d",
-			atomic_read(&(old_bh->b_count)),
-			le32_to_cpu(BHDR(old_bh)->h_refcount));
-		if (ext3_xattr_check_block(old_bh)) {
-bad_block:		ext3_error(sb, __FUNCTION__,
+		ea_bdebug(bs->bh, "b_count=%d, refcount=%d",
+			atomic_read(&(bs->bh->b_count)),
+			le32_to_cpu(BHDR(bs->bh)->h_refcount));
+		if (ext3_xattr_check_block(bs->bh)) {
+			ext3_error(sb, __FUNCTION__,
 				"inode %ld: bad block %d", inode->i_ino,
 				EXT3_I(inode)->i_file_acl);
 			error = -EIO;
 			goto cleanup;
 		}
 		/* Find the named attribute. */
-		s.base = BHDR(old_bh);
-		s.first = BFIRST(old_bh);
-		s.end = old_bh->b_data + old_bh->b_size;
-		s.here = BFIRST(old_bh);
-		error = ext3_xattr_find_entry(&s.here, name_index, name,
-					      old_bh->b_size, 1);
+		bs->s.base = BHDR(bs->bh);
+		bs->s.first = BFIRST(bs->bh);
+		bs->s.end = bs->bh->b_data + bs->bh->b_size;
+		bs->s.here = bs->s.first;
+		error = ext3_xattr_find_entry(&bs->s.here, i->name_index,
+					      i->name, bs->bh->b_size, 1);
 		if (error && error != -ENODATA)
 			goto cleanup;
-		s.not_found = error;
+		bs->s.not_found = error;
 	}
+	error = 0;
 
-	if (s.not_found) {
-		/* Request to remove a nonexistent attribute? */
-		error = -ENODATA;
-		if (flags & XATTR_REPLACE)
-			goto cleanup;
-		error = 0;
-		if (value == NULL)
-			goto cleanup;
-	} else {
-		/* Request to create an existing attribute? */
-		error = -EEXIST;
-		if (flags & XATTR_CREATE)
-			goto cleanup;
-	}
+cleanup:
+	return error;
+}
 
-	if (header) {
-		/* assert(header == BHDR(old_bh)); */
-		ce = mb_cache_entry_get(ext3_xattr_cache, old_bh->b_bdev,
-					old_bh->b_blocknr);
-		if (header->h_refcount == cpu_to_le32(1)) {
+static int
+ext3_xattr_block_set(handle_t *handle, struct inode *inode,
+		     struct ext3_xattr_info *i,
+		     struct ext3_xattr_block_find *bs)
+{
+	struct super_block *sb = inode->i_sb;
+	struct buffer_head *new_bh = NULL;
+	struct ext3_xattr_search *s = &bs->s;
+	struct mb_cache_entry *ce = NULL;
+	int error;
+
+#define header(x) ((struct ext3_xattr_header *)(x))
+
+	if (i->value && i->value_len > sb->s_blocksize)
+		return -ENOSPC;
+	if (s->base) {
+		ce = mb_cache_entry_get(ext3_xattr_cache, bs->bh->b_bdev,
+					bs->bh->b_blocknr);
+		if (header(s->base)->h_refcount == cpu_to_le32(1)) {
 			if (ce) {
 				mb_cache_entry_free(ce);
 				ce = NULL;
 			}
-			ea_bdebug(old_bh, "modifying in-place");
-			error = ext3_journal_get_write_access(handle, old_bh);
+			ea_bdebug(bs->bh, "modifying in-place");
+			error = ext3_journal_get_write_access(handle, bs->bh);
 			if (error)
 				goto cleanup;
-			lock_buffer(old_bh);
-			error = ext3_xattr_set_entry(&i, &s);
+			lock_buffer(bs->bh);
+			error = ext3_xattr_set_entry(i, s);
 			if (!error) {
-				if (!IS_LAST_ENTRY(s.first))
-					ext3_xattr_rehash(header, s.here);
-				ext3_xattr_cache_insert(old_bh);
+				if (!IS_LAST_ENTRY(s->first))
+					ext3_xattr_rehash(header(s->base),
+							  s->here);
+				ext3_xattr_cache_insert(bs->bh);
 			}
-			unlock_buffer(old_bh);
+			unlock_buffer(bs->bh);
 			if (error == -EIO)
 				goto bad_block;
-			if (!error && old_bh && header == BHDR(old_bh)) {
+			if (!error)
 				error = ext3_journal_dirty_metadata(handle,
-								    old_bh);
-			}
+								    bs->bh);
 			if (error)
 				goto cleanup;
 			goto inserted;
 		} else {
-			int offset = (char *)s.here - old_bh->b_data;
+			int offset = (char *)s->here - bs->bh->b_data;
 
 			if (ce) {
 				mb_cache_entry_release(ce);
 				ce = NULL;
 			}
-			ea_bdebug(old_bh, "cloning");
-			s.base = kmalloc(old_bh->b_size, GFP_KERNEL);
-			/*assert(header == s.base)*/
+			ea_bdebug(bs->bh, "cloning");
+			s->base = kmalloc(bs->bh->b_size, GFP_KERNEL);
 			error = -ENOMEM;
-			if (header == NULL)
+			if (s->base == NULL)
 				goto cleanup;
-			memcpy(header, BHDR(old_bh), old_bh->b_size);
-			s.first = ENTRY(header+1);
-			header->h_refcount = cpu_to_le32(1);
-			s.here = ENTRY(s.base + offset);
-			s.end = header + old_bh->b_size;
+			memcpy(s->base, BHDR(bs->bh), bs->bh->b_size);
+			s->first = ENTRY(header(s->base)+1);
+			header(s->base)->h_refcount = cpu_to_le32(1);
+			s->here = ENTRY(s->base + offset);
+			s->end = s->base + bs->bh->b_size;
 		}
 	} else {
 		/* Allocate a buffer where we construct the new block. */
-		s.base = kmalloc(sb->s_blocksize, GFP_KERNEL);
-		/*assert(header == s.base)*/
+		s->base = kmalloc(sb->s_blocksize, GFP_KERNEL);
+		/* assert(header == s->base) */
 		error = -ENOMEM;
-		if (header == NULL)
+		if (s->base == NULL)
 			goto cleanup;
-		memset(header, 0, sb->s_blocksize);
-		header->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC);
-		header->h_blocks = header->h_refcount = cpu_to_le32(1);
-		s.first = ENTRY(header+1);
-		s.here = ENTRY(header+1);
-		s.end = (void *)header + sb->s_blocksize;
+		memset(s->base, 0, sb->s_blocksize);
+		header(s->base)->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC);
+		header(s->base)->h_blocks = cpu_to_le32(1);
+		header(s->base)->h_refcount = cpu_to_le32(1);
+		s->first = ENTRY(header(s->base)+1);
+		s->here = ENTRY(header(s->base)+1);
+		s->end = s->base + sb->s_blocksize;
 	}
 
-	error = ext3_xattr_set_entry(&i, &s);
+	error = ext3_xattr_set_entry(i, s);
 	if (error == -EIO)
 		goto bad_block;
 	if (error)
 		goto cleanup;
-	if (!IS_LAST_ENTRY(s.first))
-		ext3_xattr_rehash(header, s.here);
+	if (!IS_LAST_ENTRY(s->first))
+		ext3_xattr_rehash(header(s->base), s->here);
 
 inserted:
-	if (!IS_LAST_ENTRY(s.first)) {
-		new_bh = ext3_xattr_cache_find(inode, header, &ce);
+	if (!IS_LAST_ENTRY(s->first)) {
+		new_bh = ext3_xattr_cache_find(inode, header(s->base), &ce);
 		if (new_bh) {
 			/* We found an identical block in the cache. */
-			if (new_bh == old_bh)
+			if (new_bh == bs->bh)
 				ea_bdebug(new_bh, "keeping");
 			else {
 				/* The old block is released after updating
@@ -690,7 +770,8 @@
 				error = -EDQUOT;
 				if (DQUOT_ALLOC_BLOCK(inode, 1))
 					goto cleanup;
-				error = ext3_journal_get_write_access(handle, new_bh);
+				error = ext3_journal_get_write_access(handle,
+								      new_bh);
 				if (error)
 					goto cleanup;
 				lock_buffer(new_bh);
@@ -706,10 +787,10 @@
 			}
 			mb_cache_entry_release(ce);
 			ce = NULL;
-		} else if (old_bh && header == BHDR(old_bh)) {
+		} else if (bs->bh && s->base == bs->bh->b_data) {
 			/* We were modifying this block in-place. */
-			ea_bdebug(old_bh, "keeping this block");
-			new_bh = old_bh;
+			ea_bdebug(bs->bh, "keeping this block");
+			new_bh = bs->bh;
 			get_bh(new_bh);
 		} else {
 			/* We need to allocate a new block */
@@ -735,7 +816,7 @@
 				unlock_buffer(new_bh);
 				goto getblk_failed;
 			}
-			memcpy(new_bh->b_data, header, new_bh->b_size);
+			memcpy(new_bh->b_data, s->base, new_bh->b_size);
 			set_buffer_uptodate(new_bh);
 			unlock_buffer(new_bh);
 			ext3_xattr_cache_insert(new_bh);
@@ -748,30 +829,196 @@
 
 	/* Update the inode. */
 	EXT3_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
-	inode->i_ctime = CURRENT_TIME_SEC;
-	ext3_mark_inode_dirty(handle, inode);
-	if (IS_SYNC(inode))
-		handle->h_sync = 1;
 
 	/* Drop the previous xattr block. */
-	if (old_bh && old_bh != new_bh)
-		ext3_xattr_release_block(handle, inode, old_bh);
+	if (bs->bh && bs->bh != new_bh)
+		ext3_xattr_release_block(handle, inode, bs->bh);
 	error = 0;
 
 cleanup:
 	if (ce)
 		mb_cache_entry_release(ce);
 	brelse(new_bh);
-	brelse(old_bh);
-	if (!(old_bh && header == BHDR(old_bh)))
-		kfree(header);
-	up_write(&EXT3_I(inode)->xattr_sem);
+	if (!(bs->bh && s->base == bs->bh->b_data))
+		kfree(s->base);
 
 	return error;
 
+bad_block:
+	ext3_error(inode->i_sb, __FUNCTION__,
+		   "inode %ld: bad block %d", inode->i_ino,
+		   EXT3_I(inode)->i_file_acl);
+	goto cleanup;
+
 #undef header
 }
 
+struct ext3_xattr_ibody_find {
+	struct ext3_xattr_search s;
+	struct ext3_iloc iloc;
+};
+
+int
+ext3_xattr_ibody_find(struct inode *inode, struct ext3_xattr_info *i,
+		      struct ext3_xattr_ibody_find *is)
+{
+	struct ext3_xattr_ibody_header *header;
+	struct ext3_inode *raw_inode;
+	int error;
+
+	if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE)
+		return 0;
+	raw_inode = ext3_raw_inode(&is->iloc);
+	header = IHDR(inode, raw_inode);
+	is->s.base = is->s.first = IFIRST(header);
+	is->s.here = is->s.first;
+	is->s.end = (void *)raw_inode + EXT3_SB(inode->i_sb)->s_inode_size;
+	if (EXT3_I(inode)->i_state & EXT3_STATE_XATTR) {
+		error = ext3_xattr_check_names(IFIRST(header), is->s.end);
+		if (error)
+			return error;
+		/* Find the named attribute. */
+		error = ext3_xattr_find_entry(&is->s.here, i->name_index,
+					      i->name, is->s.end -
+					      (void *)is->s.base, 0);
+		if (error && error != -ENODATA)
+			return error;
+		is->s.not_found = error;
+	}
+	return 0;
+}
+
+static int
+ext3_xattr_ibody_set(handle_t *handle, struct inode *inode,
+		     struct ext3_xattr_info *i,
+		     struct ext3_xattr_ibody_find *is)
+{
+	struct ext3_xattr_ibody_header *header;
+	struct ext3_xattr_search *s = &is->s;
+	int error;
+	
+	if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE)
+		return -ENOSPC;
+	error = ext3_xattr_set_entry(i, s);
+	if (error)
+		return error;
+	header = IHDR(inode, ext3_raw_inode(&is->iloc));
+	if (!IS_LAST_ENTRY(s->first)) {
+		header->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC);
+		EXT3_I(inode)->i_state |= EXT3_STATE_XATTR;
+	} else {
+		header->h_magic = cpu_to_le32(0);
+		EXT3_I(inode)->i_state &= ~EXT3_STATE_XATTR;
+	}
+	return 0;
+}
+
+/*
+ * ext3_xattr_set_handle()
+ *
+ * Create, replace or remove an extended attribute for this inode. Buffer
+ * is NULL to remove an existing extended attribute, and non-NULL to
+ * either replace an existing extended attribute, or create a new extended
+ * attribute. The flags XATTR_REPLACE and XATTR_CREATE
+ * specify that an extended attribute must exist and must not exist
+ * previous to the call, respectively.
+ *
+ * Returns 0, or a negative error number on failure.
+ */
+int
+ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
+		      const char *name, const void *value, size_t value_len,
+		      int flags)
+{
+	struct ext3_xattr_info i = {
+		.name_index = name_index,
+		.name = name,
+		.value = value,
+		.value_len = value_len,
+
+	};
+	struct ext3_xattr_ibody_find is = {
+		.s = { .not_found = -ENODATA, },
+	};
+	struct ext3_xattr_block_find bs = {
+		.s = { .not_found = -ENODATA, },
+	};
+	int error;
+
+	if (IS_RDONLY(inode))
+		return -EROFS;
+	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+		return -EPERM;
+	if (!name)
+		return -EINVAL;
+	if (strlen(name) > 255)
+		return -ERANGE;
+	down_write(&EXT3_I(inode)->xattr_sem);
+	error = ext3_get_inode_loc(inode, &is.iloc);
+	if (error)
+		goto cleanup;
+	error = ext3_xattr_ibody_find(inode, &i, &is);
+	if (error)
+		goto cleanup;
+	if (is.s.not_found)
+		error = ext3_xattr_block_find(inode, &i, &bs);
+	if (error)
+		goto cleanup;
+	if (is.s.not_found && bs.s.not_found) {
+		error = -ENODATA;
+		if (flags & XATTR_REPLACE)
+			goto cleanup;
+		error = 0;
+		if (!value)
+			goto cleanup;
+	} else {
+		error = -EEXIST;
+		if (flags & XATTR_CREATE)
+			goto cleanup;
+	}
+	error = ext3_journal_get_write_access(handle, is.iloc.bh);
+	if (error)
+		goto cleanup;
+	if (!value) {
+		if (!is.s.not_found)
+			error = ext3_xattr_ibody_set(handle, inode, &i, &is);
+		else if (!bs.s.not_found)
+			error = ext3_xattr_block_set(handle, inode, &i, &bs);
+	} else {
+		error = ext3_xattr_ibody_set(handle, inode, &i, &is);
+		if (!error && !bs.s.not_found) {
+			i.value = NULL;
+			error = ext3_xattr_block_set(handle, inode, &i, &bs);
+		} else if (error == -ENOSPC) {
+			error = ext3_xattr_block_set(handle, inode, &i, &bs);
+			if (error)
+				goto cleanup;
+			if (!is.s.not_found) {
+				i.value = NULL;
+				error = ext3_xattr_ibody_set(handle, inode, &i,
+							     &is);
+			}
+		}
+	}
+	if (!error) {
+		inode->i_ctime = CURRENT_TIME_SEC;
+		error = ext3_mark_iloc_dirty(handle, inode, &is.iloc);
+		/*
+		 * The bh is consumed by ext3_mark_iloc_dirty, even with
+		 * error != 0.
+		 */
+		is.iloc.bh = NULL;
+		if (IS_SYNC(inode))
+			handle->h_sync = 1;
+	}
+
+cleanup:
+	brelse(is.iloc.bh);
+	brelse(bs.bh);
+	up_write(&EXT3_I(inode)->xattr_sem);
+	return error;
+}
+
 /*
  * ext3_xattr_set()
  *
Index: linux-2.6.10/fs/ext3/inode.c
===================================================================
--- linux-2.6.10.orig/fs/ext3/inode.c	2005-01-11 01:38:40.000000000 +0100
+++ linux-2.6.10/fs/ext3/inode.c	2005-01-11 01:38:40.000000000 +0100
@@ -2383,7 +2383,9 @@
 
 int ext3_get_inode_loc(struct inode *inode, struct ext3_iloc *iloc)
 {
-	return __ext3_get_inode_loc(inode, iloc, 1);
+	/* We have all inode data except xattrs in memory here. */
+	return __ext3_get_inode_loc(inode, iloc,
+		!(EXT3_I(inode)->i_state & EXT3_STATE_XATTR));
 }
 
 void ext3_set_inode_flags(struct inode *inode)
@@ -2489,6 +2491,16 @@
 		ei->i_data[block] = raw_inode->i_block[block];
 	INIT_LIST_HEAD(&ei->i_orphan);
 
+	ei->i_extra_isize =
+		(EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) ?
+		le16_to_cpu(raw_inode->i_extra_isize) : 0;
+	if (ei->i_extra_isize) {
+		__le32 *magic = (void *)raw_inode + EXT3_GOOD_OLD_INODE_SIZE +
+				ei->i_extra_isize;
+		if (le32_to_cpu(*magic))
+			 ei->i_state |= EXT3_STATE_XATTR;
+	}
+
 	if (S_ISREG(inode->i_mode)) {
 		inode->i_op = &ext3_file_inode_operations;
 		inode->i_fop = &ext3_file_operations;
@@ -2624,6 +2636,9 @@
 	} else for (block = 0; block < EXT3_N_BLOCKS; block++)
 		raw_inode->i_block[block] = ei->i_data[block];
 
+	if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE)
+		raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize);
+
 	BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
 	rc = ext3_journal_dirty_metadata(handle, bh);
 	if (!err)
Index: linux-2.6.10/fs/ext3/xattr.h
===================================================================
--- linux-2.6.10.orig/fs/ext3/xattr.h	2005-01-11 00:54:42.000000000 +0100
+++ linux-2.6.10/fs/ext3/xattr.h	2005-01-11 01:38:40.000000000 +0100
@@ -31,6 +31,10 @@
 	__u32	h_reserved[4];	/* zero right now */
 };
 
+struct ext3_xattr_ibody_header {
+	__le32	h_magic;	/* magic number for identification */
+};
+
 struct ext3_xattr_entry {
 	__u8	e_name_len;	/* length of name */
 	__u8	e_name_index;	/* attribute name index */
Index: linux-2.6.10/include/linux/ext3_fs.h
===================================================================
--- linux-2.6.10.orig/include/linux/ext3_fs.h	2005-01-11 01:38:40.000000000 +0100
+++ linux-2.6.10/include/linux/ext3_fs.h	2005-01-11 01:38:40.000000000 +0100
@@ -195,7 +195,7 @@
  */
 #define EXT3_STATE_JDATA		0x00000001 /* journaled data exists */
 #define EXT3_STATE_NEW			0x00000002 /* inode is newly created */
-
+#define EXT3_STATE_XATTR		0x00000004 /* has in-inode xattrs */
 
 /* Used to pass group descriptor data when online resize is done */
 struct ext3_new_group_input {
@@ -293,6 +293,8 @@
 			__u32	m_i_reserved2[2];
 		} masix2;
 	} osd2;				/* OS dependent 2 */
+	__le16	i_extra_isize;
+	__le16	i_pad1;
 };
 
 #define i_size_high	i_dir_acl
Index: linux-2.6.10/include/linux/ext3_fs_i.h
===================================================================
--- linux-2.6.10.orig/include/linux/ext3_fs_i.h	2005-01-11 00:54:42.000000000 +0100
+++ linux-2.6.10/include/linux/ext3_fs_i.h	2005-01-11 02:57:54.128575944 +0100
@@ -113,6 +113,9 @@
 	 */
 	loff_t	i_disksize;
 
+	/* on-disk additional length */
+	__u16 i_extra_isize;
+
 	/*
 	 * truncate_sem is for serialising ext3_truncate() against
 	 * ext3_getblock().  In the 2.4 ext2 design, great chunks of inode's
Index: linux-2.6.10/fs/ext3/ialloc.c
===================================================================
--- linux-2.6.10.orig/fs/ext3/ialloc.c	2005-01-11 00:54:42.000000000 +0100
+++ linux-2.6.10/fs/ext3/ialloc.c	2005-01-11 01:38:40.000000000 +0100
@@ -596,6 +596,9 @@
 	spin_unlock(&sbi->s_next_gen_lock);
 
 	ei->i_state = EXT3_STATE_NEW;
+	ei->i_extra_isize =
+		(EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) ?
+		sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0;
 
 	ret = inode;
 	if(DQUOT_ALLOC_INODE(inode)) {
--
Andreas Gruenbacher <agruen@suse.de>
SUSE Labs, SUSE LINUX PRODUCTS GMBH


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [RESEND][PATCH 5/9] Ext3: extended attribute sharing fixes and in-inode EAs
  2005-01-13 10:31 [RESEND][PATCH 0/9] Ext3: extended attribute sharing fixes and in-inode EAs Andreas Gruenbacher
  2005-01-13 10:31 ` [RESEND][PATCH 9/9] " Andreas Gruenbacher
@ 2005-01-13 10:31 ` Andreas Gruenbacher
  2005-01-13 10:31 ` [RESEND][PATCH 6/9] " Andreas Gruenbacher
                   ` (6 subsequent siblings)
  8 siblings, 0 replies; 10+ messages in thread
From: Andreas Gruenbacher @ 2005-01-13 10:31 UTC (permalink / raw)
  To: Linus Torvalds, Andrew Morton
  Cc: linux-kernel, Alex Tomas, Andreas Dilger, Stephen C. Tweedie,
	Andrew Tridgell

Ext3: factor our common xattr code; unnecessary lock

The ext3_xattr_set_handle2 and ext3_xattr_delete_inode functions contain
duplicate code to decrease the reference count of an xattr block. Move
this to a separate function.

Also we know we have exclusive access to the inode in
ext3_xattr_delete_inode; there is no need to grab the xattr_sem lock.

Signed-off-by: Andreas Gruenbacher <agruen@suse.de>

Index: linux-2.6.10/fs/ext3/xattr.c
===================================================================
--- linux-2.6.10.orig/fs/ext3/xattr.c
+++ linux-2.6.10/fs/ext3/xattr.c
@@ -356,6 +356,41 @@ static void ext3_xattr_update_super_bloc
 }
 
 /*
+ * Release the xattr block BH: If the reference count is > 1, decrement
+ * it; otherwise free the block.
+ */
+static void
+ext3_xattr_release_block(handle_t *handle, struct inode *inode,
+			 struct buffer_head *bh)
+{
+	struct mb_cache_entry *ce = NULL;
+
+	ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_bdev, bh->b_blocknr);
+	if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
+		ea_bdebug(bh, "freeing");
+		if (ce)
+			mb_cache_entry_free(ce);
+		ext3_free_blocks(handle, inode, bh->b_blocknr, 1);
+		get_bh(bh);
+		ext3_forget(handle, 1, inode, bh, bh->b_blocknr);
+	} else {
+		if (ext3_journal_get_write_access(handle, bh) == 0) {
+			lock_buffer(bh);
+			HDR(bh)->h_refcount = cpu_to_le32(
+				le32_to_cpu(HDR(bh)->h_refcount) - 1);
+			ext3_journal_dirty_metadata(handle, bh);
+			if (IS_SYNC(inode))
+				handle->h_sync = 1;
+			DQUOT_FREE_BLOCK(inode, 1);
+			unlock_buffer(bh);
+			ea_bdebug(bh, "refcount now=%d",
+				  le32_to_cpu(HDR(bh)->h_refcount) - 1);
+		}
+		if (ce)
+			mb_cache_entry_release(ce);
+	}
+}
+/*
  * ext3_xattr_set_handle()
  *
  * Create, replace or remove an extended attribute for this inode. Buffer
@@ -733,37 +768,8 @@ getblk_failed:
 		/*
 		 * If there was an old block, and we are no longer using it,
 		 * release the old block.
-		*/
-		ce = mb_cache_entry_get(ext3_xattr_cache, old_bh->b_bdev,
-					old_bh->b_blocknr);
-		if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) {
-			/* Free the old block. */
-			ea_bdebug(old_bh, "freeing");
-			ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1);
-
-			/* ext3_forget() calls bforget() for us, but we
-			   let our caller release old_bh, so we need to
-			   duplicate the handle before. */
-			get_bh(old_bh);
-			ext3_forget(handle, 1, inode, old_bh,old_bh->b_blocknr);
-			if (ce) {
-				mb_cache_entry_free(ce);
-				ce = NULL;
-			}
-		} else {
-			error = ext3_journal_get_write_access(handle, old_bh);
-			if (error)
-				goto cleanup;
-			/* Decrement the refcount only. */
-			lock_buffer(old_bh);
-			HDR(old_bh)->h_refcount = cpu_to_le32(
-				le32_to_cpu(HDR(old_bh)->h_refcount) - 1);
-			DQUOT_FREE_BLOCK(inode, 1);
-			ext3_journal_dirty_metadata(handle, old_bh);
-			ea_bdebug(old_bh, "refcount now=%d",
-				le32_to_cpu(HDR(old_bh)->h_refcount));
-			unlock_buffer(old_bh);
-		}
+		 */
+		ext3_xattr_release_block(handle, inode, old_bh);
 	}
 
 cleanup:
@@ -813,13 +819,13 @@ retry:
  * ext3_xattr_delete_inode()
  *
  * Free extended attribute resources associated with this inode. This
- * is called immediately before an inode is freed.
+ * is called immediately before an inode is freed. We have exclusive
+ * access to the inode.
  */
 void
 ext3_xattr_delete_inode(handle_t *handle, struct inode *inode)
 {
 	struct buffer_head *bh = NULL;
-	struct mb_cache_entry *ce = NULL;
 
 	down_write(&EXT3_I(inode)->xattr_sem);
 	if (!EXT3_I(inode)->i_file_acl)
@@ -838,33 +844,10 @@ ext3_xattr_delete_inode(handle_t *handle
 			EXT3_I(inode)->i_file_acl);
 		goto cleanup;
 	}
-	ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_bdev, bh->b_blocknr);
-	if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
-		if (ce) {
-			mb_cache_entry_free(ce);
-			ce = NULL;
-		}
-		ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1);
-		get_bh(bh);
-		ext3_forget(handle, 1, inode, bh, EXT3_I(inode)->i_file_acl);
-	} else {
-		if (ext3_journal_get_write_access(handle, bh) != 0)
-			goto cleanup;
-		lock_buffer(bh);
-		HDR(bh)->h_refcount = cpu_to_le32(
-			le32_to_cpu(HDR(bh)->h_refcount) - 1);
-		ext3_journal_dirty_metadata(handle, bh);
-		if (IS_SYNC(inode))
-			handle->h_sync = 1;
-		DQUOT_FREE_BLOCK(inode, 1);
-		unlock_buffer(bh);
-	}
-	ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1);
+	ext3_xattr_release_block(handle, inode, bh);
 	EXT3_I(inode)->i_file_acl = 0;
 
 cleanup:
-	if (ce)
-		mb_cache_entry_release(ce);
 	brelse(bh);
 	up_write(&EXT3_I(inode)->xattr_sem);
 }
--
Andreas Gruenbacher <agruen@suse.de>
SUSE Labs, SUSE LINUX PRODUCTS GMBH


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [RESEND][PATCH 7/9] Ext3: extended attribute sharing fixes and in-inode EAs
  2005-01-13 10:31 [RESEND][PATCH 0/9] Ext3: extended attribute sharing fixes and in-inode EAs Andreas Gruenbacher
                   ` (2 preceding siblings ...)
  2005-01-13 10:31 ` [RESEND][PATCH 6/9] " Andreas Gruenbacher
@ 2005-01-13 10:31 ` Andreas Gruenbacher
  2005-01-13 10:31 ` [RESEND][PATCH 2/9] " Andreas Gruenbacher
                   ` (4 subsequent siblings)
  8 siblings, 0 replies; 10+ messages in thread
From: Andreas Gruenbacher @ 2005-01-13 10:31 UTC (permalink / raw)
  To: Linus Torvalds, Andrew Morton
  Cc: linux-kernel, Alex Tomas, Andreas Dilger, Stephen C. Tweedie,
	Andrew Tridgell

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain, Size: 30236 bytes --]

Cleanup and prepare ext3 for in-inode xattrs

Clean up several things in the xattr code, and prepare it for
in-inode attributes:

* Add the ext3_xattr_check_names, ext3_xattr_check_block, and
  ext3_xattr_check_entry functions for checking xattr data
  structures.
* Add the ext3_xattr_find_entry, ext3_xattr_list_entries, and
  ext3_xattr_set_entry functions for manipulating xattr entries.
  Switch to using these functions in ext3_xattr_get,
  ext3_xattr_list, and ext3_xattr_set_handle.
* Merge ext3_xattr_set_handle and ext3_xattr_set_handle2.
* Rename the HDR and FIRST_ENTRY macros.
* We have no way to deal with a ext3_xattr_cache_insert failure,
  so make it return void.
* Make the debug messages more useful.

Signed-off-by: Andreas Gruenbacher <agruen@suse.de>

Index: linux-2.6.10/fs/ext2/xattr.c
===================================================================
--- linux-2.6.10.orig/fs/ext2/xattr.c
+++ linux-2.6.10/fs/ext2/xattr.c
@@ -24,7 +24,7 @@
  *
  *   +------------------+
  *   | header           |
- *   ¦ entry 1          | |
+ *   | entry 1          | |
  *   | entry 2          | | growing downwards
  *   | entry 3          | v
  *   | four null bytes  |
Index: linux-2.6.10/fs/ext3/xattr.c
===================================================================
--- linux-2.6.10.orig/fs/ext3/xattr.c
+++ linux-2.6.10/fs/ext3/xattr.c
@@ -24,7 +24,7 @@
  *
  *   +------------------+
  *   | header           |
- *   ¦ entry 1          | |
+ *   | entry 1          | |
  *   | entry 2          | | growing downwards
  *   | entry 3          | v
  *   | four null bytes  |
@@ -64,9 +64,9 @@
 #include "xattr.h"
 #include "acl.h"
 
-#define HDR(bh) ((struct ext3_xattr_header *)((bh)->b_data))
+#define BHDR(bh) ((struct ext3_xattr_header *)((bh)->b_data))
 #define ENTRY(ptr) ((struct ext3_xattr_entry *)(ptr))
-#define FIRST_ENTRY(bh) ENTRY(HDR(bh)+1)
+#define BFIRST(bh) ENTRY(BHDR(bh)+1)
 #define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0)
 
 #ifdef EXT3_XATTR_DEBUG
@@ -89,11 +89,7 @@
 # define ea_bdebug(f...)
 #endif
 
-static int ext3_xattr_set_handle2(handle_t *, struct inode *,
-				  struct buffer_head *,
-				  struct ext3_xattr_header *);
-
-static int ext3_xattr_cache_insert(struct buffer_head *);
+static void ext3_xattr_cache_insert(struct buffer_head *);
 static struct buffer_head *ext3_xattr_cache_find(struct inode *,
 						 struct ext3_xattr_header *,
 						 struct mb_cache_entry **);
@@ -148,6 +144,68 @@ ext3_listxattr(struct dentry *dentry, ch
 	return ext3_xattr_list(dentry->d_inode, buffer, size);
 }
 
+static int
+ext3_xattr_check_names(struct ext3_xattr_entry *entry, void *end)
+{
+	while (!IS_LAST_ENTRY(entry)) {
+		struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(entry);
+		if ((void *)next >= end)
+			return -EIO;
+		entry = next;
+	}
+	return 0;
+}
+
+static inline int
+ext3_xattr_check_block(struct buffer_head *bh)
+{
+	int error;
+
+	if (BHDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
+	    BHDR(bh)->h_blocks != cpu_to_le32(1))
+		return -EIO;
+	error = ext3_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size);
+	return error;
+}
+
+static inline int
+ext3_xattr_check_entry(struct ext3_xattr_entry *entry, size_t size)
+{
+	size_t value_size = le32_to_cpu(entry->e_value_size);
+
+	if (entry->e_value_block != 0 || value_size > size ||
+	    le16_to_cpu(entry->e_value_offs) + value_size > size)
+		return -EIO;
+	return 0;
+}
+
+static int
+ext3_xattr_find_entry(struct ext3_xattr_entry **pentry, int name_index,
+		      const char *name, size_t size, int sorted)
+{
+	struct ext3_xattr_entry *entry;
+	size_t name_len;
+	int cmp = 1;
+
+	if (name == NULL)
+		return -EINVAL;
+	name_len = strlen(name);
+	entry = *pentry;
+	for (; !IS_LAST_ENTRY(entry); entry = EXT3_XATTR_NEXT(entry)) {
+		cmp = name_index - entry->e_name_index;
+		if (!cmp)
+			cmp = name_len - entry->e_name_len;
+		if (!cmp)
+			cmp = memcmp(name, entry->e_name, name_len);
+		if (cmp <= 0 && (sorted || cmp == 0))
+			break;
+	}
+	*pentry = entry;
+	if (!cmp && ext3_xattr_check_entry(entry, size))
+			return -EIO;
+	return cmp ? -ENODATA : 0;
+}
+
 /*
  * ext3_xattr_get()
  *
@@ -164,8 +222,7 @@ ext3_xattr_get(struct inode *inode, int 
 {
 	struct buffer_head *bh = NULL;
 	struct ext3_xattr_entry *entry;
-	size_t name_len, size;
-	char *end;
+	size_t size;
 	int error;
 
 	ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
@@ -179,78 +236,65 @@ ext3_xattr_get(struct inode *inode, int 
 		goto cleanup;
 	ea_idebug(inode, "reading block %d", EXT3_I(inode)->i_file_acl);
 	bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
-	error = -EIO;
 	if (!bh)
 		goto cleanup;
 	ea_bdebug(bh, "b_count=%d, refcount=%d",
-		atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
-	end = bh->b_data + bh->b_size;
-	if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
-	    HDR(bh)->h_blocks != cpu_to_le32(1)) {
-bad_block:	ext3_error(inode->i_sb, "ext3_xattr_get",
-			"inode %ld: bad block %d", inode->i_ino,
-			EXT3_I(inode)->i_file_acl);
+		atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
+	if (ext3_xattr_check_block(bh)) {
+bad_block:	ext3_error(inode->i_sb, __FUNCTION__,
+			   "inode %ld: bad block %d", inode->i_ino,
+			   EXT3_I(inode)->i_file_acl);
 		error = -EIO;
 		goto cleanup;
 	}
-	/* find named attribute */
-	name_len = strlen(name);
-
-	error = -ERANGE;
-	if (name_len > 255)
-		goto cleanup;
-	entry = FIRST_ENTRY(bh);
-	while (!IS_LAST_ENTRY(entry)) {
-		struct ext3_xattr_entry *next =
-			EXT3_XATTR_NEXT(entry);
-		if ((char *)next >= end)
-			goto bad_block;
-		if (name_index == entry->e_name_index &&
-		    name_len == entry->e_name_len &&
-		    memcmp(name, entry->e_name, name_len) == 0)
-			goto found;
-		entry = next;
-	}
-	/* Check the remaining name entries */
-	while (!IS_LAST_ENTRY(entry)) {
-		struct ext3_xattr_entry *next =
-			EXT3_XATTR_NEXT(entry);
-		if ((char *)next >= end)
-			goto bad_block;
-		entry = next;
-	}
-	if (ext3_xattr_cache_insert(bh))
-		ea_idebug(inode, "cache insert failed");
-	error = -ENODATA;
-	goto cleanup;
-found:
-	/* check the buffer size */
-	if (entry->e_value_block != 0)
+	ext3_xattr_cache_insert(bh);
+	entry = BFIRST(bh);
+	error = ext3_xattr_find_entry(&entry, name_index, name, bh->b_size, 1);
+	if (error == -EIO)
 		goto bad_block;
+	if (error)
+		goto cleanup;
 	size = le32_to_cpu(entry->e_value_size);
-	if (size > inode->i_sb->s_blocksize ||
-	    le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize)
-		goto bad_block;
-
-	if (ext3_xattr_cache_insert(bh))
-		ea_idebug(inode, "cache insert failed");
 	if (buffer) {
 		error = -ERANGE;
 		if (size > buffer_size)
 			goto cleanup;
-		/* return value of attribute */
 		memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs),
-			size);
+		       size);
 	}
 	error = size;
 
 cleanup:
 	brelse(bh);
 	up_read(&EXT3_I(inode)->xattr_sem);
-
 	return error;
 }
 
+static int
+ext3_xattr_list_entries(struct inode *inode, struct ext3_xattr_entry *entry,
+			char *buffer, size_t buffer_size)
+{
+	size_t rest = buffer_size;
+
+	for (; !IS_LAST_ENTRY(entry); entry = EXT3_XATTR_NEXT(entry)) {
+		struct xattr_handler *handler =
+			ext3_xattr_handler(entry->e_name_index);
+
+		if (handler) {
+			size_t size = handler->list(inode, buffer, rest,
+						    entry->e_name,
+						    entry->e_name_len);
+			if (buffer) {
+				if (size > rest)
+					return -ERANGE;
+				buffer += size;
+			}
+			rest -= size;
+		}
+	}
+	return buffer_size - rest;
+}
+
 /*
  * ext3_xattr_list()
  *
@@ -265,9 +309,6 @@ int
 ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
 {
 	struct buffer_head *bh = NULL;
-	struct ext3_xattr_entry *entry;
-	char *end;
-	size_t rest = buffer_size;
 	int error;
 
 	ea_idebug(inode, "buffer=%p, buffer_size=%ld",
@@ -283,50 +324,16 @@ ext3_xattr_list(struct inode *inode, cha
 	if (!bh)
 		goto cleanup;
 	ea_bdebug(bh, "b_count=%d, refcount=%d",
-		atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
-	end = bh->b_data + bh->b_size;
-	if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
-	    HDR(bh)->h_blocks != cpu_to_le32(1)) {
-bad_block:	ext3_error(inode->i_sb, "ext3_xattr_list",
-			"inode %ld: bad block %d", inode->i_ino,
-			EXT3_I(inode)->i_file_acl);
+		atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
+	if (ext3_xattr_check_block(bh)) {
+		ext3_error(inode->i_sb, __FUNCTION__,
+			   "inode %ld: bad block %d", inode->i_ino,
+			   EXT3_I(inode)->i_file_acl);
 		error = -EIO;
 		goto cleanup;
 	}
-
-	/* check the on-disk data structure */
-	entry = FIRST_ENTRY(bh);
-	while (!IS_LAST_ENTRY(entry)) {
-		struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(entry);
-
-		if ((char *)next >= end)
-			goto bad_block;
-		entry = next;
-	}
-	if (ext3_xattr_cache_insert(bh))
-		ea_idebug(inode, "cache insert failed");
-
-	/* list the attribute names */
-	for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry);
-	     entry = EXT3_XATTR_NEXT(entry)) {
-		struct xattr_handler *handler =
-			ext3_xattr_handler(entry->e_name_index);
-
-		if (handler) {
-			size_t size = handler->list(inode, buffer, rest,
-						    entry->e_name,
-						    entry->e_name_len);
-			if (buffer) {
-				if (size > rest) {
-					error = -ERANGE;
-					goto cleanup;
-				}
-				buffer += size;
-			}
-			rest -= size;
-		}
-	}
-	error = buffer_size - rest;  /* total size */
+	ext3_xattr_cache_insert(bh);
+	error = ext3_xattr_list_entries(inode, BFIRST(bh), buffer, buffer_size);
 
 cleanup:
 	brelse(bh);
@@ -366,8 +373,8 @@ ext3_xattr_release_block(handle_t *handl
 	struct mb_cache_entry *ce = NULL;
 
 	ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_bdev, bh->b_blocknr);
-	if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
-		ea_bdebug(bh, "freeing");
+	if (BHDR(bh)->h_refcount == cpu_to_le32(1)) {
+		ea_bdebug(bh, "refcount now=0; freeing");
 		if (ce)
 			mb_cache_entry_free(ce);
 		ext3_free_blocks(handle, inode, bh->b_blocknr, 1);
@@ -376,20 +383,137 @@ ext3_xattr_release_block(handle_t *handl
 	} else {
 		if (ext3_journal_get_write_access(handle, bh) == 0) {
 			lock_buffer(bh);
-			HDR(bh)->h_refcount = cpu_to_le32(
-				le32_to_cpu(HDR(bh)->h_refcount) - 1);
+			BHDR(bh)->h_refcount = cpu_to_le32(
+				le32_to_cpu(BHDR(bh)->h_refcount) - 1);
 			ext3_journal_dirty_metadata(handle, bh);
 			if (IS_SYNC(inode))
 				handle->h_sync = 1;
 			DQUOT_FREE_BLOCK(inode, 1);
 			unlock_buffer(bh);
-			ea_bdebug(bh, "refcount now=%d",
-				  le32_to_cpu(HDR(bh)->h_refcount) - 1);
+			ea_bdebug(bh, "refcount now=%d; releasing",
+				  le32_to_cpu(BHDR(bh)->h_refcount));
 		}
 		if (ce)
 			mb_cache_entry_release(ce);
 	}
 }
+
+struct ext3_xattr_info {
+	int name_index;
+	const char *name;
+	const void *value;
+	size_t value_len;
+};
+
+struct ext3_xattr_search {
+	struct ext3_xattr_entry *first;
+	void *base;
+	void *end;
+	struct ext3_xattr_entry *here;
+	int not_found;
+};
+
+static int
+ext3_xattr_set_entry(struct ext3_xattr_info *i, struct ext3_xattr_search *s)
+{
+	struct ext3_xattr_entry *last;
+	size_t free, min_offs = s->end - s->base, name_len = strlen(i->name);
+
+	/* Compute min_offs and last. */
+	last = s->first;
+	for (; !IS_LAST_ENTRY(last); last = EXT3_XATTR_NEXT(last)) {
+		if (!last->e_value_block && last->e_value_size) {
+			size_t offs = le16_to_cpu(last->e_value_offs);
+			if (offs < min_offs)
+				min_offs = offs;
+		}
+	}
+	free = min_offs - ((void *)last - s->base) - sizeof(__u32);
+	if (!s->not_found) {
+		if (!s->here->e_value_block && s->here->e_value_size) {
+			size_t size = le32_to_cpu(s->here->e_value_size);
+			free += EXT3_XATTR_SIZE(size);
+		}
+		free += EXT3_XATTR_LEN(name_len);
+	}
+	if (i->value) {
+		if (free < EXT3_XATTR_SIZE(i->value_len) ||
+		    free < EXT3_XATTR_LEN(name_len) +
+			   EXT3_XATTR_SIZE(i->value_len))
+			return -ENOSPC;
+	}
+
+	if (i->value && s->not_found) {
+		/* Insert the new name. */
+		size_t size = EXT3_XATTR_LEN(name_len);
+		size_t rest = (void *)last - (void *)s->here + sizeof(__u32);
+		memmove((void *)s->here + size, s->here, rest);
+		memset(s->here, 0, size);
+		s->here->e_name_index = i->name_index;
+		s->here->e_name_len = name_len;
+		memcpy(s->here->e_name, i->name, name_len);
+	} else {
+		if (!s->here->e_value_block && s->here->e_value_size) {
+			void *first_val = s->base + min_offs;
+			size_t offs = le16_to_cpu(s->here->e_value_offs);
+			void *val = s->base + offs;
+			size_t size = EXT3_XATTR_SIZE(
+				le32_to_cpu(s->here->e_value_size));
+
+			if (i->value && size == EXT3_XATTR_SIZE(i->value_len)) {
+				/* The old and the new value have the same
+				   size. Just replace. */
+				s->here->e_value_size =
+					cpu_to_le32(i->value_len);
+				memset(val + size - EXT3_XATTR_PAD, 0,
+				       EXT3_XATTR_PAD); /* Clear pad bytes. */
+				memcpy(val, i->value, i->value_len);
+				return 0;
+			}
+
+			/* Remove the old value. */
+			memmove(first_val + size, first_val, val - first_val);
+			memset(first_val, 0, size);
+			s->here->e_value_size = 0;
+			s->here->e_value_offs = 0;
+			min_offs += size;
+
+			/* Adjust all value offsets. */
+			last = s->first;
+			while (!IS_LAST_ENTRY(last)) {
+				size_t o = le16_to_cpu(last->e_value_offs);
+				if (!last->e_value_block &&
+				    last->e_value_size && o < offs)
+					last->e_value_offs =
+						cpu_to_le16(o + size);
+				last = EXT3_XATTR_NEXT(last);
+			}
+		}
+		if (!i->value) {
+			/* Remove the old name. */
+			size_t size = EXT3_XATTR_LEN(name_len);
+			last = ENTRY((void *)last - size);
+			memmove(s->here, (void *)s->here + size,
+				(void *)last - (void *)s->here + sizeof(__u32));
+			memset(last, 0, size);
+		}
+	}
+
+	if (i->value) {
+		/* Insert the new value. */
+		s->here->e_value_size = cpu_to_le32(i->value_len);
+		if (i->value_len) {
+			size_t size = EXT3_XATTR_SIZE(i->value_len);
+			void *val = s->base + min_offs - size;
+			s->here->e_value_offs = cpu_to_le16(min_offs - size);
+			memset(val + size - EXT3_XATTR_PAD, 0,
+			       EXT3_XATTR_PAD); /* Clear the pad bytes. */
+			memcpy(val, i->value, i->value_len);
+		}
+	}
+	return 0;
+}
+
 /*
  * ext3_xattr_set_handle()
  *
@@ -408,23 +532,24 @@ ext3_xattr_set_handle(handle_t *handle, 
 		      int flags)
 {
 	struct super_block *sb = inode->i_sb;
-	struct buffer_head *bh = NULL;
-	struct ext3_xattr_header *header = NULL;
-	struct ext3_xattr_entry *here, *last;
-	size_t name_len, free, min_offs = sb->s_blocksize;
-	int not_found = 1, error;
-	char *end;
+	struct buffer_head *old_bh = NULL, *new_bh = NULL;
+	struct ext3_xattr_info i = {
+		.name_index = name_index,
+		.name = name,
+		.value = value,
+		.value_len = value_len,
+	};
+	struct ext3_xattr_search s = {
+		.not_found = 1,
+	};
+	struct mb_cache_entry *ce = NULL;
+	int error;
+
+#define header ((struct ext3_xattr_header *)(s.base))
 
 	/*
 	 * header -- Points either into bh, or to a temporarily
 	 *           allocated buffer.
-	 * here -- The named entry found, or the place for inserting, within
-	 *         the block pointed to by header.
-	 * last -- Points right after the last named entry within the block
-	 *         pointed to by header.
-	 * min_offs -- The offset of the first value (values are aligned
-	 *             towards the end of the block).
-	 * end -- Points right after the block pointed to by header.
 	 */
 
 	ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld",
@@ -434,77 +559,38 @@ ext3_xattr_set_handle(handle_t *handle, 
 		return -EROFS;
 	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
 		return -EPERM;
-	if (value == NULL)
-		value_len = 0;
-	if (name == NULL)
-		return -EINVAL;
-	name_len = strlen(name);
-	if (name_len > 255 || value_len > sb->s_blocksize)
-		return -ERANGE;
+	if (i.value == NULL)
+		i.value_len = 0;
 	down_write(&EXT3_I(inode)->xattr_sem);
 	if (EXT3_I(inode)->i_file_acl) {
 		/* The inode already has an extended attribute block. */
-		bh = sb_bread(sb, EXT3_I(inode)->i_file_acl);
+		old_bh = sb_bread(sb, EXT3_I(inode)->i_file_acl);
 		error = -EIO;
-		if (!bh)
+		if (!old_bh)
 			goto cleanup;
-		ea_bdebug(bh, "b_count=%d, refcount=%d",
-			atomic_read(&(bh->b_count)),
-			le32_to_cpu(HDR(bh)->h_refcount));
-		header = HDR(bh);
-		end = bh->b_data + bh->b_size;
-		if (header->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
-		    header->h_blocks != cpu_to_le32(1)) {
-bad_block:		ext3_error(sb, "ext3_xattr_set",
+		ea_bdebug(old_bh, "b_count=%d, refcount=%d",
+			atomic_read(&(old_bh->b_count)),
+			le32_to_cpu(BHDR(old_bh)->h_refcount));
+		if (ext3_xattr_check_block(old_bh)) {
+bad_block:		ext3_error(sb, __FUNCTION__,
 				"inode %ld: bad block %d", inode->i_ino,
 				EXT3_I(inode)->i_file_acl);
 			error = -EIO;
 			goto cleanup;
 		}
 		/* Find the named attribute. */
-		here = FIRST_ENTRY(bh);
-		while (!IS_LAST_ENTRY(here)) {
-			struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(here);
-			if ((char *)next >= end)
-				goto bad_block;
-			if (!here->e_value_block && here->e_value_size) {
-				size_t offs = le16_to_cpu(here->e_value_offs);
-				if (offs < min_offs)
-					min_offs = offs;
-			}
-			not_found = name_index - here->e_name_index;
-			if (!not_found)
-				not_found = name_len - here->e_name_len;
-			if (!not_found)
-				not_found = memcmp(name, here->e_name,name_len);
-			if (not_found <= 0)
-				break;
-			here = next;
-		}
-		last = here;
-		/* We still need to compute min_offs and last. */
-		while (!IS_LAST_ENTRY(last)) {
-			struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last);
-			if ((char *)next >= end)
-				goto bad_block;
-			if (!last->e_value_block && last->e_value_size) {
-				size_t offs = le16_to_cpu(last->e_value_offs);
-				if (offs < min_offs)
-					min_offs = offs;
-			}
-			last = next;
-		}
-
-		/* Check whether we have enough space left. */
-		free = min_offs - ((char*)last - (char*)header) - sizeof(__u32);
-	} else {
-		/* We will use a new extended attribute block. */
-		free = sb->s_blocksize -
-			sizeof(struct ext3_xattr_header) - sizeof(__u32);
-		here = last = NULL;  /* avoid gcc uninitialized warning. */
+		s.base = BHDR(old_bh);
+		s.first = BFIRST(old_bh);
+		s.end = old_bh->b_data + old_bh->b_size;
+		s.here = BFIRST(old_bh);
+		error = ext3_xattr_find_entry(&s.here, name_index, name,
+					      old_bh->b_size, 1);
+		if (error && error != -ENODATA)
+			goto cleanup;
+		s.not_found = error;
 	}
 
-	if (not_found) {
+	if (s.not_found) {
 		/* Request to remove a nonexistent attribute? */
 		error = -ENODATA;
 		if (flags & XATTR_REPLACE)
@@ -517,183 +603,90 @@ bad_block:		ext3_error(sb, "ext3_xattr_s
 		error = -EEXIST;
 		if (flags & XATTR_CREATE)
 			goto cleanup;
-		if (!here->e_value_block && here->e_value_size) {
-			size_t size = le32_to_cpu(here->e_value_size);
-
-			if (le16_to_cpu(here->e_value_offs) + size > 
-			    sb->s_blocksize || size > sb->s_blocksize)
-				goto bad_block;
-			free += EXT3_XATTR_SIZE(size);
-		}
-		free += EXT3_XATTR_LEN(name_len);
 	}
-	error = -ENOSPC;
-	if (free < EXT3_XATTR_LEN(name_len) + EXT3_XATTR_SIZE(value_len))
-		goto cleanup;
-
-	/* Here we know that we can set the new attribute. */
 
 	if (header) {
-		struct mb_cache_entry *ce;
-
-		/* assert(header == HDR(bh)); */
-		ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_bdev,
-					bh->b_blocknr);
+		/* assert(header == BHDR(old_bh)); */
+		ce = mb_cache_entry_get(ext3_xattr_cache, old_bh->b_bdev,
+					old_bh->b_blocknr);
 		if (header->h_refcount == cpu_to_le32(1)) {
-			if (ce)
+			if (ce) {
 				mb_cache_entry_free(ce);
-			ea_bdebug(bh, "modifying in-place");
-			error = ext3_journal_get_write_access(handle, bh);
+				ce = NULL;
+			}
+			ea_bdebug(old_bh, "modifying in-place");
+			error = ext3_journal_get_write_access(handle, old_bh);
 			if (error)
 				goto cleanup;
-			lock_buffer(bh);
-			/* keep the buffer locked while modifying it. */
+			lock_buffer(old_bh);
+			error = ext3_xattr_set_entry(&i, &s);
+			if (!error) {
+				if (!IS_LAST_ENTRY(s.first))
+					ext3_xattr_rehash(header, s.here);
+				ext3_xattr_cache_insert(old_bh);
+			}
+			unlock_buffer(old_bh);
+			if (error == -EIO)
+				goto bad_block;
+			if (!error && old_bh && header == BHDR(old_bh)) {
+				error = ext3_journal_dirty_metadata(handle,
+								    old_bh);
+			}
+			if (error)
+				goto cleanup;
+			goto inserted;
 		} else {
-			int offset;
+			int offset = (char *)s.here - old_bh->b_data;
 
-			if (ce)
+			if (ce) {
 				mb_cache_entry_release(ce);
-			ea_bdebug(bh, "cloning");
-			header = kmalloc(bh->b_size, GFP_KERNEL);
+				ce = NULL;
+			}
+			ea_bdebug(old_bh, "cloning");
+			s.base = kmalloc(old_bh->b_size, GFP_KERNEL);
+			/*assert(header == s.base)*/
 			error = -ENOMEM;
 			if (header == NULL)
 				goto cleanup;
-			memcpy(header, HDR(bh), bh->b_size);
+			memcpy(header, BHDR(old_bh), old_bh->b_size);
+			s.first = ENTRY(header+1);
 			header->h_refcount = cpu_to_le32(1);
-			offset = (char *)here - bh->b_data;
-			here = ENTRY((char *)header + offset);
-			offset = (char *)last - bh->b_data;
-			last = ENTRY((char *)header + offset);
+			s.here = ENTRY(s.base + offset);
+			s.end = header + old_bh->b_size;
 		}
 	} else {
 		/* Allocate a buffer where we construct the new block. */
-		header = kmalloc(sb->s_blocksize, GFP_KERNEL);
+		s.base = kmalloc(sb->s_blocksize, GFP_KERNEL);
+		/*assert(header == s.base)*/
 		error = -ENOMEM;
 		if (header == NULL)
 			goto cleanup;
 		memset(header, 0, sb->s_blocksize);
-		end = (char *)header + sb->s_blocksize;
 		header->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC);
 		header->h_blocks = header->h_refcount = cpu_to_le32(1);
-		last = here = ENTRY(header+1);
+		s.first = ENTRY(header+1);
+		s.here = ENTRY(header+1);
+		s.end = (void *)header + sb->s_blocksize;
 	}
 
-	/* Iff we are modifying the block in-place, bh is locked here. */
-
-	if (not_found) {
-		/* Insert the new name. */
-		size_t size = EXT3_XATTR_LEN(name_len);
-		size_t rest = (char *)last - (char *)here;
-		memmove((char *)here + size, here, rest);
-		memset(here, 0, size);
-		here->e_name_index = name_index;
-		here->e_name_len = name_len;
-		memcpy(here->e_name, name, name_len);
-	} else {
-		if (!here->e_value_block && here->e_value_size) {
-			char *first_val = (char *)header + min_offs;
-			size_t offs = le16_to_cpu(here->e_value_offs);
-			char *val = (char *)header + offs;
-			size_t size = EXT3_XATTR_SIZE(
-				le32_to_cpu(here->e_value_size));
-
-			if (size == EXT3_XATTR_SIZE(value_len)) {
-				/* The old and the new value have the same
-				   size. Just replace. */
-				here->e_value_size = cpu_to_le32(value_len);
-				memset(val + size - EXT3_XATTR_PAD, 0,
-				       EXT3_XATTR_PAD); /* Clear pad bytes. */
-				memcpy(val, value, value_len);
-				goto skip_replace;
-			}
-
-			/* Remove the old value. */
-			memmove(first_val + size, first_val, val - first_val);
-			memset(first_val, 0, size);
-			here->e_value_offs = 0;
-			min_offs += size;
-
-			/* Adjust all value offsets. */
-			last = ENTRY(header+1);
-			while (!IS_LAST_ENTRY(last)) {
-				size_t o = le16_to_cpu(last->e_value_offs);
-				if (!last->e_value_block && o < offs)
-					last->e_value_offs =
-						cpu_to_le16(o + size);
-				last = EXT3_XATTR_NEXT(last);
-			}
-		}
-		if (value == NULL) {
-			/* Remove the old name. */
-			size_t size = EXT3_XATTR_LEN(name_len);
-			last = ENTRY((char *)last - size);
-			memmove(here, (char*)here + size,
-				(char*)last - (char*)here);
-			memset(last, 0, size);
-		}
-	}
-
-	if (value != NULL) {
-		/* Insert the new value. */
-		here->e_value_size = cpu_to_le32(value_len);
-		if (value_len) {
-			size_t size = EXT3_XATTR_SIZE(value_len);
-			char *val = (char *)header + min_offs - size;
-			here->e_value_offs =
-				cpu_to_le16((char *)val - (char *)header);
-			memset(val + size - EXT3_XATTR_PAD, 0,
-			       EXT3_XATTR_PAD); /* Clear the pad bytes. */
-			memcpy(val, value, value_len);
-		}
-	}
-
-skip_replace:
-	if (!IS_LAST_ENTRY(ENTRY(header+1)))
-		ext3_xattr_rehash(header, here);
-	if (bh && header == HDR(bh)) {
-		/* we were modifying in-place. */
-		unlock_buffer(bh);
-		error = ext3_journal_dirty_metadata(handle, bh);
-		if (error)
-			goto cleanup;
-	}
-	error = ext3_xattr_set_handle2(handle, inode, bh,
-				       IS_LAST_ENTRY(ENTRY(header+1)) ?
-				       NULL : header);
-
-cleanup:
-	brelse(bh);
-	if (!(bh && header == HDR(bh)))
-		kfree(header);
-	up_write(&EXT3_I(inode)->xattr_sem);
-
-	return error;
-}
-
-/*
- * Second half of ext3_xattr_set_handle(): Update the file system.
- */
-static int
-ext3_xattr_set_handle2(handle_t *handle, struct inode *inode,
-		       struct buffer_head *old_bh,
-		       struct ext3_xattr_header *header)
-{
-	struct super_block *sb = inode->i_sb;
-	struct buffer_head *new_bh = NULL;
-	struct mb_cache_entry *ce = NULL;
-	int error;
+	error = ext3_xattr_set_entry(&i, &s);
+	if (error == -EIO)
+		goto bad_block;
+	if (error)
+		goto cleanup;
+	if (!IS_LAST_ENTRY(s.first))
+		ext3_xattr_rehash(header, s.here);
 
-	if (header) {
+inserted:
+	if (!IS_LAST_ENTRY(s.first)) {
 		new_bh = ext3_xattr_cache_find(inode, header, &ce);
 		if (new_bh) {
 			/* We found an identical block in the cache. */
 			if (new_bh == old_bh)
-				ea_bdebug(new_bh, "keeping this block");
+				ea_bdebug(new_bh, "keeping");
 			else {
 				/* The old block is released after updating
 				   the inode. */
-				ea_bdebug(new_bh, "reusing block");
-
 				error = -EDQUOT;
 				if (DQUOT_ALLOC_BLOCK(inode, 1))
 					goto cleanup;
@@ -701,25 +694,23 @@ ext3_xattr_set_handle2(handle_t *handle,
 				if (error)
 					goto cleanup;
 				lock_buffer(new_bh);
-				HDR(new_bh)->h_refcount = cpu_to_le32(1 +
-					le32_to_cpu(HDR(new_bh)->h_refcount));
-				ea_bdebug(new_bh, "refcount now=%d",
-					le32_to_cpu(HDR(new_bh)->h_refcount));
+				BHDR(new_bh)->h_refcount = cpu_to_le32(1 +
+					le32_to_cpu(BHDR(new_bh)->h_refcount));
+				ea_bdebug(new_bh, "reusing; refcount now=%d",
+					le32_to_cpu(BHDR(new_bh)->h_refcount));
 				unlock_buffer(new_bh);
-				error = ext3_journal_dirty_metadata(handle, new_bh);
+				error = ext3_journal_dirty_metadata(handle,
+								    new_bh);
 				if (error)
 					goto cleanup;
 			}
 			mb_cache_entry_release(ce);
 			ce = NULL;
-		} else if (old_bh && header == HDR(old_bh)) {
+		} else if (old_bh && header == BHDR(old_bh)) {
 			/* We were modifying this block in-place. */
-
-			/* Keep this block. No need to lock the block as we
-			 * don't need to change the reference count. */
+			ea_bdebug(old_bh, "keeping this block");
 			new_bh = old_bh;
 			get_bh(new_bh);
-			ext3_xattr_cache_insert(new_bh);
 		} else {
 			/* We need to allocate a new block */
 			int goal = le32_to_cpu(
@@ -748,11 +739,10 @@ getblk_failed:
 			set_buffer_uptodate(new_bh);
 			unlock_buffer(new_bh);
 			ext3_xattr_cache_insert(new_bh);
-
-			ext3_xattr_update_super_block(handle, sb);
 			error = ext3_journal_dirty_metadata(handle, new_bh);
 			if (error)
 				goto cleanup;
+			ext3_xattr_update_super_block(handle, sb);
 		}
 	}
 
@@ -763,21 +753,23 @@ getblk_failed:
 	if (IS_SYNC(inode))
 		handle->h_sync = 1;
 
-	error = 0;
-	if (old_bh && old_bh != new_bh) {
-		/*
-		 * If there was an old block, and we are no longer using it,
-		 * release the old block.
-		 */
+	/* Drop the previous xattr block. */
+	if (old_bh && old_bh != new_bh)
 		ext3_xattr_release_block(handle, inode, old_bh);
-	}
+	error = 0;
 
 cleanup:
 	if (ce)
 		mb_cache_entry_release(ce);
 	brelse(new_bh);
+	brelse(old_bh);
+	if (!(old_bh && header == BHDR(old_bh)))
+		kfree(header);
+	up_write(&EXT3_I(inode)->xattr_sem);
 
 	return error;
+
+#undef header
 }
 
 /*
@@ -832,14 +824,14 @@ ext3_xattr_delete_inode(handle_t *handle
 		goto cleanup;
 	bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
 	if (!bh) {
-		ext3_error(inode->i_sb, "ext3_xattr_delete_inode",
+		ext3_error(inode->i_sb, __FUNCTION__,
 			"inode %ld: block %d read error", inode->i_ino,
 			EXT3_I(inode)->i_file_acl);
 		goto cleanup;
 	}
-	if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
-	    HDR(bh)->h_blocks != cpu_to_le32(1)) {
-		ext3_error(inode->i_sb, "ext3_xattr_delete_inode",
+	if (BHDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
+	    BHDR(bh)->h_blocks != cpu_to_le32(1)) {
+		ext3_error(inode->i_sb, __FUNCTION__,
 			"inode %ld: bad block %d", inode->i_ino,
 			EXT3_I(inode)->i_file_acl);
 		goto cleanup;
@@ -871,30 +863,29 @@ ext3_xattr_put_super(struct super_block 
  *
  * Returns 0, or a negative error number on failure.
  */
-static int
+static void
 ext3_xattr_cache_insert(struct buffer_head *bh)
 {
-	__u32 hash = le32_to_cpu(HDR(bh)->h_hash);
+	__u32 hash = le32_to_cpu(BHDR(bh)->h_hash);
 	struct mb_cache_entry *ce;
 	int error;
 
 	ce = mb_cache_entry_alloc(ext3_xattr_cache);
-	if (!ce)
-		return -ENOMEM;
+	if (!ce) {
+		ea_bdebug(bh, "out of memory");
+		return;
+	}
 	error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, &hash);
 	if (error) {
 		mb_cache_entry_free(ce);
 		if (error == -EBUSY) {
-			ea_bdebug(bh, "already in cache (%d cache entries)",
-				atomic_read(&ext3_xattr_cache->c_entry_count));
+			ea_bdebug(bh, "already in cache");
 			error = 0;
 		}
 	} else {
-		ea_bdebug(bh, "inserting [%x] (%d cache entries)", (int)hash,
-			  atomic_read(&ext3_xattr_cache->c_entry_count));
+		ea_bdebug(bh, "inserting [%x]", (int)hash);
 		mb_cache_entry_release(ce);
 	}
-	return error;
 }
 
 /*
@@ -967,16 +958,16 @@ again:
 		}
 		bh = sb_bread(inode->i_sb, ce->e_block);
 		if (!bh) {
-			ext3_error(inode->i_sb, "ext3_xattr_cache_find",
+			ext3_error(inode->i_sb, __FUNCTION__,
 				"inode %ld: block %ld read error",
 				inode->i_ino, (unsigned long) ce->e_block);
-		} else if (le32_to_cpu(HDR(bh)->h_refcount) >
+		} else if (le32_to_cpu(BHDR(bh)->h_refcount) >=
 				EXT3_XATTR_REFCOUNT_MAX) {
-			ea_idebug(inode, "block %ld refcount %d>%d",
+			ea_idebug(inode, "block %ld refcount %d>=%d",
 				  (unsigned long) ce->e_block,
-				  le32_to_cpu(HDR(bh)->h_refcount),
+				  le32_to_cpu(BHDR(bh)->h_refcount),
 					  EXT3_XATTR_REFCOUNT_MAX);
-		} else if (ext3_xattr_cmp(header, HDR(bh)) == 0) {
+		} else if (ext3_xattr_cmp(header, BHDR(bh)) == 0) {
 			*pce = ce;
 			return bh;
 		}
--
Andreas Gruenbacher <agruen@suse.de>
SUSE Labs, SUSE LINUX PRODUCTS GMBH


^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2005-01-13 11:03 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-01-13 10:31 [RESEND][PATCH 0/9] Ext3: extended attribute sharing fixes and in-inode EAs Andreas Gruenbacher
2005-01-13 10:31 ` [RESEND][PATCH 9/9] " Andreas Gruenbacher
2005-01-13 10:31 ` [RESEND][PATCH 5/9] " Andreas Gruenbacher
2005-01-13 10:31 ` [RESEND][PATCH 6/9] " Andreas Gruenbacher
2005-01-13 10:31 ` [RESEND][PATCH 7/9] " Andreas Gruenbacher
2005-01-13 10:31 ` [RESEND][PATCH 2/9] " Andreas Gruenbacher
2005-01-13 10:31 ` [RESEND][PATCH 3/9] " Andreas Gruenbacher
2005-01-13 10:31 ` [RESEND][PATCH 8/9] " Andreas Gruenbacher
2005-01-13 10:31 ` [RESEND][PATCH 1/9] " Andreas Gruenbacher
2005-01-13 10:31 ` [RESEND][PATCH 4/9] " Andreas Gruenbacher

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox