* [RESEND][PATCH 9/9] Ext3: extended attribute sharing fixes and in-inode EAs
2005-01-13 10:31 [RESEND][PATCH 0/9] Ext3: extended attribute sharing fixes and in-inode EAs Andreas Gruenbacher
@ 2005-01-13 10:31 ` Andreas Gruenbacher
2005-01-13 10:31 ` [RESEND][PATCH 5/9] " Andreas Gruenbacher
` (7 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Andreas Gruenbacher @ 2005-01-13 10:31 UTC (permalink / raw)
To: Linus Torvalds, Andrew Morton
Cc: linux-kernel, Alex Tomas, Andreas Dilger, Stephen C. Tweedie,
Andrew Tridgell
In-inode extended attributes for ext3
This started of as a patch by Alex Tomas <alex@clusterfs.com> and got an
overhaul by me. The on-disk structure used is the same as in Alex's
original patch.
Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
Index: linux-2.6.10/fs/ext3/xattr.c
===================================================================
--- linux-2.6.10.orig/fs/ext3/xattr.c 2005-01-11 00:54:42.000000000 +0100
+++ linux-2.6.10/fs/ext3/xattr.c 2005-01-11 02:58:47.296493192 +0100
@@ -9,18 +9,21 @@
* suggestion of Luka Renko <luka.renko@hermes.si>.
* xattr consolidation Copyright (c) 2004 James Morris <jmorris@redhat.com>,
* Red Hat Inc.
+ * ea-in-inode support by Alex Tomas <alex@clusterfs.com> aka bzzz
+ * and Andreas Gruenbacher <agruen@suse.de>.
*/
/*
- * Extended attributes are stored on disk blocks allocated outside of
- * any inode. The i_file_acl field is then made to point to this allocated
- * block. If all extended attributes of an inode are identical, these
- * inodes may share the same extended attribute block. Such situations
- * are automatically detected by keeping a cache of recent attribute block
- * numbers and hashes over the block's contents in memory.
+ * Extended attributes are stored directly in inodes (on file systems with
+ * inodes bigger than 128 bytes) and on additional disk blocks. The i_file_acl
+ * field contains the block number if an inode uses an additional block. All
+ * attributes must fit in the inode and one additional block. Blocks that
+ * contain the identical set of attributes may be shared among several inodes.
+ * Identical blocks are detected by keeping a cache of blocks that have
+ * recently been accessed.
*
- *
- * Extended attribute block layout:
+ * The attributes in inodes and on blocks have a different header; the entries
+ * are stored in the same format:
*
* +------------------+
* | header |
@@ -34,23 +37,17 @@
* | value 2 | |
* +------------------+
*
- * The block header is followed by multiple entry descriptors. These entry
- * descriptors are variable in size, and alligned to EXT3_XATTR_PAD
- * byte boundaries. The entry descriptors are sorted by attribute name,
- * so that two extended attribute blocks can be compared efficiently.
- *
- * Attribute values are aligned to the end of the block, stored in
- * no specific order. They are also padded to EXT3_XATTR_PAD byte
- * boundaries. No additional gaps are left between them.
+ * The header is followed by multiple entry descriptors. Descriptors are
+ * kept sorted. The attribute values are aligned to the end of the block
+ * in no specific order.
*
* Locking strategy
* ----------------
* EXT3_I(inode)->i_file_acl is protected by EXT3_I(inode)->xattr_sem.
* EA blocks are only changed if they are exclusive to an inode, so
* holding xattr_sem also means that nothing but the EA block's reference
- * count will change. Multiple writers to an EA block are synchronized
- * by the bh lock. No more than a single bh lock is held at any time
- * to avoid deadlocks.
+ * count can change. Multiple writers to the same block are synchronized
+ * by the buffer lock.
*/
#include <linux/init.h>
@@ -69,6 +66,13 @@
#define BFIRST(bh) ENTRY(BHDR(bh)+1)
#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0)
+#define IHDR(inode, raw_inode) \
+ ((struct ext3_xattr_ibody_header *) \
+ ((void *)raw_inode + \
+ EXT3_GOOD_OLD_INODE_SIZE + \
+ EXT3_I(inode)->i_extra_isize))
+#define IFIRST(hdr) ((struct ext3_xattr_entry *)((hdr)+1))
+
#ifdef EXT3_XATTR_DEBUG
# define ea_idebug(inode, f...) do { \
printk(KERN_DEBUG "inode %s:%ld: ", \
@@ -206,19 +210,9 @@
return cmp ? -ENODATA : 0;
}
-/*
- * ext3_xattr_get()
- *
- * Copy an extended attribute into the buffer
- * provided, or compute the buffer size required.
- * Buffer is NULL to compute the size of the buffer required.
- *
- * Returns a negative error number on failure, or the number of bytes
- * used / required on success.
- */
int
-ext3_xattr_get(struct inode *inode, int name_index, const char *name,
- void *buffer, size_t buffer_size)
+ext3_xattr_block_get(struct inode *inode, int name_index, const char *name,
+ void *buffer, size_t buffer_size)
{
struct buffer_head *bh = NULL;
struct ext3_xattr_entry *entry;
@@ -228,9 +222,6 @@
ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
name_index, name, buffer, (long)buffer_size);
- if (name == NULL)
- return -EINVAL;
- down_read(&EXT3_I(inode)->xattr_sem);
error = -ENODATA;
if (!EXT3_I(inode)->i_file_acl)
goto cleanup;
@@ -266,6 +257,75 @@
cleanup:
brelse(bh);
+ return error;
+}
+
+static int
+ext3_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
+ void *buffer, size_t buffer_size)
+{
+ struct ext3_xattr_ibody_header *header;
+ struct ext3_xattr_entry *entry;
+ struct ext3_inode *raw_inode;
+ struct ext3_iloc iloc;
+ size_t size;
+ void *end;
+ int error;
+
+ if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE ||
+ !(EXT3_I(inode)->i_state & EXT3_STATE_XATTR))
+ return -ENODATA;
+ error = ext3_get_inode_loc(inode, &iloc);
+ if (error)
+ return error;
+ raw_inode = ext3_raw_inode(&iloc);
+ header = IHDR(inode, raw_inode);
+ entry = IFIRST(header);
+ end = (void *)raw_inode + EXT3_SB(inode->i_sb)->s_inode_size;
+ error = ext3_xattr_check_names(entry, end);
+ if (error)
+ goto cleanup;
+ error = ext3_xattr_find_entry(&entry, name_index, name,
+ end - (void *)entry, 0);
+ if (error)
+ goto cleanup;
+ size = le32_to_cpu(entry->e_value_size);
+ if (buffer) {
+ error = -ERANGE;
+ if (size > buffer_size)
+ goto cleanup;
+ memcpy(buffer, (void *)IFIRST(header) +
+ le16_to_cpu(entry->e_value_offs), size);
+ }
+ error = size;
+
+cleanup:
+ brelse(iloc.bh);
+ return error;
+}
+
+/*
+ * ext3_xattr_get()
+ *
+ * Copy an extended attribute into the buffer
+ * provided, or compute the buffer size required.
+ * Buffer is NULL to compute the size of the buffer required.
+ *
+ * Returns a negative error number on failure, or the number of bytes
+ * used / required on success.
+ */
+int
+ext3_xattr_get(struct inode *inode, int name_index, const char *name,
+ void *buffer, size_t buffer_size)
+{
+ int error;
+
+ down_read(&EXT3_I(inode)->xattr_sem);
+ error = ext3_xattr_ibody_get(inode, name_index, name, buffer,
+ buffer_size);
+ if (error == -ENODATA)
+ error = ext3_xattr_block_get(inode, name_index, name, buffer,
+ buffer_size);
up_read(&EXT3_I(inode)->xattr_sem);
return error;
}
@@ -295,18 +355,8 @@
return buffer_size - rest;
}
-/*
- * ext3_xattr_list()
- *
- * Copy a list of attribute names into the buffer
- * provided, or compute the buffer size required.
- * Buffer is NULL to compute the size of the buffer required.
- *
- * Returns a negative error number on failure, or the number of bytes
- * used / required on success.
- */
int
-ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
+ext3_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
{
struct buffer_head *bh = NULL;
int error;
@@ -314,7 +364,6 @@
ea_idebug(inode, "buffer=%p, buffer_size=%ld",
buffer, (long)buffer_size);
- down_read(&EXT3_I(inode)->xattr_sem);
error = 0;
if (!EXT3_I(inode)->i_file_acl)
goto cleanup;
@@ -337,11 +386,71 @@
cleanup:
brelse(bh);
- up_read(&EXT3_I(inode)->xattr_sem);
return error;
}
+static int
+ext3_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size)
+{
+ struct ext3_xattr_ibody_header *header;
+ struct ext3_inode *raw_inode;
+ struct ext3_iloc iloc;
+ void *end;
+ int error;
+
+ if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE ||
+ !(EXT3_I(inode)->i_state & EXT3_STATE_XATTR))
+ return 0;
+ error = ext3_get_inode_loc(inode, &iloc);
+ if (error)
+ return error;
+ raw_inode = ext3_raw_inode(&iloc);
+ header = IHDR(inode, raw_inode);
+ end = (void *)raw_inode + EXT3_SB(inode->i_sb)->s_inode_size;
+ error = ext3_xattr_check_names(IFIRST(header), end);
+ if (error)
+ goto cleanup;
+ error = ext3_xattr_list_entries(inode, IFIRST(header),
+ buffer, buffer_size);
+
+cleanup:
+ brelse(iloc.bh);
+ return error;
+}
+
+/*
+ * ext3_xattr_list()
+ *
+ * Copy a list of attribute names into the buffer
+ * provided, or compute the buffer size required.
+ * Buffer is NULL to compute the size of the buffer required.
+ *
+ * Returns a negative error number on failure, or the number of bytes
+ * used / required on success.
+ */
+int
+ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
+{
+ int i_error, b_error;
+
+ down_read(&EXT3_I(inode)->xattr_sem);
+ i_error = ext3_xattr_ibody_list(inode, buffer, buffer_size);
+ if (i_error < 0) {
+ b_error = 0;
+ } else {
+ if (buffer) {
+ buffer += i_error;
+ buffer_size -= i_error;
+ }
+ b_error = ext3_xattr_block_list(inode, buffer, buffer_size);
+ if (b_error < 0)
+ i_error = 0;
+ }
+ up_read(&EXT3_I(inode)->xattr_sem);
+ return i_error + b_error;
+}
+
/*
* If the EXT3_FEATURE_COMPAT_EXT_ATTR feature of this file system is
* not set, set it.
@@ -514,175 +623,146 @@
return 0;
}
-/*
- * ext3_xattr_set_handle()
- *
- * Create, replace or remove an extended attribute for this inode. Buffer
- * is NULL to remove an existing extended attribute, and non-NULL to
- * either replace an existing extended attribute, or create a new extended
- * attribute. The flags XATTR_REPLACE and XATTR_CREATE
- * specify that an extended attribute must exist and must not exist
- * previous to the call, respectively.
- *
- * Returns 0, or a negative error number on failure.
- */
+struct ext3_xattr_block_find {
+ struct ext3_xattr_search s;
+ struct buffer_head *bh;
+};
+
int
-ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
- const char *name, const void *value, size_t value_len,
- int flags)
+ext3_xattr_block_find(struct inode *inode, struct ext3_xattr_info *i,
+ struct ext3_xattr_block_find *bs)
{
struct super_block *sb = inode->i_sb;
- struct buffer_head *old_bh = NULL, *new_bh = NULL;
- struct ext3_xattr_info i = {
- .name_index = name_index,
- .name = name,
- .value = value,
- .value_len = value_len,
- };
- struct ext3_xattr_search s = {
- .not_found = 1,
- };
- struct mb_cache_entry *ce = NULL;
int error;
-#define header ((struct ext3_xattr_header *)(s.base))
-
- /*
- * header -- Points either into bh, or to a temporarily
- * allocated buffer.
- */
-
ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld",
- name_index, name, value, (long)value_len);
+ i->name_index, i->name, i->value, (long)i->value_len);
- if (IS_RDONLY(inode))
- return -EROFS;
- if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
- return -EPERM;
- if (i.value == NULL)
- i.value_len = 0;
- down_write(&EXT3_I(inode)->xattr_sem);
if (EXT3_I(inode)->i_file_acl) {
/* The inode already has an extended attribute block. */
- old_bh = sb_bread(sb, EXT3_I(inode)->i_file_acl);
+ bs->bh = sb_bread(sb, EXT3_I(inode)->i_file_acl);
error = -EIO;
- if (!old_bh)
+ if (!bs->bh)
goto cleanup;
- ea_bdebug(old_bh, "b_count=%d, refcount=%d",
- atomic_read(&(old_bh->b_count)),
- le32_to_cpu(BHDR(old_bh)->h_refcount));
- if (ext3_xattr_check_block(old_bh)) {
-bad_block: ext3_error(sb, __FUNCTION__,
+ ea_bdebug(bs->bh, "b_count=%d, refcount=%d",
+ atomic_read(&(bs->bh->b_count)),
+ le32_to_cpu(BHDR(bs->bh)->h_refcount));
+ if (ext3_xattr_check_block(bs->bh)) {
+ ext3_error(sb, __FUNCTION__,
"inode %ld: bad block %d", inode->i_ino,
EXT3_I(inode)->i_file_acl);
error = -EIO;
goto cleanup;
}
/* Find the named attribute. */
- s.base = BHDR(old_bh);
- s.first = BFIRST(old_bh);
- s.end = old_bh->b_data + old_bh->b_size;
- s.here = BFIRST(old_bh);
- error = ext3_xattr_find_entry(&s.here, name_index, name,
- old_bh->b_size, 1);
+ bs->s.base = BHDR(bs->bh);
+ bs->s.first = BFIRST(bs->bh);
+ bs->s.end = bs->bh->b_data + bs->bh->b_size;
+ bs->s.here = bs->s.first;
+ error = ext3_xattr_find_entry(&bs->s.here, i->name_index,
+ i->name, bs->bh->b_size, 1);
if (error && error != -ENODATA)
goto cleanup;
- s.not_found = error;
+ bs->s.not_found = error;
}
+ error = 0;
- if (s.not_found) {
- /* Request to remove a nonexistent attribute? */
- error = -ENODATA;
- if (flags & XATTR_REPLACE)
- goto cleanup;
- error = 0;
- if (value == NULL)
- goto cleanup;
- } else {
- /* Request to create an existing attribute? */
- error = -EEXIST;
- if (flags & XATTR_CREATE)
- goto cleanup;
- }
+cleanup:
+ return error;
+}
- if (header) {
- /* assert(header == BHDR(old_bh)); */
- ce = mb_cache_entry_get(ext3_xattr_cache, old_bh->b_bdev,
- old_bh->b_blocknr);
- if (header->h_refcount == cpu_to_le32(1)) {
+static int
+ext3_xattr_block_set(handle_t *handle, struct inode *inode,
+ struct ext3_xattr_info *i,
+ struct ext3_xattr_block_find *bs)
+{
+ struct super_block *sb = inode->i_sb;
+ struct buffer_head *new_bh = NULL;
+ struct ext3_xattr_search *s = &bs->s;
+ struct mb_cache_entry *ce = NULL;
+ int error;
+
+#define header(x) ((struct ext3_xattr_header *)(x))
+
+ if (i->value && i->value_len > sb->s_blocksize)
+ return -ENOSPC;
+ if (s->base) {
+ ce = mb_cache_entry_get(ext3_xattr_cache, bs->bh->b_bdev,
+ bs->bh->b_blocknr);
+ if (header(s->base)->h_refcount == cpu_to_le32(1)) {
if (ce) {
mb_cache_entry_free(ce);
ce = NULL;
}
- ea_bdebug(old_bh, "modifying in-place");
- error = ext3_journal_get_write_access(handle, old_bh);
+ ea_bdebug(bs->bh, "modifying in-place");
+ error = ext3_journal_get_write_access(handle, bs->bh);
if (error)
goto cleanup;
- lock_buffer(old_bh);
- error = ext3_xattr_set_entry(&i, &s);
+ lock_buffer(bs->bh);
+ error = ext3_xattr_set_entry(i, s);
if (!error) {
- if (!IS_LAST_ENTRY(s.first))
- ext3_xattr_rehash(header, s.here);
- ext3_xattr_cache_insert(old_bh);
+ if (!IS_LAST_ENTRY(s->first))
+ ext3_xattr_rehash(header(s->base),
+ s->here);
+ ext3_xattr_cache_insert(bs->bh);
}
- unlock_buffer(old_bh);
+ unlock_buffer(bs->bh);
if (error == -EIO)
goto bad_block;
- if (!error && old_bh && header == BHDR(old_bh)) {
+ if (!error)
error = ext3_journal_dirty_metadata(handle,
- old_bh);
- }
+ bs->bh);
if (error)
goto cleanup;
goto inserted;
} else {
- int offset = (char *)s.here - old_bh->b_data;
+ int offset = (char *)s->here - bs->bh->b_data;
if (ce) {
mb_cache_entry_release(ce);
ce = NULL;
}
- ea_bdebug(old_bh, "cloning");
- s.base = kmalloc(old_bh->b_size, GFP_KERNEL);
- /*assert(header == s.base)*/
+ ea_bdebug(bs->bh, "cloning");
+ s->base = kmalloc(bs->bh->b_size, GFP_KERNEL);
error = -ENOMEM;
- if (header == NULL)
+ if (s->base == NULL)
goto cleanup;
- memcpy(header, BHDR(old_bh), old_bh->b_size);
- s.first = ENTRY(header+1);
- header->h_refcount = cpu_to_le32(1);
- s.here = ENTRY(s.base + offset);
- s.end = header + old_bh->b_size;
+ memcpy(s->base, BHDR(bs->bh), bs->bh->b_size);
+ s->first = ENTRY(header(s->base)+1);
+ header(s->base)->h_refcount = cpu_to_le32(1);
+ s->here = ENTRY(s->base + offset);
+ s->end = s->base + bs->bh->b_size;
}
} else {
/* Allocate a buffer where we construct the new block. */
- s.base = kmalloc(sb->s_blocksize, GFP_KERNEL);
- /*assert(header == s.base)*/
+ s->base = kmalloc(sb->s_blocksize, GFP_KERNEL);
+ /* assert(header == s->base) */
error = -ENOMEM;
- if (header == NULL)
+ if (s->base == NULL)
goto cleanup;
- memset(header, 0, sb->s_blocksize);
- header->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC);
- header->h_blocks = header->h_refcount = cpu_to_le32(1);
- s.first = ENTRY(header+1);
- s.here = ENTRY(header+1);
- s.end = (void *)header + sb->s_blocksize;
+ memset(s->base, 0, sb->s_blocksize);
+ header(s->base)->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC);
+ header(s->base)->h_blocks = cpu_to_le32(1);
+ header(s->base)->h_refcount = cpu_to_le32(1);
+ s->first = ENTRY(header(s->base)+1);
+ s->here = ENTRY(header(s->base)+1);
+ s->end = s->base + sb->s_blocksize;
}
- error = ext3_xattr_set_entry(&i, &s);
+ error = ext3_xattr_set_entry(i, s);
if (error == -EIO)
goto bad_block;
if (error)
goto cleanup;
- if (!IS_LAST_ENTRY(s.first))
- ext3_xattr_rehash(header, s.here);
+ if (!IS_LAST_ENTRY(s->first))
+ ext3_xattr_rehash(header(s->base), s->here);
inserted:
- if (!IS_LAST_ENTRY(s.first)) {
- new_bh = ext3_xattr_cache_find(inode, header, &ce);
+ if (!IS_LAST_ENTRY(s->first)) {
+ new_bh = ext3_xattr_cache_find(inode, header(s->base), &ce);
if (new_bh) {
/* We found an identical block in the cache. */
- if (new_bh == old_bh)
+ if (new_bh == bs->bh)
ea_bdebug(new_bh, "keeping");
else {
/* The old block is released after updating
@@ -690,7 +770,8 @@
error = -EDQUOT;
if (DQUOT_ALLOC_BLOCK(inode, 1))
goto cleanup;
- error = ext3_journal_get_write_access(handle, new_bh);
+ error = ext3_journal_get_write_access(handle,
+ new_bh);
if (error)
goto cleanup;
lock_buffer(new_bh);
@@ -706,10 +787,10 @@
}
mb_cache_entry_release(ce);
ce = NULL;
- } else if (old_bh && header == BHDR(old_bh)) {
+ } else if (bs->bh && s->base == bs->bh->b_data) {
/* We were modifying this block in-place. */
- ea_bdebug(old_bh, "keeping this block");
- new_bh = old_bh;
+ ea_bdebug(bs->bh, "keeping this block");
+ new_bh = bs->bh;
get_bh(new_bh);
} else {
/* We need to allocate a new block */
@@ -735,7 +816,7 @@
unlock_buffer(new_bh);
goto getblk_failed;
}
- memcpy(new_bh->b_data, header, new_bh->b_size);
+ memcpy(new_bh->b_data, s->base, new_bh->b_size);
set_buffer_uptodate(new_bh);
unlock_buffer(new_bh);
ext3_xattr_cache_insert(new_bh);
@@ -748,30 +829,196 @@
/* Update the inode. */
EXT3_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
- inode->i_ctime = CURRENT_TIME_SEC;
- ext3_mark_inode_dirty(handle, inode);
- if (IS_SYNC(inode))
- handle->h_sync = 1;
/* Drop the previous xattr block. */
- if (old_bh && old_bh != new_bh)
- ext3_xattr_release_block(handle, inode, old_bh);
+ if (bs->bh && bs->bh != new_bh)
+ ext3_xattr_release_block(handle, inode, bs->bh);
error = 0;
cleanup:
if (ce)
mb_cache_entry_release(ce);
brelse(new_bh);
- brelse(old_bh);
- if (!(old_bh && header == BHDR(old_bh)))
- kfree(header);
- up_write(&EXT3_I(inode)->xattr_sem);
+ if (!(bs->bh && s->base == bs->bh->b_data))
+ kfree(s->base);
return error;
+bad_block:
+ ext3_error(inode->i_sb, __FUNCTION__,
+ "inode %ld: bad block %d", inode->i_ino,
+ EXT3_I(inode)->i_file_acl);
+ goto cleanup;
+
#undef header
}
+struct ext3_xattr_ibody_find {
+ struct ext3_xattr_search s;
+ struct ext3_iloc iloc;
+};
+
+int
+ext3_xattr_ibody_find(struct inode *inode, struct ext3_xattr_info *i,
+ struct ext3_xattr_ibody_find *is)
+{
+ struct ext3_xattr_ibody_header *header;
+ struct ext3_inode *raw_inode;
+ int error;
+
+ if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE)
+ return 0;
+ raw_inode = ext3_raw_inode(&is->iloc);
+ header = IHDR(inode, raw_inode);
+ is->s.base = is->s.first = IFIRST(header);
+ is->s.here = is->s.first;
+ is->s.end = (void *)raw_inode + EXT3_SB(inode->i_sb)->s_inode_size;
+ if (EXT3_I(inode)->i_state & EXT3_STATE_XATTR) {
+ error = ext3_xattr_check_names(IFIRST(header), is->s.end);
+ if (error)
+ return error;
+ /* Find the named attribute. */
+ error = ext3_xattr_find_entry(&is->s.here, i->name_index,
+ i->name, is->s.end -
+ (void *)is->s.base, 0);
+ if (error && error != -ENODATA)
+ return error;
+ is->s.not_found = error;
+ }
+ return 0;
+}
+
+static int
+ext3_xattr_ibody_set(handle_t *handle, struct inode *inode,
+ struct ext3_xattr_info *i,
+ struct ext3_xattr_ibody_find *is)
+{
+ struct ext3_xattr_ibody_header *header;
+ struct ext3_xattr_search *s = &is->s;
+ int error;
+
+ if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE)
+ return -ENOSPC;
+ error = ext3_xattr_set_entry(i, s);
+ if (error)
+ return error;
+ header = IHDR(inode, ext3_raw_inode(&is->iloc));
+ if (!IS_LAST_ENTRY(s->first)) {
+ header->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC);
+ EXT3_I(inode)->i_state |= EXT3_STATE_XATTR;
+ } else {
+ header->h_magic = cpu_to_le32(0);
+ EXT3_I(inode)->i_state &= ~EXT3_STATE_XATTR;
+ }
+ return 0;
+}
+
+/*
+ * ext3_xattr_set_handle()
+ *
+ * Create, replace or remove an extended attribute for this inode. Buffer
+ * is NULL to remove an existing extended attribute, and non-NULL to
+ * either replace an existing extended attribute, or create a new extended
+ * attribute. The flags XATTR_REPLACE and XATTR_CREATE
+ * specify that an extended attribute must exist and must not exist
+ * previous to the call, respectively.
+ *
+ * Returns 0, or a negative error number on failure.
+ */
+int
+ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
+ const char *name, const void *value, size_t value_len,
+ int flags)
+{
+ struct ext3_xattr_info i = {
+ .name_index = name_index,
+ .name = name,
+ .value = value,
+ .value_len = value_len,
+
+ };
+ struct ext3_xattr_ibody_find is = {
+ .s = { .not_found = -ENODATA, },
+ };
+ struct ext3_xattr_block_find bs = {
+ .s = { .not_found = -ENODATA, },
+ };
+ int error;
+
+ if (IS_RDONLY(inode))
+ return -EROFS;
+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+ return -EPERM;
+ if (!name)
+ return -EINVAL;
+ if (strlen(name) > 255)
+ return -ERANGE;
+ down_write(&EXT3_I(inode)->xattr_sem);
+ error = ext3_get_inode_loc(inode, &is.iloc);
+ if (error)
+ goto cleanup;
+ error = ext3_xattr_ibody_find(inode, &i, &is);
+ if (error)
+ goto cleanup;
+ if (is.s.not_found)
+ error = ext3_xattr_block_find(inode, &i, &bs);
+ if (error)
+ goto cleanup;
+ if (is.s.not_found && bs.s.not_found) {
+ error = -ENODATA;
+ if (flags & XATTR_REPLACE)
+ goto cleanup;
+ error = 0;
+ if (!value)
+ goto cleanup;
+ } else {
+ error = -EEXIST;
+ if (flags & XATTR_CREATE)
+ goto cleanup;
+ }
+ error = ext3_journal_get_write_access(handle, is.iloc.bh);
+ if (error)
+ goto cleanup;
+ if (!value) {
+ if (!is.s.not_found)
+ error = ext3_xattr_ibody_set(handle, inode, &i, &is);
+ else if (!bs.s.not_found)
+ error = ext3_xattr_block_set(handle, inode, &i, &bs);
+ } else {
+ error = ext3_xattr_ibody_set(handle, inode, &i, &is);
+ if (!error && !bs.s.not_found) {
+ i.value = NULL;
+ error = ext3_xattr_block_set(handle, inode, &i, &bs);
+ } else if (error == -ENOSPC) {
+ error = ext3_xattr_block_set(handle, inode, &i, &bs);
+ if (error)
+ goto cleanup;
+ if (!is.s.not_found) {
+ i.value = NULL;
+ error = ext3_xattr_ibody_set(handle, inode, &i,
+ &is);
+ }
+ }
+ }
+ if (!error) {
+ inode->i_ctime = CURRENT_TIME_SEC;
+ error = ext3_mark_iloc_dirty(handle, inode, &is.iloc);
+ /*
+ * The bh is consumed by ext3_mark_iloc_dirty, even with
+ * error != 0.
+ */
+ is.iloc.bh = NULL;
+ if (IS_SYNC(inode))
+ handle->h_sync = 1;
+ }
+
+cleanup:
+ brelse(is.iloc.bh);
+ brelse(bs.bh);
+ up_write(&EXT3_I(inode)->xattr_sem);
+ return error;
+}
+
/*
* ext3_xattr_set()
*
Index: linux-2.6.10/fs/ext3/inode.c
===================================================================
--- linux-2.6.10.orig/fs/ext3/inode.c 2005-01-11 01:38:40.000000000 +0100
+++ linux-2.6.10/fs/ext3/inode.c 2005-01-11 01:38:40.000000000 +0100
@@ -2383,7 +2383,9 @@
int ext3_get_inode_loc(struct inode *inode, struct ext3_iloc *iloc)
{
- return __ext3_get_inode_loc(inode, iloc, 1);
+ /* We have all inode data except xattrs in memory here. */
+ return __ext3_get_inode_loc(inode, iloc,
+ !(EXT3_I(inode)->i_state & EXT3_STATE_XATTR));
}
void ext3_set_inode_flags(struct inode *inode)
@@ -2489,6 +2491,16 @@
ei->i_data[block] = raw_inode->i_block[block];
INIT_LIST_HEAD(&ei->i_orphan);
+ ei->i_extra_isize =
+ (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) ?
+ le16_to_cpu(raw_inode->i_extra_isize) : 0;
+ if (ei->i_extra_isize) {
+ __le32 *magic = (void *)raw_inode + EXT3_GOOD_OLD_INODE_SIZE +
+ ei->i_extra_isize;
+ if (le32_to_cpu(*magic))
+ ei->i_state |= EXT3_STATE_XATTR;
+ }
+
if (S_ISREG(inode->i_mode)) {
inode->i_op = &ext3_file_inode_operations;
inode->i_fop = &ext3_file_operations;
@@ -2624,6 +2636,9 @@
} else for (block = 0; block < EXT3_N_BLOCKS; block++)
raw_inode->i_block[block] = ei->i_data[block];
+ if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE)
+ raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize);
+
BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
rc = ext3_journal_dirty_metadata(handle, bh);
if (!err)
Index: linux-2.6.10/fs/ext3/xattr.h
===================================================================
--- linux-2.6.10.orig/fs/ext3/xattr.h 2005-01-11 00:54:42.000000000 +0100
+++ linux-2.6.10/fs/ext3/xattr.h 2005-01-11 01:38:40.000000000 +0100
@@ -31,6 +31,10 @@
__u32 h_reserved[4]; /* zero right now */
};
+struct ext3_xattr_ibody_header {
+ __le32 h_magic; /* magic number for identification */
+};
+
struct ext3_xattr_entry {
__u8 e_name_len; /* length of name */
__u8 e_name_index; /* attribute name index */
Index: linux-2.6.10/include/linux/ext3_fs.h
===================================================================
--- linux-2.6.10.orig/include/linux/ext3_fs.h 2005-01-11 01:38:40.000000000 +0100
+++ linux-2.6.10/include/linux/ext3_fs.h 2005-01-11 01:38:40.000000000 +0100
@@ -195,7 +195,7 @@
*/
#define EXT3_STATE_JDATA 0x00000001 /* journaled data exists */
#define EXT3_STATE_NEW 0x00000002 /* inode is newly created */
-
+#define EXT3_STATE_XATTR 0x00000004 /* has in-inode xattrs */
/* Used to pass group descriptor data when online resize is done */
struct ext3_new_group_input {
@@ -293,6 +293,8 @@
__u32 m_i_reserved2[2];
} masix2;
} osd2; /* OS dependent 2 */
+ __le16 i_extra_isize;
+ __le16 i_pad1;
};
#define i_size_high i_dir_acl
Index: linux-2.6.10/include/linux/ext3_fs_i.h
===================================================================
--- linux-2.6.10.orig/include/linux/ext3_fs_i.h 2005-01-11 00:54:42.000000000 +0100
+++ linux-2.6.10/include/linux/ext3_fs_i.h 2005-01-11 02:57:54.128575944 +0100
@@ -113,6 +113,9 @@
*/
loff_t i_disksize;
+ /* on-disk additional length */
+ __u16 i_extra_isize;
+
/*
* truncate_sem is for serialising ext3_truncate() against
* ext3_getblock(). In the 2.4 ext2 design, great chunks of inode's
Index: linux-2.6.10/fs/ext3/ialloc.c
===================================================================
--- linux-2.6.10.orig/fs/ext3/ialloc.c 2005-01-11 00:54:42.000000000 +0100
+++ linux-2.6.10/fs/ext3/ialloc.c 2005-01-11 01:38:40.000000000 +0100
@@ -596,6 +596,9 @@
spin_unlock(&sbi->s_next_gen_lock);
ei->i_state = EXT3_STATE_NEW;
+ ei->i_extra_isize =
+ (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) ?
+ sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0;
ret = inode;
if(DQUOT_ALLOC_INODE(inode)) {
--
Andreas Gruenbacher <agruen@suse.de>
SUSE Labs, SUSE LINUX PRODUCTS GMBH
^ permalink raw reply [flat|nested] 10+ messages in thread* [RESEND][PATCH 5/9] Ext3: extended attribute sharing fixes and in-inode EAs
2005-01-13 10:31 [RESEND][PATCH 0/9] Ext3: extended attribute sharing fixes and in-inode EAs Andreas Gruenbacher
2005-01-13 10:31 ` [RESEND][PATCH 9/9] " Andreas Gruenbacher
@ 2005-01-13 10:31 ` Andreas Gruenbacher
2005-01-13 10:31 ` [RESEND][PATCH 7/9] " Andreas Gruenbacher
` (6 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Andreas Gruenbacher @ 2005-01-13 10:31 UTC (permalink / raw)
To: Linus Torvalds, Andrew Morton
Cc: linux-kernel, Alex Tomas, Andreas Dilger, Stephen C. Tweedie,
Andrew Tridgell
Ext3: factor our common xattr code; unnecessary lock
The ext3_xattr_set_handle2 and ext3_xattr_delete_inode functions contain
duplicate code to decrease the reference count of an xattr block. Move
this to a separate function.
Also we know we have exclusive access to the inode in
ext3_xattr_delete_inode; there is no need to grab the xattr_sem lock.
Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
Index: linux-2.6.10/fs/ext3/xattr.c
===================================================================
--- linux-2.6.10.orig/fs/ext3/xattr.c
+++ linux-2.6.10/fs/ext3/xattr.c
@@ -356,6 +356,41 @@ static void ext3_xattr_update_super_bloc
}
/*
+ * Release the xattr block BH: If the reference count is > 1, decrement
+ * it; otherwise free the block.
+ */
+static void
+ext3_xattr_release_block(handle_t *handle, struct inode *inode,
+ struct buffer_head *bh)
+{
+ struct mb_cache_entry *ce = NULL;
+
+ ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_bdev, bh->b_blocknr);
+ if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
+ ea_bdebug(bh, "freeing");
+ if (ce)
+ mb_cache_entry_free(ce);
+ ext3_free_blocks(handle, inode, bh->b_blocknr, 1);
+ get_bh(bh);
+ ext3_forget(handle, 1, inode, bh, bh->b_blocknr);
+ } else {
+ if (ext3_journal_get_write_access(handle, bh) == 0) {
+ lock_buffer(bh);
+ HDR(bh)->h_refcount = cpu_to_le32(
+ le32_to_cpu(HDR(bh)->h_refcount) - 1);
+ ext3_journal_dirty_metadata(handle, bh);
+ if (IS_SYNC(inode))
+ handle->h_sync = 1;
+ DQUOT_FREE_BLOCK(inode, 1);
+ unlock_buffer(bh);
+ ea_bdebug(bh, "refcount now=%d",
+ le32_to_cpu(HDR(bh)->h_refcount) - 1);
+ }
+ if (ce)
+ mb_cache_entry_release(ce);
+ }
+}
+/*
* ext3_xattr_set_handle()
*
* Create, replace or remove an extended attribute for this inode. Buffer
@@ -733,37 +768,8 @@ getblk_failed:
/*
* If there was an old block, and we are no longer using it,
* release the old block.
- */
- ce = mb_cache_entry_get(ext3_xattr_cache, old_bh->b_bdev,
- old_bh->b_blocknr);
- if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) {
- /* Free the old block. */
- ea_bdebug(old_bh, "freeing");
- ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1);
-
- /* ext3_forget() calls bforget() for us, but we
- let our caller release old_bh, so we need to
- duplicate the handle before. */
- get_bh(old_bh);
- ext3_forget(handle, 1, inode, old_bh,old_bh->b_blocknr);
- if (ce) {
- mb_cache_entry_free(ce);
- ce = NULL;
- }
- } else {
- error = ext3_journal_get_write_access(handle, old_bh);
- if (error)
- goto cleanup;
- /* Decrement the refcount only. */
- lock_buffer(old_bh);
- HDR(old_bh)->h_refcount = cpu_to_le32(
- le32_to_cpu(HDR(old_bh)->h_refcount) - 1);
- DQUOT_FREE_BLOCK(inode, 1);
- ext3_journal_dirty_metadata(handle, old_bh);
- ea_bdebug(old_bh, "refcount now=%d",
- le32_to_cpu(HDR(old_bh)->h_refcount));
- unlock_buffer(old_bh);
- }
+ */
+ ext3_xattr_release_block(handle, inode, old_bh);
}
cleanup:
@@ -813,13 +819,13 @@ retry:
* ext3_xattr_delete_inode()
*
* Free extended attribute resources associated with this inode. This
- * is called immediately before an inode is freed.
+ * is called immediately before an inode is freed. We have exclusive
+ * access to the inode.
*/
void
ext3_xattr_delete_inode(handle_t *handle, struct inode *inode)
{
struct buffer_head *bh = NULL;
- struct mb_cache_entry *ce = NULL;
down_write(&EXT3_I(inode)->xattr_sem);
if (!EXT3_I(inode)->i_file_acl)
@@ -838,33 +844,10 @@ ext3_xattr_delete_inode(handle_t *handle
EXT3_I(inode)->i_file_acl);
goto cleanup;
}
- ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_bdev, bh->b_blocknr);
- if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
- if (ce) {
- mb_cache_entry_free(ce);
- ce = NULL;
- }
- ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1);
- get_bh(bh);
- ext3_forget(handle, 1, inode, bh, EXT3_I(inode)->i_file_acl);
- } else {
- if (ext3_journal_get_write_access(handle, bh) != 0)
- goto cleanup;
- lock_buffer(bh);
- HDR(bh)->h_refcount = cpu_to_le32(
- le32_to_cpu(HDR(bh)->h_refcount) - 1);
- ext3_journal_dirty_metadata(handle, bh);
- if (IS_SYNC(inode))
- handle->h_sync = 1;
- DQUOT_FREE_BLOCK(inode, 1);
- unlock_buffer(bh);
- }
- ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1);
+ ext3_xattr_release_block(handle, inode, bh);
EXT3_I(inode)->i_file_acl = 0;
cleanup:
- if (ce)
- mb_cache_entry_release(ce);
brelse(bh);
up_write(&EXT3_I(inode)->xattr_sem);
}
--
Andreas Gruenbacher <agruen@suse.de>
SUSE Labs, SUSE LINUX PRODUCTS GMBH
^ permalink raw reply [flat|nested] 10+ messages in thread* [RESEND][PATCH 7/9] Ext3: extended attribute sharing fixes and in-inode EAs
2005-01-13 10:31 [RESEND][PATCH 0/9] Ext3: extended attribute sharing fixes and in-inode EAs Andreas Gruenbacher
2005-01-13 10:31 ` [RESEND][PATCH 9/9] " Andreas Gruenbacher
2005-01-13 10:31 ` [RESEND][PATCH 5/9] " Andreas Gruenbacher
@ 2005-01-13 10:31 ` Andreas Gruenbacher
2005-01-13 10:31 ` [RESEND][PATCH 6/9] " Andreas Gruenbacher
` (5 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Andreas Gruenbacher @ 2005-01-13 10:31 UTC (permalink / raw)
To: Linus Torvalds, Andrew Morton
Cc: linux-kernel, Alex Tomas, Andreas Dilger, Stephen C. Tweedie,
Andrew Tridgell
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain, Size: 30236 bytes --]
Cleanup and prepare ext3 for in-inode xattrs
Clean up several things in the xattr code, and prepare it for
in-inode attributes:
* Add the ext3_xattr_check_names, ext3_xattr_check_block, and
ext3_xattr_check_entry functions for checking xattr data
structures.
* Add the ext3_xattr_find_entry, ext3_xattr_list_entries, and
ext3_xattr_set_entry functions for manipulating xattr entries.
Switch to using these functions in ext3_xattr_get,
ext3_xattr_list, and ext3_xattr_set_handle.
* Merge ext3_xattr_set_handle and ext3_xattr_set_handle2.
* Rename the HDR and FIRST_ENTRY macros.
* We have no way to deal with a ext3_xattr_cache_insert failure,
so make it return void.
* Make the debug messages more useful.
Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
Index: linux-2.6.10/fs/ext2/xattr.c
===================================================================
--- linux-2.6.10.orig/fs/ext2/xattr.c
+++ linux-2.6.10/fs/ext2/xattr.c
@@ -24,7 +24,7 @@
*
* +------------------+
* | header |
- * ¦ entry 1 | |
+ * | entry 1 | |
* | entry 2 | | growing downwards
* | entry 3 | v
* | four null bytes |
Index: linux-2.6.10/fs/ext3/xattr.c
===================================================================
--- linux-2.6.10.orig/fs/ext3/xattr.c
+++ linux-2.6.10/fs/ext3/xattr.c
@@ -24,7 +24,7 @@
*
* +------------------+
* | header |
- * ¦ entry 1 | |
+ * | entry 1 | |
* | entry 2 | | growing downwards
* | entry 3 | v
* | four null bytes |
@@ -64,9 +64,9 @@
#include "xattr.h"
#include "acl.h"
-#define HDR(bh) ((struct ext3_xattr_header *)((bh)->b_data))
+#define BHDR(bh) ((struct ext3_xattr_header *)((bh)->b_data))
#define ENTRY(ptr) ((struct ext3_xattr_entry *)(ptr))
-#define FIRST_ENTRY(bh) ENTRY(HDR(bh)+1)
+#define BFIRST(bh) ENTRY(BHDR(bh)+1)
#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0)
#ifdef EXT3_XATTR_DEBUG
@@ -89,11 +89,7 @@
# define ea_bdebug(f...)
#endif
-static int ext3_xattr_set_handle2(handle_t *, struct inode *,
- struct buffer_head *,
- struct ext3_xattr_header *);
-
-static int ext3_xattr_cache_insert(struct buffer_head *);
+static void ext3_xattr_cache_insert(struct buffer_head *);
static struct buffer_head *ext3_xattr_cache_find(struct inode *,
struct ext3_xattr_header *,
struct mb_cache_entry **);
@@ -148,6 +144,68 @@ ext3_listxattr(struct dentry *dentry, ch
return ext3_xattr_list(dentry->d_inode, buffer, size);
}
+static int
+ext3_xattr_check_names(struct ext3_xattr_entry *entry, void *end)
+{
+ while (!IS_LAST_ENTRY(entry)) {
+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(entry);
+ if ((void *)next >= end)
+ return -EIO;
+ entry = next;
+ }
+ return 0;
+}
+
+static inline int
+ext3_xattr_check_block(struct buffer_head *bh)
+{
+ int error;
+
+ if (BHDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
+ BHDR(bh)->h_blocks != cpu_to_le32(1))
+ return -EIO;
+ error = ext3_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size);
+ return error;
+}
+
+static inline int
+ext3_xattr_check_entry(struct ext3_xattr_entry *entry, size_t size)
+{
+ size_t value_size = le32_to_cpu(entry->e_value_size);
+
+ if (entry->e_value_block != 0 || value_size > size ||
+ le16_to_cpu(entry->e_value_offs) + value_size > size)
+ return -EIO;
+ return 0;
+}
+
+static int
+ext3_xattr_find_entry(struct ext3_xattr_entry **pentry, int name_index,
+ const char *name, size_t size, int sorted)
+{
+ struct ext3_xattr_entry *entry;
+ size_t name_len;
+ int cmp = 1;
+
+ if (name == NULL)
+ return -EINVAL;
+ name_len = strlen(name);
+ entry = *pentry;
+ for (; !IS_LAST_ENTRY(entry); entry = EXT3_XATTR_NEXT(entry)) {
+ cmp = name_index - entry->e_name_index;
+ if (!cmp)
+ cmp = name_len - entry->e_name_len;
+ if (!cmp)
+ cmp = memcmp(name, entry->e_name, name_len);
+ if (cmp <= 0 && (sorted || cmp == 0))
+ break;
+ }
+ *pentry = entry;
+ if (!cmp && ext3_xattr_check_entry(entry, size))
+ return -EIO;
+ return cmp ? -ENODATA : 0;
+}
+
/*
* ext3_xattr_get()
*
@@ -164,8 +222,7 @@ ext3_xattr_get(struct inode *inode, int
{
struct buffer_head *bh = NULL;
struct ext3_xattr_entry *entry;
- size_t name_len, size;
- char *end;
+ size_t size;
int error;
ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
@@ -179,78 +236,65 @@ ext3_xattr_get(struct inode *inode, int
goto cleanup;
ea_idebug(inode, "reading block %d", EXT3_I(inode)->i_file_acl);
bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
- error = -EIO;
if (!bh)
goto cleanup;
ea_bdebug(bh, "b_count=%d, refcount=%d",
- atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
- end = bh->b_data + bh->b_size;
- if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
- HDR(bh)->h_blocks != cpu_to_le32(1)) {
-bad_block: ext3_error(inode->i_sb, "ext3_xattr_get",
- "inode %ld: bad block %d", inode->i_ino,
- EXT3_I(inode)->i_file_acl);
+ atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
+ if (ext3_xattr_check_block(bh)) {
+bad_block: ext3_error(inode->i_sb, __FUNCTION__,
+ "inode %ld: bad block %d", inode->i_ino,
+ EXT3_I(inode)->i_file_acl);
error = -EIO;
goto cleanup;
}
- /* find named attribute */
- name_len = strlen(name);
-
- error = -ERANGE;
- if (name_len > 255)
- goto cleanup;
- entry = FIRST_ENTRY(bh);
- while (!IS_LAST_ENTRY(entry)) {
- struct ext3_xattr_entry *next =
- EXT3_XATTR_NEXT(entry);
- if ((char *)next >= end)
- goto bad_block;
- if (name_index == entry->e_name_index &&
- name_len == entry->e_name_len &&
- memcmp(name, entry->e_name, name_len) == 0)
- goto found;
- entry = next;
- }
- /* Check the remaining name entries */
- while (!IS_LAST_ENTRY(entry)) {
- struct ext3_xattr_entry *next =
- EXT3_XATTR_NEXT(entry);
- if ((char *)next >= end)
- goto bad_block;
- entry = next;
- }
- if (ext3_xattr_cache_insert(bh))
- ea_idebug(inode, "cache insert failed");
- error = -ENODATA;
- goto cleanup;
-found:
- /* check the buffer size */
- if (entry->e_value_block != 0)
+ ext3_xattr_cache_insert(bh);
+ entry = BFIRST(bh);
+ error = ext3_xattr_find_entry(&entry, name_index, name, bh->b_size, 1);
+ if (error == -EIO)
goto bad_block;
+ if (error)
+ goto cleanup;
size = le32_to_cpu(entry->e_value_size);
- if (size > inode->i_sb->s_blocksize ||
- le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize)
- goto bad_block;
-
- if (ext3_xattr_cache_insert(bh))
- ea_idebug(inode, "cache insert failed");
if (buffer) {
error = -ERANGE;
if (size > buffer_size)
goto cleanup;
- /* return value of attribute */
memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs),
- size);
+ size);
}
error = size;
cleanup:
brelse(bh);
up_read(&EXT3_I(inode)->xattr_sem);
-
return error;
}
+static int
+ext3_xattr_list_entries(struct inode *inode, struct ext3_xattr_entry *entry,
+ char *buffer, size_t buffer_size)
+{
+ size_t rest = buffer_size;
+
+ for (; !IS_LAST_ENTRY(entry); entry = EXT3_XATTR_NEXT(entry)) {
+ struct xattr_handler *handler =
+ ext3_xattr_handler(entry->e_name_index);
+
+ if (handler) {
+ size_t size = handler->list(inode, buffer, rest,
+ entry->e_name,
+ entry->e_name_len);
+ if (buffer) {
+ if (size > rest)
+ return -ERANGE;
+ buffer += size;
+ }
+ rest -= size;
+ }
+ }
+ return buffer_size - rest;
+}
+
/*
* ext3_xattr_list()
*
@@ -265,9 +309,6 @@ int
ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
{
struct buffer_head *bh = NULL;
- struct ext3_xattr_entry *entry;
- char *end;
- size_t rest = buffer_size;
int error;
ea_idebug(inode, "buffer=%p, buffer_size=%ld",
@@ -283,50 +324,16 @@ ext3_xattr_list(struct inode *inode, cha
if (!bh)
goto cleanup;
ea_bdebug(bh, "b_count=%d, refcount=%d",
- atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
- end = bh->b_data + bh->b_size;
- if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
- HDR(bh)->h_blocks != cpu_to_le32(1)) {
-bad_block: ext3_error(inode->i_sb, "ext3_xattr_list",
- "inode %ld: bad block %d", inode->i_ino,
- EXT3_I(inode)->i_file_acl);
+ atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
+ if (ext3_xattr_check_block(bh)) {
+ ext3_error(inode->i_sb, __FUNCTION__,
+ "inode %ld: bad block %d", inode->i_ino,
+ EXT3_I(inode)->i_file_acl);
error = -EIO;
goto cleanup;
}
-
- /* check the on-disk data structure */
- entry = FIRST_ENTRY(bh);
- while (!IS_LAST_ENTRY(entry)) {
- struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(entry);
-
- if ((char *)next >= end)
- goto bad_block;
- entry = next;
- }
- if (ext3_xattr_cache_insert(bh))
- ea_idebug(inode, "cache insert failed");
-
- /* list the attribute names */
- for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry);
- entry = EXT3_XATTR_NEXT(entry)) {
- struct xattr_handler *handler =
- ext3_xattr_handler(entry->e_name_index);
-
- if (handler) {
- size_t size = handler->list(inode, buffer, rest,
- entry->e_name,
- entry->e_name_len);
- if (buffer) {
- if (size > rest) {
- error = -ERANGE;
- goto cleanup;
- }
- buffer += size;
- }
- rest -= size;
- }
- }
- error = buffer_size - rest; /* total size */
+ ext3_xattr_cache_insert(bh);
+ error = ext3_xattr_list_entries(inode, BFIRST(bh), buffer, buffer_size);
cleanup:
brelse(bh);
@@ -366,8 +373,8 @@ ext3_xattr_release_block(handle_t *handl
struct mb_cache_entry *ce = NULL;
ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_bdev, bh->b_blocknr);
- if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
- ea_bdebug(bh, "freeing");
+ if (BHDR(bh)->h_refcount == cpu_to_le32(1)) {
+ ea_bdebug(bh, "refcount now=0; freeing");
if (ce)
mb_cache_entry_free(ce);
ext3_free_blocks(handle, inode, bh->b_blocknr, 1);
@@ -376,20 +383,137 @@ ext3_xattr_release_block(handle_t *handl
} else {
if (ext3_journal_get_write_access(handle, bh) == 0) {
lock_buffer(bh);
- HDR(bh)->h_refcount = cpu_to_le32(
- le32_to_cpu(HDR(bh)->h_refcount) - 1);
+ BHDR(bh)->h_refcount = cpu_to_le32(
+ le32_to_cpu(BHDR(bh)->h_refcount) - 1);
ext3_journal_dirty_metadata(handle, bh);
if (IS_SYNC(inode))
handle->h_sync = 1;
DQUOT_FREE_BLOCK(inode, 1);
unlock_buffer(bh);
- ea_bdebug(bh, "refcount now=%d",
- le32_to_cpu(HDR(bh)->h_refcount) - 1);
+ ea_bdebug(bh, "refcount now=%d; releasing",
+ le32_to_cpu(BHDR(bh)->h_refcount));
}
if (ce)
mb_cache_entry_release(ce);
}
}
+
+struct ext3_xattr_info {
+ int name_index;
+ const char *name;
+ const void *value;
+ size_t value_len;
+};
+
+struct ext3_xattr_search {
+ struct ext3_xattr_entry *first;
+ void *base;
+ void *end;
+ struct ext3_xattr_entry *here;
+ int not_found;
+};
+
+static int
+ext3_xattr_set_entry(struct ext3_xattr_info *i, struct ext3_xattr_search *s)
+{
+ struct ext3_xattr_entry *last;
+ size_t free, min_offs = s->end - s->base, name_len = strlen(i->name);
+
+ /* Compute min_offs and last. */
+ last = s->first;
+ for (; !IS_LAST_ENTRY(last); last = EXT3_XATTR_NEXT(last)) {
+ if (!last->e_value_block && last->e_value_size) {
+ size_t offs = le16_to_cpu(last->e_value_offs);
+ if (offs < min_offs)
+ min_offs = offs;
+ }
+ }
+ free = min_offs - ((void *)last - s->base) - sizeof(__u32);
+ if (!s->not_found) {
+ if (!s->here->e_value_block && s->here->e_value_size) {
+ size_t size = le32_to_cpu(s->here->e_value_size);
+ free += EXT3_XATTR_SIZE(size);
+ }
+ free += EXT3_XATTR_LEN(name_len);
+ }
+ if (i->value) {
+ if (free < EXT3_XATTR_SIZE(i->value_len) ||
+ free < EXT3_XATTR_LEN(name_len) +
+ EXT3_XATTR_SIZE(i->value_len))
+ return -ENOSPC;
+ }
+
+ if (i->value && s->not_found) {
+ /* Insert the new name. */
+ size_t size = EXT3_XATTR_LEN(name_len);
+ size_t rest = (void *)last - (void *)s->here + sizeof(__u32);
+ memmove((void *)s->here + size, s->here, rest);
+ memset(s->here, 0, size);
+ s->here->e_name_index = i->name_index;
+ s->here->e_name_len = name_len;
+ memcpy(s->here->e_name, i->name, name_len);
+ } else {
+ if (!s->here->e_value_block && s->here->e_value_size) {
+ void *first_val = s->base + min_offs;
+ size_t offs = le16_to_cpu(s->here->e_value_offs);
+ void *val = s->base + offs;
+ size_t size = EXT3_XATTR_SIZE(
+ le32_to_cpu(s->here->e_value_size));
+
+ if (i->value && size == EXT3_XATTR_SIZE(i->value_len)) {
+ /* The old and the new value have the same
+ size. Just replace. */
+ s->here->e_value_size =
+ cpu_to_le32(i->value_len);
+ memset(val + size - EXT3_XATTR_PAD, 0,
+ EXT3_XATTR_PAD); /* Clear pad bytes. */
+ memcpy(val, i->value, i->value_len);
+ return 0;
+ }
+
+ /* Remove the old value. */
+ memmove(first_val + size, first_val, val - first_val);
+ memset(first_val, 0, size);
+ s->here->e_value_size = 0;
+ s->here->e_value_offs = 0;
+ min_offs += size;
+
+ /* Adjust all value offsets. */
+ last = s->first;
+ while (!IS_LAST_ENTRY(last)) {
+ size_t o = le16_to_cpu(last->e_value_offs);
+ if (!last->e_value_block &&
+ last->e_value_size && o < offs)
+ last->e_value_offs =
+ cpu_to_le16(o + size);
+ last = EXT3_XATTR_NEXT(last);
+ }
+ }
+ if (!i->value) {
+ /* Remove the old name. */
+ size_t size = EXT3_XATTR_LEN(name_len);
+ last = ENTRY((void *)last - size);
+ memmove(s->here, (void *)s->here + size,
+ (void *)last - (void *)s->here + sizeof(__u32));
+ memset(last, 0, size);
+ }
+ }
+
+ if (i->value) {
+ /* Insert the new value. */
+ s->here->e_value_size = cpu_to_le32(i->value_len);
+ if (i->value_len) {
+ size_t size = EXT3_XATTR_SIZE(i->value_len);
+ void *val = s->base + min_offs - size;
+ s->here->e_value_offs = cpu_to_le16(min_offs - size);
+ memset(val + size - EXT3_XATTR_PAD, 0,
+ EXT3_XATTR_PAD); /* Clear the pad bytes. */
+ memcpy(val, i->value, i->value_len);
+ }
+ }
+ return 0;
+}
+
/*
* ext3_xattr_set_handle()
*
@@ -408,23 +532,24 @@ ext3_xattr_set_handle(handle_t *handle,
int flags)
{
struct super_block *sb = inode->i_sb;
- struct buffer_head *bh = NULL;
- struct ext3_xattr_header *header = NULL;
- struct ext3_xattr_entry *here, *last;
- size_t name_len, free, min_offs = sb->s_blocksize;
- int not_found = 1, error;
- char *end;
+ struct buffer_head *old_bh = NULL, *new_bh = NULL;
+ struct ext3_xattr_info i = {
+ .name_index = name_index,
+ .name = name,
+ .value = value,
+ .value_len = value_len,
+ };
+ struct ext3_xattr_search s = {
+ .not_found = 1,
+ };
+ struct mb_cache_entry *ce = NULL;
+ int error;
+
+#define header ((struct ext3_xattr_header *)(s.base))
/*
* header -- Points either into bh, or to a temporarily
* allocated buffer.
- * here -- The named entry found, or the place for inserting, within
- * the block pointed to by header.
- * last -- Points right after the last named entry within the block
- * pointed to by header.
- * min_offs -- The offset of the first value (values are aligned
- * towards the end of the block).
- * end -- Points right after the block pointed to by header.
*/
ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld",
@@ -434,77 +559,38 @@ ext3_xattr_set_handle(handle_t *handle,
return -EROFS;
if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
return -EPERM;
- if (value == NULL)
- value_len = 0;
- if (name == NULL)
- return -EINVAL;
- name_len = strlen(name);
- if (name_len > 255 || value_len > sb->s_blocksize)
- return -ERANGE;
+ if (i.value == NULL)
+ i.value_len = 0;
down_write(&EXT3_I(inode)->xattr_sem);
if (EXT3_I(inode)->i_file_acl) {
/* The inode already has an extended attribute block. */
- bh = sb_bread(sb, EXT3_I(inode)->i_file_acl);
+ old_bh = sb_bread(sb, EXT3_I(inode)->i_file_acl);
error = -EIO;
- if (!bh)
+ if (!old_bh)
goto cleanup;
- ea_bdebug(bh, "b_count=%d, refcount=%d",
- atomic_read(&(bh->b_count)),
- le32_to_cpu(HDR(bh)->h_refcount));
- header = HDR(bh);
- end = bh->b_data + bh->b_size;
- if (header->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
- header->h_blocks != cpu_to_le32(1)) {
-bad_block: ext3_error(sb, "ext3_xattr_set",
+ ea_bdebug(old_bh, "b_count=%d, refcount=%d",
+ atomic_read(&(old_bh->b_count)),
+ le32_to_cpu(BHDR(old_bh)->h_refcount));
+ if (ext3_xattr_check_block(old_bh)) {
+bad_block: ext3_error(sb, __FUNCTION__,
"inode %ld: bad block %d", inode->i_ino,
EXT3_I(inode)->i_file_acl);
error = -EIO;
goto cleanup;
}
/* Find the named attribute. */
- here = FIRST_ENTRY(bh);
- while (!IS_LAST_ENTRY(here)) {
- struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(here);
- if ((char *)next >= end)
- goto bad_block;
- if (!here->e_value_block && here->e_value_size) {
- size_t offs = le16_to_cpu(here->e_value_offs);
- if (offs < min_offs)
- min_offs = offs;
- }
- not_found = name_index - here->e_name_index;
- if (!not_found)
- not_found = name_len - here->e_name_len;
- if (!not_found)
- not_found = memcmp(name, here->e_name,name_len);
- if (not_found <= 0)
- break;
- here = next;
- }
- last = here;
- /* We still need to compute min_offs and last. */
- while (!IS_LAST_ENTRY(last)) {
- struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last);
- if ((char *)next >= end)
- goto bad_block;
- if (!last->e_value_block && last->e_value_size) {
- size_t offs = le16_to_cpu(last->e_value_offs);
- if (offs < min_offs)
- min_offs = offs;
- }
- last = next;
- }
-
- /* Check whether we have enough space left. */
- free = min_offs - ((char*)last - (char*)header) - sizeof(__u32);
- } else {
- /* We will use a new extended attribute block. */
- free = sb->s_blocksize -
- sizeof(struct ext3_xattr_header) - sizeof(__u32);
- here = last = NULL; /* avoid gcc uninitialized warning. */
+ s.base = BHDR(old_bh);
+ s.first = BFIRST(old_bh);
+ s.end = old_bh->b_data + old_bh->b_size;
+ s.here = BFIRST(old_bh);
+ error = ext3_xattr_find_entry(&s.here, name_index, name,
+ old_bh->b_size, 1);
+ if (error && error != -ENODATA)
+ goto cleanup;
+ s.not_found = error;
}
- if (not_found) {
+ if (s.not_found) {
/* Request to remove a nonexistent attribute? */
error = -ENODATA;
if (flags & XATTR_REPLACE)
@@ -517,183 +603,90 @@ bad_block: ext3_error(sb, "ext3_xattr_s
error = -EEXIST;
if (flags & XATTR_CREATE)
goto cleanup;
- if (!here->e_value_block && here->e_value_size) {
- size_t size = le32_to_cpu(here->e_value_size);
-
- if (le16_to_cpu(here->e_value_offs) + size >
- sb->s_blocksize || size > sb->s_blocksize)
- goto bad_block;
- free += EXT3_XATTR_SIZE(size);
- }
- free += EXT3_XATTR_LEN(name_len);
}
- error = -ENOSPC;
- if (free < EXT3_XATTR_LEN(name_len) + EXT3_XATTR_SIZE(value_len))
- goto cleanup;
-
- /* Here we know that we can set the new attribute. */
if (header) {
- struct mb_cache_entry *ce;
-
- /* assert(header == HDR(bh)); */
- ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_bdev,
- bh->b_blocknr);
+ /* assert(header == BHDR(old_bh)); */
+ ce = mb_cache_entry_get(ext3_xattr_cache, old_bh->b_bdev,
+ old_bh->b_blocknr);
if (header->h_refcount == cpu_to_le32(1)) {
- if (ce)
+ if (ce) {
mb_cache_entry_free(ce);
- ea_bdebug(bh, "modifying in-place");
- error = ext3_journal_get_write_access(handle, bh);
+ ce = NULL;
+ }
+ ea_bdebug(old_bh, "modifying in-place");
+ error = ext3_journal_get_write_access(handle, old_bh);
if (error)
goto cleanup;
- lock_buffer(bh);
- /* keep the buffer locked while modifying it. */
+ lock_buffer(old_bh);
+ error = ext3_xattr_set_entry(&i, &s);
+ if (!error) {
+ if (!IS_LAST_ENTRY(s.first))
+ ext3_xattr_rehash(header, s.here);
+ ext3_xattr_cache_insert(old_bh);
+ }
+ unlock_buffer(old_bh);
+ if (error == -EIO)
+ goto bad_block;
+ if (!error && old_bh && header == BHDR(old_bh)) {
+ error = ext3_journal_dirty_metadata(handle,
+ old_bh);
+ }
+ if (error)
+ goto cleanup;
+ goto inserted;
} else {
- int offset;
+ int offset = (char *)s.here - old_bh->b_data;
- if (ce)
+ if (ce) {
mb_cache_entry_release(ce);
- ea_bdebug(bh, "cloning");
- header = kmalloc(bh->b_size, GFP_KERNEL);
+ ce = NULL;
+ }
+ ea_bdebug(old_bh, "cloning");
+ s.base = kmalloc(old_bh->b_size, GFP_KERNEL);
+ /*assert(header == s.base)*/
error = -ENOMEM;
if (header == NULL)
goto cleanup;
- memcpy(header, HDR(bh), bh->b_size);
+ memcpy(header, BHDR(old_bh), old_bh->b_size);
+ s.first = ENTRY(header+1);
header->h_refcount = cpu_to_le32(1);
- offset = (char *)here - bh->b_data;
- here = ENTRY((char *)header + offset);
- offset = (char *)last - bh->b_data;
- last = ENTRY((char *)header + offset);
+ s.here = ENTRY(s.base + offset);
+ s.end = header + old_bh->b_size;
}
} else {
/* Allocate a buffer where we construct the new block. */
- header = kmalloc(sb->s_blocksize, GFP_KERNEL);
+ s.base = kmalloc(sb->s_blocksize, GFP_KERNEL);
+ /*assert(header == s.base)*/
error = -ENOMEM;
if (header == NULL)
goto cleanup;
memset(header, 0, sb->s_blocksize);
- end = (char *)header + sb->s_blocksize;
header->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC);
header->h_blocks = header->h_refcount = cpu_to_le32(1);
- last = here = ENTRY(header+1);
+ s.first = ENTRY(header+1);
+ s.here = ENTRY(header+1);
+ s.end = (void *)header + sb->s_blocksize;
}
- /* Iff we are modifying the block in-place, bh is locked here. */
-
- if (not_found) {
- /* Insert the new name. */
- size_t size = EXT3_XATTR_LEN(name_len);
- size_t rest = (char *)last - (char *)here;
- memmove((char *)here + size, here, rest);
- memset(here, 0, size);
- here->e_name_index = name_index;
- here->e_name_len = name_len;
- memcpy(here->e_name, name, name_len);
- } else {
- if (!here->e_value_block && here->e_value_size) {
- char *first_val = (char *)header + min_offs;
- size_t offs = le16_to_cpu(here->e_value_offs);
- char *val = (char *)header + offs;
- size_t size = EXT3_XATTR_SIZE(
- le32_to_cpu(here->e_value_size));
-
- if (size == EXT3_XATTR_SIZE(value_len)) {
- /* The old and the new value have the same
- size. Just replace. */
- here->e_value_size = cpu_to_le32(value_len);
- memset(val + size - EXT3_XATTR_PAD, 0,
- EXT3_XATTR_PAD); /* Clear pad bytes. */
- memcpy(val, value, value_len);
- goto skip_replace;
- }
-
- /* Remove the old value. */
- memmove(first_val + size, first_val, val - first_val);
- memset(first_val, 0, size);
- here->e_value_offs = 0;
- min_offs += size;
-
- /* Adjust all value offsets. */
- last = ENTRY(header+1);
- while (!IS_LAST_ENTRY(last)) {
- size_t o = le16_to_cpu(last->e_value_offs);
- if (!last->e_value_block && o < offs)
- last->e_value_offs =
- cpu_to_le16(o + size);
- last = EXT3_XATTR_NEXT(last);
- }
- }
- if (value == NULL) {
- /* Remove the old name. */
- size_t size = EXT3_XATTR_LEN(name_len);
- last = ENTRY((char *)last - size);
- memmove(here, (char*)here + size,
- (char*)last - (char*)here);
- memset(last, 0, size);
- }
- }
-
- if (value != NULL) {
- /* Insert the new value. */
- here->e_value_size = cpu_to_le32(value_len);
- if (value_len) {
- size_t size = EXT3_XATTR_SIZE(value_len);
- char *val = (char *)header + min_offs - size;
- here->e_value_offs =
- cpu_to_le16((char *)val - (char *)header);
- memset(val + size - EXT3_XATTR_PAD, 0,
- EXT3_XATTR_PAD); /* Clear the pad bytes. */
- memcpy(val, value, value_len);
- }
- }
-
-skip_replace:
- if (!IS_LAST_ENTRY(ENTRY(header+1)))
- ext3_xattr_rehash(header, here);
- if (bh && header == HDR(bh)) {
- /* we were modifying in-place. */
- unlock_buffer(bh);
- error = ext3_journal_dirty_metadata(handle, bh);
- if (error)
- goto cleanup;
- }
- error = ext3_xattr_set_handle2(handle, inode, bh,
- IS_LAST_ENTRY(ENTRY(header+1)) ?
- NULL : header);
-
-cleanup:
- brelse(bh);
- if (!(bh && header == HDR(bh)))
- kfree(header);
- up_write(&EXT3_I(inode)->xattr_sem);
-
- return error;
-}
-
-/*
- * Second half of ext3_xattr_set_handle(): Update the file system.
- */
-static int
-ext3_xattr_set_handle2(handle_t *handle, struct inode *inode,
- struct buffer_head *old_bh,
- struct ext3_xattr_header *header)
-{
- struct super_block *sb = inode->i_sb;
- struct buffer_head *new_bh = NULL;
- struct mb_cache_entry *ce = NULL;
- int error;
+ error = ext3_xattr_set_entry(&i, &s);
+ if (error == -EIO)
+ goto bad_block;
+ if (error)
+ goto cleanup;
+ if (!IS_LAST_ENTRY(s.first))
+ ext3_xattr_rehash(header, s.here);
- if (header) {
+inserted:
+ if (!IS_LAST_ENTRY(s.first)) {
new_bh = ext3_xattr_cache_find(inode, header, &ce);
if (new_bh) {
/* We found an identical block in the cache. */
if (new_bh == old_bh)
- ea_bdebug(new_bh, "keeping this block");
+ ea_bdebug(new_bh, "keeping");
else {
/* The old block is released after updating
the inode. */
- ea_bdebug(new_bh, "reusing block");
-
error = -EDQUOT;
if (DQUOT_ALLOC_BLOCK(inode, 1))
goto cleanup;
@@ -701,25 +694,23 @@ ext3_xattr_set_handle2(handle_t *handle,
if (error)
goto cleanup;
lock_buffer(new_bh);
- HDR(new_bh)->h_refcount = cpu_to_le32(1 +
- le32_to_cpu(HDR(new_bh)->h_refcount));
- ea_bdebug(new_bh, "refcount now=%d",
- le32_to_cpu(HDR(new_bh)->h_refcount));
+ BHDR(new_bh)->h_refcount = cpu_to_le32(1 +
+ le32_to_cpu(BHDR(new_bh)->h_refcount));
+ ea_bdebug(new_bh, "reusing; refcount now=%d",
+ le32_to_cpu(BHDR(new_bh)->h_refcount));
unlock_buffer(new_bh);
- error = ext3_journal_dirty_metadata(handle, new_bh);
+ error = ext3_journal_dirty_metadata(handle,
+ new_bh);
if (error)
goto cleanup;
}
mb_cache_entry_release(ce);
ce = NULL;
- } else if (old_bh && header == HDR(old_bh)) {
+ } else if (old_bh && header == BHDR(old_bh)) {
/* We were modifying this block in-place. */
-
- /* Keep this block. No need to lock the block as we
- * don't need to change the reference count. */
+ ea_bdebug(old_bh, "keeping this block");
new_bh = old_bh;
get_bh(new_bh);
- ext3_xattr_cache_insert(new_bh);
} else {
/* We need to allocate a new block */
int goal = le32_to_cpu(
@@ -748,11 +739,10 @@ getblk_failed:
set_buffer_uptodate(new_bh);
unlock_buffer(new_bh);
ext3_xattr_cache_insert(new_bh);
-
- ext3_xattr_update_super_block(handle, sb);
error = ext3_journal_dirty_metadata(handle, new_bh);
if (error)
goto cleanup;
+ ext3_xattr_update_super_block(handle, sb);
}
}
@@ -763,21 +753,23 @@ getblk_failed:
if (IS_SYNC(inode))
handle->h_sync = 1;
- error = 0;
- if (old_bh && old_bh != new_bh) {
- /*
- * If there was an old block, and we are no longer using it,
- * release the old block.
- */
+ /* Drop the previous xattr block. */
+ if (old_bh && old_bh != new_bh)
ext3_xattr_release_block(handle, inode, old_bh);
- }
+ error = 0;
cleanup:
if (ce)
mb_cache_entry_release(ce);
brelse(new_bh);
+ brelse(old_bh);
+ if (!(old_bh && header == BHDR(old_bh)))
+ kfree(header);
+ up_write(&EXT3_I(inode)->xattr_sem);
return error;
+
+#undef header
}
/*
@@ -832,14 +824,14 @@ ext3_xattr_delete_inode(handle_t *handle
goto cleanup;
bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
if (!bh) {
- ext3_error(inode->i_sb, "ext3_xattr_delete_inode",
+ ext3_error(inode->i_sb, __FUNCTION__,
"inode %ld: block %d read error", inode->i_ino,
EXT3_I(inode)->i_file_acl);
goto cleanup;
}
- if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
- HDR(bh)->h_blocks != cpu_to_le32(1)) {
- ext3_error(inode->i_sb, "ext3_xattr_delete_inode",
+ if (BHDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
+ BHDR(bh)->h_blocks != cpu_to_le32(1)) {
+ ext3_error(inode->i_sb, __FUNCTION__,
"inode %ld: bad block %d", inode->i_ino,
EXT3_I(inode)->i_file_acl);
goto cleanup;
@@ -871,30 +863,29 @@ ext3_xattr_put_super(struct super_block
*
* Returns 0, or a negative error number on failure.
*/
-static int
+static void
ext3_xattr_cache_insert(struct buffer_head *bh)
{
- __u32 hash = le32_to_cpu(HDR(bh)->h_hash);
+ __u32 hash = le32_to_cpu(BHDR(bh)->h_hash);
struct mb_cache_entry *ce;
int error;
ce = mb_cache_entry_alloc(ext3_xattr_cache);
- if (!ce)
- return -ENOMEM;
+ if (!ce) {
+ ea_bdebug(bh, "out of memory");
+ return;
+ }
error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, &hash);
if (error) {
mb_cache_entry_free(ce);
if (error == -EBUSY) {
- ea_bdebug(bh, "already in cache (%d cache entries)",
- atomic_read(&ext3_xattr_cache->c_entry_count));
+ ea_bdebug(bh, "already in cache");
error = 0;
}
} else {
- ea_bdebug(bh, "inserting [%x] (%d cache entries)", (int)hash,
- atomic_read(&ext3_xattr_cache->c_entry_count));
+ ea_bdebug(bh, "inserting [%x]", (int)hash);
mb_cache_entry_release(ce);
}
- return error;
}
/*
@@ -967,16 +958,16 @@ again:
}
bh = sb_bread(inode->i_sb, ce->e_block);
if (!bh) {
- ext3_error(inode->i_sb, "ext3_xattr_cache_find",
+ ext3_error(inode->i_sb, __FUNCTION__,
"inode %ld: block %ld read error",
inode->i_ino, (unsigned long) ce->e_block);
- } else if (le32_to_cpu(HDR(bh)->h_refcount) >
+ } else if (le32_to_cpu(BHDR(bh)->h_refcount) >=
EXT3_XATTR_REFCOUNT_MAX) {
- ea_idebug(inode, "block %ld refcount %d>%d",
+ ea_idebug(inode, "block %ld refcount %d>=%d",
(unsigned long) ce->e_block,
- le32_to_cpu(HDR(bh)->h_refcount),
+ le32_to_cpu(BHDR(bh)->h_refcount),
EXT3_XATTR_REFCOUNT_MAX);
- } else if (ext3_xattr_cmp(header, HDR(bh)) == 0) {
+ } else if (ext3_xattr_cmp(header, BHDR(bh)) == 0) {
*pce = ce;
return bh;
}
--
Andreas Gruenbacher <agruen@suse.de>
SUSE Labs, SUSE LINUX PRODUCTS GMBH
^ permalink raw reply [flat|nested] 10+ messages in thread* [RESEND][PATCH 6/9] Ext3: extended attribute sharing fixes and in-inode EAs
2005-01-13 10:31 [RESEND][PATCH 0/9] Ext3: extended attribute sharing fixes and in-inode EAs Andreas Gruenbacher
` (2 preceding siblings ...)
2005-01-13 10:31 ` [RESEND][PATCH 7/9] " Andreas Gruenbacher
@ 2005-01-13 10:31 ` Andreas Gruenbacher
2005-01-13 10:31 ` [RESEND][PATCH 2/9] " Andreas Gruenbacher
` (4 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Andreas Gruenbacher @ 2005-01-13 10:31 UTC (permalink / raw)
To: Linus Torvalds, Andrew Morton
Cc: linux-kernel, Alex Tomas, Andreas Dilger, Stephen C. Tweedie,
Andrew Tridgell
Ext[23]: no spare xattr handler slots needed
The ext3_xattr_set_handle2 and ext3_xattr_delete_inode functions contain
duplicate code to decrease the reference count of an xattr block. Move
this to a separate function.
Also we know we have exclusive access to the inode in
ext3_xattr_delete_inode; there is no need to grab the xattr_sem lock.
Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
Index: linux-2.6.10/fs/ext3/xattr.c
===================================================================
--- linux-2.6.10.orig/fs/ext3/xattr.c
+++ linux-2.6.10/fs/ext3/xattr.c
@@ -102,7 +102,7 @@ static void ext3_xattr_rehash(struct ext
static struct mb_cache *ext3_xattr_cache;
-static struct xattr_handler *ext3_xattr_handler_map[EXT3_XATTR_INDEX_MAX] = {
+static struct xattr_handler *ext3_xattr_handler_map[] = {
[EXT3_XATTR_INDEX_USER] = &ext3_xattr_user_handler,
#ifdef CONFIG_EXT3_FS_POSIX_ACL
[EXT3_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext3_xattr_acl_access_handler,
@@ -132,7 +132,7 @@ ext3_xattr_handler(int name_index)
{
struct xattr_handler *handler = NULL;
- if (name_index > 0 && name_index <= EXT3_XATTR_INDEX_MAX)
+ if (name_index > 0 && name_index < ARRAY_SIZE(ext3_xattr_handler_map))
handler = ext3_xattr_handler_map[name_index];
return handler;
}
Index: linux-2.6.10/fs/ext2/xattr.c
===================================================================
--- linux-2.6.10.orig/fs/ext2/xattr.c
+++ linux-2.6.10/fs/ext2/xattr.c
@@ -100,7 +100,7 @@ static void ext2_xattr_rehash(struct ext
static struct mb_cache *ext2_xattr_cache;
-static struct xattr_handler *ext2_xattr_handler_map[EXT2_XATTR_INDEX_MAX] = {
+static struct xattr_handler *ext2_xattr_handler_map[] = {
[EXT2_XATTR_INDEX_USER] = &ext2_xattr_user_handler,
#ifdef CONFIG_EXT2_FS_POSIX_ACL
[EXT2_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext2_xattr_acl_access_handler,
@@ -130,7 +130,7 @@ ext2_xattr_handler(int name_index)
{
struct xattr_handler *handler = NULL;
- if (name_index > 0 && name_index <= EXT2_XATTR_INDEX_MAX)
+ if (name_index > 0 && name_index < ARRAY_SIZE(ext2_xattr_handler_map))
handler = ext2_xattr_handler_map[name_index];
return handler;
}
Index: linux-2.6.10/fs/ext3/xattr.h
===================================================================
--- linux-2.6.10.orig/fs/ext3/xattr.h
+++ linux-2.6.10/fs/ext3/xattr.h
@@ -16,7 +16,6 @@
#define EXT3_XATTR_REFCOUNT_MAX 1024
/* Name indexes */
-#define EXT3_XATTR_INDEX_MAX 10
#define EXT3_XATTR_INDEX_USER 1
#define EXT3_XATTR_INDEX_POSIX_ACL_ACCESS 2
#define EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT 3
Index: linux-2.6.10/fs/ext2/xattr.h
===================================================================
--- linux-2.6.10.orig/fs/ext2/xattr.h
+++ linux-2.6.10/fs/ext2/xattr.h
@@ -17,7 +17,6 @@
#define EXT2_XATTR_REFCOUNT_MAX 1024
/* Name indexes */
-#define EXT2_XATTR_INDEX_MAX 10
#define EXT2_XATTR_INDEX_USER 1
#define EXT2_XATTR_INDEX_POSIX_ACL_ACCESS 2
#define EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT 3
--
Andreas Gruenbacher <agruen@suse.de>
SUSE Labs, SUSE LINUX PRODUCTS GMBH
^ permalink raw reply [flat|nested] 10+ messages in thread* [RESEND][PATCH 2/9] Ext3: extended attribute sharing fixes and in-inode EAs
2005-01-13 10:31 [RESEND][PATCH 0/9] Ext3: extended attribute sharing fixes and in-inode EAs Andreas Gruenbacher
` (3 preceding siblings ...)
2005-01-13 10:31 ` [RESEND][PATCH 6/9] " Andreas Gruenbacher
@ 2005-01-13 10:31 ` Andreas Gruenbacher
2005-01-13 10:31 ` [RESEND][PATCH 8/9] " Andreas Gruenbacher
` (3 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Andreas Gruenbacher @ 2005-01-13 10:31 UTC (permalink / raw)
To: Linus Torvalds, Andrew Morton
Cc: linux-kernel, Alex Tomas, Andreas Dilger, Stephen C. Tweedie,
Andrew Tridgell
Mbcache cleanup
There is no need to export struct mb_cache outside mbcache.c. Move
struct mb_cache to fs/mbcache.c and remove the superfluous struct
mb_cache_entry_index declaration.
Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
Index: linux-2.6.10/fs/mbcache.c
===================================================================
--- linux-2.6.10.orig/fs/mbcache.c
+++ linux-2.6.10/fs/mbcache.c
@@ -72,6 +72,20 @@ EXPORT_SYMBOL(mb_cache_entry_find_first)
EXPORT_SYMBOL(mb_cache_entry_find_next);
#endif
+struct mb_cache {
+ struct list_head c_cache_list;
+ const char *c_name;
+ struct mb_cache_op c_op;
+ atomic_t c_entry_count;
+ int c_bucket_bits;
+#ifndef MB_CACHE_INDEXES_COUNT
+ int c_indexes_count;
+#endif
+ kmem_cache_t *c_entry_cache;
+ struct list_head *c_block_hash;
+ struct list_head *c_indexes_hash[0];
+};
+
/*
* Global data: list of all mbcache's, lru list, and a spinlock for
@@ -229,7 +243,7 @@ mb_cache_create(const char *name, struct
struct mb_cache *cache = NULL;
if(entry_size < sizeof(struct mb_cache_entry) +
- indexes_count * sizeof(struct mb_cache_entry_index))
+ indexes_count * sizeof(((struct mb_cache_entry *) 0)->e_indexes[0]))
return NULL;
cache = kmalloc(sizeof(struct mb_cache) +
Index: linux-2.6.10/include/linux/mbcache.h
===================================================================
--- linux-2.6.10.orig/include/linux/mbcache.h
+++ linux-2.6.10/include/linux/mbcache.h
@@ -7,31 +7,6 @@
/* Hardwire the number of additional indexes */
#define MB_CACHE_INDEXES_COUNT 1
-struct mb_cache_entry;
-
-struct mb_cache_op {
- int (*free)(struct mb_cache_entry *, int);
-};
-
-struct mb_cache {
- struct list_head c_cache_list;
- const char *c_name;
- struct mb_cache_op c_op;
- atomic_t c_entry_count;
- int c_bucket_bits;
-#ifndef MB_CACHE_INDEXES_COUNT
- int c_indexes_count;
-#endif
- kmem_cache_t *c_entry_cache;
- struct list_head *c_block_hash;
- struct list_head *c_indexes_hash[0];
-};
-
-struct mb_cache_entry_index {
- struct list_head o_list;
- unsigned int o_key;
-};
-
struct mb_cache_entry {
struct list_head e_lru_list;
struct mb_cache *e_cache;
@@ -39,7 +14,14 @@ struct mb_cache_entry {
struct block_device *e_bdev;
sector_t e_block;
struct list_head e_block_list;
- struct mb_cache_entry_index e_indexes[0];
+ struct {
+ struct list_head o_list;
+ unsigned int o_key;
+ } e_indexes[0];
+};
+
+struct mb_cache_op {
+ int (*free)(struct mb_cache_entry *, int);
};
/* Functions on caches */
@@ -54,7 +36,6 @@ void mb_cache_destroy(struct mb_cache *)
struct mb_cache_entry *mb_cache_entry_alloc(struct mb_cache *);
int mb_cache_entry_insert(struct mb_cache_entry *, struct block_device *,
sector_t, unsigned int[]);
-void mb_cache_entry_rehash(struct mb_cache_entry *, unsigned int[]);
void mb_cache_entry_release(struct mb_cache_entry *);
void mb_cache_entry_free(struct mb_cache_entry *);
struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *,
Index: linux-2.6.10/fs/ext3/xattr.c
===================================================================
--- linux-2.6.10.orig/fs/ext3/xattr.c
+++ linux-2.6.10/fs/ext3/xattr.c
@@ -1080,7 +1080,7 @@ init_ext3_xattr(void)
{
ext3_xattr_cache = mb_cache_create("ext3_xattr", NULL,
sizeof(struct mb_cache_entry) +
- sizeof(struct mb_cache_entry_index), 1, 6);
+ sizeof(((struct mb_cache_entry *) 0)->e_indexes[0]), 1, 6);
if (!ext3_xattr_cache)
return -ENOMEM;
return 0;
Index: linux-2.6.10/fs/ext2/xattr.c
===================================================================
--- linux-2.6.10.orig/fs/ext2/xattr.c
+++ linux-2.6.10/fs/ext2/xattr.c
@@ -1016,7 +1016,7 @@ init_ext2_xattr(void)
{
ext2_xattr_cache = mb_cache_create("ext2_xattr", NULL,
sizeof(struct mb_cache_entry) +
- sizeof(struct mb_cache_entry_index), 1, 6);
+ sizeof(((struct mb_cache_entry *) 0)->e_indexes[0]), 1, 6);
if (!ext2_xattr_cache)
return -ENOMEM;
return 0;
--
Andreas Gruenbacher <agruen@suse.de>
SUSE Labs, SUSE LINUX PRODUCTS GMBH
^ permalink raw reply [flat|nested] 10+ messages in thread* [RESEND][PATCH 8/9] Ext3: extended attribute sharing fixes and in-inode EAs
2005-01-13 10:31 [RESEND][PATCH 0/9] Ext3: extended attribute sharing fixes and in-inode EAs Andreas Gruenbacher
` (4 preceding siblings ...)
2005-01-13 10:31 ` [RESEND][PATCH 2/9] " Andreas Gruenbacher
@ 2005-01-13 10:31 ` Andreas Gruenbacher
2005-01-13 10:31 ` [RESEND][PATCH 3/9] " Andreas Gruenbacher
` (2 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Andreas Gruenbacher @ 2005-01-13 10:31 UTC (permalink / raw)
To: Linus Torvalds, Andrew Morton
Cc: linux-kernel, Alex Tomas, Andreas Dilger, Stephen C. Tweedie,
Andrew Tridgell
Hide ext3_get_inode_loc in_mem option
The in_mem optimization in ext3_get_inode_loc avoids a disk read when
only the requested inode in the block group is allocated: In that case
ext3_get_inode_loc assumes that it can recreate the inode from the
in-memory inode. This is incorrect with in-inode extended attributes,
which don't have a shadow copy in memory. Hide the in_mem option and
clarify comments; the subsequent ea-in-inode changes the in_mem check as
required.
Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
Index: linux-2.6.10/include/linux/ext3_fs.h
===================================================================
--- linux-2.6.10.orig/include/linux/ext3_fs.h
+++ linux-2.6.10/include/linux/ext3_fs.h
@@ -764,6 +764,7 @@ extern int ext3_sync_inode (handle_t *,
extern void ext3_discard_reservation (struct inode *);
extern void ext3_dirty_inode(struct inode *);
extern int ext3_change_inode_journal_flag(struct inode *, int);
+extern int ext3_get_inode_loc(struct inode *, struct ext3_iloc *);
extern void ext3_truncate (struct inode *);
extern void ext3_set_inode_flags(struct inode *);
extern void ext3_set_aops(struct inode *inode);
Index: linux-2.6.10/fs/ext3/inode.c
===================================================================
--- linux-2.6.10.orig/fs/ext3/inode.c
+++ linux-2.6.10/fs/ext3/inode.c
@@ -2269,13 +2269,13 @@ static unsigned long ext3_get_inode_bloc
return block;
}
-/*
+/*
* ext3_get_inode_loc returns with an extra refcount against the inode's
- * underlying buffer_head on success. If `in_mem' is false then we're purely
- * trying to determine the inode's location on-disk and no read need be
- * performed.
+ * underlying buffer_head on success. If 'in_mem' is true, we have all
+ * data in memory that is needed to recreate the on-disk version of this
+ * inode.
*/
-static int ext3_get_inode_loc(struct inode *inode,
+static int __ext3_get_inode_loc(struct inode *inode,
struct ext3_iloc *iloc, int in_mem)
{
unsigned long block;
@@ -2300,7 +2300,11 @@ static int ext3_get_inode_loc(struct ino
goto has_buffer;
}
- /* we can't skip I/O if inode is on a disk only */
+ /*
+ * If we have all information of the inode in memory and this
+ * is the only valid inode in the block, we need not read the
+ * block.
+ */
if (in_mem) {
struct buffer_head *bitmap_bh;
struct ext3_group_desc *desc;
@@ -2309,10 +2313,6 @@ static int ext3_get_inode_loc(struct ino
int block_group;
int start;
- /*
- * If this is the only valid inode in the block we
- * need not read the block.
- */
block_group = (inode->i_ino - 1) /
EXT3_INODES_PER_GROUP(inode->i_sb);
inodes_per_buffer = bh->b_size /
@@ -2359,8 +2359,9 @@ static int ext3_get_inode_loc(struct ino
make_io:
/*
- * There are another valid inodes in the buffer so we must
- * read the block from disk
+ * There are other valid inodes in the buffer, this inode
+ * has in-inode xattrs, or we don't have this inode in memory.
+ * Read the block from disk.
*/
get_bh(bh);
bh->b_end_io = end_buffer_read_sync;
@@ -2380,6 +2381,11 @@ has_buffer:
return 0;
}
+int ext3_get_inode_loc(struct inode *inode, struct ext3_iloc *iloc)
+{
+ return __ext3_get_inode_loc(inode, iloc, 1);
+}
+
void ext3_set_inode_flags(struct inode *inode)
{
unsigned int flags = EXT3_I(inode)->i_flags;
@@ -2411,7 +2417,7 @@ void ext3_read_inode(struct inode * inod
#endif
ei->i_rsv_window.rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
- if (ext3_get_inode_loc(inode, &iloc, 0))
+ if (__ext3_get_inode_loc(inode, &iloc, 0))
goto bad_inode;
bh = iloc.bh;
raw_inode = ext3_raw_inode(&iloc);
@@ -2848,7 +2854,7 @@ ext3_reserve_inode_write(handle_t *handl
{
int err = 0;
if (handle) {
- err = ext3_get_inode_loc(inode, iloc, 1);
+ err = ext3_get_inode_loc(inode, iloc);
if (!err) {
BUFFER_TRACE(iloc->bh, "get_write_access");
err = ext3_journal_get_write_access(handle, iloc->bh);
@@ -2947,7 +2953,7 @@ ext3_pin_inode(handle_t *handle, struct
int err = 0;
if (handle) {
- err = ext3_get_inode_loc(inode, &iloc, 1);
+ err = ext3_get_inode_loc(inode, &iloc);
if (!err) {
BUFFER_TRACE(iloc.bh, "get_write_access");
err = journal_get_write_access(handle, iloc.bh);
--
Andreas Gruenbacher <agruen@suse.de>
SUSE Labs, SUSE LINUX PRODUCTS GMBH
^ permalink raw reply [flat|nested] 10+ messages in thread* [RESEND][PATCH 3/9] Ext3: extended attribute sharing fixes and in-inode EAs
2005-01-13 10:31 [RESEND][PATCH 0/9] Ext3: extended attribute sharing fixes and in-inode EAs Andreas Gruenbacher
` (5 preceding siblings ...)
2005-01-13 10:31 ` [RESEND][PATCH 8/9] " Andreas Gruenbacher
@ 2005-01-13 10:31 ` Andreas Gruenbacher
2005-01-13 10:31 ` [RESEND][PATCH 1/9] " Andreas Gruenbacher
2005-01-13 10:31 ` [RESEND][PATCH 4/9] " Andreas Gruenbacher
8 siblings, 0 replies; 10+ messages in thread
From: Andreas Gruenbacher @ 2005-01-13 10:31 UTC (permalink / raw)
To: Linus Torvalds, Andrew Morton
Cc: linux-kernel, Alex Tomas, Andreas Dilger, Stephen C. Tweedie,
Andrew Tridgell
Race in ext[23] xattr sharing code
Andrew Tridgell and Stephen C. Tweedie have reported two different
Oopses caused by a race condition in the mbcache, which is responsible
for extended attribute sharing in ext2 and ext3. Stephen tracked down
the bug; I did the fix.
Explanation:
The mbcache caches the locations and content hashes of xattr blocks.
There are two access strategies: [1] xattr block disposal via
mb_cache_entry_get(), [2] xattr block reuse (sharing) via
mb_cache_entry_find_{first,next}(). There is no locking between the two
methods, so between one mb_cache_entry_find_x and the next, a
mb_cache_entry_get might come in, unhash the cache entry, and change the
journaling state of the xattr buffer. Subsequently, two things can
happen: [a] the next mb_cache_entry_find_x may try to follow the mbcache
hash chain starting from the entry that has become unhashed, which now
is a stale pointer, [b] the block may have become deallocated, and then
we try to reuse it.
Fix this by converting the mbcache into a readers-writer style lock, and
protect all block accesses in ext2/ext3 by the mbcache entry lock. This
ensures that destroying blocks is an exclusive operation that may not
overlap xattr block reuse, while allowing multiple "re-users". Write
access to the xattr block's buffer is protected by the buffer lock.
Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
Index: linux-2.6.10/fs/mbcache.c
===================================================================
--- linux-2.6.10.orig/fs/mbcache.c
+++ linux-2.6.10/fs/mbcache.c
@@ -54,6 +54,10 @@
printk(KERN_ERR f); \
printk("\n"); \
} while(0)
+
+#define MB_CACHE_WRITER ((unsigned short)~0U >> 1)
+
+DECLARE_WAIT_QUEUE_HEAD(mb_cache_queue);
MODULE_AUTHOR("Andreas Gruenbacher <a.gruenbacher@computer.org>");
MODULE_DESCRIPTION("Meta block cache (for extended attributes)");
@@ -140,7 +144,7 @@ __mb_cache_entry_forget(struct mb_cache_
{
struct mb_cache *cache = ce->e_cache;
- mb_assert(atomic_read(&ce->e_used) == 0);
+ mb_assert(!(ce->e_used || ce->e_queued));
if (cache->c_op.free && cache->c_op.free(ce, gfp_mask)) {
/* free failed -- put back on the lru list
for freeing later. */
@@ -157,9 +161,16 @@ __mb_cache_entry_forget(struct mb_cache_
static inline void
__mb_cache_entry_release_unlock(struct mb_cache_entry *ce)
{
- if (atomic_dec_and_test(&ce->e_used)) {
+ /* Wake up all processes queuing for this cache entry. */
+ if (ce->e_queued)
+ wake_up_all(&mb_cache_queue);
+ if (ce->e_used >= MB_CACHE_WRITER)
+ ce->e_used -= MB_CACHE_WRITER;
+ ce->e_used--;
+ if (!(ce->e_used || ce->e_queued)) {
if (!__mb_cache_entry_is_hashed(ce))
goto forget;
+ mb_assert(list_empty(&ce->e_lru_list));
list_add_tail(&ce->e_lru_list, &mb_cache_lru_list);
}
spin_unlock(&mb_cache_spinlock);
@@ -396,7 +407,8 @@ mb_cache_entry_alloc(struct mb_cache *ca
INIT_LIST_HEAD(&ce->e_lru_list);
INIT_LIST_HEAD(&ce->e_block_list);
ce->e_cache = cache;
- atomic_set(&ce->e_used, 1);
+ ce->e_used = 1 + MB_CACHE_WRITER;
+ ce->e_queued = 0;
}
return ce;
}
@@ -488,7 +500,8 @@ mb_cache_entry_free(struct mb_cache_entr
*
* Get a cache entry by device / block number. (There can only be one entry
* in the cache per device and block.) Returns NULL if no such cache entry
- * exists.
+ * exists. The returned cache entry is locked for exclusive access ("single
+ * writer").
*/
struct mb_cache_entry *
mb_cache_entry_get(struct mb_cache *cache, struct block_device *bdev,
@@ -504,9 +517,27 @@ mb_cache_entry_get(struct mb_cache *cach
list_for_each(l, &cache->c_block_hash[bucket]) {
ce = list_entry(l, struct mb_cache_entry, e_block_list);
if (ce->e_bdev == bdev && ce->e_block == block) {
+ DEFINE_WAIT(wait);
+
if (!list_empty(&ce->e_lru_list))
list_del_init(&ce->e_lru_list);
- atomic_inc(&ce->e_used);
+
+ while (ce->e_used > 0) {
+ ce->e_queued++;
+ prepare_to_wait(&mb_cache_queue, &wait,
+ TASK_UNINTERRUPTIBLE);
+ spin_unlock(&mb_cache_spinlock);
+ schedule();
+ spin_lock(&mb_cache_spinlock);
+ ce->e_queued--;
+ }
+ finish_wait(&mb_cache_queue, &wait);
+ ce->e_used += 1 + MB_CACHE_WRITER;
+
+ if (!__mb_cache_entry_is_hashed(ce)) {
+ __mb_cache_entry_release_unlock(ce);
+ return NULL;
+ }
goto cleanup;
}
}
@@ -523,14 +554,37 @@ static struct mb_cache_entry *
__mb_cache_entry_find(struct list_head *l, struct list_head *head,
int index, struct block_device *bdev, unsigned int key)
{
+ DEFINE_WAIT(wait);
+
while (l != head) {
struct mb_cache_entry *ce =
list_entry(l, struct mb_cache_entry,
e_indexes[index].o_list);
if (ce->e_bdev == bdev && ce->e_indexes[index].o_key == key) {
+ DEFINE_WAIT(wait);
+
if (!list_empty(&ce->e_lru_list))
list_del_init(&ce->e_lru_list);
- atomic_inc(&ce->e_used);
+
+ /* Incrementing before holding the lock gives readers
+ priority over writers. */
+ ce->e_used++;
+ while (ce->e_used >= MB_CACHE_WRITER) {
+ ce->e_queued++;
+ prepare_to_wait(&mb_cache_queue, &wait,
+ TASK_UNINTERRUPTIBLE);
+ spin_unlock(&mb_cache_spinlock);
+ schedule();
+ spin_lock(&mb_cache_spinlock);
+ ce->e_queued--;
+ }
+ finish_wait(&mb_cache_queue, &wait);
+
+ if (!__mb_cache_entry_is_hashed(ce)) {
+ __mb_cache_entry_release_unlock(ce);
+ spin_lock(&mb_cache_spinlock);
+ return ERR_PTR(-EAGAIN);
+ }
return ce;
}
l = l->next;
@@ -544,7 +598,8 @@ __mb_cache_entry_find(struct list_head *
*
* Find the first cache entry on a given device with a certain key in
* an additional index. Additonal matches can be found with
- * mb_cache_entry_find_next(). Returns NULL if no match was found.
+ * mb_cache_entry_find_next(). Returns NULL if no match was found. The
+ * returned cache entry is locked for shared access ("multiple readers").
*
* @cache: the cache to search
* @index: the number of the additonal index to search (0<=index<indexes_count)
Index: linux-2.6.10/fs/ext3/xattr.c
===================================================================
--- linux-2.6.10.orig/fs/ext3/xattr.c
+++ linux-2.6.10/fs/ext3/xattr.c
@@ -97,7 +97,6 @@ static int ext3_xattr_cache_insert(struc
static struct buffer_head *ext3_xattr_cache_find(handle_t *, struct inode *,
struct ext3_xattr_header *,
int *);
-static void ext3_xattr_cache_remove(struct buffer_head *);
static void ext3_xattr_rehash(struct ext3_xattr_header *,
struct ext3_xattr_entry *);
@@ -500,6 +499,7 @@ bad_block: ext3_error(sb, "ext3_xattr_s
/* Here we know that we can set the new attribute. */
if (header) {
+ struct mb_cache_entry *ce;
int credits = 0;
/* assert(header == HDR(bh)); */
@@ -511,14 +511,19 @@ bad_block: ext3_error(sb, "ext3_xattr_s
&credits);
if (error)
goto cleanup;
+ ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_bdev,
+ bh->b_blocknr);
lock_buffer(bh);
if (header->h_refcount == cpu_to_le32(1)) {
+ if (ce)
+ mb_cache_entry_free(ce);
ea_bdebug(bh, "modifying in-place");
- ext3_xattr_cache_remove(bh);
/* keep the buffer locked while modifying it. */
} else {
int offset;
+ if (ce)
+ mb_cache_entry_release(ce);
unlock_buffer(bh);
journal_release_buffer(handle, bh, credits);
skip_get_write_access:
@@ -725,6 +730,8 @@ getblk_failed:
error = 0;
if (old_bh && old_bh != new_bh) {
+ struct mb_cache_entry *ce;
+
/*
* If there was an old block, and we are no longer using it,
* release the old block.
@@ -732,9 +739,13 @@ getblk_failed:
error = ext3_journal_get_write_access(handle, old_bh);
if (error)
goto cleanup;
+ ce = mb_cache_entry_get(ext3_xattr_cache, old_bh->b_bdev,
+ old_bh->b_blocknr);
lock_buffer(old_bh);
if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) {
/* Free the old block. */
+ if (ce)
+ mb_cache_entry_free(ce);
ea_bdebug(old_bh, "freeing");
ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1);
@@ -747,6 +758,8 @@ getblk_failed:
/* Decrement the refcount only. */
HDR(old_bh)->h_refcount = cpu_to_le32(
le32_to_cpu(HDR(old_bh)->h_refcount) - 1);
+ if (ce)
+ mb_cache_entry_release(ce);
DQUOT_FREE_BLOCK(inode, 1);
ext3_journal_dirty_metadata(handle, old_bh);
ea_bdebug(old_bh, "refcount now=%d",
@@ -806,6 +819,7 @@ void
ext3_xattr_delete_inode(handle_t *handle, struct inode *inode)
{
struct buffer_head *bh = NULL;
+ struct mb_cache_entry *ce;
down_write(&EXT3_I(inode)->xattr_sem);
if (!EXT3_I(inode)->i_file_acl)
@@ -826,15 +840,19 @@ ext3_xattr_delete_inode(handle_t *handle
}
if (ext3_journal_get_write_access(handle, bh) != 0)
goto cleanup;
+ ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_bdev, bh->b_blocknr);
lock_buffer(bh);
if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
- ext3_xattr_cache_remove(bh);
+ if (ce)
+ mb_cache_entry_free(ce);
ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1);
get_bh(bh);
ext3_forget(handle, 1, inode, bh, EXT3_I(inode)->i_file_acl);
} else {
HDR(bh)->h_refcount = cpu_to_le32(
le32_to_cpu(HDR(bh)->h_refcount) - 1);
+ if (ce)
+ mb_cache_entry_release(ce);
ext3_journal_dirty_metadata(handle, bh);
if (IS_SYNC(inode))
handle->h_sync = 1;
@@ -951,11 +969,18 @@ ext3_xattr_cache_find(handle_t *handle,
if (!header->h_hash)
return NULL; /* never share */
ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
+again:
ce = mb_cache_entry_find_first(ext3_xattr_cache, 0,
inode->i_sb->s_bdev, hash);
while (ce) {
- struct buffer_head *bh = sb_bread(inode->i_sb, ce->e_block);
+ struct buffer_head *bh;
+ if (IS_ERR(ce)) {
+ if (PTR_ERR(ce) == -EAGAIN)
+ goto again;
+ break;
+ }
+ bh = sb_bread(inode->i_sb, ce->e_block);
if (!bh) {
ext3_error(inode->i_sb, "ext3_xattr_cache_find",
"inode %ld: block %ld read error",
@@ -986,27 +1011,6 @@ ext3_xattr_cache_find(handle_t *handle,
return NULL;
}
-/*
- * ext3_xattr_cache_remove()
- *
- * Remove the cache entry of a block from the cache. Called when a
- * block becomes invalid.
- */
-static void
-ext3_xattr_cache_remove(struct buffer_head *bh)
-{
- struct mb_cache_entry *ce;
-
- ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_bdev,
- bh->b_blocknr);
- if (ce) {
- ea_bdebug(bh, "removing (%d cache entries remaining)",
- atomic_read(&ext3_xattr_cache->c_entry_count)-1);
- mb_cache_entry_free(ce);
- } else
- ea_bdebug(bh, "no cache entry");
-}
-
#define NAME_HASH_SHIFT 5
#define VALUE_HASH_SHIFT 16
Index: linux-2.6.10/include/linux/mbcache.h
===================================================================
--- linux-2.6.10.orig/include/linux/mbcache.h
+++ linux-2.6.10/include/linux/mbcache.h
@@ -10,7 +10,8 @@
struct mb_cache_entry {
struct list_head e_lru_list;
struct mb_cache *e_cache;
- atomic_t e_used;
+ unsigned short e_used;
+ unsigned short e_queued;
struct block_device *e_bdev;
sector_t e_block;
struct list_head e_block_list;
Index: linux-2.6.10/fs/ext2/xattr.c
===================================================================
--- linux-2.6.10.orig/fs/ext2/xattr.c
+++ linux-2.6.10/fs/ext2/xattr.c
@@ -95,7 +95,6 @@ static int ext2_xattr_set2(struct inode
static int ext2_xattr_cache_insert(struct buffer_head *);
static struct buffer_head *ext2_xattr_cache_find(struct inode *,
struct ext2_xattr_header *);
-static void ext2_xattr_cache_remove(struct buffer_head *);
static void ext2_xattr_rehash(struct ext2_xattr_header *,
struct ext2_xattr_entry *);
@@ -494,15 +493,22 @@ bad_block: ext2_error(sb, "ext2_xattr_s
/* Here we know that we can set the new attribute. */
if (header) {
+ struct mb_cache_entry *ce;
+
/* assert(header == HDR(bh)); */
+ ce = mb_cache_entry_get(ext2_xattr_cache, bh->b_bdev,
+ bh->b_blocknr);
lock_buffer(bh);
if (header->h_refcount == cpu_to_le32(1)) {
ea_bdebug(bh, "modifying in-place");
- ext2_xattr_cache_remove(bh);
+ if (ce)
+ mb_cache_entry_free(ce);
/* keep the buffer locked while modifying it. */
} else {
int offset;
+ if (ce)
+ mb_cache_entry_release(ce);
unlock_buffer(bh);
ea_bdebug(bh, "cloning");
header = kmalloc(bh->b_size, GFP_KERNEL);
@@ -707,13 +713,19 @@ ext2_xattr_set2(struct inode *inode, str
error = 0;
if (old_bh && old_bh != new_bh) {
+ struct mb_cache_entry *ce;
+
/*
* If there was an old block and we are no longer using it,
* release the old block.
*/
+ ce = mb_cache_entry_get(ext2_xattr_cache, old_bh->b_bdev,
+ old_bh->b_blocknr);
lock_buffer(old_bh);
if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) {
/* Free the old block. */
+ if (ce)
+ mb_cache_entry_free(ce);
ea_bdebug(old_bh, "freeing");
ext2_free_blocks(inode, old_bh->b_blocknr, 1);
/* We let our caller release old_bh, so we
@@ -724,6 +736,8 @@ ext2_xattr_set2(struct inode *inode, str
/* Decrement the refcount only. */
HDR(old_bh)->h_refcount = cpu_to_le32(
le32_to_cpu(HDR(old_bh)->h_refcount) - 1);
+ if (ce)
+ mb_cache_entry_release(ce);
DQUOT_FREE_BLOCK(inode, 1);
mark_buffer_dirty(old_bh);
ea_bdebug(old_bh, "refcount now=%d",
@@ -748,6 +762,7 @@ void
ext2_xattr_delete_inode(struct inode *inode)
{
struct buffer_head *bh = NULL;
+ struct mb_cache_entry *ce;
down_write(&EXT2_I(inode)->xattr_sem);
if (!EXT2_I(inode)->i_file_acl)
@@ -767,15 +782,19 @@ ext2_xattr_delete_inode(struct inode *in
EXT2_I(inode)->i_file_acl);
goto cleanup;
}
+ ce = mb_cache_entry_get(ext2_xattr_cache, bh->b_bdev, bh->b_blocknr);
lock_buffer(bh);
if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
- ext2_xattr_cache_remove(bh);
+ if (ce)
+ mb_cache_entry_free(ce);
ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1);
get_bh(bh);
bforget(bh);
} else {
HDR(bh)->h_refcount = cpu_to_le32(
le32_to_cpu(HDR(bh)->h_refcount) - 1);
+ if (ce)
+ mb_cache_entry_release(ce);
mark_buffer_dirty(bh);
if (IS_SYNC(inode))
sync_dirty_buffer(bh);
@@ -892,11 +911,19 @@ ext2_xattr_cache_find(struct inode *inod
if (!header->h_hash)
return NULL; /* never share */
ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
+again:
ce = mb_cache_entry_find_first(ext2_xattr_cache, 0,
inode->i_sb->s_bdev, hash);
while (ce) {
- struct buffer_head *bh = sb_bread(inode->i_sb, ce->e_block);
+ struct buffer_head *bh;
+
+ if (IS_ERR(ce)) {
+ if (PTR_ERR(ce) == -EAGAIN)
+ goto again;
+ break;
+ }
+ bh = sb_bread(inode->i_sb, ce->e_block);
if (!bh) {
ext2_error(inode->i_sb, "ext2_xattr_cache_find",
"inode %ld: block %ld read error",
@@ -923,26 +950,6 @@ ext2_xattr_cache_find(struct inode *inod
return NULL;
}
-/*
- * ext2_xattr_cache_remove()
- *
- * Remove the cache entry of a block from the cache. Called when a
- * block becomes invalid.
- */
-static void
-ext2_xattr_cache_remove(struct buffer_head *bh)
-{
- struct mb_cache_entry *ce;
-
- ce = mb_cache_entry_get(ext2_xattr_cache, bh->b_bdev, bh->b_blocknr);
- if (ce) {
- ea_bdebug(bh, "removing (%d cache entries remaining)",
- atomic_read(&ext2_xattr_cache->c_entry_count)-1);
- mb_cache_entry_free(ce);
- } else
- ea_bdebug(bh, "no cache entry");
-}
-
#define NAME_HASH_SHIFT 5
#define VALUE_HASH_SHIFT 16
--
Andreas Gruenbacher <agruen@suse.de>
SUSE Labs, SUSE LINUX PRODUCTS GMBH
^ permalink raw reply [flat|nested] 10+ messages in thread* [RESEND][PATCH 1/9] Ext3: extended attribute sharing fixes and in-inode EAs
2005-01-13 10:31 [RESEND][PATCH 0/9] Ext3: extended attribute sharing fixes and in-inode EAs Andreas Gruenbacher
` (6 preceding siblings ...)
2005-01-13 10:31 ` [RESEND][PATCH 3/9] " Andreas Gruenbacher
@ 2005-01-13 10:31 ` Andreas Gruenbacher
2005-01-13 10:31 ` [RESEND][PATCH 4/9] " Andreas Gruenbacher
8 siblings, 0 replies; 10+ messages in thread
From: Andreas Gruenbacher @ 2005-01-13 10:31 UTC (permalink / raw)
To: Linus Torvalds, Andrew Morton
Cc: linux-kernel, Alex Tomas, Andreas Dilger, Stephen C. Tweedie,
Andrew Tridgell
This is a reverse diff of the following changeset:
# ChangeSet
# 2005/01/04 21:29:07-08:00 alex@clusterfs.com
# [PATCH] ext3: support for EA in inode
#
# 1) intent of the patch is to get possibility to store EAs in the body of large
# inode. it saves space and improves performance in some cases
#
# 2) the patch is quite simple: it works the same way original xattr does, but
# using other storage (inode body). body has priority over separate block.
# original routines (ext3_xattr_get, ext3_xattr_list, ext3_xattr_set) are
# renamed to ext3_xattr_block_*. new routines that handle inode storate are
# added (ext3_xattr_ibody_get, ext3_xattr_ibody_list, ext3_xattr_ibody_set).
# routines ext3_xattr_get, ext3_xattr_list and ext3_xattr_set allow user to
# accesss both the storages transparently
#
# 3) the change makes sense on filesystem with inode size >= 256 bytes only.
# 2.4 kernels don't support such a filesystems, AFAIK. 2.6 kernels do support
# and ignore EAs stored in a body w/o the patch
#
# 4) debugfs and e2fsck need to be patched to deal with EAs in inode
# the patch will be sent later
#
# 5) testing results:
# a) Andrew Samba Master (tridge) has done successful tests
# b) we've been using ea-in-inode feature in Lustre for many months
#
# Signed-off-by: Andreas Dilger <adilger@clusterfs.com>
# Signed-off-by: Alex Tomas <alex@clusterfs.com>
# Signed-off-by: Andrew Morton <akpm@osdl.org>
# Signed-off-by: Linus Torvalds <torvalds@osdl.org>
#
# fs/ext3/ialloc.c
# 2005/01/04 20:24:30-08:00 alex@clusterfs.com +5 -0
# ext3: support for EA in inode
#
# fs/ext3/inode.c
# 2005/01/04 20:24:30-08:00 alex@clusterfs.com +9 -1
# ext3: support for EA in inode
#
# fs/ext3/xattr.c
# 2005/01/04 20:24:30-08:00 alex@clusterfs.com +603 -36
# ext3: support for EA in inode
#
# fs/ext3/xattr.h
# 2005/01/04 18:48:13-08:00 alex@clusterfs.com +2 -1
# ext3: support for EA in inode
#
# include/linux/ext3_fs.h
# 2005/01/04 20:24:22-08:00 alex@clusterfs.com +3 -0
# ext3: support for EA in inode
#
# include/linux/ext3_fs_i.h
# 2005/01/04 18:48:13-08:00 alex@clusterfs.com +3 -0
# ext3: support for EA in inode
#
Index: linux-2.6.10-bk13/include/linux/ext3_fs.h
===================================================================
--- linux-2.6.10-bk13.orig/include/linux/ext3_fs.h
+++ linux-2.6.10-bk13/include/linux/ext3_fs.h
@@ -293,8 +293,6 @@ struct ext3_inode {
__u32 m_i_reserved2[2];
} masix2;
} osd2; /* OS dependent 2 */
- __u16 i_extra_isize;
- __u16 i_pad1;
};
#define i_size_high i_dir_acl
@@ -757,7 +755,6 @@ extern int ext3_forget(handle_t *, int,
extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
-extern int ext3_get_inode_loc(struct inode *, struct ext3_iloc *, int);
extern void ext3_read_inode (struct inode *);
extern int ext3_write_inode (struct inode *, int);
extern int ext3_setattr (struct dentry *, struct iattr *);
Index: linux-2.6.10-bk13/include/linux/ext3_fs_i.h
===================================================================
--- linux-2.6.10-bk13.orig/include/linux/ext3_fs_i.h
+++ linux-2.6.10-bk13/include/linux/ext3_fs_i.h
@@ -113,9 +113,6 @@ struct ext3_inode_info {
*/
loff_t i_disksize;
- /* on-disk additional length */
- __u16 i_extra_isize;
-
/*
* truncate_sem is for serialising ext3_truncate() against
* ext3_getblock(). In the 2.4 ext2 design, great chunks of inode's
Index: linux-2.6.10-bk13/fs/ext3/xattr.c
===================================================================
--- linux-2.6.10-bk13.orig/fs/ext3/xattr.c
+++ linux-2.6.10-bk13/fs/ext3/xattr.c
@@ -9,7 +9,6 @@
* suggestion of Luka Renko <luka.renko@hermes.si>.
* xattr consolidation Copyright (c) 2004 James Morris <jmorris@redhat.com>,
* Red Hat Inc.
- * ea-in-inode support by Alex Tomas <alex@clusterfs.com> aka bzzz
*/
/*
@@ -90,9 +89,10 @@
# define ea_bdebug(f...)
#endif
-static int ext3_xattr_set_handle2(handle_t *handle, struct inode *inode,
- struct buffer_head *old_bh,
- struct ext3_xattr_header *header);
+static int ext3_xattr_set_handle2(handle_t *, struct inode *,
+ struct buffer_head *,
+ struct ext3_xattr_header *);
+
static int ext3_xattr_cache_insert(struct buffer_head *);
static struct buffer_head *ext3_xattr_cache_find(handle_t *, struct inode *,
struct ext3_xattr_header *,
@@ -150,12 +150,17 @@ ext3_listxattr(struct dentry *dentry, ch
}
/*
- * ext3_xattr_block_get()
+ * ext3_xattr_get()
+ *
+ * Copy an extended attribute into the buffer
+ * provided, or compute the buffer size required.
+ * Buffer is NULL to compute the size of the buffer required.
*
- * routine looks for attribute in EA block and returns it's value and size
+ * Returns a negative error number on failure, or the number of bytes
+ * used / required on success.
*/
int
-ext3_xattr_block_get(struct inode *inode, int name_index, const char *name,
+ext3_xattr_get(struct inode *inode, int name_index, const char *name,
void *buffer, size_t buffer_size)
{
struct buffer_head *bh = NULL;
@@ -169,6 +174,7 @@ ext3_xattr_block_get(struct inode *inode
if (name == NULL)
return -EINVAL;
+ down_read(&EXT3_I(inode)->xattr_sem);
error = -ENODATA;
if (!EXT3_I(inode)->i_file_acl)
goto cleanup;
@@ -241,87 +247,15 @@ found:
cleanup:
brelse(bh);
+ up_read(&EXT3_I(inode)->xattr_sem);
return error;
}
/*
- * ext3_xattr_ibody_get()
- *
- * routine looks for attribute in inode body and returns it's value and size
- */
-int
-ext3_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
- void *buffer, size_t buffer_size)
-{
- int size, name_len = strlen(name), storage_size;
- struct ext3_xattr_entry *last;
- struct ext3_inode *raw_inode;
- struct ext3_iloc iloc;
- char *start, *end;
- int ret = -ENOENT;
-
- if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE)
- return -ENOENT;
-
- ret = ext3_get_inode_loc(inode, &iloc, 1);
- if (ret)
- return ret;
- raw_inode = ext3_raw_inode(&iloc);
-
- storage_size = EXT3_SB(inode->i_sb)->s_inode_size -
- EXT3_GOOD_OLD_INODE_SIZE -
- EXT3_I(inode)->i_extra_isize -
- sizeof(__u32);
- start = (char *) raw_inode + EXT3_GOOD_OLD_INODE_SIZE +
- EXT3_I(inode)->i_extra_isize;
- if (le32_to_cpu((*(__u32*) start)) != EXT3_XATTR_MAGIC) {
- brelse(iloc.bh);
- return -ENOENT;
- }
- start += sizeof(__u32);
- end = (char *) raw_inode + EXT3_SB(inode->i_sb)->s_inode_size;
-
- last = (struct ext3_xattr_entry *) start;
- while (!IS_LAST_ENTRY(last)) {
- struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last);
- if (le32_to_cpu(last->e_value_size) > storage_size ||
- (char *) next >= end) {
- ext3_error(inode->i_sb, "ext3_xattr_ibody_get",
- "inode %ld", inode->i_ino);
- brelse(iloc.bh);
- return -EIO;
- }
- if (name_index == last->e_name_index &&
- name_len == last->e_name_len &&
- !memcmp(name, last->e_name, name_len))
- goto found;
- last = next;
- }
-
- /* can't find EA */
- brelse(iloc.bh);
- return -ENOENT;
-
-found:
- size = le32_to_cpu(last->e_value_size);
- if (buffer) {
- ret = -ERANGE;
- if (buffer_size >= size) {
- memcpy(buffer, start + le16_to_cpu(last->e_value_offs),
- size);
- ret = size;
- }
- } else
- ret = size;
- brelse(iloc.bh);
- return ret;
-}
-
-/*
- * ext3_xattr_get()
+ * ext3_xattr_list()
*
- * Copy an extended attribute into the buffer
+ * Copy a list of attribute names into the buffer
* provided, or compute the buffer size required.
* Buffer is NULL to compute the size of the buffer required.
*
@@ -329,31 +263,7 @@ found:
* used / required on success.
*/
int
-ext3_xattr_get(struct inode *inode, int name_index, const char *name,
- void *buffer, size_t buffer_size)
-{
- int err;
-
- down_read(&EXT3_I(inode)->xattr_sem);
-
- /* try to find attribute in inode body */
- err = ext3_xattr_ibody_get(inode, name_index, name,
- buffer, buffer_size);
- if (err < 0)
- /* search was unsuccessful, try to find EA in dedicated block */
- err = ext3_xattr_block_get(inode, name_index, name,
- buffer, buffer_size);
- up_read(&EXT3_I(inode)->xattr_sem);
-
- return err;
-}
-
-/* ext3_xattr_block_list()
- *
- * generate list of attributes stored in EA block
- */
-int
-ext3_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
+ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
{
struct buffer_head *bh = NULL;
struct ext3_xattr_entry *entry;
@@ -364,6 +274,7 @@ ext3_xattr_block_list(struct inode *inod
ea_idebug(inode, "buffer=%p, buffer_size=%ld",
buffer, (long)buffer_size);
+ down_read(&EXT3_I(inode)->xattr_sem);
error = 0;
if (!EXT3_I(inode)->i_file_acl)
goto cleanup;
@@ -373,7 +284,7 @@ ext3_xattr_block_list(struct inode *inod
if (!bh)
goto cleanup;
ea_bdebug(bh, "b_count=%d, refcount=%d",
- (int) atomic_read(&(bh->b_count)), (int) le32_to_cpu(HDR(bh)->h_refcount));
+ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
end = bh->b_data + bh->b_size;
if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
HDR(bh)->h_blocks != cpu_to_le32(1)) {
@@ -420,131 +331,11 @@ bad_block: ext3_error(inode->i_sb, "ext3
cleanup:
brelse(bh);
+ up_read(&EXT3_I(inode)->xattr_sem);
return error;
}
-/* ext3_xattr_ibody_list()
- *
- * generate list of attributes stored in inode body
- */
-int
-ext3_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size)
-{
- struct ext3_xattr_entry *last;
- struct ext3_inode *raw_inode;
- size_t rest = buffer_size;
- struct ext3_iloc iloc;
- char *start, *end;
- int storage_size;
- int size = 0;
- int ret;
-
- if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE)
- return 0;
-
- ret = ext3_get_inode_loc(inode, &iloc, 1);
- if (ret)
- return ret;
- raw_inode = ext3_raw_inode(&iloc);
-
- storage_size = EXT3_SB(inode->i_sb)->s_inode_size -
- EXT3_GOOD_OLD_INODE_SIZE -
- EXT3_I(inode)->i_extra_isize -
- sizeof(__u32);
- start = (char *) raw_inode + EXT3_GOOD_OLD_INODE_SIZE +
- EXT3_I(inode)->i_extra_isize;
- if (le32_to_cpu((*(__u32*) start)) != EXT3_XATTR_MAGIC)
- goto cleanup;
- start += sizeof(__u32);
- end = (char *) raw_inode + EXT3_SB(inode->i_sb)->s_inode_size;
-
- last = (struct ext3_xattr_entry *) start;
- while (!IS_LAST_ENTRY(last)) {
- struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last);
- if ((char *) next >= end) {
- ext3_error(inode->i_sb, "ext3_xattr_ibody_list",
- "inode %ld", inode->i_ino);
- ret = -EIO;
- goto cleanup;
- }
- last = next;
- }
-
- last = (struct ext3_xattr_entry *) start;
- for (; !IS_LAST_ENTRY(last); last = EXT3_XATTR_NEXT(last)) {
- struct xattr_handler *handler =
- ext3_xattr_handler(last->e_name_index);
-
- if (!handler)
- continue;
-
- size += handler->list(inode, buffer, rest, last->e_name,
- last->e_name_len);
- if (buffer) {
- if (size > rest) {
- ret = -ERANGE;
- goto cleanup;
- }
- buffer += size;
- }
- rest -= size;
- }
- ret = buffer_size - rest; /* total size */
-
-cleanup:
- brelse(iloc.bh);
- return ret;
-}
-
-/*
- * ext3_xattr_list()
- *
- * Copy a list of attribute names into the buffer
- * provided, or compute the buffer size required.
- * Buffer is NULL to compute the size of the buffer required.
- *
- * Returns a negative error number on failure, or the number of bytes
- * used / required on success.
- */
-int
-ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
-{
- int size = buffer_size;
- int error;
-
- down_read(&EXT3_I(inode)->xattr_sem);
-
- /* get list of attributes stored in inode body */
- error = ext3_xattr_ibody_list(inode, buffer, buffer_size);
- if (error < 0) {
- /* some error occured while collecting
- * attributes in inode body */
- size = 0;
- goto cleanup;
- }
- size = error;
-
- /* get list of attributes stored in dedicated block */
- if (buffer) {
- buffer_size -= error;
- if (buffer_size <= 0) {
- buffer = NULL;
- buffer_size = 0;
- } else
- buffer += error;
- }
-
- error = ext3_xattr_block_list(inode, buffer, buffer_size);
- /* listing was successful, so we return len */
- if (error < 0)
- size = 0;
-
-cleanup:
- up_read(&EXT3_I(inode)->xattr_sem);
- return error + size;
-}
-
/*
* If the EXT3_FEATURE_COMPAT_EXT_ATTR feature of this file system is
* not set, set it.
@@ -566,286 +357,6 @@ static void ext3_xattr_update_super_bloc
}
/*
- * ext3_xattr_ibody_find()
- *
- * search attribute and calculate free space in inode body
- * NOTE: free space includes space our attribute hold
- */
-int
-ext3_xattr_ibody_find(struct inode *inode, int name_index,
- const char *name, int *free)
-{
- struct ext3_xattr_entry *last;
- struct ext3_inode *raw_inode;
- int name_len = strlen(name);
- int err, storage_size;
- struct ext3_iloc iloc;
- char *start, *end;
- int ret = -ENOENT;
-
- *free = 0;
- if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE)
- return ret;
-
- err = ext3_get_inode_loc(inode, &iloc, 1);
- if (err)
- return -EIO;
- raw_inode = ext3_raw_inode(&iloc);
-
- storage_size = EXT3_SB(inode->i_sb)->s_inode_size -
- EXT3_GOOD_OLD_INODE_SIZE -
- EXT3_I(inode)->i_extra_isize -
- sizeof(__u32);
- *free = storage_size - sizeof(__u32);
- start = (char *) raw_inode + EXT3_GOOD_OLD_INODE_SIZE +
- EXT3_I(inode)->i_extra_isize;
- if (le32_to_cpu((*(__u32*) start)) != EXT3_XATTR_MAGIC) {
- brelse(iloc.bh);
- return -ENOENT;
- }
- start += sizeof(__u32);
- end = (char *) raw_inode + EXT3_SB(inode->i_sb)->s_inode_size;
-
- last = (struct ext3_xattr_entry *) start;
- while (!IS_LAST_ENTRY(last)) {
- struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last);
- if (le32_to_cpu(last->e_value_size) > storage_size ||
- (char *) next >= end) {
- ext3_error(inode->i_sb, "ext3_xattr_ibody_find",
- "inode %ld", inode->i_ino);
- brelse(iloc.bh);
- return -EIO;
- }
-
- if (name_index == last->e_name_index &&
- name_len == last->e_name_len &&
- !memcmp(name, last->e_name, name_len)) {
- ret = 0;
- } else {
- *free -= EXT3_XATTR_LEN(last->e_name_len);
- *free -= le32_to_cpu(last->e_value_size);
- }
- last = next;
- }
-
- brelse(iloc.bh);
- return ret;
-}
-
-/*
- * ext3_xattr_block_find()
- *
- * search attribute and calculate free space in EA block (if it allocated)
- * NOTE: free space includes space our attribute hold
- */
-int
-ext3_xattr_block_find(struct inode *inode, int name_index,
- const char *name, int *free)
-{
- struct buffer_head *bh = NULL;
- struct ext3_xattr_entry *entry;
- char *end;
- int name_len, error = -ENOENT;
-
- if (!EXT3_I(inode)->i_file_acl) {
- *free = inode->i_sb->s_blocksize -
- sizeof(struct ext3_xattr_header) -
- sizeof(__u32);
- return -ENOENT;
- }
- ea_idebug(inode, "reading block %d", EXT3_I(inode)->i_file_acl);
- bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
- if (!bh)
- return -EIO;
- ea_bdebug(bh, "b_count=%d, refcount=%d",
- atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
- end = bh->b_data + bh->b_size;
- if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
- HDR(bh)->h_blocks != cpu_to_le32(1)) {
-bad_block: ext3_error(inode->i_sb, "ext3_xattr_get",
- "inode %ld: bad block %d", inode->i_ino,
- EXT3_I(inode)->i_file_acl);
- brelse(bh);
- return -EIO;
- }
- /* find named attribute */
- name_len = strlen(name);
- *free = bh->b_size - sizeof(__u32);
-
- entry = FIRST_ENTRY(bh);
- while (!IS_LAST_ENTRY(entry)) {
- struct ext3_xattr_entry *next =
- EXT3_XATTR_NEXT(entry);
- if ((char *)next >= end)
- goto bad_block;
- if (name_index == entry->e_name_index &&
- name_len == entry->e_name_len &&
- memcmp(name, entry->e_name, name_len) == 0) {
- error = 0;
- } else {
- *free -= EXT3_XATTR_LEN(entry->e_name_len);
- *free -= le32_to_cpu(entry->e_value_size);
- }
- entry = next;
- }
- brelse(bh);
-
- return error;
-}
-
-/*
- * ext3_xattr_ibody_set()
- *
- * this routine add/remove/replace attribute in inode body
- */
-int
-ext3_xattr_ibody_set(handle_t *handle, struct inode *inode, int name_index,
- const char *name, const void *value, size_t value_len,
- int flags)
-{
- struct ext3_xattr_entry *last, *next, *here = NULL;
- struct ext3_inode *raw_inode;
- int name_len = strlen(name);
- int esize = EXT3_XATTR_LEN(name_len);
- struct buffer_head *bh;
- int err, storage_size;
- struct ext3_iloc iloc;
- int free, min_offs;
- char *start, *end;
-
- if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE)
- return -ENOSPC;
-
- err = ext3_get_inode_loc(inode, &iloc, 1);
- if (err)
- return err;
- raw_inode = ext3_raw_inode(&iloc);
- bh = iloc.bh;
-
- storage_size = EXT3_SB(inode->i_sb)->s_inode_size -
- EXT3_GOOD_OLD_INODE_SIZE -
- EXT3_I(inode)->i_extra_isize -
- sizeof(__u32);
- start = (char *) raw_inode + EXT3_GOOD_OLD_INODE_SIZE +
- EXT3_I(inode)->i_extra_isize;
- if ((*(__u32*) start) != EXT3_XATTR_MAGIC) {
- /* inode had no attributes before */
- *((__u32*) start) = cpu_to_le32(EXT3_XATTR_MAGIC);
- }
- start += sizeof(__u32);
- end = (char *) raw_inode + EXT3_SB(inode->i_sb)->s_inode_size;
- min_offs = storage_size;
- free = storage_size - sizeof(__u32);
-
- last = (struct ext3_xattr_entry *) start;
- while (!IS_LAST_ENTRY(last)) {
- next = EXT3_XATTR_NEXT(last);
- if (le32_to_cpu(last->e_value_size) > storage_size ||
- (char *) next >= end) {
- ext3_error(inode->i_sb, "ext3_xattr_ibody_set",
- "inode %ld", inode->i_ino);
- brelse(bh);
- return -EIO;
- }
-
- if (last->e_value_size) {
- int offs = le16_to_cpu(last->e_value_offs);
- if (offs < min_offs)
- min_offs = offs;
- }
- if (name_index == last->e_name_index &&
- name_len == last->e_name_len &&
- !memcmp(name, last->e_name, name_len))
- here = last;
- else {
- /* we calculate all but our attribute
- * because it will be removed before changing */
- free -= EXT3_XATTR_LEN(last->e_name_len);
- free -= le32_to_cpu(last->e_value_size);
- }
- last = next;
- }
-
- if (value && (esize + value_len > free)) {
- brelse(bh);
- return -ENOSPC;
- }
-
- err = ext3_reserve_inode_write(handle, inode, &iloc);
- if (err) {
- brelse(bh);
- return err;
- }
-
- /* optimization: can we simple replace old value ? */
- if (here && value_len == le32_to_cpu(here->e_value_size)) {
- int offs = le16_to_cpu(here->e_value_offs);
- memcpy(start + offs, value, value_len);
- goto done;
- }
-
- if (here) {
- /* time to remove old value */
- struct ext3_xattr_entry *e;
- int size = le32_to_cpu(here->e_value_size);
- int border = le16_to_cpu(here->e_value_offs);
- char *src;
-
- /* move tail */
- memmove(start + min_offs + size, start + min_offs,
- border - min_offs);
-
- /* recalculate offsets */
- e = (struct ext3_xattr_entry *) start;
- while (!IS_LAST_ENTRY(e)) {
- struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(e);
- int offs = le16_to_cpu(e->e_value_offs);
- if (offs < border)
- e->e_value_offs =
- cpu_to_le16(offs + size);
- e = next;
- }
- min_offs += size;
-
- /* remove entry */
- border = EXT3_XATTR_LEN(here->e_name_len);
- src = (char *) here + EXT3_XATTR_LEN(here->e_name_len);
- size = (char *) last - src;
- if ((char *) here + size > end)
- printk("ALERT at %s:%d: 0x%p + %d > 0x%p\n",
- __FILE__, __LINE__, here, size, end);
- memmove(here, src, size);
- last = (struct ext3_xattr_entry *) ((char *) last - border);
- *((__u32 *) last) = 0;
- }
-
- if (value) {
- int offs = min_offs - value_len;
- /* use last to create new entry */
- last->e_name_len = strlen(name);
- last->e_name_index = name_index;
- last->e_value_offs = cpu_to_le16(offs);
- last->e_value_size = cpu_to_le32(value_len);
- last->e_hash = last->e_value_block = 0;
- memset(last->e_name, 0, esize);
- memcpy(last->e_name, name, last->e_name_len);
- if (start + offs + value_len > end)
- printk("ALERT at %s:%d: 0x%p + %d + %zd > 0x%p\n",
- __FILE__, __LINE__, start, offs,
- value_len, end);
- memcpy(start + offs, value, value_len);
- last = EXT3_XATTR_NEXT(last);
- *((__u32 *) last) = 0;
- }
-
-done:
- ext3_mark_iloc_dirty(handle, inode, &iloc);
- brelse(bh);
-
- return 0;
-}
-
-/*
* ext3_xattr_set_handle()
*
* Create, replace or remove an extended attribute for this inode. Buffer
@@ -859,100 +370,6 @@ done:
*/
int
ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
- const char *name, const void *value, size_t value_len,
- int flags)
-{
- int free1 = -1, free2 = -1;
- int err, where = 0, total;
- int name_len;
-
- ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld",
- name_index, name, value, (long)value_len);
-
- if (IS_RDONLY(inode))
- return -EROFS;
- if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
- return -EPERM;
- if (value == NULL)
- value_len = 0;
- if (name == NULL)
- return -EINVAL;
- name_len = strlen(name);
- if (name_len > 255 || value_len > inode->i_sb->s_blocksize)
- return -ERANGE;
- down_write(&EXT3_I(inode)->xattr_sem);
-
-#define EX_FOUND_IN_IBODY 1
-#define EX_FOUND_IN_BLOCK 2
-
- /* try to find attribute in inode body */
- err = ext3_xattr_ibody_find(inode, name_index, name, &free1);
- if (err == 0) {
- /* found EA in inode */
- where = EX_FOUND_IN_IBODY;
- } else if (err == -ENOENT) {
- /* there is no such attribute in inode body */
- /* try to find attribute in dedicated block */
- err = ext3_xattr_block_find(inode, name_index, name, &free2);
- if (err != 0 && err != -ENOENT) {
- /* not found EA in block */
- goto finish;
- } else if (err == 0) {
- /* found EA in block */
- where = EX_FOUND_IN_BLOCK;
- }
- } else
- goto finish;
-
- /* check flags: may replace? may create ? */
- if (where && (flags & XATTR_CREATE)) {
- err = -EEXIST;
- goto finish;
- } else if (!where && (flags & XATTR_REPLACE)) {
- err = -ENODATA;
- goto finish;
- }
-
- /* check if we have enough space to store attribute */
- total = EXT3_XATTR_LEN(strlen(name)) + value_len;
- if (total > free1 && free2 > 0 && total > free2) {
- /* have no enough space */
- err = -ENOSPC;
- goto finish;
- }
-
- /* there are two cases when we want to remove EA from original storage:
- * a) EA is stored in the inode, but new value doesn't fit
- * b) EA is stored in the block, but new value fit in inode
- */
- if (where == EX_FOUND_IN_IBODY && total > free1)
- ext3_xattr_ibody_set(handle, inode, name_index, name,
- NULL, 0, flags);
- else if (where == EX_FOUND_IN_BLOCK && total <= free1)
- ext3_xattr_block_set(handle, inode, name_index,
- name, NULL, 0, flags);
-
- /* try to store EA in inode body */
- err = ext3_xattr_ibody_set(handle, inode, name_index, name,
- value, value_len, flags);
- if (err) {
- /* can't store EA in inode body: try to store in block */
- err = ext3_xattr_block_set(handle, inode, name_index, name,
- value, value_len, flags);
- }
-
-finish:
- up_write(&EXT3_I(inode)->xattr_sem);
- return err;
-}
-
-/*
- * ext3_xattr_block_set()
- *
- * this routine add/remove/replace attribute in EA block
- */
-int
-ext3_xattr_block_set(handle_t *handle, struct inode *inode, int name_index,
const char *name, const void *value, size_t value_len,
int flags)
{
@@ -975,7 +392,22 @@ ext3_xattr_block_set(handle_t *handle, s
* towards the end of the block).
* end -- Points right after the block pointed to by header.
*/
+
+ ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld",
+ name_index, name, value, (long)value_len);
+
+ if (IS_RDONLY(inode))
+ return -EROFS;
+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+ return -EPERM;
+ if (value == NULL)
+ value_len = 0;
+ if (name == NULL)
+ return -EINVAL;
name_len = strlen(name);
+ if (name_len > 255 || value_len > sb->s_blocksize)
+ return -ERANGE;
+ down_write(&EXT3_I(inode)->xattr_sem);
if (EXT3_I(inode)->i_file_acl) {
/* The inode already has an extended attribute block. */
bh = sb_bread(sb, EXT3_I(inode)->i_file_acl);
@@ -1201,6 +633,7 @@ cleanup:
brelse(bh);
if (!(bh && header == HDR(bh)))
kfree(header);
+ up_write(&EXT3_I(inode)->xattr_sem);
return error;
}
Index: linux-2.6.10-bk13/fs/ext3/inode.c
===================================================================
--- linux-2.6.10-bk13.orig/fs/ext3/inode.c
+++ linux-2.6.10-bk13/fs/ext3/inode.c
@@ -2275,7 +2275,7 @@ static unsigned long ext3_get_inode_bloc
* trying to determine the inode's location on-disk and no read need be
* performed.
*/
-int ext3_get_inode_loc(struct inode *inode,
+static int ext3_get_inode_loc(struct inode *inode,
struct ext3_iloc *iloc, int in_mem)
{
unsigned long block;
@@ -2483,11 +2483,6 @@ void ext3_read_inode(struct inode * inod
ei->i_data[block] = raw_inode->i_block[block];
INIT_LIST_HEAD(&ei->i_orphan);
- if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE)
- ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
- else
- ei->i_extra_isize = 0;
-
if (S_ISREG(inode->i_mode)) {
inode->i_op = &ext3_file_inode_operations;
inode->i_fop = &ext3_file_operations;
@@ -2623,9 +2618,6 @@ static int ext3_do_update_inode(handle_t
} else for (block = 0; block < EXT3_N_BLOCKS; block++)
raw_inode->i_block[block] = ei->i_data[block];
- if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE)
- raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize);
-
BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
rc = ext3_journal_dirty_metadata(handle, bh);
if (!err)
Index: linux-2.6.10-bk13/fs/ext3/xattr.h
===================================================================
--- linux-2.6.10-bk13.orig/fs/ext3/xattr.h
+++ linux-2.6.10-bk13/fs/ext3/xattr.h
@@ -67,8 +67,7 @@ extern ssize_t ext3_listxattr(struct den
extern int ext3_xattr_get(struct inode *, int, const char *, void *, size_t);
extern int ext3_xattr_list(struct inode *, char *, size_t);
extern int ext3_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
-extern int ext3_xattr_set_handle(handle_t *, struct inode *, int, const char *,const void *,size_t,int);
-extern int ext3_xattr_block_set(handle_t *, struct inode *, int, const char *,const void *,size_t,int);
+extern int ext3_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int);
extern void ext3_xattr_delete_inode(handle_t *, struct inode *);
extern void ext3_xattr_put_super(struct super_block *);
Index: linux-2.6.10-bk13/fs/ext3/ialloc.c
===================================================================
--- linux-2.6.10-bk13.orig/fs/ext3/ialloc.c
+++ linux-2.6.10-bk13/fs/ext3/ialloc.c
@@ -596,11 +596,6 @@ got:
spin_unlock(&sbi->s_next_gen_lock);
ei->i_state = EXT3_STATE_NEW;
- if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) {
- ei->i_extra_isize = sizeof(__u16) /* i_extra_isize */
- + sizeof(__u16); /* i_pad1 */
- } else
- ei->i_extra_isize = 0;
ret = inode;
if(DQUOT_ALLOC_INODE(inode)) {
--
Andreas Gruenbacher <agruen@suse.de>
SUSE Labs, SUSE LINUX PRODUCTS GMBH
^ permalink raw reply [flat|nested] 10+ messages in thread* [RESEND][PATCH 4/9] Ext3: extended attribute sharing fixes and in-inode EAs
2005-01-13 10:31 [RESEND][PATCH 0/9] Ext3: extended attribute sharing fixes and in-inode EAs Andreas Gruenbacher
` (7 preceding siblings ...)
2005-01-13 10:31 ` [RESEND][PATCH 1/9] " Andreas Gruenbacher
@ 2005-01-13 10:31 ` Andreas Gruenbacher
8 siblings, 0 replies; 10+ messages in thread
From: Andreas Gruenbacher @ 2005-01-13 10:31 UTC (permalink / raw)
To: Linus Torvalds, Andrew Morton
Cc: linux-kernel, Alex Tomas, Andreas Dilger, Stephen C. Tweedie,
Andrew Tridgell
Ext3: do not use journal_release_buffer
The use of journal_release_buffer is unsafe; it can overflow the
journal: When a buffer is stolen from a transaction and later removed
from that transaction with journal_release_buffer, the buffer is not
accounted to the transaction that now "owns" the buffer, and one extra
credit appears to be available. Don't use journal_release_buffer:
We did rely on the buffer lock to synchronize xattr block accesses, and
get write access to the buffer first to get atomicity. Return the
mb_cache_entry from ext3_xattr_cache_find instead, and do the
check/update under its lock. Only get write access when we know we will
use the buffer.
Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
Index: linux-2.6.10/fs/ext3/xattr.c
===================================================================
--- linux-2.6.10.orig/fs/ext3/xattr.c
+++ linux-2.6.10/fs/ext3/xattr.c
@@ -94,9 +94,9 @@ static int ext3_xattr_set_handle2(handle
struct ext3_xattr_header *);
static int ext3_xattr_cache_insert(struct buffer_head *);
-static struct buffer_head *ext3_xattr_cache_find(handle_t *, struct inode *,
+static struct buffer_head *ext3_xattr_cache_find(struct inode *,
struct ext3_xattr_header *,
- int *);
+ struct mb_cache_entry **);
static void ext3_xattr_rehash(struct ext3_xattr_header *,
struct ext3_xattr_entry *);
@@ -500,33 +500,24 @@ bad_block: ext3_error(sb, "ext3_xattr_s
if (header) {
struct mb_cache_entry *ce;
- int credits = 0;
/* assert(header == HDR(bh)); */
- if (header->h_refcount != cpu_to_le32(1))
- goto skip_get_write_access;
- /* ext3_journal_get_write_access() requires an unlocked bh,
- which complicates things here. */
- error = ext3_journal_get_write_access_credits(handle, bh,
- &credits);
- if (error)
- goto cleanup;
ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_bdev,
bh->b_blocknr);
- lock_buffer(bh);
if (header->h_refcount == cpu_to_le32(1)) {
if (ce)
mb_cache_entry_free(ce);
ea_bdebug(bh, "modifying in-place");
+ error = ext3_journal_get_write_access(handle, bh);
+ if (error)
+ goto cleanup;
+ lock_buffer(bh);
/* keep the buffer locked while modifying it. */
} else {
int offset;
if (ce)
mb_cache_entry_release(ce);
- unlock_buffer(bh);
- journal_release_buffer(handle, bh, credits);
- skip_get_write_access:
ea_bdebug(bh, "cloning");
header = kmalloc(bh->b_size, GFP_KERNEL);
error = -ENOMEM;
@@ -622,17 +613,18 @@ bad_block: ext3_error(sb, "ext3_xattr_s
}
skip_replace:
- if (IS_LAST_ENTRY(ENTRY(header+1))) {
- /* This block is now empty. */
- if (bh && header == HDR(bh))
- unlock_buffer(bh); /* we were modifying in-place. */
- error = ext3_xattr_set_handle2(handle, inode, bh, NULL);
- } else {
+ if (!IS_LAST_ENTRY(ENTRY(header+1)))
ext3_xattr_rehash(header, here);
- if (bh && header == HDR(bh))
- unlock_buffer(bh); /* we were modifying in-place. */
- error = ext3_xattr_set_handle2(handle, inode, bh, header);
+ if (bh && header == HDR(bh)) {
+ /* we were modifying in-place. */
+ unlock_buffer(bh);
+ error = ext3_journal_dirty_metadata(handle, bh);
+ if (error)
+ goto cleanup;
}
+ error = ext3_xattr_set_handle2(handle, inode, bh,
+ IS_LAST_ENTRY(ENTRY(header+1)) ?
+ NULL : header);
cleanup:
brelse(bh);
@@ -653,10 +645,11 @@ ext3_xattr_set_handle2(handle_t *handle,
{
struct super_block *sb = inode->i_sb;
struct buffer_head *new_bh = NULL;
- int credits = 0, error;
+ struct mb_cache_entry *ce = NULL;
+ int error;
if (header) {
- new_bh = ext3_xattr_cache_find(handle, inode, header, &credits);
+ new_bh = ext3_xattr_cache_find(inode, header, &ce);
if (new_bh) {
/* We found an identical block in the cache. */
if (new_bh == old_bh)
@@ -667,19 +660,26 @@ ext3_xattr_set_handle2(handle_t *handle,
ea_bdebug(new_bh, "reusing block");
error = -EDQUOT;
- if (DQUOT_ALLOC_BLOCK(inode, 1)) {
- unlock_buffer(new_bh);
- journal_release_buffer(handle, new_bh,
- credits);
+ if (DQUOT_ALLOC_BLOCK(inode, 1))
goto cleanup;
- }
+ error = ext3_journal_get_write_access(handle, new_bh);
+ if (error)
+ goto cleanup;
+ lock_buffer(new_bh);
HDR(new_bh)->h_refcount = cpu_to_le32(1 +
le32_to_cpu(HDR(new_bh)->h_refcount));
ea_bdebug(new_bh, "refcount now=%d",
le32_to_cpu(HDR(new_bh)->h_refcount));
+ unlock_buffer(new_bh);
+ error = ext3_journal_dirty_metadata(handle, new_bh);
+ if (error)
+ goto cleanup;
}
- unlock_buffer(new_bh);
+ mb_cache_entry_release(ce);
+ ce = NULL;
} else if (old_bh && header == HDR(old_bh)) {
+ /* We were modifying this block in-place. */
+
/* Keep this block. No need to lock the block as we
* don't need to change the reference count. */
new_bh = old_bh;
@@ -715,10 +715,10 @@ getblk_failed:
ext3_xattr_cache_insert(new_bh);
ext3_xattr_update_super_block(handle, sb);
+ error = ext3_journal_dirty_metadata(handle, new_bh);
+ if (error)
+ goto cleanup;
}
- error = ext3_journal_dirty_metadata(handle, new_bh);
- if (error)
- goto cleanup;
}
/* Update the inode. */
@@ -730,22 +730,14 @@ getblk_failed:
error = 0;
if (old_bh && old_bh != new_bh) {
- struct mb_cache_entry *ce;
-
/*
* If there was an old block, and we are no longer using it,
* release the old block.
*/
- error = ext3_journal_get_write_access(handle, old_bh);
- if (error)
- goto cleanup;
ce = mb_cache_entry_get(ext3_xattr_cache, old_bh->b_bdev,
old_bh->b_blocknr);
- lock_buffer(old_bh);
if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) {
/* Free the old block. */
- if (ce)
- mb_cache_entry_free(ce);
ea_bdebug(old_bh, "freeing");
ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1);
@@ -754,21 +746,29 @@ getblk_failed:
duplicate the handle before. */
get_bh(old_bh);
ext3_forget(handle, 1, inode, old_bh,old_bh->b_blocknr);
+ if (ce) {
+ mb_cache_entry_free(ce);
+ ce = NULL;
+ }
} else {
+ error = ext3_journal_get_write_access(handle, old_bh);
+ if (error)
+ goto cleanup;
/* Decrement the refcount only. */
+ lock_buffer(old_bh);
HDR(old_bh)->h_refcount = cpu_to_le32(
le32_to_cpu(HDR(old_bh)->h_refcount) - 1);
- if (ce)
- mb_cache_entry_release(ce);
DQUOT_FREE_BLOCK(inode, 1);
ext3_journal_dirty_metadata(handle, old_bh);
ea_bdebug(old_bh, "refcount now=%d",
le32_to_cpu(HDR(old_bh)->h_refcount));
+ unlock_buffer(old_bh);
}
- unlock_buffer(old_bh);
}
cleanup:
+ if (ce)
+ mb_cache_entry_release(ce);
brelse(new_bh);
return error;
@@ -819,7 +819,7 @@ void
ext3_xattr_delete_inode(handle_t *handle, struct inode *inode)
{
struct buffer_head *bh = NULL;
- struct mb_cache_entry *ce;
+ struct mb_cache_entry *ce = NULL;
down_write(&EXT3_I(inode)->xattr_sem);
if (!EXT3_I(inode)->i_file_acl)
@@ -838,31 +838,33 @@ ext3_xattr_delete_inode(handle_t *handle
EXT3_I(inode)->i_file_acl);
goto cleanup;
}
- if (ext3_journal_get_write_access(handle, bh) != 0)
- goto cleanup;
ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_bdev, bh->b_blocknr);
- lock_buffer(bh);
if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
- if (ce)
+ if (ce) {
mb_cache_entry_free(ce);
+ ce = NULL;
+ }
ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1);
get_bh(bh);
ext3_forget(handle, 1, inode, bh, EXT3_I(inode)->i_file_acl);
} else {
+ if (ext3_journal_get_write_access(handle, bh) != 0)
+ goto cleanup;
+ lock_buffer(bh);
HDR(bh)->h_refcount = cpu_to_le32(
le32_to_cpu(HDR(bh)->h_refcount) - 1);
- if (ce)
- mb_cache_entry_release(ce);
ext3_journal_dirty_metadata(handle, bh);
if (IS_SYNC(inode))
handle->h_sync = 1;
DQUOT_FREE_BLOCK(inode, 1);
+ unlock_buffer(bh);
}
ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1);
- unlock_buffer(bh);
EXT3_I(inode)->i_file_acl = 0;
cleanup:
+ if (ce)
+ mb_cache_entry_release(ce);
brelse(bh);
up_write(&EXT3_I(inode)->xattr_sem);
}
@@ -960,8 +962,8 @@ ext3_xattr_cmp(struct ext3_xattr_header
* not found or an error occurred.
*/
static struct buffer_head *
-ext3_xattr_cache_find(handle_t *handle, struct inode *inode,
- struct ext3_xattr_header *header, int *credits)
+ext3_xattr_cache_find(struct inode *inode, struct ext3_xattr_header *header,
+ struct mb_cache_entry **pce)
{
__u32 hash = le32_to_cpu(header->h_hash);
struct mb_cache_entry *ce;
@@ -985,27 +987,17 @@ again:
ext3_error(inode->i_sb, "ext3_xattr_cache_find",
"inode %ld: block %ld read error",
inode->i_ino, (unsigned long) ce->e_block);
- } else if (ext3_journal_get_write_access_credits(
- handle, bh, credits) == 0) {
- /* ext3_journal_get_write_access() requires an unlocked
- * bh, which complicates things here. */
- lock_buffer(bh);
- if (le32_to_cpu(HDR(bh)->h_refcount) >
- EXT3_XATTR_REFCOUNT_MAX) {
- ea_idebug(inode, "block %ld refcount %d>%d",
- (unsigned long) ce->e_block,
- le32_to_cpu(HDR(bh)->h_refcount),
+ } else if (le32_to_cpu(HDR(bh)->h_refcount) >
+ EXT3_XATTR_REFCOUNT_MAX) {
+ ea_idebug(inode, "block %ld refcount %d>%d",
+ (unsigned long) ce->e_block,
+ le32_to_cpu(HDR(bh)->h_refcount),
EXT3_XATTR_REFCOUNT_MAX);
- } else if (!ext3_xattr_cmp(header, HDR(bh))) {
- mb_cache_entry_release(ce);
- /* buffer will be unlocked by caller */
- return bh;
- }
- unlock_buffer(bh);
- journal_release_buffer(handle, bh, *credits);
- *credits = 0;
- brelse(bh);
+ } else if (ext3_xattr_cmp(header, HDR(bh)) == 0) {
+ *pce = ce;
+ return bh;
}
+ brelse(bh);
ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash);
}
return NULL;
--
Andreas Gruenbacher <agruen@suse.de>
SUSE Labs, SUSE LINUX PRODUCTS GMBH
^ permalink raw reply [flat|nested] 10+ messages in thread