* [PATCH 32/32] ext4: add nombcache mount option
@ 2017-06-20 9:14 Tahsin Erdogan
2017-06-21 21:02 ` Andreas Dilger
0 siblings, 1 reply; 7+ messages in thread
From: Tahsin Erdogan @ 2017-06-20 9:14 UTC (permalink / raw)
To: Andreas Dilger, linux-ext4; +Cc: linux-kernel, Tahsin Erdogan
The main purpose of mb cache is to achieve deduplication in
extended attributes. In use cases where opportunity for deduplication
is unlikely, it only adds overhead.
Add a mount option to explicitly turn off mb cache.
Suggested-by: Andreas Dilger <adilger@dilger.ca>
Signed-off-by: Tahsin Erdogan <tahsin@google.com>
---
fs/ext4/ext4.h | 1 +
fs/ext4/super.c | 33 ++++++++++++++++++++++-----------
fs/ext4/xattr.c | 55 ++++++++++++++++++++++++++++++++++++-------------------
3 files changed, 59 insertions(+), 30 deletions(-)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 59e9488c4876..21aca1f87187 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1149,6 +1149,7 @@ struct ext4_inode_info {
#define EXT4_MOUNT_DIOREAD_NOLOCK 0x400000 /* Enable support for dio read nolocking */
#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
+#define EXT4_MOUNT_NO_MBCACHE 0x2000000 /* No mbcache usage */
#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 4b15bf674d45..4262a1b9b5b2 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1336,7 +1336,7 @@ enum {
Opt_inode_readahead_blks, Opt_journal_ioprio,
Opt_dioread_nolock, Opt_dioread_lock,
Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
- Opt_max_dir_size_kb, Opt_nojournal_checksum,
+ Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
};
static const match_table_t tokens = {
@@ -1419,6 +1419,7 @@ static const match_table_t tokens = {
{Opt_noinit_itable, "noinit_itable"},
{Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
{Opt_test_dummy_encryption, "test_dummy_encryption"},
+ {Opt_nombcache, "nombcache"},
{Opt_removed, "check=none"}, /* mount option from ext2/3 */
{Opt_removed, "nocheck"}, /* mount option from ext2/3 */
{Opt_removed, "reservation"}, /* mount option from ext2/3 */
@@ -1626,6 +1627,7 @@ static const struct mount_opts {
{Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
{Opt_max_dir_size_kb, 0, MOPT_GTE0},
{Opt_test_dummy_encryption, 0, MOPT_GTE0},
+ {Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
{Opt_err, 0, 0}
};
@@ -4080,19 +4082,22 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
no_journal:
- sbi->s_mb_cache = ext4_xattr_create_cache();
- if (!sbi->s_mb_cache) {
- ext4_msg(sb, KERN_ERR, "Failed to create an mb_cache");
- goto failed_mount_wq;
- }
-
- if (ext4_has_feature_ea_inode(sb)) {
- sbi->s_ea_inode_cache = ext4_xattr_create_cache();
- if (!sbi->s_ea_inode_cache) {
+ if (!test_opt(sb, NO_MBCACHE)) {
+ sbi->s_mb_cache = ext4_xattr_create_cache();
+ if (!sbi->s_mb_cache) {
ext4_msg(sb, KERN_ERR,
- "Failed to create an s_ea_inode_cache");
+ "Failed to create xattr block mb_cache");
goto failed_mount_wq;
}
+
+ if (ext4_has_feature_ea_inode(sb)) {
+ sbi->s_ea_inode_cache = ext4_xattr_create_cache();
+ if (!sbi->s_ea_inode_cache) {
+ ext4_msg(sb, KERN_ERR,
+ "Failed to create ea_inode mb_cache");
+ goto failed_mount_wq;
+ }
+ }
}
if ((DUMMY_ENCRYPTION_ENABLED(sbi) || ext4_has_feature_encrypt(sb)) &&
@@ -4989,6 +4994,12 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
}
}
+ if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_NO_MBCACHE) {
+ ext4_msg(sb, KERN_ERR, "can't enable nombcache during remount");
+ err = -EINVAL;
+ goto restore_opts;
+ }
+
if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX) {
ext4_msg(sb, KERN_WARNING, "warning: refusing change of "
"dax flag with busy inodes while remounting");
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 9d1b600dc827..65ead540c684 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -636,7 +636,6 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
struct inode *inode = d_inode(dentry);
struct buffer_head *bh = NULL;
int error;
- struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
ea_idebug(inode, "buffer=%p, buffer_size=%ld",
buffer, (long)buffer_size);
@@ -658,7 +657,7 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
error = -EFSCORRUPTED;
goto cleanup;
}
- ext4_xattr_block_cache_insert(ext4_mb_cache, bh);
+ ext4_xattr_block_cache_insert(EXT4_GET_MB_CACHE(inode), bh);
error = ext4_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size);
cleanup:
@@ -977,10 +976,13 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
set_nlink(ea_inode, 1);
ext4_orphan_del(handle, ea_inode);
- hash = ext4_xattr_inode_get_hash(ea_inode);
- mb_cache_entry_create(ea_inode_cache, GFP_NOFS, hash,
- ea_inode->i_ino,
- true /* reusable */);
+ if (ea_inode_cache) {
+ hash = ext4_xattr_inode_get_hash(ea_inode);
+ mb_cache_entry_create(ea_inode_cache,
+ GFP_NOFS, hash,
+ ea_inode->i_ino,
+ true /* reusable */);
+ }
}
} else {
WARN_ONCE(ref_count < 0, "EA inode %lu ref_count=%lld",
@@ -994,9 +996,11 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
clear_nlink(ea_inode);
ext4_orphan_add(handle, ea_inode);
- hash = ext4_xattr_inode_get_hash(ea_inode);
- mb_cache_entry_delete(ea_inode_cache, hash,
- ea_inode->i_ino);
+ if (ea_inode_cache) {
+ hash = ext4_xattr_inode_get_hash(ea_inode);
+ mb_cache_entry_delete(ea_inode_cache, hash,
+ ea_inode->i_ino);
+ }
}
}
@@ -1180,7 +1184,9 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
* This must happen under buffer lock for
* ext4_xattr_block_set() to reliably detect freed block
*/
- mb_cache_entry_delete(ext4_mb_cache, hash, bh->b_blocknr);
+ if (ext4_mb_cache)
+ mb_cache_entry_delete(ext4_mb_cache, hash,
+ bh->b_blocknr);
get_bh(bh);
unlock_buffer(bh);
@@ -1200,11 +1206,13 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
if (ref == EXT4_XATTR_REFCOUNT_MAX - 1) {
struct mb_cache_entry *ce;
- ce = mb_cache_entry_get(ext4_mb_cache, hash,
- bh->b_blocknr);
- if (ce) {
- ce->e_reusable = 1;
- mb_cache_entry_put(ext4_mb_cache, ce);
+ if (ext4_mb_cache) {
+ ce = mb_cache_entry_get(ext4_mb_cache, hash,
+ bh->b_blocknr);
+ if (ce) {
+ ce->e_reusable = 1;
+ mb_cache_entry_put(ext4_mb_cache, ce);
+ }
}
}
@@ -1383,6 +1391,9 @@ ext4_xattr_inode_cache_find(struct inode *inode, const void *value,
struct mb_cache *ea_inode_cache = EA_INODE_CACHE(inode);
void *ea_data;
+ if (!ea_inode_cache)
+ return NULL;
+
ce = mb_cache_entry_find_first(ea_inode_cache, hash);
if (!ce)
return NULL;
@@ -1453,8 +1464,9 @@ static int ext4_xattr_inode_lookup_create(handle_t *handle, struct inode *inode,
return err;
}
- mb_cache_entry_create(EA_INODE_CACHE(inode), GFP_NOFS, hash,
- ea_inode->i_ino, true /* reusable */);
+ if (EA_INODE_CACHE(inode))
+ mb_cache_entry_create(EA_INODE_CACHE(inode), GFP_NOFS, hash,
+ ea_inode->i_ino, true /* reusable */);
*ret_inode = ea_inode;
return 0;
@@ -1781,8 +1793,9 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
* ext4_xattr_block_set() to reliably detect modified
* block
*/
- mb_cache_entry_delete(ext4_mb_cache, hash,
- bs->bh->b_blocknr);
+ if (ext4_mb_cache)
+ mb_cache_entry_delete(ext4_mb_cache, hash,
+ bs->bh->b_blocknr);
ea_bdebug(bs->bh, "modifying in-place");
error = ext4_xattr_set_entry(i, s, handle, inode,
true /* is_block */);
@@ -2871,6 +2884,8 @@ ext4_xattr_block_cache_insert(struct mb_cache *ext4_mb_cache,
EXT4_XATTR_REFCOUNT_MAX;
int error;
+ if (!ext4_mb_cache)
+ return;
error = mb_cache_entry_create(ext4_mb_cache, GFP_NOFS, hash,
bh->b_blocknr, reusable);
if (error) {
@@ -2937,6 +2952,8 @@ ext4_xattr_block_cache_find(struct inode *inode,
struct mb_cache_entry *ce;
struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
+ if (!ext4_mb_cache)
+ return NULL;
if (!header->h_hash)
return NULL; /* never share */
ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
--
2.13.1.518.g3df882009-goog
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH 32/32] ext4: add nombcache mount option
2017-06-20 9:14 [PATCH 32/32] ext4: add nombcache mount option Tahsin Erdogan
@ 2017-06-21 21:02 ` Andreas Dilger
2017-06-21 23:46 ` Tahsin Erdogan
0 siblings, 1 reply; 7+ messages in thread
From: Andreas Dilger @ 2017-06-21 21:02 UTC (permalink / raw)
To: Tahsin Erdogan; +Cc: linux-ext4, linux-kernel
[-- Attachment #1: Type: text/plain, Size: 5992 bytes --]
On Jun 20, 2017, at 3:14 AM, Tahsin Erdogan <tahsin@google.com> wrote:
>
> The main purpose of mb cache is to achieve deduplication in
> extended attributes. In use cases where opportunity for deduplication
> is unlikely, it only adds overhead.
>
> Add a mount option to explicitly turn off mb cache.
>
> Suggested-by: Andreas Dilger <adilger@dilger.ca>
> Signed-off-by: Tahsin Erdogan <tahsin@google.com>
> ---
> fs/ext4/ext4.h | 1 +
> fs/ext4/super.c | 33 ++++++++++++++++++++++-----------
> fs/ext4/xattr.c | 55 ++++++++++++++++++++++++++++++++++++-------------------
> 3 files changed, 59 insertions(+), 30 deletions(-)
>
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 59e9488c4876..21aca1f87187 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -1149,6 +1149,7 @@ struct ext4_inode_info {
> #define EXT4_MOUNT_DIOREAD_NOLOCK 0x400000 /* Enable support for dio read nolocking */
> #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
> #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
> +#define EXT4_MOUNT_NO_MBCACHE 0x2000000 /* No mbcache usage */
We've been using 0x00001 for EXT4_MOUNT_NO_MBCACHE.
> #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
> #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
> #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index 4b15bf674d45..4262a1b9b5b2 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -1336,7 +1336,7 @@ enum {
> Opt_inode_readahead_blks, Opt_journal_ioprio,
> Opt_dioread_nolock, Opt_dioread_lock,
> Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
> - Opt_max_dir_size_kb, Opt_nojournal_checksum,
> + Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
> };
>
> static const match_table_t tokens = {
> @@ -1419,6 +1419,7 @@ static const match_table_t tokens = {
> {Opt_noinit_itable, "noinit_itable"},
> {Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
> {Opt_test_dummy_encryption, "test_dummy_encryption"},
> + {Opt_nombcache, "nombcache"},
For compatibility, please also add:
{Opt_nombcache, "no_mbcache"},
> {Opt_removed, "check=none"}, /* mount option from ext2/3 */
> {Opt_removed, "nocheck"}, /* mount option from ext2/3 */
> {Opt_removed, "reservation"}, /* mount option from ext2/3 */
> @@ -1626,6 +1627,7 @@ static const struct mount_opts {
> {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
> {Opt_max_dir_size_kb, 0, MOPT_GTE0},
> {Opt_test_dummy_encryption, 0, MOPT_GTE0},
> + {Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
> {Opt_err, 0, 0}
> };
>
> @@ -4080,19 +4082,22 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
> sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
>
> no_journal:
> - sbi->s_mb_cache = ext4_xattr_create_cache();
> - if (!sbi->s_mb_cache) {
> - ext4_msg(sb, KERN_ERR, "Failed to create an mb_cache");
> - goto failed_mount_wq;
> - }
> -
> - if (ext4_has_feature_ea_inode(sb)) {
> - sbi->s_ea_inode_cache = ext4_xattr_create_cache();
> - if (!sbi->s_ea_inode_cache) {
> + if (!test_opt(sb, NO_MBCACHE)) {
> + sbi->s_mb_cache = ext4_xattr_create_cache();
Should be named "s_mb_ea_block_cache" or similar?
> + if (!sbi->s_mb_cache) {
> ext4_msg(sb, KERN_ERR,
> - "Failed to create an s_ea_inode_cache");
> + "Failed to create xattr block mb_cache");
> goto failed_mount_wq;
> }
> +
> + if (ext4_has_feature_ea_inode(sb)) {
> + sbi->s_ea_inode_cache = ext4_xattr_create_cache();
Should be named "s_mb_ea_inode_cache", or alternately use "s_ea_block_cache"
above?
> + if (!sbi->s_ea_inode_cache) {
> + ext4_msg(sb, KERN_ERR,
> + "Failed to create ea_inode mb_cache");
> + goto failed_mount_wq;
> + }
> + }
> }
>
> if ((DUMMY_ENCRYPTION_ENABLED(sbi) || ext4_has_feature_encrypt(sb)) &&
> @@ -4989,6 +4994,12 @@ static int ext4_remount(struct super_block *sb, int *flags,
> }
> }
>
> + if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_NO_MBCACHE) {
> + ext4_msg(sb, KERN_ERR, "can't enable nombcache during remount");
> + err = -EINVAL;
> + goto restore_opts;
> + }
It appears that this restriction also applies to enabling mbcache during
remount as well, so the message should be updated? In particular, if
NO_MBCACHE is currently set but isn't passed during the remount then this
will always cause the remount to fail. That makes it harder for users to
just run something like "mount -o remount,ro".
It seems we are a bit inconsistent with how we handle changing mount options.
In some cases, if an option is set to be enabled but cannot be, it is actually
ignored (e.g. DAX, JOURNAL_CHECKSUM). In other cases this returns an error.
In this case it is probably easiest to just ignore the change to this option,
as is done with JOURNAL_CHECKSUM and DAX.
> if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX) {
> ext4_msg(sb, KERN_WARNING, "warning: refusing change of "
> "dax flag with busy inodes while remounting");
> diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
> index 9d1b600dc827..65ead540c684 100644
> --- a/fs/ext4/xattr.c
> +++ b/fs/ext4/xattr.c
> @@ -1200,11 +1206,13 @@ ext4_xattr_release_block(handle_t *handle,
> if (ref == EXT4_XATTR_REFCOUNT_MAX - 1) {
> struct mb_cache_entry *ce;
This could go inside the "if (ext4_mb_cache)" block.
> - ce = mb_cache_entry_get(ext4_mb_cache, hash,
> - bh->b_blocknr);
> - if (ce) {
> - ce->e_reusable = 1;
> - mb_cache_entry_put(ext4_mb_cache, ce);
> + if (ext4_mb_cache) {
> + ce = mb_cache_entry_get(ext4_mb_cache, hash,
> + bh->b_blocknr);
> + if (ce) {
> + ce->e_reusable = 1;
> + mb_cache_entry_put(ext4_mb_cache, ce);
> + }
> }
> }
Cheers, Andreas
[-- Attachment #2: Message signed with OpenPGP --]
[-- Type: application/pgp-signature, Size: 195 bytes --]
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 32/32] ext4: add nombcache mount option
2017-06-21 21:02 ` Andreas Dilger
@ 2017-06-21 23:46 ` Tahsin Erdogan
2017-06-22 4:42 ` Theodore Ts'o
0 siblings, 1 reply; 7+ messages in thread
From: Tahsin Erdogan @ 2017-06-21 23:46 UTC (permalink / raw)
To: Andreas Dilger; +Cc: linux-ext4, linux-kernel
>> + if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_NO_MBCACHE) {
>> + ext4_msg(sb, KERN_ERR, "can't enable nombcache during remount");
>> + err = -EINVAL;
>> + goto restore_opts;
>> + }
>
> It appears that this restriction also applies to enabling mbcache during
> remount as well, so the message should be updated? In particular, if
> NO_MBCACHE is currently set but isn't passed during the remount then this
> will always cause the remount to fail. That makes it harder for users to
> just run something like "mount -o remount,ro".
Remount option handling is a bit strange. If an option is not
specified, it is not automatically cleared in sbi->s_mount_opt. So,
once nombcache option is set in the original mount, it is not possible
to clear it. I don't know whether this option handling behavior in
ext4 is deliberate but because of that, the mount -o remount,ro does
not result in a mount failure. The check above is effectively only to
protect against going from not having nombcache option to having
nombcache option, because the opposite is not possible.
Following shows how "debug" option remains set even though it wasn't
specified during remount:
# mount -o debug /dev/sdb /mnt/sdb
# mount | grep sdb
/dev/sdb on /mnt/sdb type ext4 (rw,debug)
# cat /proc/mounts | grep sdb
/dev/sdb /mnt/sdb ext4 rw,relatime,debug,data=ordered 0 0
# mount -o remount,ro /dev/sdb /mnt/sdb
# mount | grep sdb
/dev/sdb on /mnt/sdb type ext4 (ro)
# cat /proc/mounts | grep sdb
/dev/sdb /mnt/sdb ext4 ro,relatime,debug,data=ordered 0 0
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 32/32] ext4: add nombcache mount option
2017-06-21 23:46 ` Tahsin Erdogan
@ 2017-06-22 4:42 ` Theodore Ts'o
0 siblings, 0 replies; 7+ messages in thread
From: Theodore Ts'o @ 2017-06-22 4:42 UTC (permalink / raw)
To: Tahsin Erdogan; +Cc: Andreas Dilger, linux-ext4, linux-kernel
On Wed, Jun 21, 2017 at 04:46:48PM -0700, Tahsin Erdogan wrote:
> Remount option handling is a bit strange. If an option is not
> specified, it is not automatically cleared in sbi->s_mount_opt. So,
> once nombcache option is set in the original mount, it is not possible
> to clear it. I don't know whether this option handling behavior in
> ext4 is deliberate but because of that, the mount -o remount,ro does
> not result in a mount failure. The check above is effectively only to
> protect against going from not having nombcache option to having
> nombcache option, because the opposite is not possible.
The option handling behavior is deliberate, yes. This is why we have
the mount options "lazytime" and "nolazytime", "discard" and
"nodiscard", "barrier" and "nobarrier", etc.
-Ted
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH 01/32] ext4: xattr-in-inode support
@ 2017-06-21 21:21 Tahsin Erdogan
2017-06-21 21:21 ` [PATCH 32/32] ext4: add nombcache mount option Tahsin Erdogan
0 siblings, 1 reply; 7+ messages in thread
From: Tahsin Erdogan @ 2017-06-21 21:21 UTC (permalink / raw)
To: Andreas Dilger, Darrick J . Wong, Jan Kara, Theodore Ts'o,
linux-ext4
Cc: linux-kernel, Andreas Dilger, Kalpak Shah, James Simmons,
Tahsin Erdogan
From: Andreas Dilger <andreas.dilger@intel.com>
Large xattr support is implemented for EXT4_FEATURE_INCOMPAT_EA_INODE.
If the size of an xattr value is larger than will fit in a single
external block, then the xattr value will be saved into the body
of an external xattr inode.
The also helps support a larger number of xattr, since only the headers
will be stored in the in-inode space or the single external block.
The inode is referenced from the xattr header via "e_value_inum",
which was formerly "e_value_block", but that field was never used.
The e_value_size still contains the xattr size so that listing
xattrs does not need to look up the inode if the data is not accessed.
struct ext4_xattr_entry {
__u8 e_name_len; /* length of name */
__u8 e_name_index; /* attribute name index */
__le16 e_value_offs; /* offset in disk block of value */
__le32 e_value_inum; /* inode in which value is stored */
__le32 e_value_size; /* size of attribute value */
__le32 e_hash; /* hash value of name and value */
char e_name[0]; /* attribute name */
};
The xattr inode is marked with the EXT4_EA_INODE_FL flag and also
holds a back-reference to the owning inode in its i_mtime field,
allowing the ext4/e2fsck to verify the correct inode is accessed.
Lustre-Jira: https://jira.hpdd.intel.com/browse/LU-80
Lustre-bugzilla: https://bugzilla.lustre.org/show_bug.cgi?id=4424
Signed-off-by: Kalpak Shah <kalpak.shah@sun.com>
Signed-off-by: James Simmons <uja.ornl@gmail.com>
Signed-off-by: Andreas Dilger <andreas.dilger@intel.com>
Signed-off-by: Tahsin Erdogan <tahsin@google.com>
---
fs/ext4/ext4.h | 12 ++
fs/ext4/ialloc.c | 1 -
fs/ext4/inline.c | 2 +-
fs/ext4/inode.c | 49 ++++-
fs/ext4/xattr.c | 565 ++++++++++++++++++++++++++++++++++++++++++++++++++-----
fs/ext4/xattr.h | 33 +++-
6 files changed, 606 insertions(+), 56 deletions(-)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 32191548abed..24ef56b4572f 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1797,6 +1797,7 @@ EXT4_FEATURE_INCOMPAT_FUNCS(encrypt, ENCRYPT)
EXT4_FEATURE_INCOMPAT_EXTENTS| \
EXT4_FEATURE_INCOMPAT_64BIT| \
EXT4_FEATURE_INCOMPAT_FLEX_BG| \
+ EXT4_FEATURE_INCOMPAT_EA_INODE| \
EXT4_FEATURE_INCOMPAT_MMP | \
EXT4_FEATURE_INCOMPAT_INLINE_DATA | \
EXT4_FEATURE_INCOMPAT_ENCRYPT | \
@@ -2219,6 +2220,12 @@ struct mmpd_data {
*/
#define EXT4_MMP_MAX_CHECK_INTERVAL 300UL
+/*
+ * Maximum size of xattr attributes for FEATURE_INCOMPAT_EA_INODE 1Mb
+ * This limit is arbitrary, but is reasonable for the xattr API.
+ */
+#define EXT4_XATTR_MAX_LARGE_EA_SIZE (1024 * 1024)
+
/*
* Function prototypes
*/
@@ -2231,6 +2238,10 @@ struct mmpd_data {
# define ATTRIB_NORET __attribute__((noreturn))
# define NORET_AND noreturn,
+struct ext4_xattr_ino_array {
+ unsigned int xia_count; /* # of used item in the array */
+ unsigned int xia_inodes[0];
+};
/* bitmap.c */
extern unsigned int ext4_count_free(char *bitmap, unsigned numchars);
void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
@@ -2478,6 +2489,7 @@ extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks);
extern void ext4_set_inode_flags(struct inode *);
extern int ext4_alloc_da_blocks(struct inode *inode);
extern void ext4_set_aops(struct inode *inode);
+extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int chunk);
extern int ext4_writepage_trans_blocks(struct inode *);
extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 98ac2f1f23b3..e2eb3cc06820 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -294,7 +294,6 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
* as writing the quota to disk may need the lock as well.
*/
dquot_initialize(inode);
- ext4_xattr_delete_inode(handle, inode);
dquot_free_inode(inode);
dquot_drop(inode);
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 8d141c0c8ff9..28c5c3abddb3 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -61,7 +61,7 @@ static int get_max_inline_xattr_value_size(struct inode *inode,
/* Compute min_offs. */
for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
- if (!entry->e_value_block && entry->e_value_size) {
+ if (!entry->e_value_inum && entry->e_value_size) {
size_t offs = le16_to_cpu(entry->e_value_offs);
if (offs < min_offs)
min_offs = offs;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5cf82d03968c..e5535e5b3dc5 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -139,8 +139,6 @@ static void ext4_invalidatepage(struct page *page, unsigned int offset,
unsigned int length);
static int __ext4_journalled_writepage(struct page *page, unsigned int len);
static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh);
-static int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
- int pextents);
/*
* Test whether an inode is a fast symlink.
@@ -189,6 +187,8 @@ void ext4_evict_inode(struct inode *inode)
{
handle_t *handle;
int err;
+ int extra_credits = 3;
+ struct ext4_xattr_ino_array *lea_ino_array = NULL;
trace_ext4_evict_inode(inode);
@@ -238,8 +238,8 @@ void ext4_evict_inode(struct inode *inode)
* protection against it
*/
sb_start_intwrite(inode->i_sb);
- handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE,
- ext4_blocks_for_truncate(inode)+3);
+
+ handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, extra_credits);
if (IS_ERR(handle)) {
ext4_std_error(inode->i_sb, PTR_ERR(handle));
/*
@@ -251,9 +251,36 @@ void ext4_evict_inode(struct inode *inode)
sb_end_intwrite(inode->i_sb);
goto no_delete;
}
-
if (IS_SYNC(inode))
ext4_handle_sync(handle);
+
+ /*
+ * Delete xattr inode before deleting the main inode.
+ */
+ err = ext4_xattr_delete_inode(handle, inode, &lea_ino_array);
+ if (err) {
+ ext4_warning(inode->i_sb,
+ "couldn't delete inode's xattr (err %d)", err);
+ goto stop_handle;
+ }
+
+ if (!IS_NOQUOTA(inode))
+ extra_credits += 2 * EXT4_QUOTA_DEL_BLOCKS(inode->i_sb);
+
+ if (!ext4_handle_has_enough_credits(handle,
+ ext4_blocks_for_truncate(inode) + extra_credits)) {
+ err = ext4_journal_extend(handle,
+ ext4_blocks_for_truncate(inode) + extra_credits);
+ if (err > 0)
+ err = ext4_journal_restart(handle,
+ ext4_blocks_for_truncate(inode) + extra_credits);
+ if (err != 0) {
+ ext4_warning(inode->i_sb,
+ "couldn't extend journal (err %d)", err);
+ goto stop_handle;
+ }
+ }
+
inode->i_size = 0;
err = ext4_mark_inode_dirty(handle, inode);
if (err) {
@@ -277,10 +304,10 @@ void ext4_evict_inode(struct inode *inode)
* enough credits left in the handle to remove the inode from
* the orphan list and set the dtime field.
*/
- if (!ext4_handle_has_enough_credits(handle, 3)) {
- err = ext4_journal_extend(handle, 3);
+ if (!ext4_handle_has_enough_credits(handle, extra_credits)) {
+ err = ext4_journal_extend(handle, extra_credits);
if (err > 0)
- err = ext4_journal_restart(handle, 3);
+ err = ext4_journal_restart(handle, extra_credits);
if (err != 0) {
ext4_warning(inode->i_sb,
"couldn't extend journal (err %d)", err);
@@ -315,8 +342,12 @@ void ext4_evict_inode(struct inode *inode)
ext4_clear_inode(inode);
else
ext4_free_inode(handle, inode);
+
ext4_journal_stop(handle);
sb_end_intwrite(inode->i_sb);
+
+ if (lea_ino_array != NULL)
+ ext4_xattr_inode_array_free(inode, lea_ino_array);
return;
no_delete:
ext4_clear_inode(inode); /* We must guarantee clearing of inode... */
@@ -5504,7 +5535,7 @@ static int ext4_index_trans_blocks(struct inode *inode, int lblocks,
*
* Also account for superblock, inode, quota and xattr blocks
*/
-static int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
+int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
int pextents)
{
ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb);
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 5d3c2536641c..444be5c7a1d5 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -177,9 +177,8 @@ ext4_xattr_check_entries(struct ext4_xattr_entry *entry, void *end,
/* Check the values */
while (!IS_LAST_ENTRY(entry)) {
- if (entry->e_value_block != 0)
- return -EFSCORRUPTED;
- if (entry->e_value_size != 0) {
+ if (entry->e_value_size != 0 &&
+ entry->e_value_inum == 0) {
u16 offs = le16_to_cpu(entry->e_value_offs);
u32 size = le32_to_cpu(entry->e_value_size);
void *value;
@@ -269,6 +268,99 @@ ext4_xattr_find_entry(struct ext4_xattr_entry **pentry, int name_index,
return cmp ? -ENODATA : 0;
}
+/*
+ * Read the EA value from an inode.
+ */
+static int
+ext4_xattr_inode_read(struct inode *ea_inode, void *buf, size_t *size)
+{
+ unsigned long block = 0;
+ struct buffer_head *bh = NULL;
+ int blocksize;
+ size_t csize, ret_size = 0;
+
+ if (*size == 0)
+ return 0;
+
+ blocksize = ea_inode->i_sb->s_blocksize;
+
+ while (ret_size < *size) {
+ csize = (*size - ret_size) > blocksize ? blocksize :
+ *size - ret_size;
+ bh = ext4_bread(NULL, ea_inode, block, 0);
+ if (IS_ERR(bh)) {
+ *size = ret_size;
+ return PTR_ERR(bh);
+ }
+ memcpy(buf, bh->b_data, csize);
+ brelse(bh);
+
+ buf += csize;
+ block += 1;
+ ret_size += csize;
+ }
+
+ *size = ret_size;
+
+ return 0;
+}
+
+struct inode *ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino, int *err)
+{
+ struct inode *ea_inode = NULL;
+
+ ea_inode = ext4_iget(parent->i_sb, ea_ino);
+ if (IS_ERR(ea_inode) || is_bad_inode(ea_inode)) {
+ int rc = IS_ERR(ea_inode) ? PTR_ERR(ea_inode) : 0;
+ ext4_error(parent->i_sb, "error while reading EA inode %lu "
+ "/ %d %d", ea_ino, rc, is_bad_inode(ea_inode));
+ *err = rc != 0 ? rc : -EIO;
+ return NULL;
+ }
+
+ if (EXT4_XATTR_INODE_GET_PARENT(ea_inode) != parent->i_ino ||
+ ea_inode->i_generation != parent->i_generation) {
+ ext4_error(parent->i_sb, "Backpointer from EA inode %lu "
+ "to parent invalid.", ea_ino);
+ *err = -EINVAL;
+ goto error;
+ }
+
+ if (!(EXT4_I(ea_inode)->i_flags & EXT4_EA_INODE_FL)) {
+ ext4_error(parent->i_sb, "EA inode %lu does not have "
+ "EXT4_EA_INODE_FL flag set.\n", ea_ino);
+ *err = -EINVAL;
+ goto error;
+ }
+
+ *err = 0;
+ return ea_inode;
+
+error:
+ iput(ea_inode);
+ return NULL;
+}
+
+/*
+ * Read the value from the EA inode.
+ */
+static int
+ext4_xattr_inode_get(struct inode *inode, unsigned long ea_ino, void *buffer,
+ size_t *size)
+{
+ struct inode *ea_inode = NULL;
+ int err;
+
+ ea_inode = ext4_xattr_inode_iget(inode, ea_ino, &err);
+ if (err)
+ return err;
+
+ err = ext4_xattr_inode_read(ea_inode, buffer, size);
+ iput(ea_inode);
+
+ return err;
+}
+
static int
ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
void *buffer, size_t buffer_size)
@@ -308,8 +400,16 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
error = -ERANGE;
if (size > buffer_size)
goto cleanup;
- memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs),
- size);
+ if (entry->e_value_inum) {
+ error = ext4_xattr_inode_get(inode,
+ le32_to_cpu(entry->e_value_inum),
+ buffer, &size);
+ if (error)
+ goto cleanup;
+ } else {
+ memcpy(buffer, bh->b_data +
+ le16_to_cpu(entry->e_value_offs), size);
+ }
}
error = size;
@@ -350,8 +450,16 @@ ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
error = -ERANGE;
if (size > buffer_size)
goto cleanup;
- memcpy(buffer, (void *)IFIRST(header) +
- le16_to_cpu(entry->e_value_offs), size);
+ if (entry->e_value_inum) {
+ error = ext4_xattr_inode_get(inode,
+ le32_to_cpu(entry->e_value_inum),
+ buffer, &size);
+ if (error)
+ goto cleanup;
+ } else {
+ memcpy(buffer, (void *)IFIRST(header) +
+ le16_to_cpu(entry->e_value_offs), size);
+ }
}
error = size;
@@ -620,7 +728,7 @@ static size_t ext4_xattr_free_space(struct ext4_xattr_entry *last,
size_t *min_offs, void *base, int *total)
{
for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
- if (last->e_value_size) {
+ if (!last->e_value_inum && last->e_value_size) {
size_t offs = le16_to_cpu(last->e_value_offs);
if (offs < *min_offs)
*min_offs = offs;
@@ -631,16 +739,173 @@ static size_t ext4_xattr_free_space(struct ext4_xattr_entry *last,
return (*min_offs - ((void *)last - base) - sizeof(__u32));
}
-static int
-ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s)
+/*
+ * Write the value of the EA in an inode.
+ */
+static int ext4_xattr_inode_write(handle_t *handle, struct inode *ea_inode,
+ const void *buf, int bufsize)
+{
+ struct buffer_head *bh = NULL;
+ unsigned long block = 0;
+ unsigned blocksize = ea_inode->i_sb->s_blocksize;
+ unsigned max_blocks = (bufsize + blocksize - 1) >> ea_inode->i_blkbits;
+ int csize, wsize = 0;
+ int ret = 0;
+ int retries = 0;
+
+retry:
+ while (ret >= 0 && ret < max_blocks) {
+ struct ext4_map_blocks map;
+ map.m_lblk = block += ret;
+ map.m_len = max_blocks -= ret;
+
+ ret = ext4_map_blocks(handle, ea_inode, &map,
+ EXT4_GET_BLOCKS_CREATE);
+ if (ret <= 0) {
+ ext4_mark_inode_dirty(handle, ea_inode);
+ if (ret == -ENOSPC &&
+ ext4_should_retry_alloc(ea_inode->i_sb, &retries)) {
+ ret = 0;
+ goto retry;
+ }
+ break;
+ }
+ }
+
+ if (ret < 0)
+ return ret;
+
+ block = 0;
+ while (wsize < bufsize) {
+ if (bh != NULL)
+ brelse(bh);
+ csize = (bufsize - wsize) > blocksize ? blocksize :
+ bufsize - wsize;
+ bh = ext4_getblk(handle, ea_inode, block, 0);
+ if (IS_ERR(bh)) {
+ ret = PTR_ERR(bh);
+ goto out;
+ }
+ ret = ext4_journal_get_write_access(handle, bh);
+ if (ret)
+ goto out;
+
+ memcpy(bh->b_data, buf, csize);
+ set_buffer_uptodate(bh);
+ ext4_handle_dirty_metadata(handle, ea_inode, bh);
+
+ buf += csize;
+ wsize += csize;
+ block += 1;
+ }
+
+ inode_lock(ea_inode);
+ i_size_write(ea_inode, wsize);
+ ext4_update_i_disksize(ea_inode, wsize);
+ inode_unlock(ea_inode);
+
+ ext4_mark_inode_dirty(handle, ea_inode);
+
+out:
+ brelse(bh);
+
+ return ret;
+}
+
+/*
+ * Create an inode to store the value of a large EA.
+ */
+static struct inode *ext4_xattr_inode_create(handle_t *handle,
+ struct inode *inode)
+{
+ struct inode *ea_inode = NULL;
+
+ /*
+ * Let the next inode be the goal, so we try and allocate the EA inode
+ * in the same group, or nearby one.
+ */
+ ea_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode,
+ S_IFREG | 0600, NULL, inode->i_ino + 1, NULL);
+ if (!IS_ERR(ea_inode)) {
+ ea_inode->i_op = &ext4_file_inode_operations;
+ ea_inode->i_fop = &ext4_file_operations;
+ ext4_set_aops(ea_inode);
+ ea_inode->i_generation = inode->i_generation;
+ EXT4_I(ea_inode)->i_flags |= EXT4_EA_INODE_FL;
+
+ /*
+ * A back-pointer from EA inode to parent inode will be useful
+ * for e2fsck.
+ */
+ EXT4_XATTR_INODE_SET_PARENT(ea_inode, inode->i_ino);
+ unlock_new_inode(ea_inode);
+ }
+
+ return ea_inode;
+}
+
+/*
+ * Unlink the inode storing the value of the EA.
+ */
+int ext4_xattr_inode_unlink(struct inode *inode, unsigned long ea_ino)
+{
+ struct inode *ea_inode = NULL;
+ int err;
+
+ ea_inode = ext4_xattr_inode_iget(inode, ea_ino, &err);
+ if (err)
+ return err;
+
+ clear_nlink(ea_inode);
+ iput(ea_inode);
+
+ return 0;
+}
+
+/*
+ * Add value of the EA in an inode.
+ */
+static int ext4_xattr_inode_set(handle_t *handle, struct inode *inode,
+ unsigned long *ea_ino, const void *value,
+ size_t value_len)
+{
+ struct inode *ea_inode;
+ int err;
+
+ /* Create an inode for the EA value */
+ ea_inode = ext4_xattr_inode_create(handle, inode);
+ if (IS_ERR(ea_inode))
+ return PTR_ERR(ea_inode);
+
+ err = ext4_xattr_inode_write(handle, ea_inode, value, value_len);
+ if (err)
+ clear_nlink(ea_inode);
+ else
+ *ea_ino = ea_inode->i_ino;
+
+ iput(ea_inode);
+
+ return err;
+}
+
+static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
+ struct ext4_xattr_search *s,
+ handle_t *handle, struct inode *inode)
{
struct ext4_xattr_entry *last;
size_t free, min_offs = s->end - s->base, name_len = strlen(i->name);
+ int in_inode = i->in_inode;
+ int rc;
+
+ if (ext4_has_feature_ea_inode(inode->i_sb) &&
+ (EXT4_XATTR_SIZE(i->value_len) >
+ EXT4_XATTR_MIN_LARGE_EA_SIZE(inode->i_sb->s_blocksize)))
+ in_inode = 1;
/* Compute min_offs and last. */
last = s->first;
for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
- if (last->e_value_size) {
+ if (!last->e_value_inum && last->e_value_size) {
size_t offs = le16_to_cpu(last->e_value_offs);
if (offs < min_offs)
min_offs = offs;
@@ -648,15 +913,20 @@ ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s)
}
free = min_offs - ((void *)last - s->base) - sizeof(__u32);
if (!s->not_found) {
- if (s->here->e_value_size) {
+ if (!in_inode &&
+ !s->here->e_value_inum && s->here->e_value_size) {
size_t size = le32_to_cpu(s->here->e_value_size);
free += EXT4_XATTR_SIZE(size);
}
free += EXT4_XATTR_LEN(name_len);
}
if (i->value) {
- if (free < EXT4_XATTR_LEN(name_len) +
- EXT4_XATTR_SIZE(i->value_len))
+ size_t value_len = EXT4_XATTR_SIZE(i->value_len);
+
+ if (in_inode)
+ value_len = 0;
+
+ if (free < EXT4_XATTR_LEN(name_len) + value_len)
return -ENOSPC;
}
@@ -670,7 +940,8 @@ ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s)
s->here->e_name_len = name_len;
memcpy(s->here->e_name, i->name, name_len);
} else {
- if (s->here->e_value_size) {
+ if (!s->here->e_value_inum && s->here->e_value_size &&
+ s->here->e_value_offs > 0) {
void *first_val = s->base + min_offs;
size_t offs = le16_to_cpu(s->here->e_value_offs);
void *val = s->base + offs;
@@ -704,12 +975,18 @@ ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s)
last = s->first;
while (!IS_LAST_ENTRY(last)) {
size_t o = le16_to_cpu(last->e_value_offs);
- if (last->e_value_size && o < offs)
+ if (!last->e_value_inum &&
+ last->e_value_size && o < offs)
last->e_value_offs =
cpu_to_le16(o + size);
last = EXT4_XATTR_NEXT(last);
}
}
+ if (s->here->e_value_inum) {
+ ext4_xattr_inode_unlink(inode,
+ le32_to_cpu(s->here->e_value_inum));
+ s->here->e_value_inum = 0;
+ }
if (!i->value) {
/* Remove the old name. */
size_t size = EXT4_XATTR_LEN(name_len);
@@ -722,11 +999,20 @@ ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s)
if (i->value) {
/* Insert the new value. */
- s->here->e_value_size = cpu_to_le32(i->value_len);
- if (i->value_len) {
+ if (in_inode) {
+ unsigned long ea_ino =
+ le32_to_cpu(s->here->e_value_inum);
+ rc = ext4_xattr_inode_set(handle, inode, &ea_ino,
+ i->value, i->value_len);
+ if (rc)
+ goto out;
+ s->here->e_value_inum = cpu_to_le32(ea_ino);
+ s->here->e_value_offs = 0;
+ } else if (i->value_len) {
size_t size = EXT4_XATTR_SIZE(i->value_len);
void *val = s->base + min_offs - size;
s->here->e_value_offs = cpu_to_le16(min_offs - size);
+ s->here->e_value_inum = 0;
if (i->value == EXT4_ZERO_XATTR_VALUE) {
memset(val, 0, size);
} else {
@@ -736,8 +1022,11 @@ ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s)
memcpy(val, i->value, i->value_len);
}
}
+ s->here->e_value_size = cpu_to_le32(i->value_len);
}
- return 0;
+
+out:
+ return rc;
}
struct ext4_xattr_block_find {
@@ -801,8 +1090,6 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
#define header(x) ((struct ext4_xattr_header *)(x))
- if (i->value && i->value_len > sb->s_blocksize)
- return -ENOSPC;
if (s->base) {
BUFFER_TRACE(bs->bh, "get_write_access");
error = ext4_journal_get_write_access(handle, bs->bh);
@@ -821,7 +1108,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
mb_cache_entry_delete_block(ext4_mb_cache, hash,
bs->bh->b_blocknr);
ea_bdebug(bs->bh, "modifying in-place");
- error = ext4_xattr_set_entry(i, s);
+ error = ext4_xattr_set_entry(i, s, handle, inode);
if (!error) {
if (!IS_LAST_ENTRY(s->first))
ext4_xattr_rehash(header(s->base),
@@ -870,7 +1157,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
s->end = s->base + sb->s_blocksize;
}
- error = ext4_xattr_set_entry(i, s);
+ error = ext4_xattr_set_entry(i, s, handle, inode);
if (error == -EFSCORRUPTED)
goto bad_block;
if (error)
@@ -1070,7 +1357,7 @@ int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
if (EXT4_I(inode)->i_extra_isize == 0)
return -ENOSPC;
- error = ext4_xattr_set_entry(i, s);
+ error = ext4_xattr_set_entry(i, s, handle, inode);
if (error) {
if (error == -ENOSPC &&
ext4_has_inline_data(inode)) {
@@ -1082,7 +1369,7 @@ int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
error = ext4_xattr_ibody_find(inode, i, is);
if (error)
return error;
- error = ext4_xattr_set_entry(i, s);
+ error = ext4_xattr_set_entry(i, s, handle, inode);
}
if (error)
return error;
@@ -1098,7 +1385,7 @@ int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
return 0;
}
-static int ext4_xattr_ibody_set(struct inode *inode,
+static int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
struct ext4_xattr_info *i,
struct ext4_xattr_ibody_find *is)
{
@@ -1108,7 +1395,7 @@ static int ext4_xattr_ibody_set(struct inode *inode,
if (EXT4_I(inode)->i_extra_isize == 0)
return -ENOSPC;
- error = ext4_xattr_set_entry(i, s);
+ error = ext4_xattr_set_entry(i, s, handle, inode);
if (error)
return error;
header = IHDR(inode, ext4_raw_inode(&is->iloc));
@@ -1155,7 +1442,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
.name = name,
.value = value,
.value_len = value_len,
-
+ .in_inode = 0,
};
struct ext4_xattr_ibody_find is = {
.s = { .not_found = -ENODATA, },
@@ -1204,7 +1491,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
}
if (!value) {
if (!is.s.not_found)
- error = ext4_xattr_ibody_set(inode, &i, &is);
+ error = ext4_xattr_ibody_set(handle, inode, &i, &is);
else if (!bs.s.not_found)
error = ext4_xattr_block_set(handle, inode, &i, &bs);
} else {
@@ -1215,7 +1502,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
if (!bs.s.not_found && ext4_xattr_value_same(&bs.s, &i))
goto cleanup;
- error = ext4_xattr_ibody_set(inode, &i, &is);
+ error = ext4_xattr_ibody_set(handle, inode, &i, &is);
if (!error && !bs.s.not_found) {
i.value = NULL;
error = ext4_xattr_block_set(handle, inode, &i, &bs);
@@ -1226,11 +1513,20 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
goto cleanup;
}
error = ext4_xattr_block_set(handle, inode, &i, &bs);
+ if (ext4_has_feature_ea_inode(inode->i_sb) &&
+ error == -ENOSPC) {
+ /* xattr not fit to block, store at external
+ * inode */
+ i.in_inode = 1;
+ error = ext4_xattr_ibody_set(handle, inode,
+ &i, &is);
+ }
if (error)
goto cleanup;
if (!is.s.not_found) {
i.value = NULL;
- error = ext4_xattr_ibody_set(inode, &i, &is);
+ error = ext4_xattr_ibody_set(handle, inode, &i,
+ &is);
}
}
}
@@ -1269,12 +1565,26 @@ ext4_xattr_set(struct inode *inode, int name_index, const char *name,
const void *value, size_t value_len, int flags)
{
handle_t *handle;
+ struct super_block *sb = inode->i_sb;
int error, retries = 0;
int credits = ext4_jbd2_credits_xattr(inode);
error = dquot_initialize(inode);
if (error)
return error;
+
+ if ((value_len >= EXT4_XATTR_MIN_LARGE_EA_SIZE(sb->s_blocksize)) &&
+ ext4_has_feature_ea_inode(sb)) {
+ int nrblocks = (value_len + sb->s_blocksize - 1) >>
+ sb->s_blocksize_bits;
+
+ /* For new inode */
+ credits += EXT4_SINGLEDATA_TRANS_BLOCKS(sb) + 3;
+
+ /* For data blocks of EA inode */
+ credits += ext4_meta_trans_blocks(inode, nrblocks, 0);
+ }
+
retry:
handle = ext4_journal_start(inode, EXT4_HT_XATTR, credits);
if (IS_ERR(handle)) {
@@ -1286,7 +1596,7 @@ ext4_xattr_set(struct inode *inode, int name_index, const char *name,
value, value_len, flags);
error2 = ext4_journal_stop(handle);
if (error == -ENOSPC &&
- ext4_should_retry_alloc(inode->i_sb, &retries))
+ ext4_should_retry_alloc(sb, &retries))
goto retry;
if (error == 0)
error = error2;
@@ -1311,7 +1621,7 @@ static void ext4_xattr_shift_entries(struct ext4_xattr_entry *entry,
/* Adjust the value offsets of the entries */
for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
- if (last->e_value_size) {
+ if (!last->e_value_inum && last->e_value_size) {
new_offs = le16_to_cpu(last->e_value_offs) +
value_offs_shift;
last->e_value_offs = cpu_to_le16(new_offs);
@@ -1372,7 +1682,7 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode,
goto out;
/* Remove the chosen entry from the inode */
- error = ext4_xattr_ibody_set(inode, &i, is);
+ error = ext4_xattr_ibody_set(handle, inode, &i, is);
if (error)
goto out;
@@ -1572,21 +1882,135 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
}
+#define EIA_INCR 16 /* must be 2^n */
+#define EIA_MASK (EIA_INCR - 1)
+/* Add the large xattr @ino into @lea_ino_array for later deletion.
+ * If @lea_ino_array is new or full it will be grown and the old
+ * contents copied over.
+ */
+static int
+ext4_expand_ino_array(struct ext4_xattr_ino_array **lea_ino_array, __u32 ino)
+{
+ if (*lea_ino_array == NULL) {
+ /*
+ * Start with 15 inodes, so it fits into a power-of-two size.
+ * If *lea_ino_array is NULL, this is essentially offsetof()
+ */
+ (*lea_ino_array) =
+ kmalloc(offsetof(struct ext4_xattr_ino_array,
+ xia_inodes[EIA_MASK]),
+ GFP_NOFS);
+ if (*lea_ino_array == NULL)
+ return -ENOMEM;
+ (*lea_ino_array)->xia_count = 0;
+ } else if (((*lea_ino_array)->xia_count & EIA_MASK) == EIA_MASK) {
+ /* expand the array once all 15 + n * 16 slots are full */
+ struct ext4_xattr_ino_array *new_array = NULL;
+ int count = (*lea_ino_array)->xia_count;
+
+ /* if new_array is NULL, this is essentially offsetof() */
+ new_array = kmalloc(
+ offsetof(struct ext4_xattr_ino_array,
+ xia_inodes[count + EIA_INCR]),
+ GFP_NOFS);
+ if (new_array == NULL)
+ return -ENOMEM;
+ memcpy(new_array, *lea_ino_array,
+ offsetof(struct ext4_xattr_ino_array,
+ xia_inodes[count]));
+ kfree(*lea_ino_array);
+ *lea_ino_array = new_array;
+ }
+ (*lea_ino_array)->xia_inodes[(*lea_ino_array)->xia_count++] = ino;
+ return 0;
+}
+
+/**
+ * Add xattr inode to orphan list
+ */
+static int
+ext4_xattr_inode_orphan_add(handle_t *handle, struct inode *inode,
+ int credits, struct ext4_xattr_ino_array *lea_ino_array)
+{
+ struct inode *ea_inode = NULL;
+ int idx = 0, error = 0;
+
+ if (lea_ino_array == NULL)
+ return 0;
+
+ for (; idx < lea_ino_array->xia_count; ++idx) {
+ if (!ext4_handle_has_enough_credits(handle, credits)) {
+ error = ext4_journal_extend(handle, credits);
+ if (error > 0)
+ error = ext4_journal_restart(handle, credits);
+
+ if (error != 0) {
+ ext4_warning(inode->i_sb,
+ "couldn't extend journal "
+ "(err %d)", error);
+ return error;
+ }
+ }
+ ea_inode = ext4_xattr_inode_iget(inode,
+ lea_ino_array->xia_inodes[idx], &error);
+ if (error)
+ continue;
+ ext4_orphan_add(handle, ea_inode);
+ /* the inode's i_count will be released by caller */
+ }
+
+ return 0;
+}
/*
* ext4_xattr_delete_inode()
*
- * Free extended attribute resources associated with this inode. This
+ * Free extended attribute resources associated with this inode. Traverse
+ * all entries and unlink any xattr inodes associated with this inode. This
* is called immediately before an inode is freed. We have exclusive
- * access to the inode.
+ * access to the inode. If an orphan inode is deleted it will also delete any
+ * xattr block and all xattr inodes. They are checked by ext4_xattr_inode_iget()
+ * to ensure they belong to the parent inode and were not deleted already.
*/
-void
-ext4_xattr_delete_inode(handle_t *handle, struct inode *inode)
+int
+ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
+ struct ext4_xattr_ino_array **lea_ino_array)
{
struct buffer_head *bh = NULL;
+ struct ext4_xattr_ibody_header *header;
+ struct ext4_inode *raw_inode;
+ struct ext4_iloc iloc;
+ struct ext4_xattr_entry *entry;
+ int credits = 3, error = 0;
- if (!EXT4_I(inode)->i_file_acl)
+ if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR))
+ goto delete_external_ea;
+
+ error = ext4_get_inode_loc(inode, &iloc);
+ if (error)
+ goto cleanup;
+ raw_inode = ext4_raw_inode(&iloc);
+ header = IHDR(inode, raw_inode);
+ for (entry = IFIRST(header); !IS_LAST_ENTRY(entry);
+ entry = EXT4_XATTR_NEXT(entry)) {
+ if (!entry->e_value_inum)
+ continue;
+ if (ext4_expand_ino_array(lea_ino_array,
+ entry->e_value_inum) != 0) {
+ brelse(iloc.bh);
+ goto cleanup;
+ }
+ entry->e_value_inum = 0;
+ }
+ brelse(iloc.bh);
+
+delete_external_ea:
+ if (!EXT4_I(inode)->i_file_acl) {
+ /* add xattr inode to orphan list */
+ ext4_xattr_inode_orphan_add(handle, inode, credits,
+ *lea_ino_array);
goto cleanup;
+ }
bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
if (!bh) {
EXT4_ERROR_INODE(inode, "block %llu read error",
@@ -1599,11 +2023,69 @@ ext4_xattr_delete_inode(handle_t *handle, struct inode *inode)
EXT4_I(inode)->i_file_acl);
goto cleanup;
}
+
+ for (entry = BFIRST(bh); !IS_LAST_ENTRY(entry);
+ entry = EXT4_XATTR_NEXT(entry)) {
+ if (!entry->e_value_inum)
+ continue;
+ if (ext4_expand_ino_array(lea_ino_array,
+ entry->e_value_inum) != 0)
+ goto cleanup;
+ entry->e_value_inum = 0;
+ }
+
+ /* add xattr inode to orphan list */
+ error = ext4_xattr_inode_orphan_add(handle, inode, credits,
+ *lea_ino_array);
+ if (error != 0)
+ goto cleanup;
+
+ if (!IS_NOQUOTA(inode))
+ credits += 2 * EXT4_QUOTA_DEL_BLOCKS(inode->i_sb);
+
+ if (!ext4_handle_has_enough_credits(handle, credits)) {
+ error = ext4_journal_extend(handle, credits);
+ if (error > 0)
+ error = ext4_journal_restart(handle, credits);
+ if (error != 0) {
+ ext4_warning(inode->i_sb,
+ "couldn't extend journal (err %d)", error);
+ goto cleanup;
+ }
+ }
+
ext4_xattr_release_block(handle, inode, bh);
EXT4_I(inode)->i_file_acl = 0;
cleanup:
brelse(bh);
+
+ return error;
+}
+
+void
+ext4_xattr_inode_array_free(struct inode *inode,
+ struct ext4_xattr_ino_array *lea_ino_array)
+{
+ struct inode *ea_inode = NULL;
+ int idx = 0;
+ int err;
+
+ if (lea_ino_array == NULL)
+ return;
+
+ for (; idx < lea_ino_array->xia_count; ++idx) {
+ ea_inode = ext4_xattr_inode_iget(inode,
+ lea_ino_array->xia_inodes[idx], &err);
+ if (err)
+ continue;
+ /* for inode's i_count get from ext4_xattr_delete_inode */
+ if (!list_empty(&EXT4_I(ea_inode)->i_orphan))
+ iput(ea_inode);
+ clear_nlink(ea_inode);
+ iput(ea_inode);
+ }
+ kfree(lea_ino_array);
}
/*
@@ -1655,10 +2137,9 @@ ext4_xattr_cmp(struct ext4_xattr_header *header1,
entry1->e_name_index != entry2->e_name_index ||
entry1->e_name_len != entry2->e_name_len ||
entry1->e_value_size != entry2->e_value_size ||
+ entry1->e_value_inum != entry2->e_value_inum ||
memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len))
return 1;
- if (entry1->e_value_block != 0 || entry2->e_value_block != 0)
- return -EFSCORRUPTED;
if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs),
(char *)header2 + le16_to_cpu(entry2->e_value_offs),
le32_to_cpu(entry1->e_value_size)))
@@ -1730,7 +2211,7 @@ static inline void ext4_xattr_hash_entry(struct ext4_xattr_header *header,
*name++;
}
- if (entry->e_value_size != 0) {
+ if (!entry->e_value_inum && entry->e_value_size) {
__le32 *value = (__le32 *)((char *)header +
le16_to_cpu(entry->e_value_offs));
for (n = (le32_to_cpu(entry->e_value_size) +
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index 099c8b670ef5..6e10ff9393d4 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -44,7 +44,7 @@ struct ext4_xattr_entry {
__u8 e_name_len; /* length of name */
__u8 e_name_index; /* attribute name index */
__le16 e_value_offs; /* offset in disk block of value */
- __le32 e_value_block; /* disk block attribute is stored on (n/i) */
+ __le32 e_value_inum; /* inode in which the value is stored */
__le32 e_value_size; /* size of attribute value */
__le32 e_hash; /* hash value of name and value */
char e_name[0]; /* attribute name */
@@ -69,6 +69,26 @@ struct ext4_xattr_entry {
EXT4_I(inode)->i_extra_isize))
#define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1))
+/*
+ * Link EA inode back to parent one using i_mtime field.
+ * Extra integer type conversion added to ignore higher
+ * bits in i_mtime.tv_sec which might be set by ext4_get()
+ */
+#define EXT4_XATTR_INODE_SET_PARENT(inode, inum) \
+do { \
+ (inode)->i_mtime.tv_sec = inum; \
+} while(0)
+
+#define EXT4_XATTR_INODE_GET_PARENT(inode) \
+((__u32)(inode)->i_mtime.tv_sec)
+
+/*
+ * The minimum size of EA value when you start storing it in an external inode
+ * size of block - size of header - size of 1 entry - 4 null bytes
+*/
+#define EXT4_XATTR_MIN_LARGE_EA_SIZE(b) \
+ ((b) - EXT4_XATTR_LEN(3) - sizeof(struct ext4_xattr_header) - 4)
+
#define BHDR(bh) ((struct ext4_xattr_header *)((bh)->b_data))
#define ENTRY(ptr) ((struct ext4_xattr_entry *)(ptr))
#define BFIRST(bh) ENTRY(BHDR(bh)+1)
@@ -77,10 +97,11 @@ struct ext4_xattr_entry {
#define EXT4_ZERO_XATTR_VALUE ((void *)-1)
struct ext4_xattr_info {
- int name_index;
const char *name;
const void *value;
size_t value_len;
+ int name_index;
+ int in_inode;
};
struct ext4_xattr_search {
@@ -140,7 +161,13 @@ extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t);
extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int);
-extern void ext4_xattr_delete_inode(handle_t *, struct inode *);
+extern struct inode *ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
+ int *err);
+extern int ext4_xattr_inode_unlink(struct inode *inode, unsigned long ea_ino);
+extern int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
+ struct ext4_xattr_ino_array **array);
+extern void ext4_xattr_inode_array_free(struct inode *inode,
+ struct ext4_xattr_ino_array *array);
extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
struct ext4_inode *raw_inode, handle_t *handle);
--
2.13.1.611.g7e3b11ae1-goog
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 32/32] ext4: add nombcache mount option
2017-06-21 21:21 [PATCH 01/32] ext4: xattr-in-inode support Tahsin Erdogan
@ 2017-06-21 21:21 ` Tahsin Erdogan
0 siblings, 0 replies; 7+ messages in thread
From: Tahsin Erdogan @ 2017-06-21 21:21 UTC (permalink / raw)
To: Andreas Dilger, Darrick J . Wong, Jan Kara, Theodore Ts'o,
linux-ext4
Cc: linux-kernel, Tahsin Erdogan
The main purpose of mb cache is to achieve deduplication in
extended attributes. In use cases where opportunity for deduplication
is unlikely, it only adds overhead.
Add a mount option to explicitly turn off mb cache.
Suggested-by: Andreas Dilger <adilger@dilger.ca>
Signed-off-by: Tahsin Erdogan <tahsin@google.com>
---
fs/ext4/ext4.h | 1 +
fs/ext4/super.c | 34 +++++++++++++++++++++++-----------
fs/ext4/xattr.c | 52 +++++++++++++++++++++++++++++++++++-----------------
3 files changed, 59 insertions(+), 28 deletions(-)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index cb5ace63796a..65443cbc982a 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1149,6 +1149,7 @@ struct ext4_inode_info {
#define EXT4_MOUNT_DIOREAD_NOLOCK 0x400000 /* Enable support for dio read nolocking */
#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
+#define EXT4_MOUNT_NO_MBCACHE 0x2000000 /* No mbcache usage */
#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index f347b01ca5ca..c828959b0551 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1336,7 +1336,7 @@ enum {
Opt_inode_readahead_blks, Opt_journal_ioprio,
Opt_dioread_nolock, Opt_dioread_lock,
Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
- Opt_max_dir_size_kb, Opt_nojournal_checksum,
+ Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
};
static const match_table_t tokens = {
@@ -1419,6 +1419,8 @@ static const match_table_t tokens = {
{Opt_noinit_itable, "noinit_itable"},
{Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
{Opt_test_dummy_encryption, "test_dummy_encryption"},
+ {Opt_nombcache, "nombcache"},
+ {Opt_nombcache, "no_mbcache"}, /* for backward compatibility */
{Opt_removed, "check=none"}, /* mount option from ext2/3 */
{Opt_removed, "nocheck"}, /* mount option from ext2/3 */
{Opt_removed, "reservation"}, /* mount option from ext2/3 */
@@ -1626,6 +1628,7 @@ static const struct mount_opts {
{Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
{Opt_max_dir_size_kb, 0, MOPT_GTE0},
{Opt_test_dummy_encryption, 0, MOPT_GTE0},
+ {Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
{Opt_err, 0, 0}
};
@@ -4080,19 +4083,22 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
no_journal:
- sbi->s_mb_block_cache = ext4_xattr_create_cache();
- if (!sbi->s_mb_block_cache) {
- ext4_msg(sb, KERN_ERR, "Failed to create an mb_cache");
- goto failed_mount_wq;
- }
-
- if (ext4_has_feature_ea_inode(sb)) {
- sbi->s_ea_inode_cache = ext4_xattr_create_cache();
- if (!sbi->s_ea_inode_cache) {
+ if (!test_opt(sb, NO_MBCACHE)) {
+ sbi->s_mb_block_cache = ext4_xattr_create_cache();
+ if (!sbi->s_mb_block_cache) {
ext4_msg(sb, KERN_ERR,
- "Failed to create an s_ea_inode_cache");
+ "Failed to create xattr block mb_cache");
goto failed_mount_wq;
}
+
+ if (ext4_has_feature_ea_inode(sb)) {
+ sbi->s_ea_inode_cache = ext4_xattr_create_cache();
+ if (!sbi->s_ea_inode_cache) {
+ ext4_msg(sb, KERN_ERR,
+ "Failed to create ea_inode mb_cache");
+ goto failed_mount_wq;
+ }
+ }
}
if ((DUMMY_ENCRYPTION_ENABLED(sbi) || ext4_has_feature_encrypt(sb)) &&
@@ -4989,6 +4995,12 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
}
}
+ if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_NO_MBCACHE) {
+ ext4_msg(sb, KERN_ERR, "can't enable nombcache during remount");
+ err = -EINVAL;
+ goto restore_opts;
+ }
+
if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX) {
ext4_msg(sb, KERN_WARNING, "warning: refusing change of "
"dax flag with busy inodes while remounting");
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index edef88a2cc7c..0a160d93b52d 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -976,10 +976,13 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
set_nlink(ea_inode, 1);
ext4_orphan_del(handle, ea_inode);
- hash = ext4_xattr_inode_get_hash(ea_inode);
- mb_cache_entry_create(ea_inode_cache, GFP_NOFS, hash,
- ea_inode->i_ino,
- true /* reusable */);
+ if (ea_inode_cache) {
+ hash = ext4_xattr_inode_get_hash(ea_inode);
+ mb_cache_entry_create(ea_inode_cache,
+ GFP_NOFS, hash,
+ ea_inode->i_ino,
+ true /* reusable */);
+ }
}
} else {
WARN_ONCE(ref_count < 0, "EA inode %lu ref_count=%lld",
@@ -993,9 +996,11 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
clear_nlink(ea_inode);
ext4_orphan_add(handle, ea_inode);
- hash = ext4_xattr_inode_get_hash(ea_inode);
- mb_cache_entry_delete(ea_inode_cache, hash,
- ea_inode->i_ino);
+ if (ea_inode_cache) {
+ hash = ext4_xattr_inode_get_hash(ea_inode);
+ mb_cache_entry_delete(ea_inode_cache, hash,
+ ea_inode->i_ino);
+ }
}
}
@@ -1179,7 +1184,9 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
* This must happen under buffer lock for
* ext4_xattr_block_set() to reliably detect freed block
*/
- mb_cache_entry_delete(mb_block_cache, hash, bh->b_blocknr);
+ if (mb_block_cache)
+ mb_cache_entry_delete(mb_block_cache, hash,
+ bh->b_blocknr);
get_bh(bh);
unlock_buffer(bh);
@@ -1199,11 +1206,13 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
if (ref == EXT4_XATTR_REFCOUNT_MAX - 1) {
struct mb_cache_entry *ce;
- ce = mb_cache_entry_get(mb_block_cache, hash,
- bh->b_blocknr);
- if (ce) {
- ce->e_reusable = 1;
- mb_cache_entry_put(mb_block_cache, ce);
+ if (mb_block_cache) {
+ ce = mb_cache_entry_get(mb_block_cache, hash,
+ bh->b_blocknr);
+ if (ce) {
+ ce->e_reusable = 1;
+ mb_cache_entry_put(mb_block_cache, ce);
+ }
}
}
@@ -1382,6 +1391,9 @@ ext4_xattr_inode_cache_find(struct inode *inode, const void *value,
struct mb_cache *ea_inode_cache = EA_INODE_CACHE(inode);
void *ea_data;
+ if (!ea_inode_cache)
+ return NULL;
+
ce = mb_cache_entry_find_first(ea_inode_cache, hash);
if (!ce)
return NULL;
@@ -1452,8 +1464,9 @@ static int ext4_xattr_inode_lookup_create(handle_t *handle, struct inode *inode,
return err;
}
- mb_cache_entry_create(EA_INODE_CACHE(inode), GFP_NOFS, hash,
- ea_inode->i_ino, true /* reusable */);
+ if (EA_INODE_CACHE(inode))
+ mb_cache_entry_create(EA_INODE_CACHE(inode), GFP_NOFS, hash,
+ ea_inode->i_ino, true /* reusable */);
*ret_inode = ea_inode;
return 0;
@@ -1780,8 +1793,9 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
* ext4_xattr_block_set() to reliably detect modified
* block
*/
- mb_cache_entry_delete(mb_block_cache, hash,
- bs->bh->b_blocknr);
+ if (mb_block_cache)
+ mb_cache_entry_delete(mb_block_cache, hash,
+ bs->bh->b_blocknr);
ea_bdebug(bs->bh, "modifying in-place");
error = ext4_xattr_set_entry(i, s, handle, inode,
true /* is_block */);
@@ -2870,6 +2884,8 @@ ext4_xattr_block_cache_insert(struct mb_cache *mb_block_cache,
EXT4_XATTR_REFCOUNT_MAX;
int error;
+ if (!mb_block_cache)
+ return;
error = mb_cache_entry_create(mb_block_cache, GFP_NOFS, hash,
bh->b_blocknr, reusable);
if (error) {
@@ -2936,6 +2952,8 @@ ext4_xattr_block_cache_find(struct inode *inode,
struct mb_cache_entry *ce;
struct mb_cache *mb_block_cache = MB_BLOCK_CACHE(inode);
+ if (!mb_block_cache)
+ return NULL;
if (!header->h_hash)
return NULL; /* never share */
ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
--
2.13.1.611.g7e3b11ae1-goog
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 24/32] ext2, ext4: make mb block cache names more explicit
@ 2017-06-22 1:49 Tahsin Erdogan
2017-06-22 1:49 ` [PATCH 32/32] ext4: add nombcache mount option Tahsin Erdogan
0 siblings, 1 reply; 7+ messages in thread
From: Tahsin Erdogan @ 2017-06-22 1:49 UTC (permalink / raw)
To: Andreas Dilger, Darrick J . Wong, Jan Kara, Theodore Ts'o,
linux-ext4
Cc: linux-kernel, Tahsin Erdogan
There will be a second mb_cache instance that tracks ea_inodes. Make
existing names more explicit so that it is clear that they refer to
xattr block cache.
Signed-off-by: Tahsin Erdogan <tahsin@google.com>
---
v2:
- renamed s_mb_cache to s_mb_block_cache in both ext2 and ext4
- renamed local variables named ext[24]_mb_cache to mb_block_cache
- renamed macro EXT4_GET_MB_CACHE() to MB_BLOCK_CACHE()
- added MB_BLOCK_CACHE() to ext2 for consistency with ext4
fs/ext2/ext2.h | 2 +-
fs/ext2/super.c | 16 ++++++-------
fs/ext2/xattr.c | 36 +++++++++++++++--------------
fs/ext4/ext4.h | 2 +-
fs/ext4/super.c | 18 +++++++--------
fs/ext4/xattr.c | 71 ++++++++++++++++++++++++++++++---------------------------
6 files changed, 75 insertions(+), 70 deletions(-)
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 03f5ce1d3dbe..23ebb92484c6 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -113,7 +113,7 @@ struct ext2_sb_info {
* of the mount options.
*/
spinlock_t s_lock;
- struct mb_cache *s_mb_cache;
+ struct mb_cache *s_ea_block_cache;
};
static inline spinlock_t *
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 9c2028b50e5c..7b1bc9059863 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -147,9 +147,9 @@ static void ext2_put_super (struct super_block * sb)
ext2_quota_off_umount(sb);
- if (sbi->s_mb_cache) {
- ext2_xattr_destroy_cache(sbi->s_mb_cache);
- sbi->s_mb_cache = NULL;
+ if (sbi->s_ea_block_cache) {
+ ext2_xattr_destroy_cache(sbi->s_ea_block_cache);
+ sbi->s_ea_block_cache = NULL;
}
if (!(sb->s_flags & MS_RDONLY)) {
struct ext2_super_block *es = sbi->s_es;
@@ -1131,9 +1131,9 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
}
#ifdef CONFIG_EXT2_FS_XATTR
- sbi->s_mb_cache = ext2_xattr_create_cache();
- if (!sbi->s_mb_cache) {
- ext2_msg(sb, KERN_ERR, "Failed to create an mb_cache");
+ sbi->s_ea_block_cache = ext2_xattr_create_cache();
+ if (!sbi->s_ea_block_cache) {
+ ext2_msg(sb, KERN_ERR, "Failed to create ea_block_cache");
goto failed_mount3;
}
#endif
@@ -1182,8 +1182,8 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
sb->s_id);
goto failed_mount;
failed_mount3:
- if (sbi->s_mb_cache)
- ext2_xattr_destroy_cache(sbi->s_mb_cache);
+ if (sbi->s_ea_block_cache)
+ ext2_xattr_destroy_cache(sbi->s_ea_block_cache);
percpu_counter_destroy(&sbi->s_freeblocks_counter);
percpu_counter_destroy(&sbi->s_freeinodes_counter);
percpu_counter_destroy(&sbi->s_dirs_counter);
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 1e5f76070580..1b9b1268d418 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -121,6 +121,8 @@ const struct xattr_handler *ext2_xattr_handlers[] = {
NULL
};
+#define EA_BLOCK_CACHE(inode) (EXT2_SB(inode->i_sb)->s_ea_block_cache)
+
static inline const struct xattr_handler *
ext2_xattr_handler(int name_index)
{
@@ -150,7 +152,7 @@ ext2_xattr_get(struct inode *inode, int name_index, const char *name,
size_t name_len, size;
char *end;
int error;
- struct mb_cache *ext2_mb_cache = EXT2_SB(inode->i_sb)->s_mb_cache;
+ struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
name_index, name, buffer, (long)buffer_size);
@@ -195,7 +197,7 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_get",
goto found;
entry = next;
}
- if (ext2_xattr_cache_insert(ext2_mb_cache, bh))
+ if (ext2_xattr_cache_insert(ea_block_cache, bh))
ea_idebug(inode, "cache insert failed");
error = -ENODATA;
goto cleanup;
@@ -208,7 +210,7 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_get",
le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize)
goto bad_block;
- if (ext2_xattr_cache_insert(ext2_mb_cache, bh))
+ if (ext2_xattr_cache_insert(ea_block_cache, bh))
ea_idebug(inode, "cache insert failed");
if (buffer) {
error = -ERANGE;
@@ -246,7 +248,7 @@ ext2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
char *end;
size_t rest = buffer_size;
int error;
- struct mb_cache *ext2_mb_cache = EXT2_SB(inode->i_sb)->s_mb_cache;
+ struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
ea_idebug(inode, "buffer=%p, buffer_size=%ld",
buffer, (long)buffer_size);
@@ -281,7 +283,7 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_list",
goto bad_block;
entry = next;
}
- if (ext2_xattr_cache_insert(ext2_mb_cache, bh))
+ if (ext2_xattr_cache_insert(ea_block_cache, bh))
ea_idebug(inode, "cache insert failed");
/* list the attribute names */
@@ -493,7 +495,7 @@ bad_block: ext2_error(sb, "ext2_xattr_set",
* This must happen under buffer lock for
* ext2_xattr_set2() to reliably detect modified block
*/
- mb_cache_entry_delete(EXT2_SB(sb)->s_mb_cache, hash,
+ mb_cache_entry_delete(EA_BLOCK_CACHE(inode), hash,
bh->b_blocknr);
/* keep the buffer locked while modifying it. */
@@ -627,7 +629,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
struct super_block *sb = inode->i_sb;
struct buffer_head *new_bh = NULL;
int error;
- struct mb_cache *ext2_mb_cache = EXT2_SB(sb)->s_mb_cache;
+ struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
if (header) {
new_bh = ext2_xattr_cache_find(inode, header);
@@ -655,7 +657,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
don't need to change the reference count. */
new_bh = old_bh;
get_bh(new_bh);
- ext2_xattr_cache_insert(ext2_mb_cache, new_bh);
+ ext2_xattr_cache_insert(ea_block_cache, new_bh);
} else {
/* We need to allocate a new block */
ext2_fsblk_t goal = ext2_group_first_block_no(sb,
@@ -676,7 +678,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
memcpy(new_bh->b_data, header, new_bh->b_size);
set_buffer_uptodate(new_bh);
unlock_buffer(new_bh);
- ext2_xattr_cache_insert(ext2_mb_cache, new_bh);
+ ext2_xattr_cache_insert(ea_block_cache, new_bh);
ext2_xattr_update_super_block(sb);
}
@@ -721,7 +723,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
* This must happen under buffer lock for
* ext2_xattr_set2() to reliably detect freed block
*/
- mb_cache_entry_delete(ext2_mb_cache, hash,
+ mb_cache_entry_delete(ea_block_cache, hash,
old_bh->b_blocknr);
/* Free the old block. */
ea_bdebug(old_bh, "freeing");
@@ -795,7 +797,7 @@ ext2_xattr_delete_inode(struct inode *inode)
* This must happen under buffer lock for ext2_xattr_set2() to
* reliably detect freed block
*/
- mb_cache_entry_delete(EXT2_SB(inode->i_sb)->s_mb_cache, hash,
+ mb_cache_entry_delete(EA_BLOCK_CACHE(inode), hash,
bh->b_blocknr);
ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1);
get_bh(bh);
@@ -897,13 +899,13 @@ ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header)
{
__u32 hash = le32_to_cpu(header->h_hash);
struct mb_cache_entry *ce;
- struct mb_cache *ext2_mb_cache = EXT2_SB(inode->i_sb)->s_mb_cache;
+ struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
if (!header->h_hash)
return NULL; /* never share */
ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
again:
- ce = mb_cache_entry_find_first(ext2_mb_cache, hash);
+ ce = mb_cache_entry_find_first(ea_block_cache, hash);
while (ce) {
struct buffer_head *bh;
@@ -924,7 +926,7 @@ ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header)
* entry is still hashed is reliable.
*/
if (hlist_bl_unhashed(&ce->e_hash_list)) {
- mb_cache_entry_put(ext2_mb_cache, ce);
+ mb_cache_entry_put(ea_block_cache, ce);
unlock_buffer(bh);
brelse(bh);
goto again;
@@ -937,14 +939,14 @@ ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header)
} else if (!ext2_xattr_cmp(header, HDR(bh))) {
ea_bdebug(bh, "b_count=%d",
atomic_read(&(bh->b_count)));
- mb_cache_entry_touch(ext2_mb_cache, ce);
- mb_cache_entry_put(ext2_mb_cache, ce);
+ mb_cache_entry_touch(ea_block_cache, ce);
+ mb_cache_entry_put(ea_block_cache, ce);
return bh;
}
unlock_buffer(bh);
brelse(bh);
}
- ce = mb_cache_entry_find_next(ext2_mb_cache, ce);
+ ce = mb_cache_entry_find_next(ea_block_cache, ce);
}
return NULL;
}
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 580fdb753f29..e4a75c643731 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1516,7 +1516,7 @@ struct ext4_sb_info {
struct list_head s_es_list; /* List of inodes with reclaimable extents */
long s_es_nr_inode;
struct ext4_es_stats s_es_stats;
- struct mb_cache *s_mb_cache;
+ struct mb_cache *s_ea_block_cache;
spinlock_t s_es_lock ____cacheline_aligned_in_smp;
/* Ratelimit ext4 messages. */
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index b02a23ec92ca..380389740575 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -927,9 +927,9 @@ static void ext4_put_super(struct super_block *sb)
invalidate_bdev(sbi->journal_bdev);
ext4_blkdev_remove(sbi);
}
- if (sbi->s_mb_cache) {
- ext4_xattr_destroy_cache(sbi->s_mb_cache);
- sbi->s_mb_cache = NULL;
+ if (sbi->s_ea_block_cache) {
+ ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
+ sbi->s_ea_block_cache = NULL;
}
if (sbi->s_mmp_tsk)
kthread_stop(sbi->s_mmp_tsk);
@@ -4061,9 +4061,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
no_journal:
- sbi->s_mb_cache = ext4_xattr_create_cache();
- if (!sbi->s_mb_cache) {
- ext4_msg(sb, KERN_ERR, "Failed to create an mb_cache");
+ sbi->s_ea_block_cache = ext4_xattr_create_cache();
+ if (!sbi->s_ea_block_cache) {
+ ext4_msg(sb, KERN_ERR, "Failed to create ea_block_cache");
goto failed_mount_wq;
}
@@ -4296,9 +4296,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (EXT4_SB(sb)->rsv_conversion_wq)
destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
failed_mount_wq:
- if (sbi->s_mb_cache) {
- ext4_xattr_destroy_cache(sbi->s_mb_cache);
- sbi->s_mb_cache = NULL;
+ if (sbi->s_ea_block_cache) {
+ ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
+ sbi->s_ea_block_cache = NULL;
}
if (sbi->s_journal) {
jbd2_journal_destroy(sbi->s_journal);
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index fb437efa8688..fd73dbc92c21 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -72,10 +72,11 @@
# define ea_bdebug(bh, fmt, ...) no_printk(fmt, ##__VA_ARGS__)
#endif
-static void ext4_xattr_cache_insert(struct mb_cache *, struct buffer_head *);
-static struct buffer_head *ext4_xattr_cache_find(struct inode *,
- struct ext4_xattr_header *,
- struct mb_cache_entry **);
+static void ext4_xattr_block_cache_insert(struct mb_cache *,
+ struct buffer_head *);
+static struct buffer_head *
+ext4_xattr_block_cache_find(struct inode *, struct ext4_xattr_header *,
+ struct mb_cache_entry **);
static void ext4_xattr_rehash(struct ext4_xattr_header *,
struct ext4_xattr_entry *);
@@ -104,8 +105,8 @@ const struct xattr_handler *ext4_xattr_handlers[] = {
NULL
};
-#define EXT4_GET_MB_CACHE(inode) (((struct ext4_sb_info *) \
- inode->i_sb->s_fs_info)->s_mb_cache)
+#define EA_BLOCK_CACHE(inode) (((struct ext4_sb_info *) \
+ inode->i_sb->s_fs_info)->s_ea_block_cache)
#ifdef CONFIG_LOCKDEP
void ext4_xattr_inode_set_class(struct inode *ea_inode)
@@ -374,7 +375,7 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
struct ext4_xattr_entry *entry;
size_t size;
int error;
- struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
+ struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
name_index, name, buffer, (long)buffer_size);
@@ -395,7 +396,7 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
error = -EFSCORRUPTED;
goto cleanup;
}
- ext4_xattr_cache_insert(ext4_mb_cache, bh);
+ ext4_xattr_block_cache_insert(ea_block_cache, bh);
entry = BFIRST(bh);
error = ext4_xattr_find_entry(&entry, name_index, name, 1);
if (error)
@@ -541,7 +542,6 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
struct inode *inode = d_inode(dentry);
struct buffer_head *bh = NULL;
int error;
- struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
ea_idebug(inode, "buffer=%p, buffer_size=%ld",
buffer, (long)buffer_size);
@@ -563,7 +563,7 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
error = -EFSCORRUPTED;
goto cleanup;
}
- ext4_xattr_cache_insert(ext4_mb_cache, bh);
+ ext4_xattr_block_cache_insert(EA_BLOCK_CACHE(inode), bh);
error = ext4_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size);
cleanup:
@@ -660,7 +660,7 @@ static void
ext4_xattr_release_block(handle_t *handle, struct inode *inode,
struct buffer_head *bh)
{
- struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
+ struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
u32 hash, ref;
int error = 0;
@@ -678,7 +678,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
* This must happen under buffer lock for
* ext4_xattr_block_set() to reliably detect freed block
*/
- mb_cache_entry_delete(ext4_mb_cache, hash, bh->b_blocknr);
+ mb_cache_entry_delete(ea_block_cache, hash, bh->b_blocknr);
get_bh(bh);
unlock_buffer(bh);
ext4_free_blocks(handle, inode, bh, 0, 1,
@@ -690,11 +690,11 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
if (ref == EXT4_XATTR_REFCOUNT_MAX - 1) {
struct mb_cache_entry *ce;
- ce = mb_cache_entry_get(ext4_mb_cache, hash,
+ ce = mb_cache_entry_get(ea_block_cache, hash,
bh->b_blocknr);
if (ce) {
ce->e_reusable = 1;
- mb_cache_entry_put(ext4_mb_cache, ce);
+ mb_cache_entry_put(ea_block_cache, ce);
}
}
@@ -1096,7 +1096,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
struct ext4_xattr_search *s = &s_copy;
struct mb_cache_entry *ce = NULL;
int error = 0;
- struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
+ struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
#define header(x) ((struct ext4_xattr_header *)(x))
@@ -1115,7 +1115,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
* ext4_xattr_block_set() to reliably detect modified
* block
*/
- mb_cache_entry_delete(ext4_mb_cache, hash,
+ mb_cache_entry_delete(ea_block_cache, hash,
bs->bh->b_blocknr);
ea_bdebug(bs->bh, "modifying in-place");
error = ext4_xattr_set_entry(i, s, handle, inode);
@@ -1123,8 +1123,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
if (!IS_LAST_ENTRY(s->first))
ext4_xattr_rehash(header(s->base),
s->here);
- ext4_xattr_cache_insert(ext4_mb_cache,
- bs->bh);
+ ext4_xattr_block_cache_insert(ea_block_cache,
+ bs->bh);
}
ext4_xattr_block_csum_set(inode, bs->bh);
unlock_buffer(bs->bh);
@@ -1177,7 +1177,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
inserted:
if (!IS_LAST_ENTRY(s->first)) {
- new_bh = ext4_xattr_cache_find(inode, header(s->base), &ce);
+ new_bh = ext4_xattr_block_cache_find(inode, header(s->base),
+ &ce);
if (new_bh) {
/* We found an identical block in the cache. */
if (new_bh == bs->bh)
@@ -1222,7 +1223,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
EXT4_C2B(EXT4_SB(sb),
1));
brelse(new_bh);
- mb_cache_entry_put(ext4_mb_cache, ce);
+ mb_cache_entry_put(ea_block_cache, ce);
ce = NULL;
new_bh = NULL;
goto inserted;
@@ -1241,8 +1242,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
if (error)
goto cleanup_dquot;
}
- mb_cache_entry_touch(ext4_mb_cache, ce);
- mb_cache_entry_put(ext4_mb_cache, ce);
+ mb_cache_entry_touch(ea_block_cache, ce);
+ mb_cache_entry_put(ea_block_cache, ce);
ce = NULL;
} else if (bs->bh && s->base == bs->bh->b_data) {
/* We were modifying this block in-place. */
@@ -1292,7 +1293,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
ext4_xattr_block_csum_set(inode, new_bh);
set_buffer_uptodate(new_bh);
unlock_buffer(new_bh);
- ext4_xattr_cache_insert(ext4_mb_cache, new_bh);
+ ext4_xattr_block_cache_insert(ea_block_cache, new_bh);
error = ext4_handle_dirty_metadata(handle, inode,
new_bh);
if (error)
@@ -1310,7 +1311,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
cleanup:
if (ce)
- mb_cache_entry_put(ext4_mb_cache, ce);
+ mb_cache_entry_put(ea_block_cache, ce);
brelse(new_bh);
if (!(bs->bh && s->base == bs->bh->b_data))
kfree(s->base);
@@ -2150,15 +2151,16 @@ void ext4_xattr_inode_array_free(struct ext4_xattr_inode_array *ea_inode_array)
}
/*
- * ext4_xattr_cache_insert()
+ * ext4_xattr_block_cache_insert()
*
- * Create a new entry in the extended attribute cache, and insert
+ * Create a new entry in the extended attribute block cache, and insert
* it unless such an entry is already in the cache.
*
* Returns 0, or a negative error number on failure.
*/
static void
-ext4_xattr_cache_insert(struct mb_cache *ext4_mb_cache, struct buffer_head *bh)
+ext4_xattr_block_cache_insert(struct mb_cache *ea_block_cache,
+ struct buffer_head *bh)
{
struct ext4_xattr_header *header = BHDR(bh);
__u32 hash = le32_to_cpu(header->h_hash);
@@ -2166,7 +2168,7 @@ ext4_xattr_cache_insert(struct mb_cache *ext4_mb_cache, struct buffer_head *bh)
EXT4_XATTR_REFCOUNT_MAX;
int error;
- error = mb_cache_entry_create(ext4_mb_cache, GFP_NOFS, hash,
+ error = mb_cache_entry_create(ea_block_cache, GFP_NOFS, hash,
bh->b_blocknr, reusable);
if (error) {
if (error == -EBUSY)
@@ -2216,7 +2218,7 @@ ext4_xattr_cmp(struct ext4_xattr_header *header1,
}
/*
- * ext4_xattr_cache_find()
+ * ext4_xattr_block_cache_find()
*
* Find an identical extended attribute block.
*
@@ -2224,17 +2226,18 @@ ext4_xattr_cmp(struct ext4_xattr_header *header1,
* not found or an error occurred.
*/
static struct buffer_head *
-ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header,
- struct mb_cache_entry **pce)
+ext4_xattr_block_cache_find(struct inode *inode,
+ struct ext4_xattr_header *header,
+ struct mb_cache_entry **pce)
{
__u32 hash = le32_to_cpu(header->h_hash);
struct mb_cache_entry *ce;
- struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
+ struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
if (!header->h_hash)
return NULL; /* never share */
ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
- ce = mb_cache_entry_find_first(ext4_mb_cache, hash);
+ ce = mb_cache_entry_find_first(ea_block_cache, hash);
while (ce) {
struct buffer_head *bh;
@@ -2247,7 +2250,7 @@ ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header,
return bh;
}
brelse(bh);
- ce = mb_cache_entry_find_next(ext4_mb_cache, ce);
+ ce = mb_cache_entry_find_next(ea_block_cache, ce);
}
return NULL;
}
--
2.13.1.611.g7e3b11ae1-goog
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 32/32] ext4: add nombcache mount option
2017-06-22 1:49 [PATCH 24/32] ext2, ext4: make mb block cache names more explicit Tahsin Erdogan
@ 2017-06-22 1:49 ` Tahsin Erdogan
2017-06-22 16:00 ` Theodore Ts'o
0 siblings, 1 reply; 7+ messages in thread
From: Tahsin Erdogan @ 2017-06-22 1:49 UTC (permalink / raw)
To: Andreas Dilger, Darrick J . Wong, Jan Kara, Theodore Ts'o,
linux-ext4
Cc: linux-kernel, Tahsin Erdogan
The main purpose of mb cache is to achieve deduplication in
extended attributes. In use cases where opportunity for deduplication
is unlikely, it only adds overhead.
Add a mount option to explicitly turn off mb cache.
Suggested-by: Andreas Dilger <adilger@dilger.ca>
Signed-off-by: Tahsin Erdogan <tahsin@google.com>
---
v2:
- updated definition of EXT4_MOUNT_NO_MBCACHE to be 0x00001
fs/ext4/ext4.h | 1 +
fs/ext4/super.c | 34 +++++++++++++++++++++++-----------
fs/ext4/xattr.c | 52 +++++++++++++++++++++++++++++++++++-----------------
3 files changed, 59 insertions(+), 28 deletions(-)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index dc06287ddec8..7d66e1dade45 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1114,6 +1114,7 @@ struct ext4_inode_info {
/*
* Mount flags set via mount options or defaults
*/
+#define EXT4_MOUNT_NO_MBCACHE 0x00001 /* Do not use mbcache */
#define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */
#define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */
#define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 5ac76e8d4013..1fec35bd4084 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1336,7 +1336,7 @@ enum {
Opt_inode_readahead_blks, Opt_journal_ioprio,
Opt_dioread_nolock, Opt_dioread_lock,
Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
- Opt_max_dir_size_kb, Opt_nojournal_checksum,
+ Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
};
static const match_table_t tokens = {
@@ -1419,6 +1419,8 @@ static const match_table_t tokens = {
{Opt_noinit_itable, "noinit_itable"},
{Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
{Opt_test_dummy_encryption, "test_dummy_encryption"},
+ {Opt_nombcache, "nombcache"},
+ {Opt_nombcache, "no_mbcache"}, /* for backward compatibility */
{Opt_removed, "check=none"}, /* mount option from ext2/3 */
{Opt_removed, "nocheck"}, /* mount option from ext2/3 */
{Opt_removed, "reservation"}, /* mount option from ext2/3 */
@@ -1626,6 +1628,7 @@ static const struct mount_opts {
{Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
{Opt_max_dir_size_kb, 0, MOPT_GTE0},
{Opt_test_dummy_encryption, 0, MOPT_GTE0},
+ {Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
{Opt_err, 0, 0}
};
@@ -4080,19 +4083,22 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
no_journal:
- sbi->s_ea_block_cache = ext4_xattr_create_cache();
- if (!sbi->s_ea_block_cache) {
- ext4_msg(sb, KERN_ERR, "Failed to create ea_block_cache");
- goto failed_mount_wq;
- }
-
- if (ext4_has_feature_ea_inode(sb)) {
- sbi->s_ea_inode_cache = ext4_xattr_create_cache();
- if (!sbi->s_ea_inode_cache) {
+ if (!test_opt(sb, NO_MBCACHE)) {
+ sbi->s_ea_block_cache = ext4_xattr_create_cache();
+ if (!sbi->s_ea_block_cache) {
ext4_msg(sb, KERN_ERR,
- "Failed to create ea_inode_cache");
+ "Failed to create ea_block_cache");
goto failed_mount_wq;
}
+
+ if (ext4_has_feature_ea_inode(sb)) {
+ sbi->s_ea_inode_cache = ext4_xattr_create_cache();
+ if (!sbi->s_ea_inode_cache) {
+ ext4_msg(sb, KERN_ERR,
+ "Failed to create ea_inode_cache");
+ goto failed_mount_wq;
+ }
+ }
}
if ((DUMMY_ENCRYPTION_ENABLED(sbi) || ext4_has_feature_encrypt(sb)) &&
@@ -4989,6 +4995,12 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
}
}
+ if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_NO_MBCACHE) {
+ ext4_msg(sb, KERN_ERR, "can't enable nombcache during remount");
+ err = -EINVAL;
+ goto restore_opts;
+ }
+
if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX) {
ext4_msg(sb, KERN_WARNING, "warning: refusing change of "
"dax flag with busy inodes while remounting");
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 3f16dc979012..ce12c3fb7e59 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -976,10 +976,13 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
set_nlink(ea_inode, 1);
ext4_orphan_del(handle, ea_inode);
- hash = ext4_xattr_inode_get_hash(ea_inode);
- mb_cache_entry_create(ea_inode_cache, GFP_NOFS, hash,
- ea_inode->i_ino,
- true /* reusable */);
+ if (ea_inode_cache) {
+ hash = ext4_xattr_inode_get_hash(ea_inode);
+ mb_cache_entry_create(ea_inode_cache,
+ GFP_NOFS, hash,
+ ea_inode->i_ino,
+ true /* reusable */);
+ }
}
} else {
WARN_ONCE(ref_count < 0, "EA inode %lu ref_count=%lld",
@@ -993,9 +996,11 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
clear_nlink(ea_inode);
ext4_orphan_add(handle, ea_inode);
- hash = ext4_xattr_inode_get_hash(ea_inode);
- mb_cache_entry_delete(ea_inode_cache, hash,
- ea_inode->i_ino);
+ if (ea_inode_cache) {
+ hash = ext4_xattr_inode_get_hash(ea_inode);
+ mb_cache_entry_delete(ea_inode_cache, hash,
+ ea_inode->i_ino);
+ }
}
}
@@ -1179,7 +1184,9 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
* This must happen under buffer lock for
* ext4_xattr_block_set() to reliably detect freed block
*/
- mb_cache_entry_delete(ea_block_cache, hash, bh->b_blocknr);
+ if (ea_block_cache)
+ mb_cache_entry_delete(ea_block_cache, hash,
+ bh->b_blocknr);
get_bh(bh);
unlock_buffer(bh);
@@ -1199,11 +1206,13 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
if (ref == EXT4_XATTR_REFCOUNT_MAX - 1) {
struct mb_cache_entry *ce;
- ce = mb_cache_entry_get(ea_block_cache, hash,
- bh->b_blocknr);
- if (ce) {
- ce->e_reusable = 1;
- mb_cache_entry_put(ea_block_cache, ce);
+ if (ea_block_cache) {
+ ce = mb_cache_entry_get(ea_block_cache, hash,
+ bh->b_blocknr);
+ if (ce) {
+ ce->e_reusable = 1;
+ mb_cache_entry_put(ea_block_cache, ce);
+ }
}
}
@@ -1382,6 +1391,9 @@ ext4_xattr_inode_cache_find(struct inode *inode, const void *value,
struct mb_cache *ea_inode_cache = EA_INODE_CACHE(inode);
void *ea_data;
+ if (!ea_inode_cache)
+ return NULL;
+
ce = mb_cache_entry_find_first(ea_inode_cache, hash);
if (!ce)
return NULL;
@@ -1452,8 +1464,9 @@ static int ext4_xattr_inode_lookup_create(handle_t *handle, struct inode *inode,
return err;
}
- mb_cache_entry_create(EA_INODE_CACHE(inode), GFP_NOFS, hash,
- ea_inode->i_ino, true /* reusable */);
+ if (EA_INODE_CACHE(inode))
+ mb_cache_entry_create(EA_INODE_CACHE(inode), GFP_NOFS, hash,
+ ea_inode->i_ino, true /* reusable */);
*ret_inode = ea_inode;
return 0;
@@ -1780,8 +1793,9 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
* ext4_xattr_block_set() to reliably detect modified
* block
*/
- mb_cache_entry_delete(ea_block_cache, hash,
- bs->bh->b_blocknr);
+ if (ea_block_cache)
+ mb_cache_entry_delete(ea_block_cache, hash,
+ bs->bh->b_blocknr);
ea_bdebug(bs->bh, "modifying in-place");
error = ext4_xattr_set_entry(i, s, handle, inode,
true /* is_block */);
@@ -2870,6 +2884,8 @@ ext4_xattr_block_cache_insert(struct mb_cache *ea_block_cache,
EXT4_XATTR_REFCOUNT_MAX;
int error;
+ if (!ea_block_cache)
+ return;
error = mb_cache_entry_create(ea_block_cache, GFP_NOFS, hash,
bh->b_blocknr, reusable);
if (error) {
@@ -2936,6 +2952,8 @@ ext4_xattr_block_cache_find(struct inode *inode,
struct mb_cache_entry *ce;
struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
+ if (!ea_block_cache)
+ return NULL;
if (!header->h_hash)
return NULL; /* never share */
ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
--
2.13.1.611.g7e3b11ae1-goog
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH 32/32] ext4: add nombcache mount option
2017-06-22 1:49 ` [PATCH 32/32] ext4: add nombcache mount option Tahsin Erdogan
@ 2017-06-22 16:00 ` Theodore Ts'o
0 siblings, 0 replies; 7+ messages in thread
From: Theodore Ts'o @ 2017-06-22 16:00 UTC (permalink / raw)
To: Tahsin Erdogan
Cc: Andreas Dilger, Darrick J . Wong, Jan Kara, linux-ext4,
linux-kernel
On Wed, Jun 21, 2017 at 06:49:39PM -0700, Tahsin Erdogan wrote:
> The main purpose of mb cache is to achieve deduplication in
> extended attributes. In use cases where opportunity for deduplication
> is unlikely, it only adds overhead.
>
> Add a mount option to explicitly turn off mb cache.
>
> Suggested-by: Andreas Dilger <adilger@dilger.ca>
> Signed-off-by: Tahsin Erdogan <tahsin@google.com>
Applied, thanks.
- Ted
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2017-06-22 16:00 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-06-20 9:14 [PATCH 32/32] ext4: add nombcache mount option Tahsin Erdogan
2017-06-21 21:02 ` Andreas Dilger
2017-06-21 23:46 ` Tahsin Erdogan
2017-06-22 4:42 ` Theodore Ts'o
-- strict thread matches above, loose matches on Subject: below --
2017-06-21 21:21 [PATCH 01/32] ext4: xattr-in-inode support Tahsin Erdogan
2017-06-21 21:21 ` [PATCH 32/32] ext4: add nombcache mount option Tahsin Erdogan
2017-06-22 1:49 [PATCH 24/32] ext2, ext4: make mb block cache names more explicit Tahsin Erdogan
2017-06-22 1:49 ` [PATCH 32/32] ext4: add nombcache mount option Tahsin Erdogan
2017-06-22 16:00 ` Theodore Ts'o
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).