* [PATCH] ext4: re-inline ext4_rec_len_(to|from)_disk functions
@ 2010-08-04 15:52 Eric Sandeen
2010-08-05 0:11 ` Ted Ts'o
0 siblings, 1 reply; 4+ messages in thread
From: Eric Sandeen @ 2010-08-04 15:52 UTC (permalink / raw)
To: ext4 development
commit 3d0518f4, "ext4: New rec_len encoding for very
large blocksizes" made several changes to this path, but from
a perf perspective, un-inlining ext4_rec_len_from_disk() seems
most significant. This function is called from ext4_check_dir_entry(),
which on a file-creation workload is called extremely often.
I tested this with bonnie:
# bonnie++ -u root -s 0 -f -x 200 -d /mnt/test -n 32
(this does 200 iterations) and got this for the file creations:
ext4 stock: Average = 21206.8 files/s
ext4 inlined: Average = 22346.7 files/s (+5%)
We may get a little more by optimizing around the extra complexity
for > 64K blocks, but for now simply inlining is a nice improvement,
without a huge change in code size:
text data bss dec hex filename
314580 11888 176 326644 4fbf4 ext4-stock.ko
315246 11888 176 327310 4fe8e ext4-inlined.ko
Signed-off-by: Eric Sandeen <sandeen@redhat.com>
---
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 19a4de5..8aba5b8 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1378,6 +1378,31 @@ struct ext4_dir_entry_2 {
~EXT4_DIR_ROUND)
#define EXT4_MAX_REC_LEN ((1<<16)-1)
+static inline unsigned int
+ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize)
+{
+ unsigned len = le16_to_cpu(dlen);
+
+ if (len == EXT4_MAX_REC_LEN || len == 0)
+ return blocksize;
+ return (len & 65532) | ((len & 3) << 16);
+}
+
+static inline __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize)
+{
+ if ((len > blocksize) || (blocksize > (1 << 18)) || (len & 3))
+ BUG();
+ if (len < 65536)
+ return cpu_to_le16(len);
+ if (len == blocksize) {
+ if (blocksize == 65536)
+ return cpu_to_le16(EXT4_MAX_REC_LEN);
+ else
+ return cpu_to_le16(0);
+ }
+ return cpu_to_le16((len & 65532) | ((len >> 16) & 3));
+}
+
/*
* Hash Tree Directory indexing
* (c) Daniel Phillips, 2001
@@ -1601,8 +1626,6 @@ extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long);
extern int ext4_ext_migrate(struct inode *);
/* namei.c */
-extern unsigned int ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize);
-extern __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize);
extern int ext4_orphan_add(handle_t *, struct inode *);
extern int ext4_orphan_del(handle_t *, struct inode *);
extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index a43e661..77a7468 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -179,30 +179,6 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
struct inode *inode);
-unsigned int ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize)
-{
- unsigned len = le16_to_cpu(dlen);
-
- if (len == EXT4_MAX_REC_LEN || len == 0)
- return blocksize;
- return (len & 65532) | ((len & 3) << 16);
-}
-
-__le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize)
-{
- if ((len > blocksize) || (blocksize > (1 << 18)) || (len & 3))
- BUG();
- if (len < 65536)
- return cpu_to_le16(len);
- if (len == blocksize) {
- if (blocksize == 65536)
- return cpu_to_le16(EXT4_MAX_REC_LEN);
- else
- return cpu_to_le16(0);
- }
- return cpu_to_le16((len & 65532) | ((len >> 16) & 3));
-}
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH] ext4: re-inline ext4_rec_len_(to|from)_disk functions
2010-08-04 15:52 [PATCH] ext4: re-inline ext4_rec_len_(to|from)_disk functions Eric Sandeen
@ 2010-08-05 0:11 ` Ted Ts'o
2010-08-05 0:28 ` Eric Sandeen
0 siblings, 1 reply; 4+ messages in thread
From: Ted Ts'o @ 2010-08-05 0:11 UTC (permalink / raw)
To: Eric Sandeen; +Cc: ext4 development
Thanks for the commit. Since we don't have support for fs block size
> page size (and having done a private investigation about what would
be required, I'm not sure it's going to happen any time soon --- and
if it does, yanking the #if statements is going to be a tiny part of
the patch :-).
So I slightly modified your patch to only do the extra complexity if
the page size is >= 65536.
- Ted
commit 097639d271f40376f3b3a94c7ab242c22ddc8bc1
Author: Eric Sandeen <sandeen@redhat.com>
Date: Wed Aug 4 20:09:07 2010 -0400
ext4: re-inline ext4_rec_len_(to|from)_disk functions
commit 3d0518f4, "ext4: New rec_len encoding for very
large blocksizes" made several changes to this path, but from
a perf perspective, un-inlining ext4_rec_len_from_disk() seems
most significant. This function is called from ext4_check_dir_entry(),
which on a file-creation workload is called extremely often.
I tested this with bonnie:
# bonnie++ -u root -s 0 -f -x 200 -d /mnt/test -n 32
(this does 200 iterations) and got this for the file creations:
ext4 stock: Average = 21206.8 files/s
ext4 inlined: Average = 22346.7 files/s (+5%)
Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index ed14e1d..0f340bf 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1412,6 +1412,43 @@ struct ext4_dir_entry_2 {
#define EXT4_MAX_REC_LEN ((1<<16)-1)
/*
+ * If we ever get support for fs block sizes > page_size, we'll need
+ * to remove the #if statements in the next two functions...
+ */
+static inline unsigned int
+ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize)
+{
+ unsigned len = le16_to_cpu(dlen);
+
+#if (PAGE_SIZE >= 65536)
+ if (len == EXT4_MAX_REC_LEN || len == 0)
+ return blocksize;
+ return (len & 65532) | ((len & 3) << 16);
+#else
+ return len;
+#endif
+}
+
+static inline __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize)
+{
+ if ((len > blocksize) || (blocksize > (1 << 18)) || (len & 3))
+ BUG();
+#if (PAGE_SIZE >= 65536)
+ if (len < 65536)
+ return cpu_to_le16(len);
+ if (len == blocksize) {
+ if (blocksize == 65536)
+ return cpu_to_le16(EXT4_MAX_REC_LEN);
+ else
+ return cpu_to_le16(0);
+ }
+ return cpu_to_le16((len & 65532) | ((len >> 16) & 3));
+#else
+ return cpu_to_le16(len);
+#endif
+}
+
+/*
* Hash Tree Directory indexing
* (c) Daniel Phillips, 2001
*/
@@ -1636,8 +1673,6 @@ extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long);
extern int ext4_ext_migrate(struct inode *);
/* namei.c */
-extern unsigned int ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize);
-extern __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize);
extern int ext4_orphan_add(handle_t *, struct inode *);
extern int ext4_orphan_del(handle_t *, struct inode *);
extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index ea8b59d..314c0d3 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -179,30 +179,6 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
struct inode *inode);
-unsigned int ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize)
-{
- unsigned len = le16_to_cpu(dlen);
-
- if (len == EXT4_MAX_REC_LEN || len == 0)
- return blocksize;
- return (len & 65532) | ((len & 3) << 16);
-}
-
-__le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize)
-{
- if ((len > blocksize) || (blocksize > (1 << 18)) || (len & 3))
- BUG();
- if (len < 65536)
- return cpu_to_le16(len);
- if (len == blocksize) {
- if (blocksize == 65536)
- return cpu_to_le16(EXT4_MAX_REC_LEN);
- else
- return cpu_to_le16(0);
- }
- return cpu_to_le16((len & 65532) | ((len >> 16) & 3));
-}
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH] ext4: re-inline ext4_rec_len_(to|from)_disk functions
2010-08-05 0:11 ` Ted Ts'o
@ 2010-08-05 0:28 ` Eric Sandeen
2010-08-05 5:37 ` Ted Ts'o
0 siblings, 1 reply; 4+ messages in thread
From: Eric Sandeen @ 2010-08-05 0:28 UTC (permalink / raw)
To: Ted Ts'o; +Cc: ext4 development
Ted Ts'o wrote:
> Thanks for the commit. Since we don't have support for fs block size
>> page size (and having done a private investigation about what would
> be required, I'm not sure it's going to happen any time soon --- and
> if it does, yanking the #if statements is going to be a tiny part of
> the patch :-).
>
> So I slightly modified your patch to only do the extra complexity if
> the page size is >= 65536.
I had resisted doing that because at least the original rationale in
the patch was for when the VM allows block size > page size ...
Side note, if you do this is PAGE_SIZE or PAGE_CACHE_SIZE the right macro?
-Eric
> - Ted
>
> commit 097639d271f40376f3b3a94c7ab242c22ddc8bc1
> Author: Eric Sandeen <sandeen@redhat.com>
> Date: Wed Aug 4 20:09:07 2010 -0400
>
> ext4: re-inline ext4_rec_len_(to|from)_disk functions
>
> commit 3d0518f4, "ext4: New rec_len encoding for very
> large blocksizes" made several changes to this path, but from
> a perf perspective, un-inlining ext4_rec_len_from_disk() seems
> most significant. This function is called from ext4_check_dir_entry(),
> which on a file-creation workload is called extremely often.
>
> I tested this with bonnie:
>
> # bonnie++ -u root -s 0 -f -x 200 -d /mnt/test -n 32
>
> (this does 200 iterations) and got this for the file creations:
>
> ext4 stock: Average = 21206.8 files/s
> ext4 inlined: Average = 22346.7 files/s (+5%)
>
> Signed-off-by: Eric Sandeen <sandeen@redhat.com>
> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
>
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index ed14e1d..0f340bf 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -1412,6 +1412,43 @@ struct ext4_dir_entry_2 {
> #define EXT4_MAX_REC_LEN ((1<<16)-1)
>
> /*
> + * If we ever get support for fs block sizes > page_size, we'll need
> + * to remove the #if statements in the next two functions...
> + */
> +static inline unsigned int
> +ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize)
> +{
> + unsigned len = le16_to_cpu(dlen);
> +
> +#if (PAGE_SIZE >= 65536)
> + if (len == EXT4_MAX_REC_LEN || len == 0)
> + return blocksize;
> + return (len & 65532) | ((len & 3) << 16);
> +#else
> + return len;
> +#endif
> +}
> +
> +static inline __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize)
> +{
> + if ((len > blocksize) || (blocksize > (1 << 18)) || (len & 3))
> + BUG();
> +#if (PAGE_SIZE >= 65536)
> + if (len < 65536)
> + return cpu_to_le16(len);
> + if (len == blocksize) {
> + if (blocksize == 65536)
> + return cpu_to_le16(EXT4_MAX_REC_LEN);
> + else
> + return cpu_to_le16(0);
> + }
> + return cpu_to_le16((len & 65532) | ((len >> 16) & 3));
> +#else
> + return cpu_to_le16(len);
> +#endif
> +}
> +
> +/*
> * Hash Tree Directory indexing
> * (c) Daniel Phillips, 2001
> */
> @@ -1636,8 +1673,6 @@ extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long);
> extern int ext4_ext_migrate(struct inode *);
>
> /* namei.c */
> -extern unsigned int ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize);
> -extern __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize);
> extern int ext4_orphan_add(handle_t *, struct inode *);
> extern int ext4_orphan_del(handle_t *, struct inode *);
> extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
> diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
> index ea8b59d..314c0d3 100644
> --- a/fs/ext4/namei.c
> +++ b/fs/ext4/namei.c
> @@ -179,30 +179,6 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
> static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
> struct inode *inode);
>
> -unsigned int ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize)
> -{
> - unsigned len = le16_to_cpu(dlen);
> -
> - if (len == EXT4_MAX_REC_LEN || len == 0)
> - return blocksize;
> - return (len & 65532) | ((len & 3) << 16);
> -}
> -
> -__le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize)
> -{
> - if ((len > blocksize) || (blocksize > (1 << 18)) || (len & 3))
> - BUG();
> - if (len < 65536)
> - return cpu_to_le16(len);
> - if (len == blocksize) {
> - if (blocksize == 65536)
> - return cpu_to_le16(EXT4_MAX_REC_LEN);
> - else
> - return cpu_to_le16(0);
> - }
> - return cpu_to_le16((len & 65532) | ((len >> 16) & 3));
> -}
> -
> /*
> * p is at least 6 bytes before the end of page
> */
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] ext4: re-inline ext4_rec_len_(to|from)_disk functions
2010-08-05 0:28 ` Eric Sandeen
@ 2010-08-05 5:37 ` Ted Ts'o
0 siblings, 0 replies; 4+ messages in thread
From: Ted Ts'o @ 2010-08-05 5:37 UTC (permalink / raw)
To: Eric Sandeen; +Cc: ext4 development
On Wed, Aug 04, 2010 at 07:28:20PM -0500, Eric Sandeen wrote:
> I had resisted doing that because at least the original rationale in
> the patch was for when the VM allows block size > page size ...
Well, that and apparently you can do completely insane/ludicrous page
sizes on the Itanic. "The architecture supports 11 different page
sizes from 4k to 4GB."
> Side note, if you do this is PAGE_SIZE or PAGE_CACHE_SIZE the right macro?
Quite right, it should be PAGE_CACHE_SIZE (although I very much doubt
the two will be different any time in the near future).
- Ted
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2010-08-05 5:37 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-08-04 15:52 [PATCH] ext4: re-inline ext4_rec_len_(to|from)_disk functions Eric Sandeen
2010-08-05 0:11 ` Ted Ts'o
2010-08-05 0:28 ` Eric Sandeen
2010-08-05 5:37 ` Ted Ts'o
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).