[PATCH] ext4: make xattr inode reads faster

public inbox for linux-ext4@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH] ext4: make xattr inode reads faster
@ 2017-07-12  8:29 Tahsin Erdogan
  2017-07-14 21:13 ` Andreas Dilger
  0 siblings, 1 reply; 5+ messages in thread
From: Tahsin Erdogan @ 2017-07-12  8:29 UTC (permalink / raw)
  To: Theodore Ts'o, Andreas Dilger, linux-ext4
  Cc: linux-kernel, Tahsin Erdogan

ext4_xattr_inode_read() currently reads each block sequentially while
waiting for io operation to complete before moving on to the next
block. This prevents request merging in block layer.

Fix this by starting reads for all blocks then wait for completions.

Signed-off-by: Tahsin Erdogan <tahsin@google.com>
---
 fs/ext4/ext4.h  |  2 ++
 fs/ext4/inode.c | 36 ++++++++++++++++++++++++++++++++++++
 fs/ext4/xattr.c | 50 +++++++++++++++++++++++++++++++-------------------
 3 files changed, 69 insertions(+), 19 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 9ebde0cd632e..12f0a16ad500 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2462,6 +2462,8 @@ extern void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid);
 int ext4_inode_is_fast_symlink(struct inode *inode);
 struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int);
 struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int);
+int ext4_bread_batch(struct inode *inode, ext4_lblk_t block, int bh_count,
+		     struct buffer_head **bhs);
 int ext4_get_block_unwritten(struct inode *inode, sector_t iblock,
 			     struct buffer_head *bh_result, int create);
 int ext4_get_block(struct inode *inode, sector_t iblock,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 3c600f02673f..5b8ae1b66f09 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1015,6 +1015,42 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
 	return ERR_PTR(-EIO);
 }
 
+/* Read a contiguous batch of blocks. */
+int ext4_bread_batch(struct inode *inode, ext4_lblk_t block, int bh_count,
+		     struct buffer_head **bhs)
+{
+	int i, err;
+
+	for (i = 0; i < bh_count; i++) {
+		bhs[i] = ext4_getblk(NULL, inode, block + i, 0 /* map_flags */);
+		if (IS_ERR(bhs[i])) {
+			err = PTR_ERR(bhs[i]);
+			while (i--)
+				brelse(bhs[i]);
+			return err;
+		}
+	}
+
+	for (i = 0; i < bh_count; i++)
+		/* Note that NULL bhs[i] is valid because of holes. */
+		if (bhs[i] && !buffer_uptodate(bhs[i]))
+			ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1,
+				    &bhs[i]);
+
+	for (i = 0; i < bh_count; i++)
+		if (bhs[i])
+			wait_on_buffer(bhs[i]);
+
+	for (i = 0; i < bh_count; i++) {
+		if (bhs[i] && !buffer_uptodate(bhs[i])) {
+			for (i = 0; i < bh_count; i++)
+				brelse(bhs[i]);
+			return -EIO;
+		}
+	}
+	return 0;
+}
+
 int ext4_walk_page_buffers(handle_t *handle,
 			   struct buffer_head *head,
 			   unsigned from,
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index cff4f41ced61..f7364a842ff4 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -317,28 +317,40 @@ static void ext4_xattr_inode_set_hash(struct inode *ea_inode, u32 hash)
  */
 static int ext4_xattr_inode_read(struct inode *ea_inode, void *buf, size_t size)
 {
-	unsigned long block = 0;
-	struct buffer_head *bh;
-	int blocksize = ea_inode->i_sb->s_blocksize;
-	size_t csize, copied = 0;
-	void *copy_pos = buf;
-
-	while (copied < size) {
-		csize = (size - copied) > blocksize ? blocksize : size - copied;
-		bh = ext4_bread(NULL, ea_inode, block, 0);
-		if (IS_ERR(bh))
-			return PTR_ERR(bh);
-		if (!bh)
-			return -EFSCORRUPTED;
+	int blocksize = 1 << ea_inode->i_blkbits;
+	int bh_count = (size + blocksize - 1) >> ea_inode->i_blkbits;
+	int tail_size = (size % blocksize) ?: blocksize;
+	struct buffer_head *bhs_inline[8];
+	struct buffer_head **bhs = bhs_inline;
+	int i, ret;
+
+	if (bh_count > ARRAY_SIZE(bhs_inline)) {
+		bhs = kmalloc_array(bh_count, sizeof(*bhs), GFP_NOFS);
+		if (!bhs)
+			return -ENOMEM;
+	}
 
-		memcpy(copy_pos, bh->b_data, csize);
-		brelse(bh);
+	ret = ext4_bread_batch(ea_inode, 0 /* block */, bh_count, bhs);
+	if (ret)
+		goto free_bhs;
 
-		copy_pos += csize;
-		block += 1;
-		copied += csize;
+	for (i = 0; i < bh_count; i++) {
+		/* There shouldn't be any holes in ea_inode. */
+		if (!bhs[i]) {
+			ret = -EFSCORRUPTED;
+			goto put_bhs;
+		}
+		memcpy((char *)buf + blocksize * i, bhs[i]->b_data,
+		       i < bh_count - 1 ? blocksize : tail_size);
 	}
-	return 0;
+	ret = 0;
+put_bhs:
+	for (i = 0; i < bh_count; i++)
+		brelse(bhs[i]);
+free_bhs:
+	if (bhs != bhs_inline)
+		kfree(bhs);
+	return ret;
 }
 
 static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
-- 
2.13.2.932.g7449e964c-goog

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] ext4: make xattr inode reads faster
  2017-07-12  8:29 [PATCH] ext4: make xattr inode reads faster Tahsin Erdogan
@ 2017-07-14 21:13 ` Andreas Dilger
  2017-07-17  6:20   ` [PATCH v2] " Tahsin Erdogan
  0 siblings, 1 reply; 5+ messages in thread
From: Andreas Dilger @ 2017-07-14 21:13 UTC (permalink / raw)
  To: Tahsin Erdogan; +Cc: Theodore Ts'o, linux-ext4, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 6047 bytes --]

On Jul 12, 2017, at 1:29 AM, Tahsin Erdogan <tahsin@google.com> wrote:
> 
> ext4_xattr_inode_read() currently reads each block sequentially while
> waiting for io operation to complete before moving on to the next
> block. This prevents request merging in block layer.
> 
> Fix this by starting reads for all blocks then wait for completions.

It surprises me that ext4/VFS doesn't already have a helper routine to do this.
It looks like ext4_find_entry() is doing something similar for read-ahead of
directory blocks, so it may be worthwhile to consider moving that code over
to use ext4_bread_batch(), but it looks like it would need to add a flag for
whether to wait on the buffers to be uptodate or not.

> Signed-off-by: Tahsin Erdogan <tahsin@google.com>
> ---
> fs/ext4/ext4.h  |  2 ++
> fs/ext4/inode.c | 36 ++++++++++++++++++++++++++++++++++++
> fs/ext4/xattr.c | 50 +++++++++++++++++++++++++++++++-------------------
> 3 files changed, 69 insertions(+), 19 deletions(-)
> 
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 9ebde0cd632e..12f0a16ad500 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -2462,6 +2462,8 @@ extern void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid);
> int ext4_inode_is_fast_symlink(struct inode *inode);
> struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int);
> struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int);
> +int ext4_bread_batch(struct inode *inode, ext4_lblk_t block, int bh_count,
> +		     struct buffer_head **bhs);
> int ext4_get_block_unwritten(struct inode *inode, sector_t iblock,
> 			     struct buffer_head *bh_result, int create);
> int ext4_get_block(struct inode *inode, sector_t iblock,
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 3c600f02673f..5b8ae1b66f09 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -1015,6 +1015,42 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
> 	return ERR_PTR(-EIO);
> }
> 
> +/* Read a contiguous batch of blocks. */
> +int ext4_bread_batch(struct inode *inode, ext4_lblk_t block, int bh_count,
> +		     struct buffer_head **bhs)
> +{
> +	int i, err;
> +
> +	for (i = 0; i < bh_count; i++) {
> +		bhs[i] = ext4_getblk(NULL, inode, block + i, 0 /* map_flags */);


It's nice to have the parameter annotation, since it can often be confusing if
there are multiple int/bool arguments to a function, but, I'd write this as:

	ret = ext4_getblk(NULL, inode, block + i, /* map_flags = */ 0);

I don't know if there is some kind of convention for this kind of comment?

> +		if (IS_ERR(bhs[i])) {
> +			err = PTR_ERR(bhs[i]);
> +			while (i--)
> +				brelse(bhs[i]);

It would be better to do the error cleanup once at the end of the function,
instead of multiple times in the code, like:

		if (IS_ERR(bhs[i])) {
			err = PTR_ERR(bhs[i]);
			bh_count = i;
			goto out_brelse;
		}
	:
	:
out_brelse:
	for (i = 0; i < bh_count; i++) {
		brelse[bhs[i]);
		bhs[i] = NULL;
	}
	return err;


The "bhs[i] = NULL" isn't strictly necessary, but better to not leave stray
pointers around in this array.

> +			return err;
> +		}
> +	}
> +
> +	for (i = 0; i < bh_count; i++)
> +		/* Note that NULL bhs[i] is valid because of holes. */
> +		if (bhs[i] && !buffer_uptodate(bhs[i]))
> +			ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1,
> +				    &bhs[i]);
> +
> +	for (i = 0; i < bh_count; i++)
> +		if (bhs[i])
> +			wait_on_buffer(bhs[i]);
> +
> +	for (i = 0; i < bh_count; i++) {
> +		if (bhs[i] && !buffer_uptodate(bhs[i])) {
> +			for (i = 0; i < bh_count; i++)
> +				brelse(bhs[i]);

The "out_brelse:" label could also be used here.

> +			return -EIO;
> +		}
> +	}
> +	return 0;
> +}
> +
> int ext4_walk_page_buffers(handle_t *handle,
> 			   struct buffer_head *head,
> 			   unsigned from,
> diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
> index cff4f41ced61..f7364a842ff4 100644
> --- a/fs/ext4/xattr.c
> +++ b/fs/ext4/xattr.c
> @@ -317,28 +317,40 @@ static void ext4_xattr_inode_set_hash(struct inode *ea_inode, u32 hash)
>  */
> static int ext4_xattr_inode_read(struct inode *ea_inode, void *buf, size_t size)
> {
> -	unsigned long block = 0;
> -	struct buffer_head *bh;
> -	int blocksize = ea_inode->i_sb->s_blocksize;
> -	size_t csize, copied = 0;
> -	void *copy_pos = buf;
> -
> -	while (copied < size) {
> -		csize = (size - copied) > blocksize ? blocksize : size - copied;
> -		bh = ext4_bread(NULL, ea_inode, block, 0);
> -		if (IS_ERR(bh))
> -			return PTR_ERR(bh);
> -		if (!bh)
> -			return -EFSCORRUPTED;
> +	int blocksize = 1 << ea_inode->i_blkbits;
> +	int bh_count = (size + blocksize - 1) >> ea_inode->i_blkbits;
> +	int tail_size = (size % blocksize) ?: blocksize;
> +	struct buffer_head *bhs_inline[8];

> +	struct buffer_head **bhs = bhs_inline;
> +	int i, ret;
> +
> +	if (bh_count > ARRAY_SIZE(bhs_inline)) {
> +		bhs = kmalloc_array(bh_count, sizeof(*bhs), GFP_NOFS);
> +		if (!bhs)
> +			return -ENOMEM;
> +	}
> 
> -		memcpy(copy_pos, bh->b_data, csize);
> -		brelse(bh);
> +	ret = ext4_bread_batch(ea_inode, 0 /* block */, bh_count, bhs);


	ret = ext4_bread_batch(ea_inode, /* block = */ 0, bh_count, bhs);  ?

> +	if (ret)
> +		goto free_bhs;
> 
> -		copy_pos += csize;
> -		block += 1;
> -		copied += csize;
> +	for (i = 0; i < bh_count; i++) {
> +		/* There shouldn't be any holes in ea_inode. */
> +		if (!bhs[i]) {
> +			ret = -EFSCORRUPTED;
> +			goto put_bhs;
> +		}
> +		memcpy((char *)buf + blocksize * i, bhs[i]->b_data,
> +		       i < bh_count - 1 ? blocksize : tail_size);
> 	}
> -	return 0;
> +	ret = 0;
> +put_bhs:
> +	for (i = 0; i < bh_count; i++)
> +		brelse(bhs[i]);
> +free_bhs:
> +	if (bhs != bhs_inline)
> +		kfree(bhs);
> +	return ret;
> }
> 
> static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
> --
> 2.13.2.932.g7449e964c-goog
> 


Cheers, Andreas






[-- Attachment #2: Message signed with OpenPGP --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH v2] ext4: make xattr inode reads faster
  2017-07-14 21:13 ` Andreas Dilger
@ 2017-07-17  6:20   ` Tahsin Erdogan
  2017-07-17  6:30     ` Tahsin Erdogan
  2017-08-06  4:14     ` Theodore Ts'o
  0 siblings, 2 replies; 5+ messages in thread
From: Tahsin Erdogan @ 2017-07-17  6:20 UTC (permalink / raw)
  To: Theodore Ts'o, Andreas Dilger, linux-ext4
  Cc: linux-kernel, Tahsin Erdogan

ext4_xattr_inode_read() currently reads each block sequentially while
waiting for io operation to complete before moving on to the next
block. This prevents request merging in block layer.

Add a ext4_bread_batch() function that starts reads for all blocks
then optionally waits for them to complete. A similar logic is used
in ext4_find_entry(), so update that code to use the new function.

Signed-off-by: Tahsin Erdogan <tahsin@google.com>
---
v2:
 - updated ext4_find_entry() to also use ext4_bread_batch()
 - added wait parameter to ext4_bread_batch()

 fs/ext4/ext4.h  |  2 ++
 fs/ext4/inode.c | 44 ++++++++++++++++++++++++++++++++++++++++++++
 fs/ext4/namei.c | 43 ++++++++++++++-----------------------------
 fs/ext4/xattr.c | 51 ++++++++++++++++++++++++++++++++-------------------
 4 files changed, 92 insertions(+), 48 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 9ebde0cd632e..e9440ed605c0 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2462,6 +2462,8 @@ extern void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid);
 int ext4_inode_is_fast_symlink(struct inode *inode);
 struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int);
 struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int);
+int ext4_bread_batch(struct inode *inode, ext4_lblk_t block, int bh_count,
+		     bool wait, struct buffer_head **bhs);
 int ext4_get_block_unwritten(struct inode *inode, sector_t iblock,
 			     struct buffer_head *bh_result, int create);
 int ext4_get_block(struct inode *inode, sector_t iblock,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 3c600f02673f..70699940e20d 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1015,6 +1015,50 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
 	return ERR_PTR(-EIO);
 }
 
+/* Read a contiguous batch of blocks. */
+int ext4_bread_batch(struct inode *inode, ext4_lblk_t block, int bh_count,
+		     bool wait, struct buffer_head **bhs)
+{
+	int i, err;
+
+	for (i = 0; i < bh_count; i++) {
+		bhs[i] = ext4_getblk(NULL, inode, block + i, 0 /* map_flags */);
+		if (IS_ERR(bhs[i])) {
+			err = PTR_ERR(bhs[i]);
+			bh_count = i;
+			goto out_brelse;
+		}
+	}
+
+	for (i = 0; i < bh_count; i++)
+		/* Note that NULL bhs[i] is valid because of holes. */
+		if (bhs[i] && !buffer_uptodate(bhs[i]))
+			ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1,
+				    &bhs[i]);
+
+	if (!wait)
+		return 0;
+
+	for (i = 0; i < bh_count; i++)
+		if (bhs[i])
+			wait_on_buffer(bhs[i]);
+
+	for (i = 0; i < bh_count; i++) {
+		if (bhs[i] && !buffer_uptodate(bhs[i])) {
+			err = -EIO;
+			goto out_brelse;
+		}
+	}
+	return 0;
+
+out_brelse:
+	for (i = 0; i < bh_count; i++) {
+		brelse(bhs[i]);
+		bhs[i] = NULL;
+	}
+	return err;
+}
+
 int ext4_walk_page_buffers(handle_t *handle,
 			   struct buffer_head *head,
 			   unsigned from,
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 13f0cadb1238..2758ff1b940f 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1342,13 +1342,12 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
 	struct super_block *sb;
 	struct buffer_head *bh_use[NAMEI_RA_SIZE];
 	struct buffer_head *bh, *ret = NULL;
-	ext4_lblk_t start, block, b;
+	ext4_lblk_t start, block;
 	const u8 *name = d_name->name;
-	int ra_max = 0;		/* Number of bh's in the readahead
+	size_t ra_max = 0;	/* Number of bh's in the readahead
 				   buffer, bh_use[] */
-	int ra_ptr = 0;		/* Current index into readahead
+	size_t ra_ptr = 0;	/* Current index into readahead
 				   buffer */
-	int num = 0;
 	ext4_lblk_t  nblocks;
 	int i, namelen, retval;
 	struct ext4_filename fname;
@@ -1411,31 +1410,17 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
 		if (ra_ptr >= ra_max) {
 			/* Refill the readahead buffer */
 			ra_ptr = 0;
-			b = block;
-			for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) {
-				/*
-				 * Terminate if we reach the end of the
-				 * directory and must wrap, or if our
-				 * search has finished at this block.
-				 */
-				if (b >= nblocks || (num && block == start)) {
-					bh_use[ra_max] = NULL;
-					break;
-				}
-				num++;
-				bh = ext4_getblk(NULL, dir, b++, 0);
-				if (IS_ERR(bh)) {
-					if (ra_max == 0) {
-						ret = bh;
-						goto cleanup_and_exit;
-					}
-					break;
-				}
-				bh_use[ra_max] = bh;
-				if (bh)
-					ll_rw_block(REQ_OP_READ,
-						    REQ_META | REQ_PRIO,
-						    1, &bh);
+			if (block < start)
+				ra_max = start - block;
+			else
+				ra_max = nblocks - block;
+			ra_max = min(ra_max, ARRAY_SIZE(bh_use));
+			retval = ext4_bread_batch(dir, block, ra_max,
+						  false /* wait */, bh_use);
+			if (retval) {
+				ret = ERR_PTR(retval);
+				ra_max = 0;
+				goto cleanup_and_exit;
 			}
 		}
 		if ((bh = bh_use[ra_ptr++]) == NULL)
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index cff4f41ced61..68b5b4f9fbcb 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -317,28 +317,41 @@ static void ext4_xattr_inode_set_hash(struct inode *ea_inode, u32 hash)
  */
 static int ext4_xattr_inode_read(struct inode *ea_inode, void *buf, size_t size)
 {
-	unsigned long block = 0;
-	struct buffer_head *bh;
-	int blocksize = ea_inode->i_sb->s_blocksize;
-	size_t csize, copied = 0;
-	void *copy_pos = buf;
-
-	while (copied < size) {
-		csize = (size - copied) > blocksize ? blocksize : size - copied;
-		bh = ext4_bread(NULL, ea_inode, block, 0);
-		if (IS_ERR(bh))
-			return PTR_ERR(bh);
-		if (!bh)
-			return -EFSCORRUPTED;
+	int blocksize = 1 << ea_inode->i_blkbits;
+	int bh_count = (size + blocksize - 1) >> ea_inode->i_blkbits;
+	int tail_size = (size % blocksize) ?: blocksize;
+	struct buffer_head *bhs_inline[8];
+	struct buffer_head **bhs = bhs_inline;
+	int i, ret;
+
+	if (bh_count > ARRAY_SIZE(bhs_inline)) {
+		bhs = kmalloc_array(bh_count, sizeof(*bhs), GFP_NOFS);
+		if (!bhs)
+			return -ENOMEM;
+	}
 
-		memcpy(copy_pos, bh->b_data, csize);
-		brelse(bh);
+	ret = ext4_bread_batch(ea_inode, 0 /* block */, bh_count,
+			       true /* wait */, bhs);
+	if (ret)
+		goto free_bhs;
 
-		copy_pos += csize;
-		block += 1;
-		copied += csize;
+	for (i = 0; i < bh_count; i++) {
+		/* There shouldn't be any holes in ea_inode. */
+		if (!bhs[i]) {
+			ret = -EFSCORRUPTED;
+			goto put_bhs;
+		}
+		memcpy((char *)buf + blocksize * i, bhs[i]->b_data,
+		       i < bh_count - 1 ? blocksize : tail_size);
 	}
-	return 0;
+	ret = 0;
+put_bhs:
+	for (i = 0; i < bh_count; i++)
+		brelse(bhs[i]);
+free_bhs:
+	if (bhs != bhs_inline)
+		kfree(bhs);
+	return ret;
 }
 
 static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
-- 
2.13.2.932.g7449e964c-goog

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] ext4: make xattr inode reads faster
  2017-07-17  6:20   ` [PATCH v2] " Tahsin Erdogan
@ 2017-07-17  6:30     ` Tahsin Erdogan
  2017-08-06  4:14     ` Theodore Ts'o
  1 sibling, 0 replies; 5+ messages in thread
From: Tahsin Erdogan @ 2017-07-17  6:30 UTC (permalink / raw)
  To: Theodore Ts'o, Andreas Dilger, Ext4 Developers List
  Cc: lkml, Tahsin Erdogan

> It surprises me that ext4/VFS doesn't already have a helper routine to do this.
> It looks like ext4_find_entry() is doing something similar for read-ahead of
> directory blocks, so it may be worthwhile to consider moving that code over
> to use ext4_bread_batch(), but it looks like it would need to add a flag for
> whether to wait on the buffers to be uptodate or not.

Done.

> It's nice to have the parameter annotation, since it can often be confusing if
> there are multiple int/bool arguments to a function, but, I'd write this as:
>
>         ret = ext4_getblk(NULL, inode, block + i, /* map_flags = */ 0);
>
> I don't know if there is some kind of convention for this kind of comment?

I agree that yours is a bit more intuitive, however the version that
puts the comment after the value is more common in the codebase. I
prefer keeping it as is for consistency reasons. Let me know if you
still prefer the prefix version.

>
>> +             if (IS_ERR(bhs[i])) {
>> +                     err = PTR_ERR(bhs[i]);
>> +                     while (i--)
>> +                             brelse(bhs[i]);
>
> It would be better to do the error cleanup once at the end of the function,
> instead of multiple times in the code, like:
>
>                 if (IS_ERR(bhs[i])) {
>                         err = PTR_ERR(bhs[i]);
>                         bh_count = i;
>                         goto out_brelse;
>                 }

Done.


On Sun, Jul 16, 2017 at 11:20 PM, Tahsin Erdogan <tahsin@google.com> wrote:
> ext4_xattr_inode_read() currently reads each block sequentially while
> waiting for io operation to complete before moving on to the next
> block. This prevents request merging in block layer.
>
> Add a ext4_bread_batch() function that starts reads for all blocks
> then optionally waits for them to complete. A similar logic is used
> in ext4_find_entry(), so update that code to use the new function.
>
> Signed-off-by: Tahsin Erdogan <tahsin@google.com>
> ---
> v2:
>  - updated ext4_find_entry() to also use ext4_bread_batch()
>  - added wait parameter to ext4_bread_batch()
>
>  fs/ext4/ext4.h  |  2 ++
>  fs/ext4/inode.c | 44 ++++++++++++++++++++++++++++++++++++++++++++
>  fs/ext4/namei.c | 43 ++++++++++++++-----------------------------
>  fs/ext4/xattr.c | 51 ++++++++++++++++++++++++++++++++-------------------
>  4 files changed, 92 insertions(+), 48 deletions(-)
>
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 9ebde0cd632e..e9440ed605c0 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -2462,6 +2462,8 @@ extern void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid);
>  int ext4_inode_is_fast_symlink(struct inode *inode);
>  struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int);
>  struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int);
> +int ext4_bread_batch(struct inode *inode, ext4_lblk_t block, int bh_count,
> +                    bool wait, struct buffer_head **bhs);
>  int ext4_get_block_unwritten(struct inode *inode, sector_t iblock,
>                              struct buffer_head *bh_result, int create);
>  int ext4_get_block(struct inode *inode, sector_t iblock,
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 3c600f02673f..70699940e20d 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -1015,6 +1015,50 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
>         return ERR_PTR(-EIO);
>  }
>
> +/* Read a contiguous batch of blocks. */
> +int ext4_bread_batch(struct inode *inode, ext4_lblk_t block, int bh_count,
> +                    bool wait, struct buffer_head **bhs)
> +{
> +       int i, err;
> +
> +       for (i = 0; i < bh_count; i++) {
> +               bhs[i] = ext4_getblk(NULL, inode, block + i, 0 /* map_flags */);
> +               if (IS_ERR(bhs[i])) {
> +                       err = PTR_ERR(bhs[i]);
> +                       bh_count = i;
> +                       goto out_brelse;
> +               }
> +       }
> +
> +       for (i = 0; i < bh_count; i++)
> +               /* Note that NULL bhs[i] is valid because of holes. */
> +               if (bhs[i] && !buffer_uptodate(bhs[i]))
> +                       ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1,
> +                                   &bhs[i]);
> +
> +       if (!wait)
> +               return 0;
> +
> +       for (i = 0; i < bh_count; i++)
> +               if (bhs[i])
> +                       wait_on_buffer(bhs[i]);
> +
> +       for (i = 0; i < bh_count; i++) {
> +               if (bhs[i] && !buffer_uptodate(bhs[i])) {
> +                       err = -EIO;
> +                       goto out_brelse;
> +               }
> +       }
> +       return 0;
> +
> +out_brelse:
> +       for (i = 0; i < bh_count; i++) {
> +               brelse(bhs[i]);
> +               bhs[i] = NULL;
> +       }
> +       return err;
> +}
> +
>  int ext4_walk_page_buffers(handle_t *handle,
>                            struct buffer_head *head,
>                            unsigned from,
> diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
> index 13f0cadb1238..2758ff1b940f 100644
> --- a/fs/ext4/namei.c
> +++ b/fs/ext4/namei.c
> @@ -1342,13 +1342,12 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
>         struct super_block *sb;
>         struct buffer_head *bh_use[NAMEI_RA_SIZE];
>         struct buffer_head *bh, *ret = NULL;
> -       ext4_lblk_t start, block, b;
> +       ext4_lblk_t start, block;
>         const u8 *name = d_name->name;
> -       int ra_max = 0;         /* Number of bh's in the readahead
> +       size_t ra_max = 0;      /* Number of bh's in the readahead
>                                    buffer, bh_use[] */
> -       int ra_ptr = 0;         /* Current index into readahead
> +       size_t ra_ptr = 0;      /* Current index into readahead
>                                    buffer */
> -       int num = 0;
>         ext4_lblk_t  nblocks;
>         int i, namelen, retval;
>         struct ext4_filename fname;
> @@ -1411,31 +1410,17 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
>                 if (ra_ptr >= ra_max) {
>                         /* Refill the readahead buffer */
>                         ra_ptr = 0;
> -                       b = block;
> -                       for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) {
> -                               /*
> -                                * Terminate if we reach the end of the
> -                                * directory and must wrap, or if our
> -                                * search has finished at this block.
> -                                */
> -                               if (b >= nblocks || (num && block == start)) {
> -                                       bh_use[ra_max] = NULL;
> -                                       break;
> -                               }
> -                               num++;
> -                               bh = ext4_getblk(NULL, dir, b++, 0);
> -                               if (IS_ERR(bh)) {
> -                                       if (ra_max == 0) {
> -                                               ret = bh;
> -                                               goto cleanup_and_exit;
> -                                       }
> -                                       break;
> -                               }
> -                               bh_use[ra_max] = bh;
> -                               if (bh)
> -                                       ll_rw_block(REQ_OP_READ,
> -                                                   REQ_META | REQ_PRIO,
> -                                                   1, &bh);
> +                       if (block < start)
> +                               ra_max = start - block;
> +                       else
> +                               ra_max = nblocks - block;
> +                       ra_max = min(ra_max, ARRAY_SIZE(bh_use));
> +                       retval = ext4_bread_batch(dir, block, ra_max,
> +                                                 false /* wait */, bh_use);
> +                       if (retval) {
> +                               ret = ERR_PTR(retval);
> +                               ra_max = 0;
> +                               goto cleanup_and_exit;
>                         }
>                 }
>                 if ((bh = bh_use[ra_ptr++]) == NULL)
> diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
> index cff4f41ced61..68b5b4f9fbcb 100644
> --- a/fs/ext4/xattr.c
> +++ b/fs/ext4/xattr.c
> @@ -317,28 +317,41 @@ static void ext4_xattr_inode_set_hash(struct inode *ea_inode, u32 hash)
>   */
>  static int ext4_xattr_inode_read(struct inode *ea_inode, void *buf, size_t size)
>  {
> -       unsigned long block = 0;
> -       struct buffer_head *bh;
> -       int blocksize = ea_inode->i_sb->s_blocksize;
> -       size_t csize, copied = 0;
> -       void *copy_pos = buf;
> -
> -       while (copied < size) {
> -               csize = (size - copied) > blocksize ? blocksize : size - copied;
> -               bh = ext4_bread(NULL, ea_inode, block, 0);
> -               if (IS_ERR(bh))
> -                       return PTR_ERR(bh);
> -               if (!bh)
> -                       return -EFSCORRUPTED;
> +       int blocksize = 1 << ea_inode->i_blkbits;
> +       int bh_count = (size + blocksize - 1) >> ea_inode->i_blkbits;
> +       int tail_size = (size % blocksize) ?: blocksize;
> +       struct buffer_head *bhs_inline[8];
> +       struct buffer_head **bhs = bhs_inline;
> +       int i, ret;
> +
> +       if (bh_count > ARRAY_SIZE(bhs_inline)) {
> +               bhs = kmalloc_array(bh_count, sizeof(*bhs), GFP_NOFS);
> +               if (!bhs)
> +                       return -ENOMEM;
> +       }
>
> -               memcpy(copy_pos, bh->b_data, csize);
> -               brelse(bh);
> +       ret = ext4_bread_batch(ea_inode, 0 /* block */, bh_count,
> +                              true /* wait */, bhs);
> +       if (ret)
> +               goto free_bhs;
>
> -               copy_pos += csize;
> -               block += 1;
> -               copied += csize;
> +       for (i = 0; i < bh_count; i++) {
> +               /* There shouldn't be any holes in ea_inode. */
> +               if (!bhs[i]) {
> +                       ret = -EFSCORRUPTED;
> +                       goto put_bhs;
> +               }
> +               memcpy((char *)buf + blocksize * i, bhs[i]->b_data,
> +                      i < bh_count - 1 ? blocksize : tail_size);
>         }
> -       return 0;
> +       ret = 0;
> +put_bhs:
> +       for (i = 0; i < bh_count; i++)
> +               brelse(bhs[i]);
> +free_bhs:
> +       if (bhs != bhs_inline)
> +               kfree(bhs);
> +       return ret;
>  }
>
>  static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
> --
> 2.13.2.932.g7449e964c-goog
>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] ext4: make xattr inode reads faster
  2017-07-17  6:20   ` [PATCH v2] " Tahsin Erdogan
  2017-07-17  6:30     ` Tahsin Erdogan
@ 2017-08-06  4:14     ` Theodore Ts'o
  1 sibling, 0 replies; 5+ messages in thread
From: Theodore Ts'o @ 2017-08-06  4:14 UTC (permalink / raw)
  To: Tahsin Erdogan; +Cc: Andreas Dilger, linux-ext4, linux-kernel

On Sun, Jul 16, 2017 at 11:20:12PM -0700, Tahsin Erdogan wrote:
> ext4_xattr_inode_read() currently reads each block sequentially while
> waiting for io operation to complete before moving on to the next
> block. This prevents request merging in block layer.
> 
> Add a ext4_bread_batch() function that starts reads for all blocks
> then optionally waits for them to complete. A similar logic is used
> in ext4_find_entry(), so update that code to use the new function.
> 
> Signed-off-by: Tahsin Erdogan <tahsin@google.com>

Thanks, applied.

					- Ted

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2017-08-06  4:14 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-07-12  8:29 [PATCH] ext4: make xattr inode reads faster Tahsin Erdogan
2017-07-14 21:13 ` Andreas Dilger
2017-07-17  6:20   ` [PATCH v2] " Tahsin Erdogan
2017-07-17  6:30     ` Tahsin Erdogan
2017-08-06  4:14     ` Theodore Ts'o

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox