linux-btrfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Miao Xie <miaox@cn.fujitsu.com>
To: <fdmanana@gmail.com>
Cc: "linux-btrfs@vger.kernel.org" <linux-btrfs@vger.kernel.org>
Subject: Re: [PATCH 02/12] Btrfs: load checksum data once when submitting a direct read io
Date: Tue, 29 Jul 2014 09:56:14 +0800	[thread overview]
Message-ID: <53D6FF3E.8080404@cn.fujitsu.com> (raw)
In-Reply-To: <CAL3q7H5ihcw404e4dYpoJU-hcSZJaKe=mBi38srFqdSLv8DoRg@mail.gmail.com>

On Mon, 28 Jul 2014 18:24:47 +0100, Filipe David Manana wrote:
> On Sat, Jun 28, 2014 at 12:34 PM, Miao Xie <miaox@cn.fujitsu.com> wrote:
>> The current code would load checksum data for several times when we split
>> a whole direct read io because of the limit of the raid stripe, it would
>> make us search the csum tree for several times. In fact, it just wasted time,
>> and made the contention of the csum tree root be more serious. This patch
>> improves this problem by loading the data at once.
>>
>> Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
>> ---
>>  fs/btrfs/btrfs_inode.h |  1 -
>>  fs/btrfs/ctree.h       |  3 +--
>>  fs/btrfs/file-item.c   | 14 ++------------
>>  fs/btrfs/inode.c       | 40 ++++++++++++++++++++++------------------
>>  4 files changed, 25 insertions(+), 33 deletions(-)
>>
>> diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
>> index 4794923..7e9f53b 100644
>> --- a/fs/btrfs/btrfs_inode.h
>> +++ b/fs/btrfs/btrfs_inode.h
>> @@ -263,7 +263,6 @@ struct btrfs_dio_private {
>>
>>         /* dio_bio came from fs/direct-io.c */
>>         struct bio *dio_bio;
>> -       u8 csum[0];
>>  };
>>
>>  /*
>> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
>> index be91397..40e9938 100644
>> --- a/fs/btrfs/ctree.h
>> +++ b/fs/btrfs/ctree.h
>> @@ -3739,8 +3739,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
>>  int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
>>                           struct bio *bio, u32 *dst);
>>  int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
>> -                             struct btrfs_dio_private *dip, struct bio *bio,
>> -                             u64 logical_offset);
>> +                             struct bio *bio, u64 logical_offset);
>>  int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
>>                              struct btrfs_root *root,
>>                              u64 objectid, u64 pos,
>> diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
>> index f46cfe4..cf1b94f 100644
>> --- a/fs/btrfs/file-item.c
>> +++ b/fs/btrfs/file-item.c
>> @@ -299,19 +299,9 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
>>  }
>>
>>  int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
>> -                             struct btrfs_dio_private *dip, struct bio *bio,
>> -                             u64 offset)
>> +                             struct bio *bio, u64 offset)
>>  {
>> -       int len = (bio->bi_iter.bi_sector << 9) - dip->disk_bytenr;
>> -       u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
>> -       int ret;
>> -
>> -       len >>= inode->i_sb->s_blocksize_bits;
>> -       len *= csum_size;
>> -
>> -       ret = __btrfs_lookup_bio_sums(root, inode, bio, offset,
>> -                                     (u32 *)(dip->csum + len), 1);
>> -       return ret;
>> +       return __btrfs_lookup_bio_sums(root, inode, bio, offset, NULL, 1);
>>  }
>>
>>  int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
>> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
>> index a3f102f..969fb22 100644
>> --- a/fs/btrfs/inode.c
>> +++ b/fs/btrfs/inode.c
>> @@ -7081,7 +7081,8 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
>>         struct inode *inode = dip->inode;
>>         struct btrfs_root *root = BTRFS_I(inode)->root;
>>         struct bio *dio_bio;
>> -       u32 *csums = (u32 *)dip->csum;
>> +       struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
>> +       u32 *csums = (u32 *)io_bio->csum;
>>         u64 start;
>>         int i;
>>
>> @@ -7123,6 +7124,9 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
>>         if (err)
>>                 clear_bit(BIO_UPTODATE, &dio_bio->bi_flags);
>>         dio_end_io(dio_bio, err);
>> +
>> +       if (io_bio->end_io)
>> +               io_bio->end_io(io_bio, err);
>>         bio_put(bio);
>>  }
>>
>> @@ -7261,13 +7265,20 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
>>                 ret = btrfs_csum_one_bio(root, inode, bio, file_offset, 1);
>>                 if (ret)
>>                         goto err;
>> -       } else if (!skip_sum) {
>> -               ret = btrfs_lookup_bio_sums_dio(root, inode, dip, bio,
>> +       } else {
>> +               /*
>> +                * We have loaded all the csum data we need when we submit
>> +                * the first bio, so skip it.
>> +                */
>> +               if (dip->logical_offset != file_offset)
>> +                       goto map;
>> +
>> +               /* Load all csum data at once. */
>> +               ret = btrfs_lookup_bio_sums_dio(root, inode, dip->orig_bio,
>>                                                 file_offset);
>>                 if (ret)
>>                         goto err;
>>         }
>> -
>>  map:
>>         ret = btrfs_map_bio(root, rw, bio, 0, async_submit);
>>  err:
>> @@ -7288,7 +7299,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
>>         u64 submit_len = 0;
>>         u64 map_length;
>>         int nr_pages = 0;
>> -       int ret = 0;
>> +       int ret;
>>         int async_submit = 0;
>>
>>         map_length = orig_bio->bi_iter.bi_size;
>> @@ -7392,30 +7403,20 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
>>         struct btrfs_root *root = BTRFS_I(inode)->root;
>>         struct btrfs_dio_private *dip;
>>         struct bio *io_bio;
>> +       struct btrfs_io_bio *btrfs_bio;
>>         int skip_sum;
>> -       int sum_len;
>>         int write = rw & REQ_WRITE;
>>         int ret = 0;
>> -       u16 csum_size;
>>
>>         skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
>>
>> -       io_bio = btrfs_bio_clone(dio_bio, GFP_NOFS);
>> +       io_bio = btrfs_bio_clone(dio_bio, GFP_NOFS | __GFP_ZERO);
> 
> Hi Miao,
> 
> With this change (adding the __GFP_ZERO flag), I ran once into the
> following warning while running xfstests (dunno exactly which test
> case triggered it, likely one of those that run fsstress):

Thanks for test.
I'll fix it.

Miao

> 
> [ 3941.856860] ------------[ cut here ]------------
> [ 3941.856871] WARNING: CPU: 0 PID: 4154 at mm/mempool.c:205
> mempool_alloc+0xc8/0x1c0()
> [ 3941.856873] Modules linked in: btrfs xor raid6_pq binfmt_misc nfsd
> auth_rpcgss oid_registry nfs_acl nfs lockd fscache sunrpc i2c_piix4
> i2c_core pcspkr evbug psmouse serio_raw e1000 [
> last unloaded: btrfs]
> [ 3941.856886] CPU: 0 PID: 4154 Comm: xfs_io Tainted: G        W
> 3.16.0-rc6-fdm-btrfs-next-37+ #1
> [ 3941.856887] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
> [ 3941.856889]  0000000000000009 ffff8800d569f778 ffffffff8169a687
> 00000000000077b0
> [ 3941.856892]  0000000000000000 ffff8800d569f7b8 ffffffff8104fb4c
> 00000000ffffffff
> [ 3941.856894]  0000000000008050 0000000000000001 0000000000008050
> ffff88004f921918
> [ 3941.856896] Call Trace:
> [ 3941.856901]  [<ffffffff8169a687>] dump_stack+0x4e/0x68
> [ 3941.856904]  [<ffffffff8104fb4c>] warn_slowpath_common+0x8c/0xc0
> [ 3941.856905]  [<ffffffff8104fb9a>] warn_slowpath_null+0x1a/0x20
> [ 3941.856907]  [<ffffffff81151fc8>] mempool_alloc+0xc8/0x1c0
> [ 3941.856911]  [<ffffffff810129cf>] ? save_stack_trace+0x2f/0x50
> [ 3941.856918]  [<ffffffff8131331a>] bio_alloc_bioset+0x10a/0x1c0
> [ 3941.856921]  [<ffffffff81314c68>] bio_clone_bioset+0x88/0x310
> [ 3941.856923]  [<ffffffff81151a65>] ? mempool_alloc_slab+0x15/0x20
> [ 3941.856936]  [<ffffffffa0209385>] btrfs_bio_clone+0x15/0x20 [btrfs]
> [ 3941.856944]  [<ffffffffa01ed47f>] btrfs_submit_direct+0x4f/0x7b0 [btrfs]
> [ 3941.856948]  [<ffffffff811fc10a>] ? do_blockdev_direct_IO+0x17ea/0x1f60
> [ 3941.856952]  [<ffffffff810afb35>] ? mark_held_locks+0x75/0xa0
> [ 3941.856955]  [<ffffffff816a383f>] ? _raw_spin_unlock_irqrestore+0x3f/0x70
> [ 3941.856956]  [<ffffffff811fc13e>] do_blockdev_direct_IO+0x181e/0x1f60
> [ 3941.856965]  [<ffffffffa01f86d0>] ?
> btrfs_page_exists_in_range+0x2a0/0x2a0 [btrfs]
> [ 3941.856972]  [<ffffffffa01ed430>] ?
> btrfs_writepage_start_hook+0xf0/0xf0 [btrfs]
> [ 3941.856974]  [<ffffffff811fc8cc>] __blockdev_direct_IO+0x4c/0x50
> [ 3941.856981]  [<ffffffffa01f86d0>] ?
> btrfs_page_exists_in_range+0x2a0/0x2a0 [btrfs]
> [ 3941.856987]  [<ffffffffa01ed430>] ?
> btrfs_writepage_start_hook+0xf0/0xf0 [btrfs]
> [ 3941.856993]  [<ffffffffa01eb591>] btrfs_direct_IO+0x1a1/0x340 [btrfs]
> [ 3941.856999]  [<ffffffffa01f86d0>] ?
> btrfs_page_exists_in_range+0x2a0/0x2a0 [btrfs]
> [ 3941.857005]  [<ffffffffa01ed430>] ?
> btrfs_writepage_start_hook+0xf0/0xf0 [btrfs]
> [ 3941.857007]  [<ffffffff81150210>] generic_file_direct_write+0xb0/0x180
> [ 3941.857014]  [<ffffffffa01fc4a1>] btrfs_file_write_iter+0x411/0x560 [btrfs]
> [ 3941.857017]  [<ffffffff811ba541>] new_sync_write+0x81/0xb0
> [ 3941.857019]  [<ffffffff811bb342>] vfs_write+0xc2/0x1f0
> [ 3941.857020]  [<ffffffff811bba2a>] SyS_pwrite64+0x9a/0xb0
> [ 3941.857022]  [<ffffffff816a3d92>] system_call_fastpath+0x16/0x1b
> [ 3941.857024] ---[ end trace c1dfd29523250709 ]---
> 
> Thanks.
> 
> 
>>         if (!io_bio) {
>>                 ret = -ENOMEM;
>>                 goto free_ordered;
>>         }
>>
>> -       if (!skip_sum && !write) {
>> -               csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
>> -               sum_len = dio_bio->bi_iter.bi_size >>
>> -                       inode->i_sb->s_blocksize_bits;
>> -               sum_len *= csum_size;
>> -       } else {
>> -               sum_len = 0;
>> -       }
>> -
>> -       dip = kmalloc(sizeof(*dip) + sum_len, GFP_NOFS);
>> +       dip = kmalloc(sizeof(*dip), GFP_NOFS);
>>         if (!dip) {
>>                 ret = -ENOMEM;
>>                 goto free_io_bio;
>> @@ -7441,6 +7442,9 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
>>         if (!ret)
>>                 return;
>>
>> +       btrfs_bio = btrfs_io_bio(io_bio);
>> +       if (btrfs_bio->end_io)
>> +               btrfs_bio->end_io(btrfs_bio, ret);
>>  free_io_bio:
>>         bio_put(io_bio);
>>
>> --
>> 1.9.3
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> 
> 


  reply	other threads:[~2014-07-29  1:54 UTC|newest]

Thread overview: 49+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-06-28 11:34 [PATCH 00/12] Implement the data repair function for direct read Miao Xie
2014-06-28 11:34 ` [PATCH 01/12] Btrfs: fix put dio bio twice when we submit dio bio fail Miao Xie
2014-06-28 11:34 ` [PATCH 02/12] Btrfs: load checksum data once when submitting a direct read io Miao Xie
2014-07-28 17:24   ` Filipe David Manana
2014-07-29  1:56     ` Miao Xie [this message]
2014-06-28 11:34 ` [PATCH 03/12] Btrfs: cleanup similar code of the buffered data data check and dio read data check Miao Xie
2014-06-28 11:34 ` [PATCH 04/12] Btrfs: do file data check by sub-bio's self Miao Xie
2014-06-28 11:34 ` [PATCH 05/12] Btrfs: fix missing error handler if submiting re-read bio fails Miao Xie
2014-06-28 11:34 ` [PATCH 06/12] Btrfs: Cleanup unused variant and argument of IO failure handlers Miao Xie
2014-06-28 11:34 ` [PATCH 07/12] Btrfs: split bio_readpage_error into several functions Miao Xie
2014-06-28 11:34 ` [PATCH 08/12] Btrfs: modify repair_io_failure and make it suit direct io Miao Xie
2014-06-28 11:34 ` [PATCH 09/12] Btrfs: modify clean_io_failure " Miao Xie
2014-06-28 11:35 ` [PATCH 10/12] Btrfs: Set real mirror number for read operation on RAID0/5/6 Miao Xie
2014-06-28 11:35 ` [PATCH 11/12] Btrfs: implement repair function when direct read fails Miao Xie
2014-06-28 11:35 ` [PATCH 12/12] Btrfs: cleanup the read failure record after write or when the inode is freeing Miao Xie
2014-07-29  9:23 ` [PATCH v2 00/12] Implement the data repair function for direct read Miao Xie
2014-07-29  9:23   ` [PATCH v2 01/12] Btrfs: fix put dio bio twice when we submit dio bio fail Miao Xie
2014-07-29  9:24   ` [PATCH v2 02/12] Btrfs: load checksum data once when submitting a direct read io Miao Xie
2014-08-08  0:32     ` Filipe David Manana
2014-08-08  9:22       ` Miao Xie
2014-08-08  9:23       ` [PATCH v3 " Miao Xie
2014-07-29  9:24   ` [PATCH v2 03/12] Btrfs: cleanup similar code of the buffered data data check and dio read data check Miao Xie
2014-07-29  9:24   ` [PATCH v2 04/12] Btrfs: do file data check by sub-bio's self Miao Xie
2014-07-29  9:24   ` [PATCH v2 05/12] Btrfs: fix missing error handler if submiting re-read bio fails Miao Xie
2014-07-29  9:24   ` [PATCH v2 06/12] Btrfs: Cleanup unused variant and argument of IO failure handlers Miao Xie
2014-07-29  9:24   ` [PATCH v2 07/12] Btrfs: split bio_readpage_error into several functions Miao Xie
2014-07-29  9:24   ` [PATCH v2 08/12] Btrfs: modify repair_io_failure and make it suit direct io Miao Xie
2014-07-29  9:24   ` [PATCH v2 09/12] Btrfs: modify clean_io_failure " Miao Xie
2014-07-29  9:24   ` [PATCH v2 10/12] Btrfs: Set real mirror number for read operation on RAID0/5/6 Miao Xie
2014-07-29  9:24   ` [PATCH v2 11/12] Btrfs: implement repair function when direct read fails Miao Xie
2014-08-29 18:31     ` Chris Mason
2014-09-01  6:56       ` Miao Xie
2014-09-02 12:33         ` Liu Bo
2014-09-02 13:05           ` Chris Mason
2014-09-03  9:02             ` Miao Xie
2014-09-12 10:43             ` [PATCH v4 00/11] Implement the data repair function for direct read Miao Xie
2014-09-12 10:43               ` [PATCH v4 01/11] Btrfs: load checksum data once when submitting a direct read io Miao Xie
2014-09-12 10:43               ` [PATCH v4 02/11] Btrfs: cleanup similar code of the buffered data data check and dio read data check Miao Xie
2014-09-12 10:43               ` [PATCH v4 03/11] Btrfs: do file data check by sub-bio's self Miao Xie
2014-09-12 10:43               ` [PATCH v4 04/11] Btrfs: fix missing error handler if submiting re-read bio fails Miao Xie
2014-09-12 10:43               ` [PATCH v4 05/11] Btrfs: Cleanup unused variant and argument of IO failure handlers Miao Xie
2014-09-12 10:43               ` [PATCH v4 06/11] Btrfs: split bio_readpage_error into several functions Miao Xie
2014-09-12 10:44               ` [PATCH v4 07/11] Btrfs: modify repair_io_failure and make it suit direct io Miao Xie
2014-09-12 10:44               ` [PATCH v4 08/11] Btrfs: modify clean_io_failure " Miao Xie
2014-09-12 10:44               ` [PATCH v4 09/11] Btrfs: Set real mirror number for read operation on RAID0/5/6 Miao Xie
2014-09-12 10:44               ` [PATCH v4 10/11] Btrfs: implement repair function when direct read fails Miao Xie
2014-09-12 10:44               ` [PATCH v4 11/11] Btrfs: cleanup the read failure record after write or when the inode is freeing Miao Xie
2014-09-12 14:50               ` [PATCH v4 00/11] Implement the data repair function for direct read Chris Mason
2014-07-29  9:24   ` [PATCH v2 12/12] Btrfs: cleanup the read failure record after write or when the inode is freeing Miao Xie

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=53D6FF3E.8080404@cn.fujitsu.com \
    --to=miaox@cn.fujitsu.com \
    --cc=fdmanana@gmail.com \
    --cc=linux-btrfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).