linux-btrfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Chris Mason <chris.mason@oracle.com>
To: Miao Xie <miaox@cn.fujitsu.com>
Cc: Josef Bacik <josef@redhat.com>,
	Linux Btrfs <linux-btrfs@vger.kernel.org>,
	Ito <t-itoh@jp.fujitsu.com>
Subject: Re: [PATCH -V2 3/3] btrfs: fix panic caused by direct IO
Date: Sun, 21 Nov 2010 22:18:54 -0500	[thread overview]
Message-ID: <1290395921-sup-5232@think> (raw)
In-Reply-To: <4CE9DDCB.7060706@cn.fujitsu.com>

Great, I'll test this and the others overnight.  Thanks!

-chris

Excerpts from Miao Xie's message of 2010-11-21 22:04:43 -0500:
> V1->V2 Changes:
> change the fix method. we split bios in btrfs_submit_direct() to fix this
> problem now.
> 
> btrfs paniced when we write >64KB data by direct IO at one time.
> 
> Reproduce steps:
>  # mkfs.btrfs /dev/sda5 /dev/sda6
>  # mount /dev/sda5 /mnt
>  # dd if=/dev/zero of=/mnt/tmpfile bs=100K count=1 oflag=direct
> 
> Then btrfs paniced:
> mapping failed logical 1103155200 bio len 69632 len 12288
> ------------[ cut here ]------------
> kernel BUG at fs/btrfs/volumes.c:3010!
> [SNIP]
> Pid: 1992, comm: btrfs-worker-0 Not tainted 2.6.37-rc1 #1 D2399/PRIMERGY
> RIP: 0010:[<ffffffffa03d1462>]  [<ffffffffa03d1462>] btrfs_map_bio+0x202/0x210 [btrfs]
> [SNIP]
> Call Trace:
>  [<ffffffffa03ab3eb>] __btrfs_submit_bio_done+0x1b/0x20 [btrfs]
>  [<ffffffffa03a35ff>] run_one_async_done+0x9f/0xb0 [btrfs]
>  [<ffffffffa03d3d20>] run_ordered_completions+0x80/0xc0 [btrfs]
>  [<ffffffffa03d45a4>] worker_loop+0x154/0x5f0 [btrfs]
>  [<ffffffffa03d4450>] ? worker_loop+0x0/0x5f0 [btrfs]
>  [<ffffffffa03d4450>] ? worker_loop+0x0/0x5f0 [btrfs]
>  [<ffffffff81083216>] kthread+0x96/0xa0
>  [<ffffffff8100cec4>] kernel_thread_helper+0x4/0x10
>  [<ffffffff81083180>] ? kthread+0x0/0xa0
>  [<ffffffff8100cec0>] ? kernel_thread_helper+0x0/0x10
> 
> We fix this problem by splitting bios when we submit bios.
> 
> Reported-by: Tsutomu Itoh <t-itoh@jp.fujitsu.com>
> Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
> Tested-by: Tsutomu Itoh <t-itoh@jp.fujitsu.com>
> ---
>  fs/btrfs/inode.c |  205 ++++++++++++++++++++++++++++++++++++++++++++++++------
>  1 files changed, 184 insertions(+), 21 deletions(-)
> 
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index 5a5edc7..c91d0e3 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -5535,13 +5535,21 @@ struct btrfs_dio_private {
>      u64 bytes;
>      u32 *csums;
>      void *private;
> +
> +    /* number of bios pending for this dio */
> +    atomic_t pending_bios;
> +
> +    /* IO errors */
> +    int errors;
> +
> +    struct bio *orig_bio;
>  };
>  
>  static void btrfs_endio_direct_read(struct bio *bio, int err)
>  {
> +    struct btrfs_dio_private *dip = bio->bi_private;
>      struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1;
>      struct bio_vec *bvec = bio->bi_io_vec;
> -    struct btrfs_dio_private *dip = bio->bi_private;
>      struct inode *inode = dip->inode;
>      struct btrfs_root *root = BTRFS_I(inode)->root;
>      u64 start;
> @@ -5684,6 +5692,176 @@ static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw,
>      return 0;
>  }
>  
> +static void btrfs_end_dio_bio(struct bio *bio, int err)
> +{
> +    struct btrfs_dio_private *dip = bio->bi_private;
> +
> +    if (err) {
> +        printk(KERN_ERR "btrfs direct IO failed ino %lu rw %lu "
> +              "disk_bytenr %lu len %u err no %d\n",
> +              dip->inode->i_ino, bio->bi_rw, bio->bi_sector,
> +              bio->bi_size, err);
> +        dip->errors = 1;
> +
> +        /*
> +         * before atomic variable goto zero, we must make sure
> +         * dip->errors is perceived to be set.
> +         */
> +        smp_mb__before_atomic_dec();
> +    }
> +
> +    /* if there are more bios still pending for this dio, just exit */
> +    if (!atomic_dec_and_test(&dip->pending_bios))
> +        goto out;
> +
> +    if (dip->errors)
> +        bio_io_error(dip->orig_bio);
> +    else {
> +        set_bit(BIO_UPTODATE, &dip->orig_bio->bi_flags);
> +        bio_endio(dip->orig_bio, 0);
> +    }
> +out:
> +    bio_put(bio);
> +}
> +
> +static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
> +                       u64 first_sector, gfp_t gfp_flags)
> +{
> +    int nr_vecs = bio_get_nr_vecs(bdev);
> +    return btrfs_bio_alloc(bdev, first_sector, nr_vecs, gfp_flags);
> +}
> +
> +static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
> +                     int rw, u64 file_offset, int skip_sum,
> +                     u32 *csums)
> +{
> +    int write = rw & REQ_WRITE;
> +    struct btrfs_root *root = BTRFS_I(inode)->root;
> +    int ret;
> +
> +    bio_get(bio);
> +    ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
> +    if (ret)
> +        goto err;
> +
> +    if (write && !skip_sum) {
> +        ret = btrfs_wq_submit_bio(root->fs_info,
> +                   inode, rw, bio, 0, 0,
> +                   file_offset,
> +                   __btrfs_submit_bio_start_direct_io,
> +                   __btrfs_submit_bio_done);
> +        goto err;
> +    } else if (!skip_sum)
> +        btrfs_lookup_bio_sums_dio(root, inode, bio,
> +                      file_offset, csums);
> +
> +    ret = btrfs_map_bio(root, rw, bio, 0, 1);
> +err:
> +    bio_put(bio);
> +    return ret;
> +}
> +
> +static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
> +                    int skip_sum)
> +{
> +    struct inode *inode = dip->inode;
> +    struct btrfs_root *root = BTRFS_I(inode)->root;
> +    struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
> +    struct bio *bio;
> +    struct bio *orig_bio = dip->orig_bio;
> +    struct bio_vec *bvec = orig_bio->bi_io_vec;
> +    u64 start_sector = orig_bio->bi_sector;
> +    u64 file_offset = dip->logical_offset;
> +    u64 submit_len = 0;
> +    u64 map_length;
> +    int nr_pages = 0;
> +    u32 *csums = dip->csums;
> +    int ret = 0;
> +
> +    bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
> +    if (!bio)
> +        return -ENOMEM;
> +    bio->bi_private = dip;
> +    bio->bi_end_io = btrfs_end_dio_bio;
> +    atomic_inc(&dip->pending_bios);
> +
> +    map_length = orig_bio->bi_size;
> +    ret = btrfs_map_block(map_tree, READ, start_sector << 9,
> +                  &map_length, NULL, 0);
> +    if (ret) {
> +        bio_put(bio);
> +        return -EIO;
> +    }
> +
> +    while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) {
> +        if (unlikely(map_length < submit_len + bvec->bv_len ||
> +            bio_add_page(bio, bvec->bv_page, bvec->bv_len,
> +                 bvec->bv_offset) < bvec->bv_len)) {
> +            /*
> +             * inc the count before we submit the bio so
> +             * we know the end IO handler won't happen before
> +             * we inc the count. Otherwise, the dip might get freed
> +             * before we're done setting it up
> +             */
> +            atomic_inc(&dip->pending_bios);
> +            ret = __btrfs_submit_dio_bio(bio, inode, rw,
> +                             file_offset, skip_sum,
> +                             csums);
> +            if (ret) {
> +                bio_put(bio);
> +                atomic_dec(&dip->pending_bios);
> +                goto out_err;
> +            }
> +
> +            if (!skip_sum)
> +                csums = csums + nr_pages;
> +            start_sector += submit_len >> 9;
> +            file_offset += submit_len;
> +
> +            submit_len = 0;
> +            nr_pages = 0;
> +
> +            bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev,
> +                          start_sector, GFP_NOFS);
> +            if (!bio)
> +                goto out_err;
> +            bio->bi_private = dip;
> +            bio->bi_end_io = btrfs_end_dio_bio;
> +
> +            map_length = orig_bio->bi_size;
> +            ret = btrfs_map_block(map_tree, READ, start_sector << 9,
> +                          &map_length, NULL, 0);
> +            if (ret) {
> +                bio_put(bio);
> +                goto out_err;
> +            }
> +        } else {
> +            submit_len += bvec->bv_len;
> +            nr_pages ++;
> +            bvec++;
> +        }
> +    }
> +
> +    ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum,
> +                     csums);
> +    if (!ret)
> +        return 0;
> +
> +    bio_put(bio);
> +out_err:
> +    dip->errors = 1;
> +    /*
> +     * before atomic variable goto zero, we must
> +     * make sure dip->errors is perceived to be set.
> +     */
> +    smp_mb__before_atomic_dec();
> +    if (atomic_dec_and_test(&dip->pending_bios))
> +        bio_io_error(dip->orig_bio);
> +
> +    /* bio_end_io() will handle error, so we needn't return it */
> +    return 0;
> +}
> +
>  static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
>                  loff_t file_offset)
>  {
> @@ -5723,33 +5901,18 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
>  
>      dip->disk_bytenr = (u64)bio->bi_sector << 9;
>      bio->bi_private = dip;
> +    dip->errors = 0;
> +    dip->orig_bio = bio;
> +    atomic_set(&dip->pending_bios, 0);
>  
>      if (write)
>          bio->bi_end_io = btrfs_endio_direct_write;
>      else
>          bio->bi_end_io = btrfs_endio_direct_read;
>  
> -    ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
> -    if (ret)
> -        goto free_ordered;
> -
> -    if (write && !skip_sum) {
> -        ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
> -                   inode, rw, bio, 0, 0,
> -                   dip->logical_offset,
> -                   __btrfs_submit_bio_start_direct_io,
> -                   __btrfs_submit_bio_done);
> -        if (ret)
> -            goto free_ordered;
> +    ret = btrfs_submit_direct_hook(rw, dip, skip_sum);
> +    if (!ret)
>          return;
> -    } else if (!skip_sum)
> -        btrfs_lookup_bio_sums_dio(root, inode, bio,
> -                      dip->logical_offset, dip->csums);
> -
> -    ret = btrfs_map_bio(root, rw, bio, 0, 1);
> -    if (ret)
> -        goto free_ordered;
> -    return;
>  free_ordered:
>      /*
>       * If this is a write, we need to clean up the reserved space and kill

  reply	other threads:[~2010-11-22  3:18 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-11-22  3:04 [PATCH -V2 3/3] btrfs: fix panic caused by direct IO Miao Xie
2010-11-22  3:18 ` Chris Mason [this message]
2010-11-29  1:55   ` Chris Mason
2010-11-29  2:08     ` Miao Xie
2010-11-29  3:58     ` Miao Xie

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1290395921-sup-5232@think \
    --to=chris.mason@oracle.com \
    --cc=josef@redhat.com \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=miaox@cn.fujitsu.com \
    --cc=t-itoh@jp.fujitsu.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).