From: Shi Weihua <shiwh@cn.fujitsu.com>
To: Josef Bacik <josef@redhat.com>
Cc: linux-btrfs@vger.kernel.org, linux-fsdevel@vger.kernel.org,
linux-kernel@vger.kernel.org, hch@infradead.org,
akpm@linux-foundation.org
Subject: Re: [PATCH 6/6] Btrfs: do aio_write instead of write
Date: Sun, 23 May 2010 16:31:52 +0800 [thread overview]
Message-ID: <4BF8E7F8.50800@cn.fujitsu.com> (raw)
In-Reply-To: <1274461422-18433-6-git-send-email-josef@redhat.com>
at 2010-5-22 1:03, Josef Bacik wrote:
> In order for AIO to work, we need to implement aio_write. This patch converts
> our btrfs_file_write to btrfs_aio_write. I've tested this with xfstests and
> nothing broke, and the AIO stuff magically started working. Thanks,
But xfstests's case 198(source: src/aio-dio-regress/aiodio_sparse2.c) still failed,
following message outputted.
--------------------
AIO write offset 0 expected 65536 got -22
AIO write offset 5242880 expected 65536 got -22
AIO write offset 10485760 expected 65536 got -22
AIO write offset 15728640 expected 65536 got -22
AIO write offset 20971520 expected 65536 got -22
AIO write offset 31457280 expected 65536 got -22
AIO write offset 36700160 expected 65536 got -22
AIO write offset 41943040 expected 65536 got -22
AIO write offset 47185920 expected 65536 got -22
AIO write offset 52428800 expected 65536 got -22
AIO write offset 57671680 expected 65536 got -22
AIO write offset 62914560 expected 65536 got -22
AIO write offset 73400320 expected 65536 got -22
AIO write offset 78643200 expected 65536 got -22
non one buffer at buf[0] => 0x00,00,00,00
non-one read at offset 0
*** WARNING *** /tmp/aaaa has not been unlinked; if you don't rm it manually first, it may influence the next run
--------------------
generic_file_direct_write()(in btrfs_file_aio_write(), fs/btrfs/file.c) returned -22,
maybe it's useful for your analysing.
Thanks.
>
> Signed-off-by: Josef Bacik <josef@redhat.com>
> ---
> fs/btrfs/extent_io.c | 11 +++-
> fs/btrfs/file.c | 152 +++++++++++++++++++++++---------------------------
> 2 files changed, 80 insertions(+), 83 deletions(-)
>
> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> index d2d0368..c407f1c 100644
> --- a/fs/btrfs/extent_io.c
> +++ b/fs/btrfs/extent_io.c
> @@ -2020,6 +2020,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
> sector_t sector;
> struct extent_map *em;
> struct block_device *bdev;
> + struct btrfs_ordered_extent *ordered;
> int ret;
> int nr = 0;
> size_t page_offset = 0;
> @@ -2031,7 +2032,15 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
> set_page_extent_mapped(page);
>
> end = page_end;
> - lock_extent(tree, start, end, GFP_NOFS);
> + while (1) {
> + lock_extent(tree, start, end, GFP_NOFS);
> + ordered = btrfs_lookup_ordered_extent(inode, start);
> + if (!ordered)
> + break;
> + unlock_extent(tree, start, end, GFP_NOFS);
> + btrfs_start_ordered_extent(inode, ordered, 1);
> + btrfs_put_ordered_extent(ordered);
> + }
>
> if (page->index == last_byte >> PAGE_CACHE_SHIFT) {
> char *userpage;
> diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
> index dace07b..ce35431 100644
> --- a/fs/btrfs/file.c
> +++ b/fs/btrfs/file.c
> @@ -46,32 +46,42 @@
> static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
> int write_bytes,
> struct page **prepared_pages,
> - const char __user *buf)
> + struct iov_iter *i)
> {
> - long page_fault = 0;
> - int i;
> + size_t copied;
> + int pg = 0;
> int offset = pos & (PAGE_CACHE_SIZE - 1);
>
> - for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) {
> + while (write_bytes > 0) {
> size_t count = min_t(size_t,
> PAGE_CACHE_SIZE - offset, write_bytes);
> - struct page *page = prepared_pages[i];
> - fault_in_pages_readable(buf, count);
> + struct page *page = prepared_pages[pg];
> +again:
> + if (unlikely(iov_iter_fault_in_readable(i, count)))
> + return -EFAULT;
>
> /* Copy data from userspace to the current page */
> - kmap(page);
> - page_fault = __copy_from_user(page_address(page) + offset,
> - buf, count);
> + copied = iov_iter_copy_from_user(page, i, offset, count);
> +
> /* Flush processor's dcache for this page */
> flush_dcache_page(page);
> - kunmap(page);
> - buf += count;
> - write_bytes -= count;
> + iov_iter_advance(i, copied);
> + write_bytes -= copied;
>
> - if (page_fault)
> - break;
> + if (unlikely(copied == 0)) {
> + count = min_t(size_t, PAGE_CACHE_SIZE - offset,
> + iov_iter_single_seg_count(i));
> + goto again;
> + }
> +
> + if (unlikely(copied < PAGE_CACHE_SIZE - offset)) {
> + offset += copied;
> + } else {
> + pg++;
> + offset = 0;
> + }
> }
> - return page_fault ? -EFAULT : 0;
> + return 0;
> }
>
> /*
> @@ -823,60 +833,24 @@ again:
> return 0;
> }
>
> -/* Copied from read-write.c */
> -static void wait_on_retry_sync_kiocb(struct kiocb *iocb)
> -{
> - set_current_state(TASK_UNINTERRUPTIBLE);
> - if (!kiocbIsKicked(iocb))
> - schedule();
> - else
> - kiocbClearKicked(iocb);
> - __set_current_state(TASK_RUNNING);
> -}
> -
> -/*
> - * Just a copy of what do_sync_write does.
> - */
> -static ssize_t __btrfs_direct_write(struct file *file, const char __user *buf,
> - size_t count, loff_t pos, loff_t *ppos)
> -{
> - struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
> - unsigned long nr_segs = 1;
> - struct kiocb kiocb;
> - ssize_t ret;
> -
> - init_sync_kiocb(&kiocb, file);
> - kiocb.ki_pos = pos;
> - kiocb.ki_left = count;
> - kiocb.ki_nbytes = count;
> -
> - while (1) {
> - ret = generic_file_direct_write(&kiocb, &iov, &nr_segs, pos,
> - ppos, count, count);
> - if (ret != -EIOCBRETRY)
> - break;
> - wait_on_retry_sync_kiocb(&kiocb);
> - }
> -
> - if (ret == -EIOCBQUEUED)
> - ret = wait_on_sync_kiocb(&kiocb);
> - *ppos = kiocb.ki_pos;
> - return ret;
> -}
> -
> -static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
> - size_t count, loff_t *ppos)
> +static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
> + const struct iovec *iov,
> + unsigned long nr_segs, loff_t pos)
> {
> - loff_t pos;
> + struct file *file = iocb->ki_filp;
> + struct inode *inode = fdentry(file)->d_inode;
> + struct btrfs_root *root = BTRFS_I(inode)->root;
> + struct page *pinned[2];
> + struct page **pages = NULL;
> + struct iov_iter i;
> + loff_t *ppos = &iocb->ki_pos;
> loff_t start_pos;
> ssize_t num_written = 0;
> ssize_t err = 0;
> + size_t count;
> + size_t ocount;
> int ret = 0;
> - struct inode *inode = fdentry(file)->d_inode;
> - struct btrfs_root *root = BTRFS_I(inode)->root;
> - struct page **pages = NULL;
> int nrptrs;
> - struct page *pinned[2];
> unsigned long first_index;
> unsigned long last_index;
> int will_write;
> @@ -888,7 +862,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
> pinned[0] = NULL;
> pinned[1] = NULL;
>
> - pos = *ppos;
> start_pos = pos;
>
> vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
> @@ -902,6 +875,11 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
>
> mutex_lock(&inode->i_mutex);
>
> + err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
> + if (err)
> + goto out;
> + count = ocount;
> +
> current->backing_dev_info = inode->i_mapping->backing_dev_info;
> err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
> if (err)
> @@ -918,14 +896,24 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
> BTRFS_I(inode)->sequence++;
>
> if (unlikely(file->f_flags & O_DIRECT)) {
> - num_written = __btrfs_direct_write(file, buf, count, pos,
> - ppos);
> - pos += num_written;
> - count -= num_written;
> + ret = btrfs_check_data_free_space(root, inode, count);
> + if (ret)
> + goto out;
>
> - /* We've written everything we wanted to, exit */
> - if (num_written < 0 || !count)
> + num_written = generic_file_direct_write(iocb, iov, &nr_segs,
> + pos, ppos, count,
> + ocount);
> +
> + /* All reservations for DIO are done internally */
> + btrfs_free_reserved_data_space(root, inode, count);
> +
> + if (num_written < 0) {
> + ret = num_written;
> + num_written = 0;
> + goto out;
> + } else if (num_written == count) {
> goto out;
> + }
>
> /*
> * We are going to do buffered for the rest of the range, so we
> @@ -933,18 +921,20 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
> * done.
> */
> buffered = 1;
> - buf += num_written;
> + pos += num_written;
> }
>
> - nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE,
> - PAGE_CACHE_SIZE / (sizeof(struct page *)));
> + iov_iter_init(&i, iov, nr_segs, count, num_written);
> + nrptrs = min((iov_iter_count(&i) + PAGE_CACHE_SIZE - 1) /
> + PAGE_CACHE_SIZE, PAGE_CACHE_SIZE /
> + (sizeof(struct page *)));
> pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
>
> /* generic_write_checks can change our pos */
> start_pos = pos;
>
> first_index = pos >> PAGE_CACHE_SHIFT;
> - last_index = (pos + count) >> PAGE_CACHE_SHIFT;
> + last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT;
>
> /*
> * there are lots of better ways to do this, but this code
> @@ -961,7 +951,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
> unlock_page(pinned[0]);
> }
> }
> - if ((pos + count) & (PAGE_CACHE_SIZE - 1)) {
> + if ((pos + iov_iter_count(&i)) & (PAGE_CACHE_SIZE - 1)) {
> pinned[1] = grab_cache_page(inode->i_mapping, last_index);
> if (!PageUptodate(pinned[1])) {
> ret = btrfs_readpage(NULL, pinned[1]);
> @@ -972,10 +962,10 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
> }
> }
>
> - while (count > 0) {
> + while (iov_iter_count(&i) > 0) {
> size_t offset = pos & (PAGE_CACHE_SIZE - 1);
> - size_t write_bytes = min(count, nrptrs *
> - (size_t)PAGE_CACHE_SIZE -
> + size_t write_bytes = min(iov_iter_count(&i),
> + nrptrs * (size_t)PAGE_CACHE_SIZE -
> offset);
> size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
> PAGE_CACHE_SHIFT;
> @@ -997,7 +987,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
> }
>
> ret = btrfs_copy_from_user(pos, num_pages,
> - write_bytes, pages, buf);
> + write_bytes, pages, &i);
> if (ret) {
> btrfs_free_reserved_data_space(root, inode,
> write_bytes);
> @@ -1026,8 +1016,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
> btrfs_throttle(root);
> }
>
> - buf += write_bytes;
> - count -= write_bytes;
> pos += write_bytes;
> num_written += write_bytes;
>
> @@ -1222,7 +1210,7 @@ const struct file_operations btrfs_file_operations = {
> .read = do_sync_read,
> .aio_read = generic_file_aio_read,
> .splice_read = generic_file_splice_read,
> - .write = btrfs_file_write,
> + .aio_write = btrfs_file_aio_write,
> .mmap = btrfs_file_mmap,
> .open = generic_file_open,
> .release = btrfs_release_file,
--
Shi Weihua
next prev parent reply other threads:[~2010-05-23 8:31 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-05-21 17:03 [PATCH 1/6] fs: allow short direct-io reads to be completed via buffered IO Josef Bacik
2010-05-21 17:03 ` [PATCH 2/6] direct-io: add a hook for the fs to provide its own submit_bio function Josef Bacik
2010-05-21 17:03 ` [PATCH 3/6] direct-io: do not merge logically non-contiguous requests Josef Bacik
2010-05-22 1:47 ` Mike Fedyk
2010-05-22 1:47 ` Mike Fedyk
2010-05-22 1:47 ` Mike Fedyk
2010-05-22 14:02 ` Josef Bacik
2010-05-22 14:02 ` Josef Bacik
2010-05-22 14:02 ` Josef Bacik
2010-05-21 17:03 ` [PATCH 4/6] fs: kill blockdev_direct_IO_no_locking Josef Bacik
2010-05-21 17:03 ` [PATCH 5/6] Btrfs: add basic DIO read/write support Josef Bacik
2010-05-21 17:03 ` [PATCH 6/6] Btrfs: do aio_write instead of write Josef Bacik
2010-05-23 8:31 ` Shi Weihua [this message]
2010-05-23 23:59 ` Josef Bacik
2010-05-27 3:06 ` liubo
2010-05-27 3:13 ` liubo
2010-05-27 12:59 ` Chris Mason
2010-05-28 1:42 ` liubo
2010-05-30 9:33 ` Dmitri Nikulin
2010-06-01 13:19 ` Chris Mason
2010-06-01 23:20 ` Dmitri Nikulin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4BF8E7F8.50800@cn.fujitsu.com \
--to=shiwh@cn.fujitsu.com \
--cc=akpm@linux-foundation.org \
--cc=hch@infradead.org \
--cc=josef@redhat.com \
--cc=linux-btrfs@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.