linux-btrfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Maria Wikström" <maria@ponstudios.se>
To: johannes.hirte@fem.tu-ilmenau.de, xin.zhong@intel.com
Cc: linux-btrfs@vger.kernel.org
Subject: Re: [PATCH v2]Btrfs: pwrite blocked when writing from the mmaped buffer of the same page
Date: Thu, 27 Jan 2011 23:12:25 +0100	[thread overview]
Message-ID: <1296166345.11397.16.camel@mainframe> (raw)
In-Reply-To: <201101271409.27877.johannes.hirte@fem.tu-ilmenau.de>

tor 2011-01-27 klockan 14:09 +0100 skrev Johannes Hirte: 
> On Thursday 09 December 2010 10:30:14 Zhong, Xin wrote:
> > This problem is found in meego testing:
> > http://bugs.meego.com/show_bug.cgi?id=6672
> > A file in btrfs is mmaped and the mmaped buffer is passed to pwrite to
> > write to the same page of the same file. In btrfs_file_aio_write(), the
> > pages is locked by prepare_pages(). So when btrfs_copy_from_user() is
> > called, page fault happens and the same page needs to be locked again in
> > filemap_fault(). The fix is to move iov_iter_fault_in_readable() before
> > prepage_pages() to make page fault happen before pages are locked. And
> > also disable page fault in critical region in btrfs_copy_from_user().
> > 
> > Reviewed-by: Yan, Zheng<zheng.z.yan@intel.com>
> > Signed-off-by: Zhong, Xin <xin.zhong@intel.com>
> > ---
> >  fs/btrfs/file.c |   92
> > ++++++++++++++++++++++++++++++++++++------------------- 1 files changed,
> > 60 insertions(+), 32 deletions(-)
> > 
> > diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
> > index c1faded..66836d8 100644
> > --- a/fs/btrfs/file.c
> > +++ b/fs/btrfs/file.c
> > @@ -48,30 +48,34 @@ static noinline int btrfs_copy_from_user(loff_t pos,
> > int num_pages, struct page **prepared_pages,
> >  					 struct iov_iter *i)
> >  {
> > -	size_t copied;
> > +	size_t copied = 0;
> >  	int pg = 0;
> >  	int offset = pos & (PAGE_CACHE_SIZE - 1);
> > +	int total_copied = 0;
> > 
> >  	while (write_bytes > 0) {
> >  		size_t count = min_t(size_t,
> >  				     PAGE_CACHE_SIZE - offset, write_bytes);
> >  		struct page *page = prepared_pages[pg];
> > -again:
> > -		if (unlikely(iov_iter_fault_in_readable(i, count)))
> > -			return -EFAULT;
> > -
> > -		/* Copy data from userspace to the current page */
> > -		copied = iov_iter_copy_from_user(page, i, offset, count);
> > +		/*
> > +		 * Copy data from userspace to the current page
> > +		 *
> > +		 * Disable pagefault to avoid recursive lock since
> > +		 * the pages are already locked
> > +		 */
> > +		pagefault_disable();
> > +		copied = iov_iter_copy_from_user_atomic(page, i, offset, count);
> > +		pagefault_enable();
> > 
> >  		/* Flush processor's dcache for this page */
> >  		flush_dcache_page(page);
> >  		iov_iter_advance(i, copied);
> >  		write_bytes -= copied;
> > +		total_copied += copied;
> > 
> > +		/* Return to btrfs_file_aio_write to fault page */
> >  		if (unlikely(copied == 0)) {
> > -			count = min_t(size_t, PAGE_CACHE_SIZE - offset,
> > -				      iov_iter_single_seg_count(i));
> > -			goto again;
> > +			break;
> >  		}
> > 
> >  		if (unlikely(copied < PAGE_CACHE_SIZE - offset)) {
> > @@ -81,7 +85,7 @@ again:
> >  			offset = 0;
> >  		}
> >  	}
> > -	return 0;
> > +	return total_copied;
> >  }
> > 
> >  /*
> > @@ -854,6 +858,8 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
> >  	unsigned long last_index;
> >  	int will_write;
> >  	int buffered = 0;
> > +	int copied = 0;
> > +	int dirty_pages = 0;
> > 
> >  	will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) ||
> >  		      (file->f_flags & O_DIRECT));
> > @@ -970,7 +976,17 @@ static ssize_t btrfs_file_aio_write(struct kiocb
> > *iocb, WARN_ON(num_pages > nrptrs);
> >  		memset(pages, 0, sizeof(struct page *) * nrptrs);
> > 
> > -		ret = btrfs_delalloc_reserve_space(inode, write_bytes);
> > +		/*
> > +		 * Fault pages before locking them in prepare_pages
> > +		 * to avoid recursive lock
> > +		 */
> > +		if (unlikely(iov_iter_fault_in_readable(&i, write_bytes))) {
> > +			ret = -EFAULT;
> > +			goto out;
> > +		}
> > +
> > +		ret = btrfs_delalloc_reserve_space(inode,
> > +					num_pages << PAGE_CACHE_SHIFT);
> >  		if (ret)
> >  			goto out;
> > 
> > @@ -978,37 +994,49 @@ static ssize_t btrfs_file_aio_write(struct kiocb
> > *iocb, pos, first_index, last_index,
> >  				    write_bytes);
> >  		if (ret) {
> > -			btrfs_delalloc_release_space(inode, write_bytes);
> > +			btrfs_delalloc_release_space(inode,
> > +					num_pages << PAGE_CACHE_SHIFT);
> >  			goto out;
> >  		}
> > 
> > -		ret = btrfs_copy_from_user(pos, num_pages,
> > +		copied = btrfs_copy_from_user(pos, num_pages,
> >  					   write_bytes, pages, &i);
> > -		if (ret == 0) {
> > +		dirty_pages = (copied + PAGE_CACHE_SIZE - 1) >>
> > +					PAGE_CACHE_SHIFT;
> > +
> > +		if (num_pages > dirty_pages) {
> > +			if (copied > 0)
> > +				atomic_inc(
> > +					&BTRFS_I(inode)->outstanding_extents);
> > +			btrfs_delalloc_release_space(inode,
> > +					(num_pages - dirty_pages) <<
> > +					PAGE_CACHE_SHIFT);
> > +		}
> > +
> > +		if (copied > 0) {
> >  			dirty_and_release_pages(NULL, root, file, pages,
> > -						num_pages, pos, write_bytes);
> > +						dirty_pages, pos, copied);
> >  		}
> > 
> >  		btrfs_drop_pages(pages, num_pages);
> > -		if (ret) {
> > -			btrfs_delalloc_release_space(inode, write_bytes);
> > -			goto out;
> > -		}
> > 
> > -		if (will_write) {
> > -			filemap_fdatawrite_range(inode->i_mapping, pos,
> > -						 pos + write_bytes - 1);
> > -		} else {
> > -			balance_dirty_pages_ratelimited_nr(inode->i_mapping,
> > -							   num_pages);
> > -			if (num_pages <
> > -			    (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
> > -				btrfs_btree_balance_dirty(root, 1);
> > -			btrfs_throttle(root);
> > +		if (copied > 0) {
> > +			if (will_write) {
> > +				filemap_fdatawrite_range(inode->i_mapping, pos,
> > +							 pos + copied - 1);
> > +			} else {
> > +				balance_dirty_pages_ratelimited_nr(
> > +							inode->i_mapping,
> > +							dirty_pages);
> > +				if (dirty_pages <
> > +				(root->leafsize >> PAGE_CACHE_SHIFT) + 1)
> > +					btrfs_btree_balance_dirty(root, 1);
> > +				btrfs_throttle(root);
> > +			}
> >  		}
> > 
> > -		pos += write_bytes;
> > -		num_written += write_bytes;
> > +		pos += copied;
> > +		num_written += copied;
> > 
> >  		cond_resched();
> >  	}
> 
> This patch breaks one of my Gentoo boxes. When I try to install/update via 
> emerge, some packages hang. It seems that it's always a "svn info" process 
> that is stuck in kernel eating 100% CPU. I don't know how svn is involved 
> here, but reverting this patch makes the system work again. I'll try to get a 
> simple testcase.
> 
> regards,
>   Johannes

The same thing happens here. The only way to kill the process (svn info)
is to restart the computer. Running vanilla 2.6.37 without patches.

#strace emerge libgcrypt
-- cut -- 
open("/var/tmp/portage/dev-libs/libgcrypt-1.4.6/temp/build.log",
O_WRONLY|O_CREAT|O_APPEND|O_LARGEFILE, 0666) = 7
fstat64(7, {st_mode=S_IFREG|0660, st_size=416, ...}) = 0
mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1,
0) = 0xb713a000
fstat64(7, {st_mode=S_IFREG|0660, st_size=416, ...}) = 0
_llseek(7, 416, [416], SEEK_SET)        = 0
fstat64(7, {st_mode=S_IFREG|0660, st_size=416, ...}) = 0
stat64("/var/tmp/portage/dev-libs/libgcrypt-1.4.6/temp/build.log",
{st_mode=S_IFREG|0660, st_size=416, ...}) = 0
dup(1)                                  = 8
fcntl64(8, F_GETFL)                     = 0x2 (flags O_RDWR)
fstat64(8, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 3), ...}) = 0
mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1,
0) = 0xb7005000
_llseek(8, 0, 0xbff80cfc, SEEK_CUR)     = -1 ESPIPE (Illegal seek)
fstat64(8, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 3), ...}) = 0
stat64("/var/tmp/portage/dev-libs/libgcrypt-1.4.6/temp/environment",
{st_mode=S_IFREG|0664, st_size=105970, ...}) = 0
clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|
SIGCHLD, child_tidptr=0xb7430728) = 11665
close(6)                                = 0
gettimeofday({1296162836, 126192}, NULL) = 0
poll([{fd=3, events=POLLIN|POLLERR|POLLHUP|POLLNVAL}, {fd=5,
events=POLLIN|POLLERR|POLLHUP|POLLNVAL}], 2, 3000) = 1 ([{fd=5,
revents=POLLIN}])
read(5, ">>> Preparing source in /var/tmp"..., 4096) = 91
read(5, 0xb713b000, 4096)               = -1 EAGAIN (Resource
temporarily unavailable)
write(8, ">>> Preparing source in /var/tmp"..., 91>>> Preparing source
in /var/tmp/portage/dev-libs/libgcrypt-1.4.6/work/libgcrypt-1.4.6 ...
) = 91
write(7, ">>> Preparing source in /var/tmp"..., 91) = 91
poll([{fd=3, events=POLLIN|POLLERR|POLLHUP|POLLNVAL}, {fd=5,
events=POLLIN|POLLERR|POLLHUP|POLLNVAL}], 2, 3000) = 0 (Timeout)
poll([{fd=3, events=POLLIN|POLLERR|POLLHUP|POLLNVAL}, {fd=5,
events=POLLIN|POLLERR|POLLHUP|POLLNVAL}], 2, 3000) = 0 (Timeout)
poll([{fd=3, events=POLLIN|POLLERR|POLLHUP|POLLNVAL}, {fd=5,
events=POLLIN|POLLERR|POLLHUP|POLLNVAL}], 2, 3000) = 0 (Timeout)

The last line is repeated every 3sec.

// Maria



  reply	other threads:[~2011-01-27 22:12 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-12-09  9:30 [PATCH v2]Btrfs: pwrite blocked when writing from the mmaped buffer of the same page Zhong, Xin
2011-01-27 13:09 ` Johannes Hirte
2011-01-27 22:12   ` Maria Wikström [this message]
2011-01-28  1:26     ` Zhong, Xin
2011-01-28  2:54       ` Johannes Hirte
2011-01-28  3:53         ` Zhong, Xin
2011-02-01 23:34           ` Johannes Hirte
2011-02-11  4:39             ` Zhong, Xin
2011-02-18 11:31               ` Maria Wikström
2011-02-21  1:51                 ` Zhong, Xin
2011-02-24 14:51                   ` Maria Wikström
2011-02-24 15:55                     ` Mitch Harder
2011-02-24 16:00                       ` Chris Mason
2011-02-24 16:03                         ` Mitch Harder
2011-02-24 16:19                           ` Chris Mason
2011-02-24 16:32                             ` Mitch Harder
     [not found]                               ` <AANLkTinvyb-bTVVignd1KGojvh-QrYCFmCnwYKBsYC_2@mail.gmail.com>
2011-02-25 17:11                                 ` Mitch Harder
2011-02-25 18:43                                   ` Mitch Harder
2011-02-25 19:19                                     ` Chris Mason
2011-02-28  1:46                                     ` [PATCH] btrfs file write debugging patch Chris Mason
2011-02-28  8:56                                       ` Zhong, Xin
2011-02-28 14:02                                         ` Chris Mason
2011-02-28 10:13                                       ` Johannes Hirte
2011-02-28 14:00                                         ` Chris Mason
2011-02-28 16:10                                         ` Josef Bacik
2011-02-28 16:45                                           ` Maria Wikström
2011-02-28 17:47                                             ` Mitch Harder
2011-02-28 20:20                                               ` Mitch Harder
2011-03-01  5:09                                                 ` Mitch Harder
2011-03-01 10:14                                                 ` Zhong, Xin
2011-03-01 11:56                                                   ` Zhong, Xin
2011-03-01 14:54                                                     ` Mitch Harder
2011-03-01 14:51                                                   ` Mitch Harder
2011-03-01 21:56                                                 ` Piotr Szymaniak
2011-02-24 23:35                   ` [PATCH v2]Btrfs: pwrite blocked when writing from the mmaped buffer of the same page Piotr Szymaniak
2011-02-22 22:27               ` Johannes Hirte
2011-02-23  7:27                 ` Zhong, Xin
2011-02-23 21:56                   ` Chris Mason
2011-02-23 23:02                     ` Johannes Hirte
2011-02-24 15:23                       ` Chris Mason
2011-01-28 16:47         ` Maria Wikström
2011-01-28 18:27           ` Rui Miguel Silva
2011-01-29 15:38             ` Maria Wikström

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1296166345.11397.16.camel@mainframe \
    --to=maria@ponstudios.se \
    --cc=johannes.hirte@fem.tu-ilmenau.de \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=xin.zhong@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).