linux-btrfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Chandan Rajendra <chandan@linux.vnet.ibm.com>
To: bo.li.liu@oracle.com
Cc: clm@fb.com, jbacik@fb.com, dsterba@suse.cz,
	linux-btrfs@vger.kernel.org, chandan@mykolab.com
Subject: Re: [RFC PATCH V11 13/21] Btrfs: subpagesize-blocksize: Deal with partial ordered extent allocations.
Date: Tue, 07 Jul 2015 19:08:31 +0530	[thread overview]
Message-ID: <32668308.c3o97L3Tz8@localhost.localdomain> (raw)
In-Reply-To: <20150706100631.GC6105@localhost.localdomain>

On Monday 06 Jul 2015 18:06:33 Liu Bo wrote:
> On Mon, Jun 01, 2015 at 08:52:48PM +0530, Chandan Rajendra wrote:
> > In subpagesize-blocksize scenario, extent allocations for only some of the
> > dirty blocks of a page can succeed, while allocation for rest of the
> > blocks
> > can fail. This patch allows I/O against such partially allocated ordered
> > extents to be submitted.
> > 
> > Signed-off-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
> > ---
> > 
> >  fs/btrfs/extent_io.c | 27 ++++++++++++++-------------
> >  fs/btrfs/inode.c     | 35 ++++++++++++++++++++++-------------
> >  2 files changed, 36 insertions(+), 26 deletions(-)
> > 
> > diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> > index 0b017e1..0110abc 100644
> > --- a/fs/btrfs/extent_io.c
> > +++ b/fs/btrfs/extent_io.c
> > @@ -1850,17 +1850,23 @@ int extent_clear_unlock_delalloc(struct inode
> > *inode, u64 start, u64 end,> 
> >  			if (page_ops & PAGE_SET_PRIVATE2)
> >  			
> >  				SetPagePrivate2(pages[i]);
> > 
> > +			if (page_ops & PAGE_SET_ERROR)
> > +				SetPageError(pages[i]);
> > +
> > 
> >  			if (pages[i] == locked_page) {
> >  			
> >  				page_cache_release(pages[i]);
> >  				continue;
> >  			
> >  			}
> > 
> > -			if (page_ops & PAGE_CLEAR_DIRTY)
> > +
> > +			if ((page_ops & PAGE_CLEAR_DIRTY)
> > +				&& !PagePrivate2(pages[i]))
> > 
> >  				clear_page_dirty_for_io(pages[i]);
> > 
> > -			if (page_ops & PAGE_SET_WRITEBACK)
> > +			if ((page_ops & PAGE_SET_WRITEBACK)
> > +				&& !PagePrivate2(pages[i]))
> > 
> >  				set_page_writeback(pages[i]);
> > 
> > -			if (page_ops & PAGE_SET_ERROR)
> > -				SetPageError(pages[i]);
> > -			if (page_ops & PAGE_END_WRITEBACK)
> > +
> > +			if ((page_ops & PAGE_END_WRITEBACK)
> > +				&& !PagePrivate2(pages[i]))
> > 
> >  				end_page_writeback(pages[i]);
> >  			
> >  			if (page_ops & PAGE_UNLOCK)
> >  			
> >  				unlock_page(pages[i]);
> > 
> > @@ -2550,7 +2556,7 @@ int end_extent_writepage(struct page *page, int err,
> > u64 start, u64 end)> 
> >  			uptodate = 0;
> >  	
> >  	}
> > 
> > -	if (!uptodate) {
> > +	if (!uptodate || PageError(page)) {
> > 
> >  		ClearPageUptodate(page);
> >  		SetPageError(page);
> >  		ret = ret < 0 ? ret : -EIO;
> > 
> > @@ -3340,7 +3346,6 @@ static noinline_for_stack int
> > writepage_delalloc(struct inode *inode,> 
> >  					       nr_written);
> >  		
> >  		/* File system has been set read-only */
> >  		if (ret) {
> > 
> > -			SetPageError(page);
> > 
> >  			/* fill_delalloc should be return < 0 for error
> >  			
> >  			 * but just in case, we use > 0 here meaning the
> >  			 * IO is started, so we don't want to return > 0
> > 
> > @@ -3561,7 +3566,6 @@ static int __extent_writepage(struct page *page,
> > struct writeback_control *wbc,> 
> >  	struct inode *inode = page->mapping->host;
> >  	struct extent_page_data *epd = data;
> >  	u64 start = page_offset(page);
> > 
> > -	u64 page_end = start + PAGE_CACHE_SIZE - 1;
> > 
> >  	int ret;
> >  	int nr = 0;
> >  	size_t pg_offset = 0;
> > 
> > @@ -3606,7 +3610,7 @@ static int __extent_writepage(struct page *page,
> > struct writeback_control *wbc,> 
> >  	ret = writepage_delalloc(inode, page, wbc, epd, start, &nr_written);
> >  	if (ret == 1)
> >  	
> >  		goto done_unlocked;
> > 
> > -	if (ret)
> > +	if (ret && !PagePrivate2(page))
> > 
> >  		goto done;
> >  	
> >  	ret = __extent_writepage_io(inode, page, wbc, epd,
> > 
> > @@ -3620,10 +3624,7 @@ done:
> >  		set_page_writeback(page);
> >  		end_page_writeback(page);
> >  	
> >  	}
> > 
> > -	if (PageError(page)) {
> > -		ret = ret < 0 ? ret : -EIO;
> > -		end_extent_writepage(page, ret, start, page_end);
> > -	}
> > +
> > 
> >  	unlock_page(page);
> >  	return ret;
> > 
> > diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> > index 8b4aaed..bff60c6 100644
> > --- a/fs/btrfs/inode.c
> > +++ b/fs/btrfs/inode.c
> > @@ -925,6 +925,8 @@ static noinline int cow_file_range(struct inode
> > *inode,
> > 
> >  	struct btrfs_key ins;
> >  	struct extent_map *em;
> >  	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
> > 
> > +	struct btrfs_ordered_extent *ordered;
> > +	unsigned long page_ops, extent_ops;
> > 
> >  	int ret = 0;
> >  	
> >  	if (btrfs_is_free_space_inode(inode)) {
> > 
> > @@ -969,8 +971,6 @@ static noinline int cow_file_range(struct inode
> > *inode,
> > 
> >  	btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
> >  	
> >  	while (disk_num_bytes > 0) {
> > 
> > -		unsigned long op;
> > -
> > 
> >  		cur_alloc_size = disk_num_bytes;
> >  		ret = btrfs_reserve_extent(root, cur_alloc_size,
> >  		
> >  					   root->sectorsize, 0, alloc_hint,
> > 
> > @@ -1023,7 +1023,7 @@ static noinline int cow_file_range(struct inode
> > *inode,> 
> >  			ret = btrfs_reloc_clone_csums(inode, start,
> >  			
> >  						      cur_alloc_size);
> >  			
> >  			if (ret)
> > 
> > -				goto out_drop_extent_cache;
> > +				goto out_remove_ordered_extent;
> > 
> >  		}
> >  		
> >  		if (disk_num_bytes < cur_alloc_size)
> > 
> > @@ -1036,13 +1036,12 @@ static noinline int cow_file_range(struct inode
> > *inode,> 
> >  		 * Do set the Private2 bit so we know this page was properly
> >  		 * setup for writepage
> >  		 */
> > 
> > -		op = unlock ? PAGE_UNLOCK : 0;
> > -		op |= PAGE_SET_PRIVATE2;
> > -
> > +		page_ops = unlock ? PAGE_UNLOCK : 0;
> > +		page_ops |= PAGE_SET_PRIVATE2;
> > +		extent_ops = EXTENT_LOCKED | EXTENT_DELALLOC;
> > 
> >  		extent_clear_unlock_delalloc(inode, start,
> > 
> > -					     start + ram_size - 1, 
locked_page,
> > -					     EXTENT_LOCKED | EXTENT_DELALLOC,
> > -					     op);
> > +					start + ram_size - 1, locked_page,
> > +					extent_ops, page_ops);
> > 
> >  		disk_num_bytes -= cur_alloc_size;
> >  		num_bytes -= cur_alloc_size;
> >  		alloc_hint = ins.objectid + ins.offset;
> > 
> > @@ -1051,16 +1050,26 @@ static noinline int cow_file_range(struct inode
> > *inode,> 
> >  out:
> >  	return ret;
> > 
> > +out_remove_ordered_extent:
> > +	ordered = btrfs_lookup_ordered_extent(inode, ins.objectid);
> > +	BUG_ON(!ordered);
> > +	btrfs_remove_ordered_extent(inode, ordered);
> > +
> 
> Two problems here,
> 
> 1. ins.objectid refers to block address while
> btrfs_lookup_ordered_extent() expects a file offset.
>
Ah, That has most probably saved me from hours of debugging. Thanks a lot for
pointing it out.

> 2. Removing ordered extent is not enough for cleaning it up, not only
> this ordered extent remains in memory, but our reserved space number
> needs to be cleaned up.
> 
> If we have to do it this way, I'd copy what btrfs_finish_ordered_io()
> does the cleanup job, however, I hope to call end_extent_writepage()
> directly here to keep it simple as much as possible.
>
Yes, calling end_extent_writepage() looks to be the easiest and correct way to
do this. I will test and include the change in the next version of the
patchset.

> Thanks,
> 
> -liubo
> 
> >  out_drop_extent_cache:
> >  	btrfs_drop_extent_cache(inode, start, start + ram_size - 1, 0);
> > 
> > +
> > 
> >  out_reserve:
> >  	btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
> > 
> > +
> > 
> >  out_unlock:
> > +	page_ops = unlock ? PAGE_UNLOCK : 0;
> > +	page_ops |= PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK
> > +		| PAGE_SET_ERROR;
> > +	extent_ops = EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING
> > +		| EXTENT_DEFRAG;
> > +
> > 
> >  	extent_clear_unlock_delalloc(inode, start, end, locked_page,
> > 
> > -				     EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
> > -				     EXTENT_DELALLOC | EXTENT_DEFRAG,
> > -				     PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
> > -				     PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK);
> > +				extent_ops, page_ops);
> > 
> >  	goto out;
> >  
> >  }

-- 
chandan


  reply	other threads:[~2015-07-07 13:39 UTC|newest]

Thread overview: 47+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-06-01 15:22 [RFC PATCH V11 00/21] Btrfs: Subpagesize-blocksize: Allow I/O on blocks whose size is less than page size Chandan Rajendra
2015-06-01 15:22 ` [RFC PATCH V11 01/21] Btrfs: subpagesize-blocksize: Fix whole page read Chandan Rajendra
2015-06-19  4:45   ` Liu Bo
2015-06-19  9:45     ` Chandan Rajendra
2015-06-23  8:37       ` Liu Bo
2016-02-10 10:44         ` David Sterba
2016-02-10 10:39       ` David Sterba
2016-02-11  5:42         ` Chandan Rajendra
2015-06-01 15:22 ` [RFC PATCH V11 02/21] Btrfs: subpagesize-blocksize: Fix whole page write Chandan Rajendra
2015-06-26  9:50   ` Liu Bo
2015-06-29  8:54     ` Chandan Rajendra
2015-07-01 14:27       ` Liu Bo
2015-06-01 15:22 ` [RFC PATCH V11 03/21] Btrfs: subpagesize-blocksize: __btrfs_buffered_write: Reserve/release extents aligned to block size Chandan Rajendra
2015-06-01 15:22 ` [RFC PATCH V11 04/21] Btrfs: subpagesize-blocksize: Define extent_buffer_head Chandan Rajendra
2015-07-01 14:33   ` Liu Bo
2015-06-01 15:22 ` [RFC PATCH V11 05/21] Btrfs: subpagesize-blocksize: Read tree blocks whose size is < PAGE_SIZE Chandan Rajendra
2015-07-01 14:40   ` Liu Bo
2015-07-03 10:02     ` Chandan Rajendra
2015-06-01 15:22 ` [RFC PATCH V11 06/21] Btrfs: subpagesize-blocksize: Write only dirty extent buffers belonging to a page Chandan Rajendra
2015-06-01 15:22 ` [RFC PATCH V11 07/21] Btrfs: subpagesize-blocksize: Allow mounting filesystems where sectorsize != PAGE_SIZE Chandan Rajendra
2015-06-01 15:22 ` [RFC PATCH V11 08/21] Btrfs: subpagesize-blocksize: Compute and look up csums based on sectorsized blocks Chandan Rajendra
2015-07-01 14:37   ` Liu Bo
2015-06-01 15:22 ` [RFC PATCH V11 09/21] Btrfs: subpagesize-blocksize: Direct I/O read: Work " Chandan Rajendra
2015-07-01 14:45   ` Liu Bo
2015-07-03 10:05     ` Chandan Rajendra
2015-06-01 15:22 ` [RFC PATCH V11 10/21] Btrfs: subpagesize-blocksize: fallocate: Work with sectorsized units Chandan Rajendra
2015-06-01 15:22 ` [RFC PATCH V11 11/21] Btrfs: subpagesize-blocksize: btrfs_page_mkwrite: Reserve space in " Chandan Rajendra
2015-07-06  3:18   ` Liu Bo
2015-06-01 15:22 ` [RFC PATCH V11 12/21] Btrfs: subpagesize-blocksize: Search for all ordered extents that could span across a page Chandan Rajendra
2015-07-01 14:47   ` Liu Bo
2015-07-03 10:08     ` Chandan Rajendra
2015-07-06  3:17       ` Liu Bo
2015-07-06 10:49         ` Chandan Rajendra
2015-06-01 15:22 ` [RFC PATCH V11 13/21] Btrfs: subpagesize-blocksize: Deal with partial ordered extent allocations Chandan Rajendra
2015-07-06 10:06   ` Liu Bo
2015-07-07 13:38     ` Chandan Rajendra [this message]
2015-06-01 15:22 ` [RFC PATCH V11 14/21] Btrfs: subpagesize-blocksize: Explicitly Track I/O status of blocks of an ordered extent Chandan Rajendra
2015-07-20  8:34   ` Liu Bo
2015-07-20 12:54     ` Chandan Rajendra
2015-06-01 15:22 ` [RFC PATCH V11 15/21] Btrfs: subpagesize-blocksize: Revert commit fc4adbff823f76577ece26dcb88bf6f8392dbd43 Chandan Rajendra
2015-06-01 15:22 ` [RFC PATCH V11 16/21] Btrfs: subpagesize-blocksize: Prevent writes to an extent buffer when PG_writeback flag is set Chandan Rajendra
2015-06-01 15:22 ` [RFC PATCH V11 17/21] Btrfs: subpagesize-blocksize: Use (eb->start, seq) as search key for tree modification log Chandan Rajendra
2015-07-20 14:46   ` Liu Bo
2015-06-01 15:22 ` [RFC PATCH V11 18/21] Btrfs: subpagesize-blocksize: btrfs_submit_direct_hook: Handle map_length < bio vector length Chandan Rajendra
2015-06-01 15:22 ` [RFC PATCH V11 19/21] Revert "btrfs: fix lockups from btrfs_clear_path_blocking" Chandan Rajendra
2015-06-01 15:22 ` [RFC PATCH V11 20/21] Btrfs: subpagesize-blockssize: Limit inline extents to root->sectorsize Chandan Rajendra
2015-06-01 15:22 ` [RFC PATCH V11 21/21] Btrfs: subpagesize-blocksize: Fix block size returned to user space Chandan Rajendra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=32668308.c3o97L3Tz8@localhost.localdomain \
    --to=chandan@linux.vnet.ibm.com \
    --cc=bo.li.liu@oracle.com \
    --cc=chandan@mykolab.com \
    --cc=clm@fb.com \
    --cc=dsterba@suse.cz \
    --cc=jbacik@fb.com \
    --cc=linux-btrfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).