All of lore.kernel.org
 help / color / mirror / Atom feed
From: Zheng Liu <gnehzuil.liu@gmail.com>
To: Jan Kara <jack@suse.cz>
Cc: Ted Tso <tytso@mit.edu>, linux-ext4@vger.kernel.org
Subject: Re: [PATCH 20/29] ext4: Use transaction reservation for extent conversion in ext4_end_io
Date: Wed, 8 May 2013 14:57:13 +0800	[thread overview]
Message-ID: <20130508065713.GB20599@gmail.com> (raw)
In-Reply-To: <1365456754-29373-21-git-send-email-jack@suse.cz>

On Mon, Apr 08, 2013 at 11:32:25PM +0200, Jan Kara wrote:
> Later we would like to clear PageWriteback bit only after extent conversion
> from unwritten to written extents is performed. However it is not possible
> to start a transaction after PageWriteback is set because that violates
> lock ordering (and is easy to deadlock). So we have to reserve a transaction
> before locking pages and sending them for IO and later we use the transaction
> for extent conversion from ext4_end_io().
> 
> Signed-off-by: Jan Kara <jack@suse.cz>

Reviewed-by: Zheng Liu <wenqing.lz@taobao.com>
Regards,
                                                - Zheng

> ---
>  fs/ext4/ext4.h      |   12 +++++++++---
>  fs/ext4/ext4_jbd2.h |    3 ++-
>  fs/ext4/extents.c   |   39 ++++++++++++++++++++++++++++-----------
>  fs/ext4/inode.c     |   32 ++++++++++++++++++++++++++++++--
>  fs/ext4/page-io.c   |   11 ++++++++---
>  5 files changed, 77 insertions(+), 20 deletions(-)
> 
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 3c3827a..65adf0d 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -182,10 +182,13 @@ struct ext4_map_blocks {
>  #define EXT4_IO_END_DIRECT	0x0004
>  
>  /*
> - * For converting uninitialized extents on a work queue.
> + * For converting uninitialized extents on a work queue. 'handle' is used for
> + * buffered writeback.
>   */
>  typedef struct ext4_io_end {
>  	struct list_head	list;		/* per-file finished IO list */
> +	handle_t		*handle;	/* handle reserved for extent
> +						 * conversion */
>  	struct inode		*inode;		/* file being written to */
>  	unsigned int		flag;		/* unwritten or not */
>  	loff_t			offset;		/* offset in the file */
> @@ -1314,6 +1317,9 @@ static inline void ext4_set_io_unwritten_flag(struct inode *inode,
>  					      struct ext4_io_end *io_end)
>  {
>  	if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
> +		/* Writeback has to have coversion transaction reserved */
> +		WARN_ON(!io_end->handle &&
> +			!(io_end->flag & EXT4_IO_END_DIRECT));
>  		io_end->flag |= EXT4_IO_END_UNWRITTEN;
>  		atomic_inc(&EXT4_I(inode)->i_unwritten);
>  	}
> @@ -2550,8 +2556,8 @@ extern void ext4_ext_init(struct super_block *);
>  extern void ext4_ext_release(struct super_block *);
>  extern long ext4_fallocate(struct file *file, int mode, loff_t offset,
>  			  loff_t len);
> -extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
> -			  ssize_t len);
> +extern int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode,
> +					  loff_t offset, ssize_t len);
>  extern int ext4_map_blocks(handle_t *handle, struct inode *inode,
>  			   struct ext4_map_blocks *map, int flags);
>  extern int ext4_ext_calc_metadata_amount(struct inode *inode,
> diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
> index bb17931..88e95d7 100644
> --- a/fs/ext4/ext4_jbd2.h
> +++ b/fs/ext4/ext4_jbd2.h
> @@ -132,7 +132,8 @@ static inline int ext4_jbd2_credits_xattr(struct inode *inode)
>  #define EXT4_HT_MIGRATE          8
>  #define EXT4_HT_MOVE_EXTENTS     9
>  #define EXT4_HT_XATTR           10
> -#define EXT4_HT_MAX             11
> +#define EXT4_HT_EXT_CONVERT     11
> +#define EXT4_HT_MAX             12
>  
>  /**
>   *   struct ext4_journal_cb_entry - Base structure for callback information.
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index 8064b71..ae22735 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -4484,10 +4484,9 @@ retry:
>   * function, to convert the fallocated extents after IO is completed.
>   * Returns 0 on success.
>   */
> -int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
> -				    ssize_t len)
> +int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode,
> +				   loff_t offset, ssize_t len)
>  {
> -	handle_t *handle;
>  	unsigned int max_blocks;
>  	int ret = 0;
>  	int ret2 = 0;
> @@ -4502,16 +4501,31 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
>  	max_blocks = ((EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) -
>  		      map.m_lblk);
>  	/*
> -	 * credits to insert 1 extent into extent tree
> +	 * This is somewhat ugly but the idea is clear: When transaction is
> +	 * reserved, everything goes into it. Otherwise we rather start several
> +	 * smaller transactions for conversion of each extent separately.
>  	 */
> -	credits = ext4_chunk_trans_blocks(inode, max_blocks);
> +	if (handle) {
> +		handle = ext4_journal_start_reserved(handle);
> +		if (IS_ERR(handle))
> +			return PTR_ERR(handle);
> +		credits = 0;
> +	} else {
> +		/*
> +		 * credits to insert 1 extent into extent tree
> +		 */
> +		credits = ext4_chunk_trans_blocks(inode, max_blocks);
> +	}
>  	while (ret >= 0 && ret < max_blocks) {
>  		map.m_lblk += ret;
>  		map.m_len = (max_blocks -= ret);
> -		handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits);
> -		if (IS_ERR(handle)) {
> -			ret = PTR_ERR(handle);
> -			break;
> +		if (credits) {
> +			handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
> +						    credits);
> +			if (IS_ERR(handle)) {
> +				ret = PTR_ERR(handle);
> +				break;
> +			}
>  		}
>  		ret = ext4_map_blocks(handle, inode, &map,
>  				      EXT4_GET_BLOCKS_IO_CONVERT_EXT);
> @@ -4522,10 +4536,13 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
>  				     inode->i_ino, map.m_lblk,
>  				     map.m_len, ret);
>  		ext4_mark_inode_dirty(handle, inode);
> -		ret2 = ext4_journal_stop(handle);
> -		if (ret <= 0 || ret2 )
> +		if (credits)
> +			ret2 = ext4_journal_stop(handle);
> +		if (ret <= 0 || ret2)
>  			break;
>  	}
> +	if (!credits)
> +		ret2 = ext4_journal_stop(handle);
>  	return ret > 0 ? ret2 : ret;
>  }
>  
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 0602a09..f8e78ce 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -1327,6 +1327,8 @@ static void ext4_da_page_release_reservation(struct page *page,
>  struct mpage_da_data {
>  	struct inode *inode;
>  	struct writeback_control *wbc;
> +	handle_t *reserved_handle;	/* Handle reserved for conversion */
> +
>  	pgoff_t first_page;	/* The first page to write */
>  	pgoff_t next_page;	/* Current page to examine */
>  	pgoff_t last_page;	/* Last page to examine */
> @@ -1973,8 +1975,13 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
>  	err = ext4_map_blocks(handle, inode, map, get_blocks_flags);
>  	if (err < 0)
>  		return err;
> -	if (map->m_flags & EXT4_MAP_UNINIT)
> +	if (map->m_flags & EXT4_MAP_UNINIT) {
> +		if (!mpd->io_submit.io_end->handle) {
> +			mpd->io_submit.io_end->handle = mpd->reserved_handle;
> +			mpd->reserved_handle = NULL;
> +		}
>  		ext4_set_io_unwritten_flag(inode, mpd->io_submit.io_end);
> +	}
>  
>  	BUG_ON(map->m_len == 0);
>  	if (map->m_flags & EXT4_MAP_NEW) {
> @@ -2274,6 +2281,7 @@ static int ext4_da_writepages(struct address_space *mapping,
>  
>  	mpd.inode = inode;
>  	mpd.wbc = wbc;
> +	mpd.reserved_handle = NULL;
>  	ext4_io_submit_init(&mpd.io_submit, wbc);
>  retry:
>  	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
> @@ -2288,6 +2296,23 @@ retry:
>  			break;
>  		}
>  
> +		/* Reserve handle if it may be needed for extent conversion */
> +		if (ext4_should_dioread_nolock(inode) && !mpd.reserved_handle) {
> +			/*
> +			 * We may need to convert upto one extent per block in
> +			 * the page and we may dirty the inode.
> +			 */
> +			mpd.reserved_handle = ext4_journal_reserve(inode,
> +				EXT4_HT_EXT_CONVERT,
> +				1 + (PAGE_CACHE_SIZE >> inode->i_blkbits));
> +			if (IS_ERR(mpd.reserved_handle)) {
> +				ret = PTR_ERR(mpd.reserved_handle);
> +				mpd.reserved_handle = NULL;
> +				ext4_put_io_end(mpd.io_submit.io_end);
> +				break;
> +			}
> +		}
> +
>  		/*
>  		 * We have two constraints: We find one extent to map and we
>  		 * must always write out whole page (makes a difference when
> @@ -2364,6 +2389,9 @@ retry:
>  		 */
>  		mapping->writeback_index = mpd.first_page;
>  
> +	if (mpd.reserved_handle)
> +		ext4_journal_free_reserved(mpd.reserved_handle);
> +
>  out_writepages:
>  	trace_ext4_da_writepages_result(inode, wbc, ret,
>  					nr_to_write - wbc->nr_to_write);
> @@ -2977,7 +3005,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
>  		 * for non AIO case, since the IO is already
>  		 * completed, we could do the conversion right here
>  		 */
> -		err = ext4_convert_unwritten_extents(inode,
> +		err = ext4_convert_unwritten_extents(NULL, inode,
>  						     offset, ret);
>  		if (err < 0)
>  			ret = err;
> diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
> index cc59cd9..e8ee4da 100644
> --- a/fs/ext4/page-io.c
> +++ b/fs/ext4/page-io.c
> @@ -55,6 +55,7 @@ static void ext4_release_io_end(ext4_io_end_t *io_end)
>  {
>  	BUG_ON(!list_empty(&io_end->list));
>  	BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN);
> +	WARN_ON(io_end->handle);
>  
>  	if (atomic_dec_and_test(&EXT4_I(io_end->inode)->i_ioend_count))
>  		wake_up_all(ext4_ioend_wq(io_end->inode));
> @@ -81,13 +82,15 @@ static int ext4_end_io(ext4_io_end_t *io)
>  	struct inode *inode = io->inode;
>  	loff_t offset = io->offset;
>  	ssize_t size = io->size;
> +	handle_t *handle = io->handle;
>  	int ret = 0;
>  
>  	ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
>  		   "list->prev 0x%p\n",
>  		   io, inode->i_ino, io->list.next, io->list.prev);
>  
> -	ret = ext4_convert_unwritten_extents(inode, offset, size);
> +	io->handle = NULL;	/* Following call will use up the handle */
> +	ret = ext4_convert_unwritten_extents(handle, inode, offset, size);
>  	if (ret < 0) {
>  		ext4_msg(inode->i_sb, KERN_EMERG,
>  			 "failed to convert unwritten extents to written "
> @@ -217,8 +220,10 @@ int ext4_put_io_end(ext4_io_end_t *io_end)
>  
>  	if (atomic_dec_and_test(&io_end->count)) {
>  		if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
> -			err = ext4_convert_unwritten_extents(io_end->inode,
> -						io_end->offset, io_end->size);
> +			err = ext4_convert_unwritten_extents(io_end->handle,
> +						io_end->inode, io_end->offset,
> +						io_end->size);
> +			io_end->handle = NULL;
>  			ext4_clear_io_unwritten_flag(io_end);
>  		}
>  		ext4_release_io_end(io_end);
> -- 
> 1.7.1
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

  reply	other threads:[~2013-05-08  6:39 UTC|newest]

Thread overview: 76+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-04-08 21:32 [PATCH 00/22 v1] Fixes and improvements in ext4 writeback path Jan Kara
2013-04-08 21:32 ` [PATCH 01/29] ext4: Make ext4_bio_write_page() use BH_Async_Write flags instead page pointers from ext4_io_end Jan Kara
2013-04-10 18:05   ` Dmitry Monakhov
2013-04-11 13:38   ` Zheng Liu
2013-04-12  3:50   ` Theodore Ts'o
2013-04-08 21:32 ` [PATCH 02/29] ext4: Use io_end for multiple bios Jan Kara
2013-04-11  5:10   ` Dmitry Monakhov
2013-04-11 14:04   ` Zheng Liu
2013-04-12  3:55   ` Theodore Ts'o
2013-04-08 21:32 ` [PATCH 03/29] ext4: Clear buffer_uninit flag when submitting IO Jan Kara
2013-04-11 14:08   ` Zheng Liu
2013-04-08 21:32 ` [PATCH 04/29] jbd2: Reduce journal_head size Jan Kara
2013-04-11 14:10   ` Zheng Liu
2013-04-12  4:04   ` Theodore Ts'o
2013-04-08 21:32 ` [PATCH 05/29] jbd2: Don't create journal_head for temporary journal buffers Jan Kara
2013-04-12  8:01   ` Zheng Liu
2013-04-08 21:32 ` [PATCH 06/29] jbd2: Remove journal_head from descriptor buffers Jan Kara
2013-04-12  8:10   ` Zheng Liu
2013-04-08 21:32 ` [PATCH 07/29] jbd2: Refine waiting for shadow buffers Jan Kara
2013-05-03 14:16   ` Zheng Liu
2013-05-03 20:44     ` Jan Kara
2013-04-08 21:32 ` [PATCH 08/29] jbd2: Remove outdated comment Jan Kara
2013-05-03 14:20   ` Zheng Liu
2013-04-08 21:32 ` [PATCH 09/29] jbd2: Cleanup needed free block estimates when starting a transaction Jan Kara
2013-05-05  8:17   ` Zheng Liu
2013-04-08 21:32 ` [PATCH 10/29] jbd2: Fix race in t_outstanding_credits update in jbd2_journal_extend() Jan Kara
2013-05-05  8:37   ` Zheng Liu
2013-04-08 21:32 ` [PATCH 11/29] jbd2: Remove unused waitqueues Jan Kara
2013-05-05  8:41   ` Zheng Liu
2013-04-08 21:32 ` [PATCH 12/29] jbd2: Transaction reservation support Jan Kara
2013-05-05  9:39   ` Zheng Liu
2013-05-06 12:49     ` Jan Kara
2013-05-07  5:22       ` Zheng Liu
2013-04-08 21:32 ` [PATCH 13/29] ext4: Provide wrappers for transaction reservation calls Jan Kara
2013-05-05 11:51   ` Zheng Liu
2013-05-05 11:58   ` Zheng Liu
2013-05-06 12:51     ` Jan Kara
2013-04-08 21:32 ` [PATCH 14/29] ext4: Stop messing with nr_to_write in ext4_da_writepages() Jan Kara
2013-05-05 12:40   ` Zheng Liu
2013-04-08 21:32 ` [PATCH 15/29] ext4: Deprecate max_writeback_mb_bump sysfs attribute Jan Kara
2013-05-05 12:47   ` Zheng Liu
2013-05-06 12:55     ` Jan Kara
2013-04-08 21:32 ` [PATCH 16/29] ext4: Improve writepage credit estimate for files with indirect blocks Jan Kara
2013-05-07  5:39   ` Zheng Liu
2013-04-08 21:32 ` [PATCH 17/29] ext4: Better estimate credits needed for ext4_da_writepages() Jan Kara
2013-05-07  6:33   ` Zheng Liu
2013-05-07 14:17     ` Jan Kara
2013-04-08 21:32 ` [PATCH 18/29] ext4: Restructure writeback path Jan Kara
2013-05-08  3:48   ` Zheng Liu
2013-05-08 11:20     ` Jan Kara
2013-04-08 21:32 ` [PATCH 19/29] ext4: Remove buffer_uninit handling Jan Kara
2013-05-08  6:56   ` Zheng Liu
2013-04-08 21:32 ` [PATCH 20/29] ext4: Use transaction reservation for extent conversion in ext4_end_io Jan Kara
2013-05-08  6:57   ` Zheng Liu [this message]
2013-04-08 21:32 ` [PATCH 21/29] ext4: Split extent conversion lists to reserved & unreserved parts Jan Kara
2013-05-08  7:03   ` Zheng Liu
2013-05-08 11:23     ` Jan Kara
2013-05-08 11:49       ` Zheng Liu
2013-04-08 21:32 ` [PATCH 22/29] ext4: Defer clearing of PageWriteback after extent conversion Jan Kara
2013-05-08  7:08   ` Zheng Liu
2013-04-08 21:32 ` [PATCH 23/29] ext4: Protect extent conversion after DIO with i_dio_count Jan Kara
2013-05-08  7:08   ` Zheng Liu
2013-04-08 21:32 ` [PATCH 24/29] ext4: Remove wait for unwritten extent conversion from ext4_ext_truncate() Jan Kara
2013-05-08  7:35   ` Zheng Liu
2013-04-08 21:32 ` [PATCH 25/29] ext4: Use generic_file_fsync() in ext4_file_fsync() in nojournal mode Jan Kara
2013-05-08  7:37   ` Zheng Liu
2013-05-08 11:29     ` Jan Kara
2013-04-08 21:32 ` [PATCH 26/29] ext4: Remove i_mutex from ext4_file_sync() Jan Kara
2013-05-08  7:41   ` Zheng Liu
2013-04-08 21:32 ` [PATCH 27/29] ext4: Remove wait for unwritten extents in ext4_ind_direct_IO() Jan Kara
2013-05-08  7:55   ` Zheng Liu
2013-04-08 21:32 ` [PATCH 28/29] ext4: Don't wait for extent conversion in ext4_ext_punch_hole() Jan Kara
2013-05-08  7:56   ` Zheng Liu
2013-04-08 21:32 ` [PATCH 29/29] ext4: Remove ext4_ioend_wait() Jan Kara
2013-05-08  7:57   ` Zheng Liu
2013-05-08 11:32     ` Jan Kara

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20130508065713.GB20599@gmail.com \
    --to=gnehzuil.liu@gmail.com \
    --cc=jack@suse.cz \
    --cc=linux-ext4@vger.kernel.org \
    --cc=tytso@mit.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.