linux-ext4.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/3 v2] ext4: punching hole improvement
@ 2012-11-19 12:55 Zheng Liu
  2012-11-19 12:55 ` [PATCH 1/3 v2] ext4: add indirect punching hole support Zheng Liu
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Zheng Liu @ 2012-11-19 12:55 UTC (permalink / raw)
  To: linux-ext4; +Cc: Zheng Liu

Hi all, 

In this patch set, punching hole feature is improved.  The improvements are as
below.
 - add block-based file punching hole support
 - add tracepoint in punching hole

In patch 1, it introduces punching hole feature for block-based file.

In patch 2, we check FALLOC_FL_PUNCH_HOLE flag firstly in ext4_fallocate to
fully enable punching hole feature for extent-based file and block-based file.

In patch 3, a tracepoint is added in ext4_punch_hole.

Any comments or feedbacks are appreciated.  Thanks!

v2 <- v1:
 * Rework patch 1.  Now it looks very simple and straightforward.

BTW, after applying this patch set, xfstest #255 will not pass w/o extent
because block-based file doesn't support unwritten extents.

Regards,
						- Zheng
---
Zheng Liu(3)
      ext4: add indirect punching hole support
      ext4: let us fully support punching hole feature in fallocate
      ext4: add tracepoint for punching hole

 fs/ext4/ext4.h              |   1 +
 fs/ext4/extents.c           |  14 ++++-----
 fs/ext4/indirect.c          | 244 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/ext4/inode.c             |   8 ++---
 include/trace/events/ext4.h |  25 +++++++++++++++
 5 files changed, 281 insertions(+), 11 deletions(-)

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 1/3 v2] ext4: add indirect punching hole support
  2012-11-19 12:55 [PATCH 0/3 v2] ext4: punching hole improvement Zheng Liu
@ 2012-11-19 12:55 ` Zheng Liu
  2012-11-19 12:55 ` [PATCH 2/3 v2] ext4: let us fully support punching hole feature in fallocate Zheng Liu
  2012-11-19 12:55 ` [PATCH 3/3 v2] ext4: add tracepoint for punching hole Zheng Liu
  2 siblings, 0 replies; 6+ messages in thread
From: Zheng Liu @ 2012-11-19 12:55 UTC (permalink / raw)
  To: linux-ext4; +Cc: Zheng Liu

From: Zheng Liu <wenqing.lz@taobao.com>

This patch makes indirect file support punching hole feature.  It is almost
the same as ext4_ext_punch_hole.  First, we invalidate all pages between
this hole, and then we try to deallocate all blocks of this hole.

A recursive function is used to handle deallocation of blocks.  In this
function, it iterates over the entries in inode's i_blocks or indirect blocks,
and try to free the block for each one of them.

 * After applying this patch, xfstest #255 will not pass w/o extent because
 * block-based file doesn't support unwritten extent.

Signed-off-by: Zheng Liu <wenqing.lz@taobao.com>
---
 fs/ext4/ext4.h     |   1 +
 fs/ext4/indirect.c | 244 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/ext4/inode.c    |   6 +-
 3 files changed, 247 insertions(+), 4 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 3c20de1..b1ac5d5 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2035,6 +2035,7 @@ extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
 extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock);
 extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks, int chunk);
 extern void ext4_ind_truncate(struct inode *inode);
+extern int ext4_ind_punch_hole(struct file *file, loff_t offset, loff_t length);
 
 /* ioctl.c */
 extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 792e388..ad58421 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -1514,3 +1514,247 @@ out_stop:
 	trace_ext4_truncate_exit(inode);
 }
 
+static int free_hole_blocks(handle_t *handle, struct inode *inode,
+			    struct buffer_head *parent_bh, __le32 *i_data,
+			    int level, ext4_lblk_t first,
+			    ext4_lblk_t count, int max)
+{
+	struct buffer_head *bh = NULL;
+	int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
+	int ret = 0;
+	int i, inc;
+	ext4_lblk_t offset;
+	__le32 blk;
+
+	inc = 1 << ((EXT4_BLOCK_SIZE_BITS(inode->i_sb) - 2) * level);
+	for (i = 0, offset = 0; i < max; i++, i_data++, offset += inc) {
+		if (offset >= count + first)
+			break;
+		if (*i_data == 0 || (offset + inc) <= first)
+			continue;
+		blk = *i_data;
+		if (level > 0) {
+			ext4_lblk_t first2;
+			bh = sb_bread(inode->i_sb, blk);
+			if (!bh) {
+				EXT4_ERROR_INODE_BLOCK(inode, blk,
+						       "Read failure");
+				return -EIO;
+			}
+			first2 = (first > offset) ? first - offset : 0;
+			ret = free_hole_blocks(handle, inode, bh,
+					       (__le32 *)bh->b_data, level - 1,
+					       first2, count - offset,
+					       inode->i_sb->s_blocksize >> 2);
+			if (ret) {
+				brelse(bh);
+				goto err;
+			}
+		}
+		if (level == 0 ||
+		    (bh && all_zeroes((__le32 *)bh->b_data,
+				      (__le32 *)bh->b_data + addr_per_block))) {
+			ext4_free_data(handle, inode, parent_bh, &blk, &blk+1);
+			*i_data = 0;
+		}
+		brelse(bh);
+		bh = NULL;
+	}
+
+err:
+	return ret;
+}
+
+static int ext4_free_hole_blocks(handle_t *handle, struct inode *inode,
+				 ext4_lblk_t first, ext4_lblk_t stop)
+{
+	int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
+	int level, ret = 0;
+	int num = EXT4_NDIR_BLOCKS;
+	ext4_lblk_t count, max = EXT4_NDIR_BLOCKS;
+	__le32 *i_data = EXT4_I(inode)->i_data;
+
+	count = stop - first;
+	for (level = 0; level < 4; level++, max *= addr_per_block) {
+		if (first < max) {
+			ret = free_hole_blocks(handle, inode, NULL, i_data,
+					       level, first, count, num);
+			if (ret)
+				goto err;
+			if (count > max)
+				count -= max - first;
+			else
+				break;
+			first = 0;
+		} else {
+			first -= max;
+		}
+		i_data += num;
+		if (level == 0) {
+			num = 1;
+			max = 1;
+		}
+	}
+
+err:
+	return ret;
+}
+
+int ext4_ind_punch_hole(struct file *file, loff_t offset, loff_t length)
+{
+	struct inode *inode = file->f_path.dentry->d_inode;
+	struct super_block *sb = inode->i_sb;
+	ext4_lblk_t first_block, stop_block;
+	struct address_space *mapping = inode->i_mapping;
+	handle_t *handle = NULL;
+	loff_t first_page, last_page, page_len;
+	loff_t first_page_offset, last_page_offset;
+	int err = 0;
+
+	/*
+	 * Write out all dirty pages to avoid race conditions
+	 * Then release them.
+	 */
+	if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
+		err = filemap_write_and_wait_range(mapping,
+			offset, offset + length - 1);
+		if (err)
+			return err;
+	}
+
+	mutex_lock(&inode->i_mutex);
+	/* It's not possible punch hole on append only file */
+	if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
+		err = -EPERM;
+		goto out_mutex;
+	}
+	if (IS_SWAPFILE(inode)) {
+		err = -ETXTBSY;
+		goto out_mutex;
+	}
+
+	/* No need to punch hole beyond i_size */
+	if (offset >= inode->i_size)
+		goto out_mutex;
+
+	/*
+	 * If the hole extents beyond i_size, set the hole
+	 * to end after the page that contains i_size
+	 */
+	if (offset + length > inode->i_size) {
+		length = inode->i_size +
+		    PAGE_CACHE_SIZE - (inode->i_size & (PAGE_CACHE_SIZE - 1)) -
+		    offset;
+	}
+
+	first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	last_page = (offset + length) >> PAGE_CACHE_SHIFT;
+
+	first_page_offset = first_page << PAGE_CACHE_SHIFT;
+	last_page_offset = last_page << PAGE_CACHE_SHIFT;
+
+	/* Now release the pages */
+	if (last_page_offset > first_page_offset) {
+		truncate_pagecache_range(inode, first_page_offset,
+					 last_page_offset - 1);
+	}
+
+	/* Wait all existing dio works, newcomers will block on i_mutex */
+	ext4_inode_block_unlocked_dio(inode);
+	err = ext4_flush_unwritten_io(inode);
+	if (err)
+		goto out_dio;
+	inode_dio_wait(inode);
+
+	handle = start_transaction(inode);
+	if (IS_ERR(handle))
+		goto out_dio;
+
+	/*
+	 * Now we need to zero out the non-page-aligned data in the
+	 * pages at the start and tail of the hole, and unmap the buffer
+	 * heads for the block aligned regions of the page that were
+	 * completely zerod.
+	 */
+	if (first_page > last_page) {
+		/*
+		 * If the file space being truncated is contained within a page
+		 * just zero out and unmap the middle of that page
+		 */
+		err = ext4_discard_partial_page_buffers(handle,
+			mapping, offset, length, 0);
+		if (err)
+			goto out;
+	} else {
+		/*
+		 * Zero out and unmap the paritial page that contains
+		 * the start of the hole
+		 */
+		page_len = first_page_offset - offset;
+		if (page_len > 0) {
+			err = ext4_discard_partial_page_buffers(handle, mapping,
+							offset, page_len, 0);
+			if (err)
+				goto out;
+		}
+
+		/*
+		 * Zero out and unmap the partial page that contains
+		 * the end of the hole
+		 */
+		page_len = offset + length - last_page_offset;
+		if (page_len > 0) {
+			err = ext4_discard_partial_page_buffers(handle, mapping,
+						last_page_offset, page_len, 0);
+			if (err)
+				goto out;
+		}
+	}
+
+	/*
+	 * If i_size contained in the last page, we need to
+	 * unmap and zero the paritial page after i_size
+	 */
+	if (inode->i_size >> PAGE_CACHE_SHIFT == last_page &&
+	    inode->i_size % PAGE_CACHE_SIZE != 0) {
+		page_len = PAGE_CACHE_SIZE -
+			(inode->i_size & (PAGE_CACHE_SIZE - 1));
+		if (page_len > 0) {
+			err = ext4_discard_partial_page_buffers(handle,
+				mapping, inode->i_size, page_len, 0);
+			if (err)
+				goto out;
+		}
+	}
+
+	first_block = (offset + sb->s_blocksize - 1) >>
+		EXT4_BLOCK_SIZE_BITS(sb);
+	stop_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb);
+
+	if (first_block >= stop_block)
+		goto out;
+
+	down_write(&EXT4_I(inode)->i_data_sem);
+	ext4_discard_preallocations(inode);
+
+	err = ext4_free_hole_blocks(handle, inode, first_block, stop_block);
+
+	ext4_discard_preallocations(inode);
+
+	if (IS_SYNC(inode))
+		ext4_handle_sync(handle);
+
+	up_write(&EXT4_I(inode)->i_data_sem);
+
+out:
+	inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
+	ext4_mark_inode_dirty(handle, inode);
+	ext4_journal_stop(handle);
+
+out_dio:
+	ext4_inode_resume_unlocked_dio(inode);
+out_mutex:
+	mutex_unlock(&inode->i_mutex);
+
+	return err;
+}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index b3c243b..733ed5b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3478,10 +3478,8 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
 	if (!S_ISREG(inode->i_mode))
 		return -EOPNOTSUPP;
 
-	if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
-		/* TODO: Add support for non extent hole punching */
-		return -EOPNOTSUPP;
-	}
+	if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
+		return ext4_ind_punch_hole(file, offset, length);
 
 	if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) {
 		/* TODO: Add support for bigalloc file systems */
-- 
1.7.12.rc2.18.g61b472e


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/3 v2] ext4: let us fully support punching hole feature in fallocate
  2012-11-19 12:55 [PATCH 0/3 v2] ext4: punching hole improvement Zheng Liu
  2012-11-19 12:55 ` [PATCH 1/3 v2] ext4: add indirect punching hole support Zheng Liu
@ 2012-11-19 12:55 ` Zheng Liu
  2012-11-20  6:35   ` Guo Chao
  2012-11-19 12:55 ` [PATCH 3/3 v2] ext4: add tracepoint for punching hole Zheng Liu
  2 siblings, 1 reply; 6+ messages in thread
From: Zheng Liu @ 2012-11-19 12:55 UTC (permalink / raw)
  To: linux-ext4; +Cc: Zheng Liu

From: Zheng Liu <wenqing.lz@taobao.com>

After adding indirect punching hole feature, we need to enable it in fallocate.
For this purpose, some sanity checks need to be adjusted.  Currently we need to
check FALLOC_FL_PUNCH_HOLE flag before other sanity checks.

Signed-off-by: Zheng Liu <wenqing.lz@taobao.com>
---
 fs/ext4/extents.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 7011ac9..b43b3e9 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4420,13 +4420,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 	struct ext4_map_blocks map;
 	unsigned int credits, blkbits = inode->i_blkbits;
 
-	/*
-	 * currently supporting (pre)allocate mode for extent-based
-	 * files _only_
-	 */
-	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
-		return -EOPNOTSUPP;

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 3/3 v2] ext4: add tracepoint for punching hole
  2012-11-19 12:55 [PATCH 0/3 v2] ext4: punching hole improvement Zheng Liu
  2012-11-19 12:55 ` [PATCH 1/3 v2] ext4: add indirect punching hole support Zheng Liu
  2012-11-19 12:55 ` [PATCH 2/3 v2] ext4: let us fully support punching hole feature in fallocate Zheng Liu
@ 2012-11-19 12:55 ` Zheng Liu
  2 siblings, 0 replies; 6+ messages in thread
From: Zheng Liu @ 2012-11-19 12:55 UTC (permalink / raw)
  To: linux-ext4; +Cc: Zheng Liu

From: Zheng Liu <wenqing.lz@taobao.com>

This patch adds a tracepoint in ext4_punch_hole.

Signed-off-by: Zheng Liu <wenqing.lz@taobao.com>
---
 fs/ext4/inode.c             |  2 ++
 include/trace/events/ext4.h | 25 +++++++++++++++++++++++++
 2 files changed, 27 insertions(+)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 733ed5b..f850ea6 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3478,6 +3478,8 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
 	if (!S_ISREG(inode->i_mode))
 		return -EOPNOTSUPP;
 
+	trace_ext4_punch_hole(inode, offset, length);
+
 	if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
 		return ext4_ind_punch_hole(file, offset, length);
 
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index d49b285..476c7d3 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -1311,6 +1311,31 @@ TRACE_EVENT(ext4_fallocate_exit,
 		  __entry->ret)
 );
 
+TRACE_EVENT(ext4_punch_hole,
+	TP_PROTO(struct inode *inode, loff_t offset, loff_t len),
+
+	TP_ARGS(inode, offset, len),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	ino_t,	ino			)
+		__field(	loff_t,	offset			)
+		__field(	loff_t, len			)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= inode->i_sb->s_dev;
+		__entry->ino	= inode->i_ino;
+		__entry->offset	= offset;
+		__entry->len	= len;
+	),
+
+	TP_printk("dev %d,%d ino %lu offset %lld len %lld",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long)__entry->ino,
+		  __entry->offset, __entry->len)
+);
+
 TRACE_EVENT(ext4_unlink_enter,
 	TP_PROTO(struct inode *parent, struct dentry *dentry),
 
-- 
1.7.12.rc2.18.g61b472e


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH 2/3 v2] ext4: let us fully support punching hole feature in fallocate
  2012-11-19 12:55 ` [PATCH 2/3 v2] ext4: let us fully support punching hole feature in fallocate Zheng Liu
@ 2012-11-20  6:35   ` Guo Chao
  2012-11-20  7:43     ` Zheng Liu
  0 siblings, 1 reply; 6+ messages in thread
From: Guo Chao @ 2012-11-20  6:35 UTC (permalink / raw)
  To: Zheng Liu; +Cc: linux-ext4, Zheng Liu

Hi, Zheng:

On Mon, Nov 19, 2012 at 08:55:17PM +0800, Zheng Liu wrote:
> From: Zheng Liu <wenqing.lz@taobao.com>
> 
> After adding indirect punching hole feature, we need to enable it in fallocate.
> For this purpose, some sanity checks need to be adjusted.  Currently we need to
> check FALLOC_FL_PUNCH_HOLE flag before other sanity checks.
> 
> Signed-off-by: Zheng Liu <wenqing.lz@taobao.com>
> ---
>  fs/ext4/extents.c | 14 +++++++-------
>  1 file changed, 7 insertions(+), 7 deletions(-)
> 
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index 7011ac9..b43b3e9 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -4420,13 +4420,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
>  	struct ext4_map_blocks map;
>  	unsigned int credits, blkbits = inode->i_blkbits;
> 
> -	/*
> -	 * currently supporting (pre)allocate mode for extent-based
> -	 * files _only_
> -	 */
> -	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
> -		return -EOPNOTSUPP;
> -
>  	/* Return error if mode is not supported */
>  	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
>  		return -EOPNOTSUPP;

Checking these mode flags seems redundant here, VFS already checked them.
Maybe you can remove it by the way.

Regards,
Guo Chao


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 2/3 v2] ext4: let us fully support punching hole feature in fallocate
  2012-11-20  6:35   ` Guo Chao
@ 2012-11-20  7:43     ` Zheng Liu
  0 siblings, 0 replies; 6+ messages in thread
From: Zheng Liu @ 2012-11-20  7:43 UTC (permalink / raw)
  To: Guo Chao; +Cc: linux-ext4, Zheng Liu

On Tue, Nov 20, 2012 at 02:35:05PM +0800, Guo Chao wrote:
> Hi, Zheng:
> 
> On Mon, Nov 19, 2012 at 08:55:17PM +0800, Zheng Liu wrote:
> > From: Zheng Liu <wenqing.lz@taobao.com>
> > 
> > After adding indirect punching hole feature, we need to enable it in fallocate.
> > For this purpose, some sanity checks need to be adjusted.  Currently we need to
> > check FALLOC_FL_PUNCH_HOLE flag before other sanity checks.
> > 
> > Signed-off-by: Zheng Liu <wenqing.lz@taobao.com>
> > ---
> >  fs/ext4/extents.c | 14 +++++++-------
> >  1 file changed, 7 insertions(+), 7 deletions(-)
> > 
> > diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> > index 7011ac9..b43b3e9 100644
> > --- a/fs/ext4/extents.c
> > +++ b/fs/ext4/extents.c
> > @@ -4420,13 +4420,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
> >  	struct ext4_map_blocks map;
> >  	unsigned int credits, blkbits = inode->i_blkbits;
> > 
> > -	/*
> > -	 * currently supporting (pre)allocate mode for extent-based
> > -	 * files _only_
> > -	 */
> > -	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
> > -		return -EOPNOTSUPP;
> > -
> >  	/* Return error if mode is not supported */
> >  	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
> >  		return -EOPNOTSUPP;
> 
> Checking these mode flags seems redundant here, VFS already checked them.
> Maybe you can remove it by the way.

Yeah, I see.  Not only ext4, other filesystems also check it in themselves,
such as xfs, btrfs.  I am not very familiar with why we need to do this,
but IMHO a better way might be removing it from all filesystems in another
patch series.  I will send it out.  Thanks for your suggestion.

Regards,
                                                - Zheng

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2012-11-20  7:30 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-11-19 12:55 [PATCH 0/3 v2] ext4: punching hole improvement Zheng Liu
2012-11-19 12:55 ` [PATCH 1/3 v2] ext4: add indirect punching hole support Zheng Liu
2012-11-19 12:55 ` [PATCH 2/3 v2] ext4: let us fully support punching hole feature in fallocate Zheng Liu
2012-11-20  6:35   ` Guo Chao
2012-11-20  7:43     ` Zheng Liu
2012-11-19 12:55 ` [PATCH 3/3 v2] ext4: add tracepoint for punching hole Zheng Liu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).