All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3 v2] ext4: punching hole improvement
@ 2012-11-19 12:55 Zheng Liu
  2012-11-19 12:55 ` [PATCH 1/3 v2] ext4: add indirect punching hole support Zheng Liu
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Zheng Liu @ 2012-11-19 12:55 UTC (permalink / raw)
  To: linux-ext4; +Cc: Zheng Liu

Hi all, 

In this patch set, punching hole feature is improved.  The improvements are as
below.
 - add block-based file punching hole support
 - add tracepoint in punching hole

In patch 1, it introduces punching hole feature for block-based file.

In patch 2, we check FALLOC_FL_PUNCH_HOLE flag firstly in ext4_fallocate to
fully enable punching hole feature for extent-based file and block-based file.

In patch 3, a tracepoint is added in ext4_punch_hole.

Any comments or feedbacks are appreciated.  Thanks!

v2 <- v1:
 * Rework patch 1.  Now it looks very simple and straightforward.

BTW, after applying this patch set, xfstest #255 will not pass w/o extent
because block-based file doesn't support unwritten extents.

Regards,
						- Zheng
---
Zheng Liu(3)
      ext4: add indirect punching hole support
      ext4: let us fully support punching hole feature in fallocate
      ext4: add tracepoint for punching hole

 fs/ext4/ext4.h              |   1 +
 fs/ext4/extents.c           |  14 ++++-----
 fs/ext4/indirect.c          | 244 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/ext4/inode.c             |   8 ++---
 include/trace/events/ext4.h |  25 +++++++++++++++
 5 files changed, 281 insertions(+), 11 deletions(-)

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 1/3 v2] ext4: add indirect punching hole support
  2012-11-19 12:55 [PATCH 0/3 v2] ext4: punching hole improvement Zheng Liu
@ 2012-11-19 12:55 ` Zheng Liu
  2012-11-19 12:55 ` [PATCH 2/3 v2] ext4: let us fully support punching hole feature in fallocate Zheng Liu
  2012-11-19 12:55 ` [PATCH 3/3 v2] ext4: add tracepoint for punching hole Zheng Liu
  2 siblings, 0 replies; 6+ messages in thread
From: Zheng Liu @ 2012-11-19 12:55 UTC (permalink / raw)
  To: linux-ext4; +Cc: Zheng Liu

From: Zheng Liu <wenqing.lz@taobao.com>

This patch makes indirect file support punching hole feature.  It is almost
the same as ext4_ext_punch_hole.  First, we invalidate all pages between
this hole, and then we try to deallocate all blocks of this hole.

A recursive function is used to handle deallocation of blocks.  In this
function, it iterates over the entries in inode's i_blocks or indirect blocks,
and try to free the block for each one of them.

 * After applying this patch, xfstest #255 will not pass w/o extent because
 * block-based file doesn't support unwritten extent.

Signed-off-by: Zheng Liu <wenqing.lz@taobao.com>
---
 fs/ext4/ext4.h     |   1 +
 fs/ext4/indirect.c | 244 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/ext4/inode.c    |   6 +-
 3 files changed, 247 insertions(+), 4 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 3c20de1..b1ac5d5 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2035,6 +2035,7 @@ extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
 extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock);
 extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks, int chunk);
 extern void ext4_ind_truncate(struct inode *inode);
+extern int ext4_ind_punch_hole(struct file *file, loff_t offset, loff_t length);
 
 /* ioctl.c */
 extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 792e388..ad58421 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -1514,3 +1514,247 @@ out_stop:
 	trace_ext4_truncate_exit(inode);
 }
 
+static int free_hole_blocks(handle_t *handle, struct inode *inode,
+			    struct buffer_head *parent_bh, __le32 *i_data,
+			    int level, ext4_lblk_t first,
+			    ext4_lblk_t count, int max)
+{
+	struct buffer_head *bh = NULL;
+	int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
+	int ret = 0;
+	int i, inc;
+	ext4_lblk_t offset;
+	__le32 blk;
+
+	inc = 1 << ((EXT4_BLOCK_SIZE_BITS(inode->i_sb) - 2) * level);
+	for (i = 0, offset = 0; i < max; i++, i_data++, offset += inc) {
+		if (offset >= count + first)
+			break;
+		if (*i_data == 0 || (offset + inc) <= first)
+			continue;
+		blk = *i_data;
+		if (level > 0) {
+			ext4_lblk_t first2;
+			bh = sb_bread(inode->i_sb, blk);
+			if (!bh) {
+				EXT4_ERROR_INODE_BLOCK(inode, blk,
+						       "Read failure");
+				return -EIO;
+			}
+			first2 = (first > offset) ? first - offset : 0;
+			ret = free_hole_blocks(handle, inode, bh,
+					       (__le32 *)bh->b_data, level - 1,
+					       first2, count - offset,
+					       inode->i_sb->s_blocksize >> 2);
+			if (ret) {
+				brelse(bh);
+				goto err;
+			}
+		}
+		if (level == 0 ||
+		    (bh && all_zeroes((__le32 *)bh->b_data,
+				      (__le32 *)bh->b_data + addr_per_block))) {
+			ext4_free_data(handle, inode, parent_bh, &blk, &blk+1);
+			*i_data = 0;
+		}
+		brelse(bh);
+		bh = NULL;
+	}
+
+err:
+	return ret;
+}
+
+static int ext4_free_hole_blocks(handle_t *handle, struct inode *inode,
+				 ext4_lblk_t first, ext4_lblk_t stop)
+{
+	int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
+	int level, ret = 0;
+	int num = EXT4_NDIR_BLOCKS;
+	ext4_lblk_t count, max = EXT4_NDIR_BLOCKS;
+	__le32 *i_data = EXT4_I(inode)->i_data;
+
+	count = stop - first;
+	for (level = 0; level < 4; level++, max *= addr_per_block) {
+		if (first < max) {
+			ret = free_hole_blocks(handle, inode, NULL, i_data,
+					       level, first, count, num);
+			if (ret)
+				goto err;
+			if (count > max)
+				count -= max - first;
+			else
+				break;
+			first = 0;
+		} else {
+			first -= max;
+		}
+		i_data += num;
+		if (level == 0) {
+			num = 1;
+			max = 1;
+		}
+	}
+
+err:
+	return ret;
+}
+
+int ext4_ind_punch_hole(struct file *file, loff_t offset, loff_t length)
+{
+	struct inode *inode = file->f_path.dentry->d_inode;
+	struct super_block *sb = inode->i_sb;
+	ext4_lblk_t first_block, stop_block;
+	struct address_space *mapping = inode->i_mapping;
+	handle_t *handle = NULL;
+	loff_t first_page, last_page, page_len;
+	loff_t first_page_offset, last_page_offset;
+	int err = 0;
+
+	/*
+	 * Write out all dirty pages to avoid race conditions
+	 * Then release them.
+	 */
+	if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
+		err = filemap_write_and_wait_range(mapping,
+			offset, offset + length - 1);
+		if (err)
+			return err;
+	}
+
+	mutex_lock(&inode->i_mutex);
+	/* It's not possible punch hole on append only file */
+	if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
+		err = -EPERM;
+		goto out_mutex;
+	}
+	if (IS_SWAPFILE(inode)) {
+		err = -ETXTBSY;
+		goto out_mutex;
+	}
+
+	/* No need to punch hole beyond i_size */
+	if (offset >= inode->i_size)
+		goto out_mutex;
+
+	/*
+	 * If the hole extents beyond i_size, set the hole
+	 * to end after the page that contains i_size
+	 */
+	if (offset + length > inode->i_size) {
+		length = inode->i_size +
+		    PAGE_CACHE_SIZE - (inode->i_size & (PAGE_CACHE_SIZE - 1)) -
+		    offset;
+	}
+
+	first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	last_page = (offset + length) >> PAGE_CACHE_SHIFT;
+
+	first_page_offset = first_page << PAGE_CACHE_SHIFT;
+	last_page_offset = last_page << PAGE_CACHE_SHIFT;
+
+	/* Now release the pages */
+	if (last_page_offset > first_page_offset) {
+		truncate_pagecache_range(inode, first_page_offset,
+					 last_page_offset - 1);
+	}
+
+	/* Wait all existing dio works, newcomers will block on i_mutex */
+	ext4_inode_block_unlocked_dio(inode);
+	err = ext4_flush_unwritten_io(inode);
+	if (err)
+		goto out_dio;
+	inode_dio_wait(inode);
+
+	handle = start_transaction(inode);
+	if (IS_ERR(handle))
+		goto out_dio;
+
+	/*
+	 * Now we need to zero out the non-page-aligned data in the
+	 * pages at the start and tail of the hole, and unmap the buffer
+	 * heads for the block aligned regions of the page that were
+	 * completely zerod.
+	 */
+	if (first_page > last_page) {
+		/*
+		 * If the file space being truncated is contained within a page
+		 * just zero out and unmap the middle of that page
+		 */
+		err = ext4_discard_partial_page_buffers(handle,
+			mapping, offset, length, 0);
+		if (err)
+			goto out;
+	} else {
+		/*
+		 * Zero out and unmap the paritial page that contains
+		 * the start of the hole
+		 */
+		page_len = first_page_offset - offset;
+		if (page_len > 0) {
+			err = ext4_discard_partial_page_buffers(handle, mapping,
+							offset, page_len, 0);
+			if (err)
+				goto out;
+		}
+
+		/*
+		 * Zero out and unmap the partial page that contains
+		 * the end of the hole
+		 */
+		page_len = offset + length - last_page_offset;
+		if (page_len > 0) {
+			err = ext4_discard_partial_page_buffers(handle, mapping,
+						last_page_offset, page_len, 0);
+			if (err)
+				goto out;
+		}
+	}
+
+	/*
+	 * If i_size contained in the last page, we need to
+	 * unmap and zero the paritial page after i_size
+	 */
+	if (inode->i_size >> PAGE_CACHE_SHIFT == last_page &&
+	    inode->i_size % PAGE_CACHE_SIZE != 0) {
+		page_len = PAGE_CACHE_SIZE -
+			(inode->i_size & (PAGE_CACHE_SIZE - 1));
+		if (page_len > 0) {
+			err = ext4_discard_partial_page_buffers(handle,
+				mapping, inode->i_size, page_len, 0);
+			if (err)
+				goto out;
+		}
+	}
+
+	first_block = (offset + sb->s_blocksize - 1) >>
+		EXT4_BLOCK_SIZE_BITS(sb);
+	stop_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb);
+
+	if (first_block >= stop_block)
+		goto out;
+
+	down_write(&EXT4_I(inode)->i_data_sem);
+	ext4_discard_preallocations(inode);
+
+	err = ext4_free_hole_blocks(handle, inode, first_block, stop_block);
+
+	ext4_discard_preallocations(inode);
+
+	if (IS_SYNC(inode))
+		ext4_handle_sync(handle);
+
+	up_write(&EXT4_I(inode)->i_data_sem);
+
+out:
+	inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
+	ext4_mark_inode_dirty(handle, inode);
+	ext4_journal_stop(handle);
+
+out_dio:
+	ext4_inode_resume_unlocked_dio(inode);
+out_mutex:
+	mutex_unlock(&inode->i_mutex);
+
+	return err;
+}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index b3c243b..733ed5b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3478,10 +3478,8 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
 	if (!S_ISREG(inode->i_mode))
 		return -EOPNOTSUPP;
 
-	if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
-		/* TODO: Add support for non extent hole punching */
-		return -EOPNOTSUPP;
-	}
+	if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
+		return ext4_ind_punch_hole(file, offset, length);
 
 	if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) {
 		/* TODO: Add support for bigalloc file systems */
-- 
1.7.12.rc2.18.g61b472e


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/3 v2] ext4: let us fully support punching hole feature in fallocate
  2012-11-19 12:55 [PATCH 0/3 v2] ext4: punching hole improvement Zheng Liu
  2012-11-19 12:55 ` [PATCH 1/3 v2] ext4: add indirect punching hole support Zheng Liu
@ 2012-11-19 12:55 ` Zheng Liu
  2012-11-20  6:35   ` Guo Chao
  2012-11-19 12:55 ` [PATCH 3/3 v2] ext4: add tracepoint for punching hole Zheng Liu
  2 siblings, 1 reply; 6+ messages in thread
From: Zheng Liu @ 2012-11-19 12:55 UTC (permalink / raw)
  To: linux-ext4; +Cc: Zheng Liu

From: Zheng Liu <wenqing.lz@taobao.com>

After adding indirect punching hole feature, we need to enable it in fallocate.
For this purpose, some sanity checks need to be adjusted.  Currently we need to
check FALLOC_FL_PUNCH_HOLE flag before other sanity checks.

Signed-off-by: Zheng Liu <wenqing.lz@taobao.com>
---
 fs/ext4/extents.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 7011ac9..b43b3e9 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4420,13 +4420,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 	struct ext4_map_blocks map;
 	unsigned int credits, blkbits = inode->i_blkbits;
 
-	/*
-	 * currently supporting (pre)allocate mode for extent-based
-	 * files _only_
-	 */
-	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
-		return -EOPNOTSUPP;

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 3/3 v2] ext4: add tracepoint for punching hole
  2012-11-19 12:55 [PATCH 0/3 v2] ext4: punching hole improvement Zheng Liu
  2012-11-19 12:55 ` [PATCH 1/3 v2] ext4: add indirect punching hole support Zheng Liu
  2012-11-19 12:55 ` [PATCH 2/3 v2] ext4: let us fully support punching hole feature in fallocate Zheng Liu
@ 2012-11-19 12:55 ` Zheng Liu
  2 siblings, 0 replies; 6+ messages in thread
From: Zheng Liu @ 2012-11-19 12:55 UTC (permalink / raw)
  To: linux-ext4; +Cc: Zheng Liu

From: Zheng Liu <wenqing.lz@taobao.com>

This patch adds a tracepoint in ext4_punch_hole.

Signed-off-by: Zheng Liu <wenqing.lz@taobao.com>
---
 fs/ext4/inode.c             |  2 ++
 include/trace/events/ext4.h | 25 +++++++++++++++++++++++++
 2 files changed, 27 insertions(+)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 733ed5b..f850ea6 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3478,6 +3478,8 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
 	if (!S_ISREG(inode->i_mode))
 		return -EOPNOTSUPP;
 
+	trace_ext4_punch_hole(inode, offset, length);
+
 	if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
 		return ext4_ind_punch_hole(file, offset, length);
 
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index d49b285..476c7d3 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -1311,6 +1311,31 @@ TRACE_EVENT(ext4_fallocate_exit,
 		  __entry->ret)
 );
 
+TRACE_EVENT(ext4_punch_hole,
+	TP_PROTO(struct inode *inode, loff_t offset, loff_t len),
+
+	TP_ARGS(inode, offset, len),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	ino_t,	ino			)
+		__field(	loff_t,	offset			)
+		__field(	loff_t, len			)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= inode->i_sb->s_dev;
+		__entry->ino	= inode->i_ino;
+		__entry->offset	= offset;
+		__entry->len	= len;
+	),
+
+	TP_printk("dev %d,%d ino %lu offset %lld len %lld",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long)__entry->ino,
+		  __entry->offset, __entry->len)
+);
+
 TRACE_EVENT(ext4_unlink_enter,
 	TP_PROTO(struct inode *parent, struct dentry *dentry),
 
-- 
1.7.12.rc2.18.g61b472e


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH 2/3 v2] ext4: let us fully support punching hole feature in fallocate
  2012-11-19 12:55 ` [PATCH 2/3 v2] ext4: let us fully support punching hole feature in fallocate Zheng Liu
@ 2012-11-20  6:35   ` Guo Chao
  2012-11-20  7:43     ` Zheng Liu
  0 siblings, 1 reply; 6+ messages in thread
From: Guo Chao @ 2012-11-20  6:35 UTC (permalink / raw)
  To: Zheng Liu; +Cc: linux-ext4, Zheng Liu

Hi, Zheng:

On Mon, Nov 19, 2012 at 08:55:17PM +0800, Zheng Liu wrote:
> From: Zheng Liu <wenqing.lz@taobao.com>
> 
> After adding indirect punching hole feature, we need to enable it in fallocate.
> For this purpose, some sanity checks need to be adjusted.  Currently we need to
> check FALLOC_FL_PUNCH_HOLE flag before other sanity checks.
> 
> Signed-off-by: Zheng Liu <wenqing.lz@taobao.com>
> ---
>  fs/ext4/extents.c | 14 +++++++-------
>  1 file changed, 7 insertions(+), 7 deletions(-)
> 
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index 7011ac9..b43b3e9 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -4420,13 +4420,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
>  	struct ext4_map_blocks map;
>  	unsigned int credits, blkbits = inode->i_blkbits;
> 
> -	/*
> -	 * currently supporting (pre)allocate mode for extent-based
> -	 * files _only_
> -	 */
> -	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
> -		return -EOPNOTSUPP;
> -
>  	/* Return error if mode is not supported */
>  	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
>  		return -EOPNOTSUPP;

Checking these mode flags seems redundant here, VFS already checked them.
Maybe you can remove it by the way.

Regards,
Guo Chao


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 2/3 v2] ext4: let us fully support punching hole feature in fallocate
  2012-11-20  6:35   ` Guo Chao
@ 2012-11-20  7:43     ` Zheng Liu
  0 siblings, 0 replies; 6+ messages in thread
From: Zheng Liu @ 2012-11-20  7:43 UTC (permalink / raw)
  To: Guo Chao; +Cc: linux-ext4, Zheng Liu

On Tue, Nov 20, 2012 at 02:35:05PM +0800, Guo Chao wrote:
> Hi, Zheng:
> 
> On Mon, Nov 19, 2012 at 08:55:17PM +0800, Zheng Liu wrote:
> > From: Zheng Liu <wenqing.lz@taobao.com>
> > 
> > After adding indirect punching hole feature, we need to enable it in fallocate.
> > For this purpose, some sanity checks need to be adjusted.  Currently we need to
> > check FALLOC_FL_PUNCH_HOLE flag before other sanity checks.
> > 
> > Signed-off-by: Zheng Liu <wenqing.lz@taobao.com>
> > ---
> >  fs/ext4/extents.c | 14 +++++++-------
> >  1 file changed, 7 insertions(+), 7 deletions(-)
> > 
> > diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> > index 7011ac9..b43b3e9 100644
> > --- a/fs/ext4/extents.c
> > +++ b/fs/ext4/extents.c
> > @@ -4420,13 +4420,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
> >  	struct ext4_map_blocks map;
> >  	unsigned int credits, blkbits = inode->i_blkbits;
> > 
> > -	/*
> > -	 * currently supporting (pre)allocate mode for extent-based
> > -	 * files _only_
> > -	 */
> > -	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
> > -		return -EOPNOTSUPP;
> > -
> >  	/* Return error if mode is not supported */
> >  	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
> >  		return -EOPNOTSUPP;
> 
> Checking these mode flags seems redundant here, VFS already checked them.
> Maybe you can remove it by the way.

Yeah, I see.  Not only ext4, other filesystems also check it in themselves,
such as xfs, btrfs.  I am not very familiar with why we need to do this,
but IMHO a better way might be removing it from all filesystems in another
patch series.  I will send it out.  Thanks for your suggestion.

Regards,
                                                - Zheng

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2012-11-20  7:30 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-11-19 12:55 [PATCH 0/3 v2] ext4: punching hole improvement Zheng Liu
2012-11-19 12:55 ` [PATCH 1/3 v2] ext4: add indirect punching hole support Zheng Liu
2012-11-19 12:55 ` [PATCH 2/3 v2] ext4: let us fully support punching hole feature in fallocate Zheng Liu
2012-11-20  6:35   ` Guo Chao
2012-11-20  7:43     ` Zheng Liu
2012-11-19 12:55 ` [PATCH 3/3 v2] ext4: add tracepoint for punching hole Zheng Liu

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.