linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 2/2] fs: wire up .truncate_range and .fallocate
       [not found] <1322038412-29013-1-git-send-email-amwang@redhat.com>
@ 2011-11-23  8:53 ` Cong Wang
  2011-11-23 10:38   ` Christoph Hellwig
  0 siblings, 1 reply; 3+ messages in thread
From: Cong Wang @ 2011-11-23  8:53 UTC (permalink / raw)
  To: linux-kernel
  Cc: akpm, Hugh Dickins, Al Viro, Christoph Hellwig, WANG Cong,
	Matthew Wilcox, Andrea Arcangeli, Rik van Riel, Mel Gorman,
	Minchan Kim, Johannes Weiner, linux-fsdevel, linux-mm

As Hugh suggested, with FALLOC_FL_PUNCH_HOLE, we can use do_fallocate()
to implement madvise_remove and finally remove .truncate_range call back.

Cc: Hugh Dickins <hughd@google.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: WANG Cong <amwang@redhat.com>

---
 include/linux/fs.h |    1 -
 include/linux/mm.h |    3 ++-
 mm/madvise.c       |    8 +++++---
 mm/shmem.c         |    1 -
 mm/truncate.c      |   21 +++++++++++++--------
 5 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index e313022..266df73 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1635,7 +1635,6 @@ struct inode_operations {
 	ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
 	ssize_t (*listxattr) (struct dentry *, char *, size_t);
 	int (*removexattr) (struct dentry *, const char *);
-	void (*truncate_range)(struct inode *, loff_t, loff_t);
 	int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
 		      u64 len);
 } ____cacheline_aligned;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3dc3a8c..a47f744 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -951,7 +951,8 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,
 extern void truncate_pagecache(struct inode *inode, loff_t old, loff_t new);
 extern void truncate_setsize(struct inode *inode, loff_t newsize);
 extern int vmtruncate(struct inode *inode, loff_t offset);
-extern int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end);
+extern int vmtruncate_file_range(struct file *file, struct inode *inode,
+			loff_t offset, loff_t end);
 
 int truncate_inode_page(struct address_space *mapping, struct page *page);
 int generic_error_remove_page(struct address_space *mapping, struct page *page);
diff --git a/mm/madvise.c b/mm/madvise.c
index 74bf193..05610d3 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -194,7 +194,8 @@ static long madvise_remove(struct vm_area_struct *vma,
 				struct vm_area_struct **prev,
 				unsigned long start, unsigned long end)
 {
-	struct address_space *mapping;
+	struct file *file;
+	struct inode *inode;
 	loff_t offset, endoff;
 	int error;
 
@@ -211,7 +212,8 @@ static long madvise_remove(struct vm_area_struct *vma,
 	if ((vma->vm_flags & (VM_SHARED|VM_WRITE)) != (VM_SHARED|VM_WRITE))
 		return -EACCES;
 
-	mapping = vma->vm_file->f_mapping;
+	file = vma->vm_file;
+	inode = file->f_mapping->host;
 
 	offset = (loff_t)(start - vma->vm_start)
 			+ ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
@@ -220,7 +222,7 @@ static long madvise_remove(struct vm_area_struct *vma,
 
 	/* vmtruncate_range needs to take i_mutex */
 	up_read(&current->mm->mmap_sem);
-	error = vmtruncate_range(mapping->host, offset, endoff);
+	error = vmtruncate_file_range(file, inode, offset, endoff);
 	down_read(&current->mm->mmap_sem);
 	return error;
 }
diff --git a/mm/shmem.c b/mm/shmem.c
index 65f7a27..fce5667 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2356,7 +2356,6 @@ static const struct file_operations shmem_file_operations = {
 
 static const struct inode_operations shmem_inode_operations = {
 	.setattr	= shmem_setattr,
-	.truncate_range	= shmem_truncate_range,
 #ifdef CONFIG_TMPFS_XATTR
 	.setxattr	= shmem_setxattr,
 	.getxattr	= shmem_getxattr,
diff --git a/mm/truncate.c b/mm/truncate.c
index 632b15e..7c46539 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -20,6 +20,7 @@
 #include <linux/buffer_head.h>	/* grr. try_to_release_page,
 				   do_invalidatepage */
 #include <linux/cleancache.h>
+#include <linux/falloc.h>
 #include "internal.h"
 
 
@@ -602,24 +603,28 @@ int vmtruncate(struct inode *inode, loff_t newsize)
 }
 EXPORT_SYMBOL(vmtruncate);
 
-int vmtruncate_range(struct inode *inode, loff_t lstart, loff_t lend)
+int vmtruncate_file_range(struct file *file, struct inode *inode,
+		     loff_t lstart, loff_t lend)
 {
 	struct address_space *mapping = inode->i_mapping;
 	loff_t holebegin = round_up(lstart, PAGE_SIZE);
 	loff_t holelen = 1 + lend - holebegin;
+	int err;
 
-	/*
-	 * If the underlying filesystem is not going to provide
-	 * a way to truncate a range of blocks (punch a hole) -
-	 * we should return failure right now.
-	 */
-	if (!inode->i_op->truncate_range)
+	if (!file->f_op->fallocate)
 		return -ENOSYS;
 
 	mutex_lock(&inode->i_mutex);
 	inode_dio_wait(inode);
 	unmap_mapping_range(mapping, holebegin, holelen, 1);
-	inode->i_op->truncate_range(inode, lstart, lend);
+	mutex_unlock(&inode->i_mutex);
+
+	err = do_fallocate(file, FALLOC_FL_KEEP_SIZE|FALLOC_FL_PUNCH_HOLE,
+		     holebegin, holelen);
+	if (err)
+		return err;
+
+	mutex_lock(&inode->i_mutex);
 	/* unmap again to remove racily COWed private pages */
 	unmap_mapping_range(mapping, holebegin, holelen, 1);
 	mutex_unlock(&inode->i_mutex);
-- 
1.7.4.4

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH 2/2] fs: wire up .truncate_range and .fallocate
  2011-11-23  8:53 ` [PATCH 2/2] fs: wire up .truncate_range and .fallocate Cong Wang
@ 2011-11-23 10:38   ` Christoph Hellwig
  2011-11-23 19:16     ` Hugh Dickins
  0 siblings, 1 reply; 3+ messages in thread
From: Christoph Hellwig @ 2011-11-23 10:38 UTC (permalink / raw)
  To: Cong Wang
  Cc: linux-kernel, akpm, Hugh Dickins, Al Viro, Christoph Hellwig,
	Matthew Wilcox, Andrea Arcangeli, Rik van Riel, Mel Gorman,
	Minchan Kim, Johannes Weiner, linux-fsdevel, linux-mm

On Wed, Nov 23, 2011 at 04:53:31PM +0800, Cong Wang wrote:
> +int vmtruncate_file_range(struct file *file, struct inode *inode,
> +		     loff_t lstart, loff_t lend)

We can always get the inode from file->f_path.dentry->d_inode, thus
passing both of them doesn't make much sense.

> +	if (!file->f_op->fallocate)
>  		return -ENOSYS;
>  
>  	mutex_lock(&inode->i_mutex);
>  	inode_dio_wait(inode);
>  	unmap_mapping_range(mapping, holebegin, holelen, 1);
> -	inode->i_op->truncate_range(inode, lstart, lend);
> +	mutex_unlock(&inode->i_mutex);
> +
> +	err = do_fallocate(file, FALLOC_FL_KEEP_SIZE|FALLOC_FL_PUNCH_HOLE,
> +		     holebegin, holelen);
> +	if (err)
> +		return err;
> +
> +	mutex_lock(&inode->i_mutex);
>  	/* unmap again to remove racily COWed private pages */
>  	unmap_mapping_range(mapping, holebegin, holelen, 1);
>  	mutex_unlock(&inode->i_mutex);

I suspect this should be turned inside out, just we do for normal
truncate.  That is instead of keeping vmtruncate(_file)_range we
should have a truncate_pagecache_range do to the actual zapping,
closely mirroring what we do for truncate.  In the best case we'd
even implement the regular truncate ones on top of the range version.

It also seems like all fallocate implementaions for far got away
without the unmap_mapping_range, so either people didn't test them
hard enough, or tmpfs doesn't need it either.  I fear the former
is true.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH 2/2] fs: wire up .truncate_range and .fallocate
  2011-11-23 10:38   ` Christoph Hellwig
@ 2011-11-23 19:16     ` Hugh Dickins
  0 siblings, 0 replies; 3+ messages in thread
From: Hugh Dickins @ 2011-11-23 19:16 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Cong Wang, linux-kernel, akpm, Al Viro, Matthew Wilcox,
	Andrea Arcangeli, Rik van Riel, Mel Gorman, Minchan Kim,
	Johannes Weiner, linux-fsdevel, linux-mm

On Wed, 23 Nov 2011, Christoph Hellwig wrote:
> 
> It also seems like all fallocate implementaions for far got away
> without the unmap_mapping_range, so either people didn't test them
> hard enough, or tmpfs doesn't need it either.  I fear the former
> is true.

They're saved by the funny little one-by-one unmap_mapping_range()
fallback in truncate_inode_page().  It's inefficient (in those rare
cases when someone is punching a hole somewhere that's mapped) and
we ought to do better, but we don't have an actual bug there.

Hugh

int truncate_inode_page(struct address_space *mapping, struct page *page)
{
	if (page_mapped(page)) {
		unmap_mapping_range(mapping,
				   (loff_t)page->index << PAGE_CACHE_SHIFT,
				   PAGE_CACHE_SIZE, 0);
	}
	return truncate_complete_page(mapping, page);
}

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2011-11-23 19:16 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <1322038412-29013-1-git-send-email-amwang@redhat.com>
2011-11-23  8:53 ` [PATCH 2/2] fs: wire up .truncate_range and .fallocate Cong Wang
2011-11-23 10:38   ` Christoph Hellwig
2011-11-23 19:16     ` Hugh Dickins

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).