linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [rfc][patch 1/4] fs: new truncate helpers
@ 2009-07-07 14:44 Nick Piggin
  2009-07-07 14:46 ` [rfc][patch 2/4] fs: use " Nick Piggin
                   ` (3 more replies)
  0 siblings, 4 replies; 36+ messages in thread
From: Nick Piggin @ 2009-07-07 14:44 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: Christoph Hellwig, Jan Kara, LKML, linux-mm


Introduce new truncate helpers truncate_pagecache and inode_newsize_ok.
vmtruncate is also consolidated from mm/memory.c and mm/nommu.c and
into mm/truncate.c.

Signed-off-by: Nick Piggin <npiggin@suse.de>
---
 Documentation/vm/locking |    2 -
 fs/attr.c                |   45 +++++++++++++++++++++++++++++++++-
 include/linux/fs.h       |    1 
 include/linux/mm.h       |    1 
 mm/filemap.c             |    2 -
 mm/memory.c              |   62 ++---------------------------------------------
 mm/mremap.c              |    4 +--
 mm/nommu.c               |   40 ------------------------------
 mm/truncate.c            |   61 ++++++++++++++++++++++++++++++++++++++++++++++
 9 files changed, 114 insertions(+), 104 deletions(-)

Index: linux-2.6/fs/attr.c
===================================================================
--- linux-2.6.orig/fs/attr.c
+++ linux-2.6/fs/attr.c
@@ -60,9 +60,52 @@ fine:
 error:
 	return retval;
 }
-
 EXPORT_SYMBOL(inode_change_ok);
 
+/**
+ * inode_newsize_ok - may this inode be truncated to a given size
+ * @inode:	the inode to be truncated
+ * @offset:	the new size to assign to the inode
+ * @Returns:	0 on success, -ve errno on failure
+ *
+ * Description:
+ *    inode_newsize_ok will check filesystem limits and ulimits to
+ *    check that the new inode size is within limits. inode_newsize_ok
+ *    will also send SIGXFSZ when necessary. Caller must not proceed
+ *    with inode size change if failure is returned. @inode must be
+ *    a file (not directory), with appropriate permissions to allow
+ *    truncate (inode_newsize_ok does NOT check these conditions).
+ *
+ *    inode_newsize_ok must be called with i_mutex held.
+ */
+int inode_newsize_ok(struct inode *inode, loff_t offset)
+{
+	if (inode->i_size < offset) {
+		unsigned long limit;
+
+		limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
+		if (limit != RLIM_INFINITY && offset > limit)
+			goto out_sig;
+		if (offset > inode->i_sb->s_maxbytes)
+			goto out_big;
+	} else {
+		/*
+		 * truncation of in-use swapfiles is disallowed - it would
+		 * cause subsequent swapout to scribble on the now-freed
+		 * blocks.
+		 */
+		if (IS_SWAPFILE(inode))
+			return -ETXTBSY;
+	}
+
+	return 0;
+out_sig:
+	send_sig(SIGXFSZ, current, 0);
+out_big:
+	return -EFBIG;
+}
+EXPORT_SYMBOL(inode_newsize_ok);
+
 int inode_setattr(struct inode * inode, struct iattr * attr)
 {
 	unsigned int ia_valid = attr->ia_valid;
Index: linux-2.6/include/linux/fs.h
===================================================================
--- linux-2.6.orig/include/linux/fs.h
+++ linux-2.6/include/linux/fs.h
@@ -2367,6 +2367,7 @@ extern int buffer_migrate_page(struct ad
 #endif
 
 extern int inode_change_ok(struct inode *, struct iattr *);
+extern int inode_newsize_ok(struct inode *, loff_t offset);
 extern int __must_check inode_setattr(struct inode *, struct iattr *);
 
 extern void file_update_time(struct file *file);
Index: linux-2.6/include/linux/mm.h
===================================================================
--- linux-2.6.orig/include/linux/mm.h
+++ linux-2.6/include/linux/mm.h
@@ -805,6 +805,7 @@ static inline void unmap_shared_mapping_
 	unmap_mapping_range(mapping, holebegin, holelen, 0);
 }
 
+extern void truncate_pagecache(struct inode * inode, loff_t old, loff_t new);
 extern int vmtruncate(struct inode * inode, loff_t offset);
 extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end);
 
Index: linux-2.6/mm/memory.c
===================================================================
--- linux-2.6.orig/mm/memory.c
+++ linux-2.6/mm/memory.c
@@ -282,7 +282,8 @@ void free_pgtables(struct mmu_gather *tl
 		unsigned long addr = vma->vm_start;
 
 		/*
-		 * Hide vma from rmap and vmtruncate before freeing pgtables
+		 * Hide vma from rmap and truncate_pagecache before freeing
+		 * pgtables
 		 */
 		anon_vma_unlink(vma);
 		unlink_file_vma(vma);
@@ -2358,7 +2359,7 @@ restart:
  * @mapping: the address space containing mmaps to be unmapped.
  * @holebegin: byte in first page to unmap, relative to the start of
  * the underlying file.  This will be rounded down to a PAGE_SIZE
- * boundary.  Note that this is different from vmtruncate(), which
+ * boundary.  Note that this is different from truncate_pagecache(), which
  * must keep the partial page.  In contrast, we must get rid of
  * partial pages.
  * @holelen: size of prospective hole in bytes.  This will be rounded
@@ -2409,63 +2410,6 @@ void unmap_mapping_range(struct address_
 }
 EXPORT_SYMBOL(unmap_mapping_range);
 
-/**
- * vmtruncate - unmap mappings "freed" by truncate() syscall
- * @inode: inode of the file used
- * @offset: file offset to start truncating
- *
- * NOTE! We have to be ready to update the memory sharing
- * between the file and the memory map for a potential last
- * incomplete page.  Ugly, but necessary.
- */
-int vmtruncate(struct inode * inode, loff_t offset)
-{
-	if (inode->i_size < offset) {
-		unsigned long limit;
-
-		limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
-		if (limit != RLIM_INFINITY && offset > limit)
-			goto out_sig;
-		if (offset > inode->i_sb->s_maxbytes)
-			goto out_big;
-		i_size_write(inode, offset);
-	} else {
-		struct address_space *mapping = inode->i_mapping;
-
-		/*
-		 * truncation of in-use swapfiles is disallowed - it would
-		 * cause subsequent swapout to scribble on the now-freed
-		 * blocks.
-		 */
-		if (IS_SWAPFILE(inode))
-			return -ETXTBSY;
-		i_size_write(inode, offset);
-
-		/*
-		 * unmap_mapping_range is called twice, first simply for
-		 * efficiency so that truncate_inode_pages does fewer
-		 * single-page unmaps.  However after this first call, and
-		 * before truncate_inode_pages finishes, it is possible for
-		 * private pages to be COWed, which remain after
-		 * truncate_inode_pages finishes, hence the second
-		 * unmap_mapping_range call must be made for correctness.
-		 */
-		unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-		truncate_inode_pages(mapping, offset);
-		unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-	}
-
-	if (inode->i_op->truncate)
-		inode->i_op->truncate(inode);
-	return 0;
-
-out_sig:
-	send_sig(SIGXFSZ, current, 0);
-out_big:
-	return -EFBIG;
-}
-EXPORT_SYMBOL(vmtruncate);
-
 int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end)
 {
 	struct address_space *mapping = inode->i_mapping;
Index: linux-2.6/mm/nommu.c
===================================================================
--- linux-2.6.orig/mm/nommu.c
+++ linux-2.6/mm/nommu.c
@@ -86,46 +86,6 @@ struct vm_operations_struct generic_file
 };
 
 /*
- * Handle all mappings that got truncated by a "truncate()"
- * system call.
- *
- * NOTE! We have to be ready to update the memory sharing
- * between the file and the memory map for a potential last
- * incomplete page.  Ugly, but necessary.
- */
-int vmtruncate(struct inode *inode, loff_t offset)
-{
-	struct address_space *mapping = inode->i_mapping;
-	unsigned long limit;
-
-	if (inode->i_size < offset)
-		goto do_expand;
-	i_size_write(inode, offset);
-
-	truncate_inode_pages(mapping, offset);
-	goto out_truncate;
-
-do_expand:
-	limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
-	if (limit != RLIM_INFINITY && offset > limit)
-		goto out_sig;
-	if (offset > inode->i_sb->s_maxbytes)
-		goto out;
-	i_size_write(inode, offset);
-
-out_truncate:
-	if (inode->i_op->truncate)
-		inode->i_op->truncate(inode);
-	return 0;
-out_sig:
-	send_sig(SIGXFSZ, current, 0);
-out:
-	return -EFBIG;
-}
-
-EXPORT_SYMBOL(vmtruncate);
-
-/*
  * Return the total memory allocated for this pointer, not
  * just what the caller asked for.
  *
Index: linux-2.6/Documentation/vm/locking
===================================================================
--- linux-2.6.orig/Documentation/vm/locking
+++ linux-2.6/Documentation/vm/locking
@@ -80,7 +80,7 @@ Note: PTL can also be used to guarantee
 mm start up ... this is a loose form of stability on mm_users. For
 example, it is used in copy_mm to protect against a racing tlb_gather_mmu
 single address space optimization, so that the zap_page_range (from
-vmtruncate) does not lose sending ipi's to cloned threads that might 
+truncate) does not lose sending ipi's to cloned threads that might
 be spawned underneath it and go to user mode to drag in pte's into tlbs.
 
 swap_lock
Index: linux-2.6/mm/filemap.c
===================================================================
--- linux-2.6.orig/mm/filemap.c
+++ linux-2.6/mm/filemap.c
@@ -59,7 +59,7 @@
 /*
  * Lock ordering:
  *
- *  ->i_mmap_lock		(vmtruncate)
+ *  ->i_mmap_lock		(truncate_pagecache)
  *    ->private_lock		(__free_pte->__set_page_dirty_buffers)
  *      ->swap_lock		(exclusive_swap_page, others)
  *        ->mapping->tree_lock
Index: linux-2.6/mm/mremap.c
===================================================================
--- linux-2.6.orig/mm/mremap.c
+++ linux-2.6/mm/mremap.c
@@ -85,8 +85,8 @@ static void move_ptes(struct vm_area_str
 	if (vma->vm_file) {
 		/*
 		 * Subtle point from Rajesh Venkatasubramanian: before
-		 * moving file-based ptes, we must lock vmtruncate out,
-		 * since it might clean the dst vma before the src vma,
+		 * moving file-based ptes, we must lock truncate_pagecache
+		 * out, since it might clean the dst vma before the src vma,
 		 * and we propagate stale pages into the dst afterward.
 		 */
 		mapping = vma->vm_file->f_mapping;
Index: linux-2.6/mm/truncate.c
===================================================================
--- linux-2.6.orig/mm/truncate.c
+++ linux-2.6/mm/truncate.c
@@ -465,3 +465,64 @@ int invalidate_inode_pages2(struct addre
 	return invalidate_inode_pages2_range(mapping, 0, -1);
 }
 EXPORT_SYMBOL_GPL(invalidate_inode_pages2);
+
+/**
+ * truncate_pagecache - unmap mappings "freed" by truncate() syscall
+ * @inode: inode
+ * @old: old file offset
+ * @new: new file offset
+ *
+ * inode's new i_size must already be written before truncate_pagecache
+ * is called.
+ */
+void truncate_pagecache(struct inode * inode, loff_t old, loff_t new)
+{
+	if (new < old) {
+		struct address_space *mapping = inode->i_mapping;
+
+		/*
+		 * unmap_mapping_range is called twice, first simply for
+		 * efficiency so that truncate_inode_pages does fewer
+		 * single-page unmaps.  However after this first call, and
+		 * before truncate_inode_pages finishes, it is possible for
+		 * private pages to be COWed, which remain after
+		 * truncate_inode_pages finishes, hence the second
+		 * unmap_mapping_range call must be made for correctness.
+		 */
+		unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1);
+		truncate_inode_pages(mapping, new);
+		unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1);
+	}
+}
+EXPORT_SYMBOL(truncate_pagecache);
+
+/**
+ * vmtruncate - unmap mappings "freed" by truncate() syscall
+ * @inode: inode of the file used
+ * @offset: file offset to start truncating
+ *
+ * NOTE! We have to be ready to update the memory sharing
+ * between the file and the memory map for a potential last
+ * incomplete page.  Ugly, but necessary.
+ *
+ * This function is deprecated and truncate_pagecache should be
+ * used instead.
+ */
+int vmtruncate(struct inode * inode, loff_t offset)
+{
+	loff_t oldsize;
+	int error;
+
+	error = inode_newsize_ok(inode, offset);
+	if (error)
+		return error;
+	oldsize = inode->i_size;
+	i_size_write(inode, offset);
+	truncate_pagecache(inode, oldsize, offset);
+
+	if (inode->i_op->truncate)
+		inode->i_op->truncate(inode);
+
+	return error;
+}
+EXPORT_SYMBOL(vmtruncate);

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [rfc][patch 2/4] fs: use new truncate helpers
  2009-07-07 14:44 [rfc][patch 1/4] fs: new truncate helpers Nick Piggin
@ 2009-07-07 14:46 ` Nick Piggin
  2009-07-07 14:53   ` Christoph Hellwig
  2009-07-07 14:48 ` [rfc][patch 3/4] fs: new truncate sequence Nick Piggin
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 36+ messages in thread
From: Nick Piggin @ 2009-07-07 14:46 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: Christoph Hellwig, Jan Kara, LKML, linux-mm


Update some fs code to make use of new helper functions introduced
in the previous patch. Should be no significant change in behaviour
(except CIFS now calls send_sig under i_lock, via inode_newsize_ok).

Signed-off-by: Nick Piggin <npiggin@suse.de>
---
 fs/buffer.c           |   10 +--------
 fs/cifs/inode.c       |   51 ++++++++-----------------------------------------
 fs/fuse/dir.c         |   14 +++----------
 fs/fuse/fuse_i.h      |    2 -
 fs/fuse/inode.c       |   11 ----------
 fs/nfs/inode.c        |   52 +++++++++++---------------------------------------
 fs/ramfs/file-nommu.c |   18 ++++-------------
 7 files changed, 33 insertions(+), 125 deletions(-)

Index: linux-2.6/fs/buffer.c
===================================================================
--- linux-2.6.orig/fs/buffer.c
+++ linux-2.6/fs/buffer.c
@@ -2225,16 +2225,10 @@ int generic_cont_expand_simple(struct in
 	struct address_space *mapping = inode->i_mapping;
 	struct page *page;
 	void *fsdata;
-	unsigned long limit;
 	int err;
 
-	err = -EFBIG;
-        limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
-	if (limit != RLIM_INFINITY && size > (loff_t)limit) {
-		send_sig(SIGXFSZ, current, 0);
-		goto out;
-	}
-	if (size > inode->i_sb->s_maxbytes)
+	err = inode_newsize_ok(inode, size);
+	if (err)
 		goto out;
 
 	err = pagecache_write_begin(NULL, mapping, size, 0,
Index: linux-2.6/fs/cifs/inode.c
===================================================================
--- linux-2.6.orig/fs/cifs/inode.c
+++ linux-2.6/fs/cifs/inode.c
@@ -1645,57 +1645,24 @@ static int cifs_truncate_page(struct add
 
 static int cifs_vmtruncate(struct inode *inode, loff_t offset)
 {
-	struct address_space *mapping = inode->i_mapping;
-	unsigned long limit;
+	loff_t oldsize;
+	int err;
 
 	spin_lock(&inode->i_lock);
-	if (inode->i_size < offset)
-		goto do_expand;
-	/*
-	 * truncation of in-use swapfiles is disallowed - it would cause
-	 * subsequent swapout to scribble on the now-freed blocks.
-	 */
-	if (IS_SWAPFILE(inode)) {
+	err = inode_newsize_ok(inode, offset);
+	if (err) {
 		spin_unlock(&inode->i_lock);
-		goto out_busy;
+		goto out;
 	}
-	i_size_write(inode, offset);
-	spin_unlock(&inode->i_lock);
-	/*
-	 * unmap_mapping_range is called twice, first simply for efficiency
-	 * so that truncate_inode_pages does fewer single-page unmaps. However
-	 * after this first call, and before truncate_inode_pages finishes,
-	 * it is possible for private pages to be COWed, which remain after
-	 * truncate_inode_pages finishes, hence the second unmap_mapping_range
-	 * call must be made for correctness.
-	 */
-	unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-	truncate_inode_pages(mapping, offset);
-	unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-	goto out_truncate;
 
-do_expand:
-	limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
-	if (limit != RLIM_INFINITY && offset > limit) {
-		spin_unlock(&inode->i_lock);
-		goto out_sig;
-	}
-	if (offset > inode->i_sb->s_maxbytes) {
-		spin_unlock(&inode->i_lock);
-		goto out_big;
-	}
+	oldsize = inode->i_size;
 	i_size_write(inode, offset);
 	spin_unlock(&inode->i_lock);
-out_truncate:
+	truncate_pagecache(inode, oldsize, offset);
 	if (inode->i_op->truncate)
 		inode->i_op->truncate(inode);
-	return 0;
-out_sig:
-	send_sig(SIGXFSZ, current, 0);
-out_big:
-	return -EFBIG;
-out_busy:
-	return -ETXTBSY;
+out:
+	return err;
 }
 
 static int
Index: linux-2.6/fs/fuse/dir.c
===================================================================
--- linux-2.6.orig/fs/fuse/dir.c
+++ linux-2.6/fs/fuse/dir.c
@@ -1225,14 +1225,9 @@ static int fuse_do_setattr(struct dentry
 		return 0;
 
 	if (attr->ia_valid & ATTR_SIZE) {
-		unsigned long limit;
-		if (IS_SWAPFILE(inode))
-			return -ETXTBSY;
-		limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
-		if (limit != RLIM_INFINITY && attr->ia_size > (loff_t) limit) {
-			send_sig(SIGXFSZ, current, 0);
-			return -EFBIG;
-		}
+		err = inode_newsize_ok(inode, attr->ia_size);
+		if (err)
+			return err;
 		is_truncate = true;
 	}
 
@@ -1299,8 +1294,7 @@ static int fuse_do_setattr(struct dentry
 	 * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
 	 */
 	if (S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
-		if (outarg.attr.size < oldsize)
-			fuse_truncate(inode->i_mapping, outarg.attr.size);
+		truncate_pagecache(inode, oldsize, outarg.attr.size);
 		invalidate_inode_pages2(inode->i_mapping);
 	}
 
Index: linux-2.6/fs/nfs/inode.c
===================================================================
--- linux-2.6.orig/fs/nfs/inode.c
+++ linux-2.6/fs/nfs/inode.c
@@ -427,49 +427,21 @@ nfs_setattr(struct dentry *dentry, struc
  */
 static int nfs_vmtruncate(struct inode * inode, loff_t offset)
 {
-	if (i_size_read(inode) < offset) {
-		unsigned long limit;
+	loff_t oldsize;
+	int err;
 
-		limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
-		if (limit != RLIM_INFINITY && offset > limit)
-			goto out_sig;
-		if (offset > inode->i_sb->s_maxbytes)
-			goto out_big;
-		spin_lock(&inode->i_lock);
-		i_size_write(inode, offset);
-		spin_unlock(&inode->i_lock);
-	} else {
-		struct address_space *mapping = inode->i_mapping;
+	err = inode_newsize_ok(inode, offset);
+	if (err)
+		goto out;
 
-		/*
-		 * truncation of in-use swapfiles is disallowed - it would
-		 * cause subsequent swapout to scribble on the now-freed
-		 * blocks.
-		 */
-		if (IS_SWAPFILE(inode))
-			return -ETXTBSY;
-		spin_lock(&inode->i_lock);
-		i_size_write(inode, offset);
-		spin_unlock(&inode->i_lock);
+	spin_lock(&inode->i_lock);
+	oldsize = inode->i_size;
+	i_size_write(inode, offset);
+	spin_unlock(&inode->i_lock);
 
-		/*
-		 * unmap_mapping_range is called twice, first simply for
-		 * efficiency so that truncate_inode_pages does fewer
-		 * single-page unmaps.  However after this first call, and
-		 * before truncate_inode_pages finishes, it is possible for
-		 * private pages to be COWed, which remain after
-		 * truncate_inode_pages finishes, hence the second
-		 * unmap_mapping_range call must be made for correctness.
-		 */
-		unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-		truncate_inode_pages(mapping, offset);
-		unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-	}
-	return 0;
-out_sig:
-	send_sig(SIGXFSZ, current, 0);
-out_big:
-	return -EFBIG;
+	truncate_pagecache(inode, oldsize, offset);
+out:
+	return err;
 }
 
 /**
Index: linux-2.6/fs/ramfs/file-nommu.c
===================================================================
--- linux-2.6.orig/fs/ramfs/file-nommu.c
+++ linux-2.6/fs/ramfs/file-nommu.c
@@ -68,14 +68,11 @@ int ramfs_nommu_expand_for_mapping(struc
 	/* make various checks */
 	order = get_order(newsize);
 	if (unlikely(order >= MAX_ORDER))
-		goto too_big;
+		return -EFBIG;
 
-	limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
-	if (limit != RLIM_INFINITY && newsize > limit)
-		goto fsize_exceeded;
-
-	if (newsize > inode->i_sb->s_maxbytes)
-		goto too_big;
+	ret = inode_newsize_ok(inode, newsize);
+	if (ret)
+		return ret;
 
 	i_size_write(inode, newsize);
 
@@ -117,12 +114,7 @@ int ramfs_nommu_expand_for_mapping(struc
 
 	return 0;
 
- fsize_exceeded:
-	send_sig(SIGXFSZ, current, 0);
- too_big:
-	return -EFBIG;
-
- add_error:
+add_error:
 	while (loop < npages)
 		__free_page(pages + loop++);
 	return ret;
Index: linux-2.6/fs/fuse/fuse_i.h
===================================================================
--- linux-2.6.orig/fs/fuse/fuse_i.h
+++ linux-2.6/fs/fuse/fuse_i.h
@@ -588,8 +588,6 @@ void fuse_change_attributes(struct inode
 void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
 				   u64 attr_valid);
 
-void fuse_truncate(struct address_space *mapping, loff_t offset);
-
 /**
  * Initialize the client device
  */
Index: linux-2.6/fs/fuse/inode.c
===================================================================
--- linux-2.6.orig/fs/fuse/inode.c
+++ linux-2.6/fs/fuse/inode.c
@@ -115,14 +115,6 @@ static int fuse_remount_fs(struct super_
 	return 0;
 }
 
-void fuse_truncate(struct address_space *mapping, loff_t offset)
-{
-	/* See vmtruncate() */
-	unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-	truncate_inode_pages(mapping, offset);
-	unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-}
-
 void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
 				   u64 attr_valid)
 {
@@ -180,8 +172,7 @@ void fuse_change_attributes(struct inode
 	spin_unlock(&fc->lock);
 
 	if (S_ISREG(inode->i_mode) && oldsize != attr->size) {
-		if (attr->size < oldsize)
-			fuse_truncate(inode->i_mapping, attr->size);
+		truncate_pagecache(inode, oldsize, attr->size);
 		invalidate_inode_pages2(inode->i_mapping);
 	}
 }

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [rfc][patch 3/4] fs: new truncate sequence
  2009-07-07 14:44 [rfc][patch 1/4] fs: new truncate helpers Nick Piggin
  2009-07-07 14:46 ` [rfc][patch 2/4] fs: use " Nick Piggin
@ 2009-07-07 14:48 ` Nick Piggin
  2009-07-07 14:58   ` Christoph Hellwig
  2009-07-07 14:48 ` [rfc][patch 1/4] fs: new truncate helpers Christoph Hellwig
  2009-07-07 14:49 ` [rfc][patch 4/4] fs: tmpfs, ext2 use new truncate Nick Piggin
  3 siblings, 1 reply; 36+ messages in thread
From: Nick Piggin @ 2009-07-07 14:48 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: Christoph Hellwig, Jan Kara, LKML, linux-mm

Don't know whether it is a good idea to move i_alloc_sem into implementation.
Maybe it is better to leave it in the caller in this patchset?
--

Introduce a new truncate calling sequence into fs/mm subsystems. Rather than
setattr > vmtruncate > truncate, add a new inode operation ->setsize, called
from notify_change when called with ATTR_SIZE. The filesystem will be
responsible for updating i_size and truncating pagecache. ->setattr will
still be called with ATTR_SIZE.

Generic code which previously called vmtruncate (in order to truncate blocks
that may have been instantiated past i_size) no longer calls vmtruncate in the
case that the inode has an ->setsize attribute. In that case it is the
responsibility of the caller to trim off blocks appropriately in case of
error.

Big problem with the previous calling sequence: the filesystem is not called
until i_size has already changed.  This means it is not allowed to fail the
call, and also it does not know what the previous i_size was. Also, generic
code calling vmtruncate to truncate allocated blocks in case of error had
no good way to return a meaningful error (or, for example, atomically handle
block deallocation).

Signed-off-by: Nick Piggin <npiggin@suse.de>
---
 Documentation/filesystems/Locking |    2 ++
 Documentation/filesystems/vfs.txt |    9 ++++++++-
 fs/attr.c                         |   22 +++++++++++++++++++---
 fs/buffer.c                       |   11 ++++++++---
 fs/configfs/inode.c               |    1 +
 fs/direct-io.c                    |    7 ++++---
 fs/libfs.c                        |   19 +++++++++++++++++++
 fs/ramfs/file-mmu.c               |    1 +
 include/linux/fs.h                |    9 +++++++++
 9 files changed, 71 insertions(+), 10 deletions(-)

Index: linux-2.6/fs/libfs.c
===================================================================
--- linux-2.6.orig/fs/libfs.c
+++ linux-2.6/fs/libfs.c
@@ -329,6 +329,24 @@ int simple_rename(struct inode *old_dir,
 	return 0;
 }
 
+int simple_setsize(struct dentry *dentry, loff_t newsize,
+			unsigned flags, struct file *file)
+{
+	struct inode *inode = dentry->d_inode;
+	loff_t oldsize;
+	int error;
+
+	error = inode_newsize_ok(inode, newsize);
+	if (error)
+		return error;
+
+	oldsize = inode->i_size;
+	i_size_write(inode, newsize);
+	truncate_pagecache(inode, oldsize, newsize);
+
+	return error;
+}
+
 int simple_readpage(struct file *file, struct page *page)
 {
 	clear_highpage(page);
@@ -840,6 +858,7 @@ EXPORT_SYMBOL(generic_read_dir);
 EXPORT_SYMBOL(get_sb_pseudo);
 EXPORT_SYMBOL(simple_write_begin);
 EXPORT_SYMBOL(simple_write_end);
+EXPORT_SYMBOL(simple_setsize);
 EXPORT_SYMBOL(simple_dir_inode_operations);
 EXPORT_SYMBOL(simple_dir_operations);
 EXPORT_SYMBOL(simple_empty);
Index: linux-2.6/include/linux/fs.h
===================================================================
--- linux-2.6.orig/include/linux/fs.h
+++ linux-2.6/include/linux/fs.h
@@ -431,6 +431,12 @@ typedef void (dio_iodone_t)(struct kiocb
 #define ATTR_TIMES_SET	(1 << 16)
 
 /*
+ * Setsize flags.
+ */
+#define SETSIZE_FILE	(1 << 0) /* Trucating via an open file (eg ftruncate) */
+#define SETSIZE_OPEN	(1 << 1) /* Truncating from open(O_TRUNC) */
+
+/*
  * This is the Inode Attributes structure, used for notify_change().  It
  * uses the above definitions as flags, to know which values have changed.
  * Also, in this manner, a Filesystem can look at only the values it cares
@@ -1527,6 +1533,7 @@ struct inode_operations {
 	void * (*follow_link) (struct dentry *, struct nameidata *);
 	void (*put_link) (struct dentry *, struct nameidata *, void *);
 	void (*truncate) (struct inode *);
+	int (*setsize) (struct dentry *, loff_t, unsigned, struct file *);
 	int (*permission) (struct inode *, int);
 	int (*setattr) (struct dentry *, struct iattr *);
 	int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
@@ -2332,6 +2339,8 @@ extern int simple_link(struct dentry *,
 extern int simple_unlink(struct inode *, struct dentry *);
 extern int simple_rmdir(struct inode *, struct dentry *);
 extern int simple_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
+extern int simple_setsize(struct dentry *dentry, loff_t newsize,
+			unsigned flags, struct file *file);
 extern int simple_sync_file(struct file *, struct dentry *, int);
 extern int simple_empty(struct dentry *);
 extern int simple_readpage(struct file *file, struct page *page);
Index: linux-2.6/fs/buffer.c
===================================================================
--- linux-2.6.orig/fs/buffer.c
+++ linux-2.6/fs/buffer.c
@@ -1992,9 +1992,13 @@ int block_write_begin(struct file *file,
 			 * prepare_write() may have instantiated a few blocks
 			 * outside i_size.  Trim these off again. Don't need
 			 * i_size_read because we hold i_mutex.
+			 *
+			 * Filesystems which define ->setsize must handle
+			 * this themselves.
 			 */
 			if (pos + len > inode->i_size)
-				vmtruncate(inode, inode->i_size);
+				if (!inode->i_op->setsize)
+					vmtruncate(inode, inode->i_size);
 		}
 	}
 
@@ -2371,7 +2375,7 @@ int block_commit_write(struct page *page
  *
  * We are not allowed to take the i_mutex here so we have to play games to
  * protect against truncate races as the page could now be beyond EOF.  Because
- * vmtruncate() writes the inode size before removing pages, once we have the
+ * truncate writes the inode size before removing pages, once we have the
  * page lock we can determine safely if the page is beyond EOF. If it is not
  * beyond EOF, then the page is guaranteed safe against truncation until we
  * unlock the page.
@@ -2595,7 +2599,8 @@ out_release:
 	*pagep = NULL;
 
 	if (pos + len > inode->i_size)
-		vmtruncate(inode, inode->i_size);
+		if (!inode->i_op->setsize)
+			vmtruncate(inode, inode->i_size);
 
 	return ret;
 }
Index: linux-2.6/fs/direct-io.c
===================================================================
--- linux-2.6.orig/fs/direct-io.c
+++ linux-2.6/fs/direct-io.c
@@ -1210,14 +1210,15 @@ __blockdev_direct_IO(int rw, struct kioc
 	/*
 	 * In case of error extending write may have instantiated a few
 	 * blocks outside i_size. Trim these off again for DIO_LOCKING.
-	 * NOTE: DIO_NO_LOCK/DIO_OWN_LOCK callers have to handle this by
-	 * it's own meaner.
+	 * NOTE: DIO_NO_LOCK/DIO_OWN_LOCK callers have to handle this in
+	 * their own manner.
 	 */
 	if (unlikely(retval < 0 && (rw & WRITE))) {
 		loff_t isize = i_size_read(inode);
 
 		if (end > isize && dio_lock_type == DIO_LOCKING)
-			vmtruncate(inode, isize);
+			if (!inode->i_op->setsize)
+				vmtruncate(inode, isize);
 	}
 
 	if (rw == READ && dio_lock_type == DIO_LOCKING)
Index: linux-2.6/Documentation/filesystems/Locking
===================================================================
--- linux-2.6.orig/Documentation/filesystems/Locking
+++ linux-2.6/Documentation/filesystems/Locking
@@ -45,6 +45,7 @@ ata *);
 	int (*follow_link) (struct dentry *, struct nameidata *);
 	void (*truncate) (struct inode *);
 	int (*permission) (struct inode *, int, struct nameidata *);
+	int (*setsize) (struct dentry *, loff_t, unsigned, struct file *);
 	int (*setattr) (struct dentry *, struct iattr *);
 	int (*getattr) (struct vfsmount *, struct dentry *, struct kstat *);
 	int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
@@ -67,6 +68,7 @@ rename:		yes (all)	(see below)
 readlink:	no
 follow_link:	no
 truncate:	yes		(see below)
+setsize:	yes
 setattr:	yes
 permission:	no
 getattr:	no
Index: linux-2.6/fs/attr.c
===================================================================
--- linux-2.6.orig/fs/attr.c
+++ linux-2.6/fs/attr.c
@@ -206,8 +206,24 @@ int notify_change(struct dentry * dentry
 	if (error)
 		return error;
 
-	if (ia_valid & ATTR_SIZE)
-		down_write(&dentry->d_inode->i_alloc_sem);
+	if (ia_valid & ATTR_SIZE) {
+		if (inode->i_op && inode->i_op->setsize) {
+			unsigned int flags = 0;
+			struct file *file = NULL;
+
+			if (ia_valid & ATTR_FILE) {
+				flags |= SETSIZE_FILE;
+				file = attr->ia_file;
+			}
+			if (ia_valid & ATTR_OPEN)
+				flags |= SETSIZE_OPEN;
+			error = inode->i_op->setsize(dentry, attr->ia_size,
+							flags, file);
+			if (error)
+				return error;
+		} else
+			down_write(&dentry->d_inode->i_alloc_sem);
+	}
 
 	if (inode->i_op && inode->i_op->setattr) {
 		error = inode->i_op->setattr(dentry, attr);
@@ -223,7 +239,7 @@ int notify_change(struct dentry * dentry
 		}
 	}
 
-	if (ia_valid & ATTR_SIZE)
+	if (ia_valid & ATTR_SIZE && !(inode->i_op && inode->i_op->setsize))
 		up_write(&dentry->d_inode->i_alloc_sem);
 
 	if (!error)
Index: linux-2.6/Documentation/filesystems/vfs.txt
===================================================================
--- linux-2.6.orig/Documentation/filesystems/vfs.txt
+++ linux-2.6/Documentation/filesystems/vfs.txt
@@ -326,6 +326,7 @@ struct inode_operations {
         void (*put_link) (struct dentry *, struct nameidata *, void *);
 	void (*truncate) (struct inode *);
 	int (*permission) (struct inode *, int, struct nameidata *);
+	int (*setsize) (struct dentry *, loff_t, unsigned, struct file *);
 	int (*setattr) (struct dentry *, struct iattr *);
 	int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
 	int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
@@ -401,7 +402,8 @@ otherwise noted.
   	started might not be in the page cache at the end of the
   	walk).
 
-  truncate: called by the VFS to change the size of a file.  The
+  truncate: Deprecated. This will not be called if ->setsize is defined.
+	Called by the VFS to change the size of a file.  The
  	i_size field of the inode is set to the desired size by the
  	VFS before this method is called.  This method is called by
  	the truncate(2) system call and related functionality.
@@ -409,6 +411,11 @@ otherwise noted.
   permission: called by the VFS to check for access rights on a POSIX-like
   	filesystem.
 
+  setsize: Supersedes truncate. Called by the VFS to change the size of a file.
+	It is the responsibility of the implementation to check size is OK
+	(eg. using inode_newsize_ok()), update i_size, and trim pagecache
+	(eg. using truncate_pagecache()).
+
   setattr: called by the VFS to set attributes for a file. This method
   	is called by chmod(2) and related system calls.
 

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [rfc][patch 1/4] fs: new truncate helpers
  2009-07-07 14:44 [rfc][patch 1/4] fs: new truncate helpers Nick Piggin
  2009-07-07 14:46 ` [rfc][patch 2/4] fs: use " Nick Piggin
  2009-07-07 14:48 ` [rfc][patch 3/4] fs: new truncate sequence Nick Piggin
@ 2009-07-07 14:48 ` Christoph Hellwig
  2009-07-07 14:49 ` [rfc][patch 4/4] fs: tmpfs, ext2 use new truncate Nick Piggin
  3 siblings, 0 replies; 36+ messages in thread
From: Christoph Hellwig @ 2009-07-07 14:48 UTC (permalink / raw)
  To: Nick Piggin; +Cc: linux-fsdevel, Christoph Hellwig, Jan Kara, LKML, linux-mm

On Tue, Jul 07, 2009 at 04:44:23PM +0200, Nick Piggin wrote:
> 
> Introduce new truncate helpers truncate_pagecache and inode_newsize_ok.
> vmtruncate is also consolidated from mm/memory.c and mm/nommu.c and
> into mm/truncate.c.
> 
> Signed-off-by: Nick Piggin <npiggin@suse.de>

Looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [rfc][patch 4/4] fs: tmpfs, ext2 use new truncate
  2009-07-07 14:44 [rfc][patch 1/4] fs: new truncate helpers Nick Piggin
                   ` (2 preceding siblings ...)
  2009-07-07 14:48 ` [rfc][patch 1/4] fs: new truncate helpers Christoph Hellwig
@ 2009-07-07 14:49 ` Nick Piggin
  2009-07-07 16:38   ` Christoph Hellwig
  3 siblings, 1 reply; 36+ messages in thread
From: Nick Piggin @ 2009-07-07 14:49 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: Christoph Hellwig, Jan Kara, LKML, linux-mm


Convert ext2 and tmpfs to use the new truncate convention (with setsize
inode operation).

Signed-off-by: Nick Piggin <npiggin@suse.de>
---
 fs/ext2/ext2.h  |    2 
 fs/ext2/file.c  |    2 
 fs/ext2/inode.c |  117 +++++++++++++++++++++++++++++++++++++++++++-------------
 mm/shmem.c      |   29 +++++++++----
 4 files changed, 112 insertions(+), 38 deletions(-)

Index: linux-2.6/fs/ext2/inode.c
===================================================================
--- linux-2.6.orig/fs/ext2/inode.c
+++ linux-2.6/fs/ext2/inode.c
@@ -53,6 +53,8 @@ static inline int ext2_inode_is_fast_sym
 		inode->i_blocks - ea_blocks == 0);
 }
 
+static void ext2_truncate_blocks(struct inode *inode, loff_t offset);
+
 /*
  * Called at the last iput() if i_nlink is zero.
  */
@@ -68,7 +70,7 @@ void ext2_delete_inode (struct inode * i
 
 	inode->i_size = 0;
 	if (inode->i_blocks)
-		ext2_truncate (inode);
+		ext2_truncate_blocks(inode, 0);
 	ext2_free_inode (inode);
 
 	return;
@@ -761,8 +763,33 @@ ext2_write_begin(struct file *file, stru
 		loff_t pos, unsigned len, unsigned flags,
 		struct page **pagep, void **fsdata)
 {
+	int ret;
+
 	*pagep = NULL;
-	return __ext2_write_begin(file, mapping, pos, len, flags, pagep,fsdata);
+	ret = __ext2_write_begin(file, mapping, pos, len, flags, pagep,fsdata);
+	if (ret < 0) {
+		struct inode *inode = mapping->host;
+		loff_t isize = inode->i_size;
+		if (pos + len > isize)
+			ext2_truncate_blocks(inode, isize);
+	}
+	return ret;
+}
+
+static int ext2_write_end(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned copied,
+			struct page *page, void *fsdata)
+{
+	int ret;
+
+	ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
+	if (ret < len) {
+		struct inode *inode = mapping->host;
+		loff_t isize = inode->i_size;
+		if (pos + len > isize)
+			ext2_truncate_blocks(inode, isize);
+	}
+	return ret;
 }
 
 static int
@@ -770,13 +797,22 @@ ext2_nobh_write_begin(struct file *file,
 		loff_t pos, unsigned len, unsigned flags,
 		struct page **pagep, void **fsdata)
 {
+	int ret;
+
 	/*
 	 * Dir-in-pagecache still uses ext2_write_begin. Would have to rework
 	 * directory handling code to pass around offsets rather than struct
 	 * pages in order to make this work easily.
 	 */
-	return nobh_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+	ret = nobh_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
 							ext2_get_block);
+	if (ret < 0) {
+		struct inode *inode = mapping->host;
+		loff_t isize = inode->i_size;
+		if (pos + len > isize)
+			ext2_truncate_blocks(inode, isize);
+	}
+	return ret;
 }
 
 static int ext2_nobh_writepage(struct page *page,
@@ -796,9 +832,15 @@ ext2_direct_IO(int rw, struct kiocb *ioc
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
+	ssize_t ret;
 
-	return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
+	ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
 				offset, nr_segs, ext2_get_block, NULL);
+	if (ret < 0 && (rw & WRITE)) {
+		loff_t isize = inode->i_size;
+		ext2_truncate_blocks(inode, isize);
+	}
+	return ret;
 }
 
 static int
@@ -813,7 +855,7 @@ const struct address_space_operations ex
 	.writepage		= ext2_writepage,
 	.sync_page		= block_sync_page,
 	.write_begin		= ext2_write_begin,
-	.write_end		= generic_write_end,
+	.write_end		= ext2_write_end,
 	.bmap			= ext2_bmap,
 	.direct_IO		= ext2_direct_IO,
 	.writepages		= ext2_writepages,
@@ -1020,7 +1062,7 @@ static void ext2_free_branches(struct in
 		ext2_free_data(inode, p, q);
 }
 
-void ext2_truncate(struct inode *inode)
+static void ext2_truncate_blocks(struct inode *inode, loff_t offset)
 {
 	__le32 *i_data = EXT2_I(inode)->i_data;
 	struct ext2_inode_info *ei = EXT2_I(inode);
@@ -1032,27 +1074,8 @@ void ext2_truncate(struct inode *inode)
 	int n;
 	long iblock;
 	unsigned blocksize;
-
-	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
-	    S_ISLNK(inode->i_mode)))
-		return;
-	if (ext2_inode_is_fast_symlink(inode))
-		return;
-	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
-		return;
-
 	blocksize = inode->i_sb->s_blocksize;
-	iblock = (inode->i_size + blocksize-1)
-					>> EXT2_BLOCK_SIZE_BITS(inode->i_sb);
-
-	if (mapping_is_xip(inode->i_mapping))
-		xip_truncate_page(inode->i_mapping, inode->i_size);
-	else if (test_opt(inode->i_sb, NOBH))
-		nobh_truncate_page(inode->i_mapping,
-				inode->i_size, ext2_get_block);
-	else
-		block_truncate_page(inode->i_mapping,
-				inode->i_size, ext2_get_block);
+	iblock = (offset + blocksize-1) >> EXT2_BLOCK_SIZE_BITS(inode->i_sb);
 
 	n = ext2_block_to_path(inode, iblock, offsets, NULL);
 	if (n == 0)
@@ -1120,6 +1143,46 @@ do_indirects:
 	ext2_discard_reservation(inode);
 
 	mutex_unlock(&ei->truncate_mutex);
+}
+
+int ext2_setsize(struct dentry *dentry, loff_t newsize,
+			unsigned int flags, struct file *file)
+{
+	struct inode *inode = dentry->d_inode;
+	loff_t oldsize;
+	int error;
+
+	error = inode_newsize_ok(inode, newsize);
+	if (error)
+		return error;
+
+	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+	    S_ISLNK(inode->i_mode)))
+		return -EINVAL;
+	if (ext2_inode_is_fast_symlink(inode))
+		return -EINVAL;
+	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+		return -EPERM;
+
+	if (mapping_is_xip(inode->i_mapping))
+		error = xip_truncate_page(inode->i_mapping, newsize);
+	else if (test_opt(inode->i_sb, NOBH))
+		error = nobh_truncate_page(inode->i_mapping,
+				newsize, ext2_get_block);
+	else
+		error = block_truncate_page(inode->i_mapping,
+				newsize, ext2_get_block);
+	if (error)
+		return error;
+
+	oldsize = inode->i_size;
+	i_size_write(inode, newsize);
+	truncate_pagecache(inode, oldsize, newsize);
+
+	down_write(&inode->i_alloc_sem);
+	ext2_truncate_blocks(inode, newsize);
+	up_write(&inode->i_alloc_sem);
+
 	inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
 	if (inode_needs_sync(inode)) {
 		sync_mapping_buffers(inode->i_mapping);
@@ -1127,6 +1190,8 @@ do_indirects:
 	} else {
 		mark_inode_dirty(inode);
 	}
+
+	return 0;
 }
 
 static struct ext2_inode *ext2_get_inode(struct super_block *sb, ino_t ino,
Index: linux-2.6/mm/shmem.c
===================================================================
--- linux-2.6.orig/mm/shmem.c
+++ linux-2.6/mm/shmem.c
@@ -730,10 +730,11 @@ done2:
 	if (inode->i_mapping->nrpages && (info->flags & SHMEM_PAGEIN)) {
 		/*
 		 * Call truncate_inode_pages again: racing shmem_unuse_inode
-		 * may have swizzled a page in from swap since vmtruncate or
-		 * generic_delete_inode did it, before we lowered next_index.
-		 * Also, though shmem_getpage checks i_size before adding to
-		 * cache, no recheck after: so fix the narrow window there too.
+		 * may have swizzled a page in from swap since
+		 * truncate_pagecache or generic_delete_inode did it, before we
+		 * lowered next_index.  Also, though shmem_getpage checks
+		 * i_size before adding to cache, no recheck after: so fix the
+		 * narrow window there too.
 		 *
 		 * Recalling truncate_inode_pages_range and unmap_mapping_range
 		 * every time for punch_hole (which never got a chance to clear
@@ -763,9 +764,17 @@ done2:
 	}
 }
 
-static void shmem_truncate(struct inode *inode)
+static int shmem_setsize(struct dentry *dentry, loff_t newsize,
+			unsigned int flags, struct file *file)
 {
-	shmem_truncate_range(inode, inode->i_size, (loff_t)-1);
+	int error;
+
+	error = simple_setsize(dentry, newsize, flags, file);
+	if (error)
+		return error;
+	shmem_truncate_range(dentry->d_inode, newsize, (loff_t)-1);
+
+	return error;
 }
 
 static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
@@ -822,11 +831,11 @@ static void shmem_delete_inode(struct in
 {
 	struct shmem_inode_info *info = SHMEM_I(inode);
 
-	if (inode->i_op->truncate == shmem_truncate) {
+	if (inode->i_op->setsize == shmem_setsize) {
 		truncate_inode_pages(inode->i_mapping, 0);
 		shmem_unacct_size(info->flags, inode->i_size);
 		inode->i_size = 0;
-		shmem_truncate(inode);
+		shmem_truncate_range(inode, 0, (loff_t)-1);
 		if (!list_empty(&info->swaplist)) {
 			mutex_lock(&shmem_swaplist_mutex);
 			list_del_init(&info->swaplist);
@@ -2018,7 +2027,7 @@ static const struct inode_operations shm
 };
 
 static const struct inode_operations shmem_symlink_inode_operations = {
-	.truncate	= shmem_truncate,
+	.setsize	= shmem_setsize,
 	.readlink	= generic_readlink,
 	.follow_link	= shmem_follow_link,
 	.put_link	= shmem_put_link,
@@ -2438,7 +2447,7 @@ static const struct file_operations shme
 };
 
 static const struct inode_operations shmem_inode_operations = {
-	.truncate	= shmem_truncate,
+	.setsize	= shmem_setsize,
 	.setattr	= shmem_notify_change,
 	.truncate_range	= shmem_truncate_range,
 #ifdef CONFIG_TMPFS_POSIX_ACL
Index: linux-2.6/fs/ext2/ext2.h
===================================================================
--- linux-2.6.orig/fs/ext2/ext2.h
+++ linux-2.6/fs/ext2/ext2.h
@@ -122,7 +122,7 @@ extern int ext2_write_inode (struct inod
 extern void ext2_delete_inode (struct inode *);
 extern int ext2_sync_inode (struct inode *);
 extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int);
-extern void ext2_truncate (struct inode *);
+extern int ext2_setsize(struct dentry *, loff_t, unsigned int, struct file *);
 extern int ext2_setattr (struct dentry *, struct iattr *);
 extern void ext2_set_inode_flags(struct inode *inode);
 extern void ext2_get_inode_flags(struct ext2_inode_info *);
Index: linux-2.6/fs/ext2/file.c
===================================================================
--- linux-2.6.orig/fs/ext2/file.c
+++ linux-2.6/fs/ext2/file.c
@@ -77,7 +77,7 @@ const struct file_operations ext2_xip_fi
 #endif
 
 const struct inode_operations ext2_file_inode_operations = {
-	.truncate	= ext2_truncate,
+	.setsize	= ext2_setsize,
 #ifdef CONFIG_EXT2_FS_XATTR
 	.setxattr	= generic_setxattr,
 	.getxattr	= generic_getxattr,

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [rfc][patch 2/4] fs: use new truncate helpers
  2009-07-07 14:46 ` [rfc][patch 2/4] fs: use " Nick Piggin
@ 2009-07-07 14:53   ` Christoph Hellwig
  0 siblings, 0 replies; 36+ messages in thread
From: Christoph Hellwig @ 2009-07-07 14:53 UTC (permalink / raw)
  To: Nick Piggin; +Cc: linux-fsdevel, Christoph Hellwig, Jan Kara, LKML, linux-mm

On Tue, Jul 07, 2009 at 04:46:00PM +0200, Nick Piggin wrote:
> 
> Update some fs code to make use of new helper functions introduced
> in the previous patch. Should be no significant change in behaviour
> (except CIFS now calls send_sig under i_lock, via inode_newsize_ok).

Looks good to me,


Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [rfc][patch 3/4] fs: new truncate sequence
  2009-07-07 14:48 ` [rfc][patch 3/4] fs: new truncate sequence Nick Piggin
@ 2009-07-07 14:58   ` Christoph Hellwig
  2009-07-07 15:02     ` Nick Piggin
  0 siblings, 1 reply; 36+ messages in thread
From: Christoph Hellwig @ 2009-07-07 14:58 UTC (permalink / raw)
  To: Nick Piggin; +Cc: linux-fsdevel, Christoph Hellwig, Jan Kara, LKML, linux-mm

On Tue, Jul 07, 2009 at 04:48:23PM +0200, Nick Piggin wrote:
> Don't know whether it is a good idea to move i_alloc_sem into implementation.
> Maybe it is better to leave it in the caller in this patchset?

Generally moving locks without changing prototypes can be a very subtle
way to break things.  A good option is to move the locking in a separate
patch set in a patch series or at least release if it's otherwise to
complicated.

> +int simple_setsize(struct dentry *dentry, loff_t newsize,
> +			unsigned flags, struct file *file)

This one could probably also use kerneldoc comment.

> +{
> +	struct inode *inode = dentry->d_inode;
> +	loff_t oldsize;
> +	int error;
> +
> +	error = inode_newsize_ok(inode, newsize);
> +	if (error)
> +		return error;
> +
> +	oldsize = inode->i_size;
> +	i_size_write(inode, newsize);
> +	truncate_pagecache(inode, oldsize, newsize);
> +
> +	return error;
> +}

> +	if (ia_valid & ATTR_SIZE) {
> +		if (inode->i_op && inode->i_op->setsize) {

inode->i_op is mandatory these days.

> +			unsigned int flags = 0;
> +			struct file *file = NULL;
> +
> +			if (ia_valid & ATTR_FILE) {
> +				flags |= SETSIZE_FILE;
> +				file = attr->ia_file;
> +			}
> +			if (ia_valid & ATTR_OPEN)
> +				flags |= SETSIZE_OPEN;
> +			error = inode->i_op->setsize(dentry, attr->ia_size,
> +							flags, file);
> +			if (error)
> +				return error;

So you still pass down to ->setattr if ->setsize succeeded?  That's a
very confusing calling convention.  It also means we first do the
truncation and any following time/mode updates are in a separate
transaction which is a no-go.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [rfc][patch 3/4] fs: new truncate sequence
  2009-07-07 14:58   ` Christoph Hellwig
@ 2009-07-07 15:02     ` Nick Piggin
  2009-07-07 15:07       ` Christoph Hellwig
  0 siblings, 1 reply; 36+ messages in thread
From: Nick Piggin @ 2009-07-07 15:02 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-fsdevel, Jan Kara, LKML, linux-mm

On Tue, Jul 07, 2009 at 10:58:20AM -0400, Christoph Hellwig wrote:
> On Tue, Jul 07, 2009 at 04:48:23PM +0200, Nick Piggin wrote:
> > Don't know whether it is a good idea to move i_alloc_sem into implementation.
> > Maybe it is better to leave it in the caller in this patchset?
> 
> Generally moving locks without changing prototypes can be a very subtle
> way to break things.  A good option is to move the locking in a separate
> patch set in a patch series or at least release if it's otherwise to
> complicated.

Yeah probably right.

 
> > +int simple_setsize(struct dentry *dentry, loff_t newsize,
> > +			unsigned flags, struct file *file)
> 
> This one could probably also use kerneldoc comment.
> 
> > +{
> > +	struct inode *inode = dentry->d_inode;
> > +	loff_t oldsize;
> > +	int error;
> > +
> > +	error = inode_newsize_ok(inode, newsize);
> > +	if (error)
> > +		return error;
> > +
> > +	oldsize = inode->i_size;
> > +	i_size_write(inode, newsize);
> > +	truncate_pagecache(inode, oldsize, newsize);
> > +
> > +	return error;
> > +}
> 
> > +	if (ia_valid & ATTR_SIZE) {
> > +		if (inode->i_op && inode->i_op->setsize) {
> 
> inode->i_op is mandatory these days.

Oh OK. Some existing places are testing for it...

 
> > +			unsigned int flags = 0;
> > +			struct file *file = NULL;
> > +
> > +			if (ia_valid & ATTR_FILE) {
> > +				flags |= SETSIZE_FILE;
> > +				file = attr->ia_file;
> > +			}
> > +			if (ia_valid & ATTR_OPEN)
> > +				flags |= SETSIZE_OPEN;
> > +			error = inode->i_op->setsize(dentry, attr->ia_size,
> > +							flags, file);
> > +			if (error)
> > +				return error;
> 
> So you still pass down to ->setattr if ->setsize succeeded?  That's a
> very confusing calling convention.  It also means we first do the
> truncation and any following time/mode updates are in a separate
> transaction which is a no-go.

That's kind of why I liked it in inode_setattr better.

But if the filesystem defines its own ->setattr, then it could simply
not define a ->setsize and do the right thing in setattr. So this
calling convention seems not too bad.



^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [rfc][patch 3/4] fs: new truncate sequence
  2009-07-07 15:02     ` Nick Piggin
@ 2009-07-07 15:07       ` Christoph Hellwig
  2009-07-07 15:48         ` Nick Piggin
  0 siblings, 1 reply; 36+ messages in thread
From: Christoph Hellwig @ 2009-07-07 15:07 UTC (permalink / raw)
  To: Nick Piggin; +Cc: Christoph Hellwig, linux-fsdevel, Jan Kara, LKML, linux-mm

On Tue, Jul 07, 2009 at 05:02:57PM +0200, Nick Piggin wrote:
> That's kind of why I liked it in inode_setattr better.
> 
> But if the filesystem defines its own ->setattr, then it could simply
> not define a ->setsize and do the right thing in setattr. So this
> calling convention seems not too bad.

Or the filesystem could just call into it's own setattr method
internally.  For that we'd switch back to passing the iattr to
->setsize.  For a filesystem that doesn't do anything special for
ATTR_SIZE ->setsize could point to the same function as ->setattr.

For filesystem where's it's really different they could be separate or
share helpers.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [rfc][patch 3/4] fs: new truncate sequence
  2009-07-07 15:07       ` Christoph Hellwig
@ 2009-07-07 15:48         ` Nick Piggin
  2009-07-07 16:30           ` Christoph Hellwig
  0 siblings, 1 reply; 36+ messages in thread
From: Nick Piggin @ 2009-07-07 15:48 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-fsdevel, Jan Kara, LKML, linux-mm

On Tue, Jul 07, 2009 at 11:07:58AM -0400, Christoph Hellwig wrote:
> On Tue, Jul 07, 2009 at 05:02:57PM +0200, Nick Piggin wrote:
> > That's kind of why I liked it in inode_setattr better.
> > 
> > But if the filesystem defines its own ->setattr, then it could simply
> > not define a ->setsize and do the right thing in setattr. So this
> > calling convention seems not too bad.
> 
> Or the filesystem could just call into it's own setattr method
> internally.  For that we'd switch back to passing the iattr to
> ->setsize.  For a filesystem that doesn't do anything special for
> ATTR_SIZE ->setsize could point to the same function as ->setattr.
> 
> For filesystem where's it's really different they could be separate or
> share helpers.

OK, so what do you suggest? If the filesystem defines
->setsize then do not pass ATTR_SIZE changes into setattr?
But then do you also not pass in ATTR_TIME cchanges to setattr
iff they  are together with ATTR_SIZE change? It sees also like
quite a difficult calling convention.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [rfc][patch 3/4] fs: new truncate sequence
  2009-07-07 15:48         ` Nick Piggin
@ 2009-07-07 16:30           ` Christoph Hellwig
  2009-07-08  6:32             ` Nick Piggin
  0 siblings, 1 reply; 36+ messages in thread
From: Christoph Hellwig @ 2009-07-07 16:30 UTC (permalink / raw)
  To: Nick Piggin; +Cc: Christoph Hellwig, linux-fsdevel, Jan Kara, LKML, linux-mm

On Tue, Jul 07, 2009 at 05:48:09PM +0200, Nick Piggin wrote:
> OK, so what do you suggest? If the filesystem defines
> ->setsize then do not pass ATTR_SIZE changes into setattr?
> But then do you also not pass in ATTR_TIME cchanges to setattr
> iff they  are together with ATTR_SIZE change? It sees also like
> quite a difficult calling convention.

Ok, I played around with these ideas and your patches a bit.  I think
we're actually best of to return to one of the early ideas and just
get rid of ->truncate without any replacement, e.g. let ->setattr
handle all of it.

Below is a patch ontop of you four patches that implements exactly that
and it looks surprisingly nice.  The only gotcha I can see is that we
need to audit for existing filesystems not implementing ->truncate
getting a behaviour change due to the checks to decide if we want
to call vmtruncate.  But most likely any existing filesystems without
->truncate using the buffer.c helper or direct I/O is buggy anyway.

Note that it doesn't touch i_alloc_mutex locking for now - if we go
down this route I would do the lock shift in one patch at the end of
the series.

This patch passes xfsqa for ext2 and doing some randoms ops for shmem
(it's mounted on /tmp on my test VM)

Index: linux-2.6/fs/ext2/ext2.h
===================================================================
--- linux-2.6.orig/fs/ext2/ext2.h	2009-07-07 17:15:22.591389224 +0200
+++ linux-2.6/fs/ext2/ext2.h	2009-07-07 17:15:26.185252886 +0200
@@ -122,7 +122,6 @@ extern int ext2_write_inode (struct inod
 extern void ext2_delete_inode (struct inode *);
 extern int ext2_sync_inode (struct inode *);
 extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int);
-extern int ext2_setsize(struct dentry *, loff_t, unsigned int, struct file *);
 extern int ext2_setattr (struct dentry *, struct iattr *);
 extern void ext2_set_inode_flags(struct inode *inode);
 extern void ext2_get_inode_flags(struct ext2_inode_info *);
Index: linux-2.6/fs/ext2/file.c
===================================================================
--- linux-2.6.orig/fs/ext2/file.c	2009-07-07 17:15:10.028363845 +0200
+++ linux-2.6/fs/ext2/file.c	2009-07-07 17:15:19.283479548 +0200
@@ -77,7 +77,6 @@ const struct file_operations ext2_xip_fi
 #endif
 
 const struct inode_operations ext2_file_inode_operations = {
-	.setsize	= ext2_setsize,
 #ifdef CONFIG_EXT2_FS_XATTR
 	.setxattr	= generic_setxattr,
 	.getxattr	= generic_getxattr,
Index: linux-2.6/fs/ext2/inode.c
===================================================================
--- linux-2.6.orig/fs/ext2/inode.c	2009-07-07 17:15:10.045364476 +0200
+++ linux-2.6/fs/ext2/inode.c	2009-07-07 17:53:01.633240795 +0200
@@ -1145,55 +1145,6 @@ do_indirects:
 	mutex_unlock(&ei->truncate_mutex);
 }
 
-int ext2_setsize(struct dentry *dentry, loff_t newsize,
-			unsigned int flags, struct file *file)
-{
-	struct inode *inode = dentry->d_inode;
-	loff_t oldsize;
-	int error;
-
-	error = inode_newsize_ok(inode, newsize);
-	if (error)
-		return error;
-
-	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
-	    S_ISLNK(inode->i_mode)))
-		return -EINVAL;
-	if (ext2_inode_is_fast_symlink(inode))
-		return -EINVAL;
-	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
-		return -EPERM;
-
-	if (mapping_is_xip(inode->i_mapping))
-		error = xip_truncate_page(inode->i_mapping, newsize);
-	else if (test_opt(inode->i_sb, NOBH))
-		error = nobh_truncate_page(inode->i_mapping,
-				newsize, ext2_get_block);
-	else
-		error = block_truncate_page(inode->i_mapping,
-				newsize, ext2_get_block);
-	if (error)
-		return error;
-
-	oldsize = inode->i_size;
-	i_size_write(inode, newsize);
-	truncate_pagecache(inode, oldsize, newsize);
-
-	down_write(&inode->i_alloc_sem);
-	ext2_truncate_blocks(inode, newsize);
-	up_write(&inode->i_alloc_sem);
-
-	inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
-	if (inode_needs_sync(inode)) {
-		sync_mapping_buffers(inode->i_mapping);
-		ext2_sync_inode (inode);
-	} else {
-		mark_inode_dirty(inode);
-	}
-
-	return 0;
-}
-
 static struct ext2_inode *ext2_get_inode(struct super_block *sb, ino_t ino,
 					struct buffer_head **p)
 {
@@ -1510,11 +1461,62 @@ int ext2_sync_inode(struct inode *inode)
 	return sync_inode(inode, &wbc);
 }
 
+static int ext2_setsize(struct inode *inode, loff_t newsize)
+{
+	loff_t oldsize;
+	int error;
+
+	error = inode_newsize_ok(inode, newsize);
+	if (error)
+		return error;
+
+	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+	    S_ISLNK(inode->i_mode)))
+		return -EINVAL;
+	if (ext2_inode_is_fast_symlink(inode))
+		return -EINVAL;
+	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+		return -EPERM;
+
+	if (mapping_is_xip(inode->i_mapping))
+		error = xip_truncate_page(inode->i_mapping, newsize);
+	else if (test_opt(inode->i_sb, NOBH))
+		error = nobh_truncate_page(inode->i_mapping,
+				newsize, ext2_get_block);
+	else
+		error = block_truncate_page(inode->i_mapping,
+				newsize, ext2_get_block);
+	if (error)
+		return error;
+
+	oldsize = inode->i_size;
+	i_size_write(inode, newsize);
+	truncate_pagecache(inode, oldsize, newsize);
+
+	ext2_truncate_blocks(inode, newsize);
+
+	inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
+	if (inode_needs_sync(inode)) {
+		sync_mapping_buffers(inode->i_mapping);
+		ext2_sync_inode (inode);
+	} else {
+		mark_inode_dirty(inode);
+	}
+
+	return 0;
+}
+
 int ext2_setattr(struct dentry *dentry, struct iattr *iattr)
 {
 	struct inode *inode = dentry->d_inode;
 	int error;
 
+	if (iattr->ia_valid & ATTR_SIZE) {
+		error = ext2_setsize(inode, iattr->ia_size);
+		if (error)
+			return error;
+	}
+
 	error = inode_change_ok(inode, iattr);
 	if (error)
 		return error;
Index: linux-2.6/fs/attr.c
===================================================================
--- linux-2.6.orig/fs/attr.c	2009-07-07 17:14:41.017394460 +0200
+++ linux-2.6/fs/attr.c	2009-07-07 17:23:06.618241423 +0200
@@ -206,24 +206,8 @@ int notify_change(struct dentry * dentry
 	if (error)
 		return error;
 
-	if (ia_valid & ATTR_SIZE) {
-		if (inode->i_op && inode->i_op->setsize) {
-			unsigned int flags = 0;
-			struct file *file = NULL;
-
-			if (ia_valid & ATTR_FILE) {
-				flags |= SETSIZE_FILE;
-				file = attr->ia_file;
-			}
-			if (ia_valid & ATTR_OPEN)
-				flags |= SETSIZE_OPEN;
-			error = inode->i_op->setsize(dentry, attr->ia_size,
-							flags, file);
-			if (error)
-				return error;
-		} else
-			down_write(&dentry->d_inode->i_alloc_sem);
-	}
+	if (ia_valid & ATTR_SIZE)
+		down_write(&dentry->d_inode->i_alloc_sem);
 
 	if (inode->i_op && inode->i_op->setattr) {
 		error = inode->i_op->setattr(dentry, attr);
@@ -239,7 +223,7 @@ int notify_change(struct dentry * dentry
 		}
 	}
 
-	if (ia_valid & ATTR_SIZE && !(inode->i_op && inode->i_op->setsize))
+	if (ia_valid & ATTR_SIZE)
 		up_write(&dentry->d_inode->i_alloc_sem);
 
 	if (!error)
Index: linux-2.6/fs/buffer.c
===================================================================
--- linux-2.6.orig/fs/buffer.c	2009-07-07 17:22:16.770364959 +0200
+++ linux-2.6/fs/buffer.c	2009-07-07 17:23:33.825268267 +0200
@@ -1993,11 +1993,11 @@ int block_write_begin(struct file *file,
 			 * outside i_size.  Trim these off again. Don't need
 			 * i_size_read because we hold i_mutex.
 			 *
-			 * Filesystems which define ->setsize must handle
+			 * Filesystems which do not define ->setsize must handle
 			 * this themselves.
 			 */
 			if (pos + len > inode->i_size)
-				if (!inode->i_op->setsize)
+				if (inode->i_op->truncate)
 					vmtruncate(inode, inode->i_size);
 		}
 	}
@@ -2599,7 +2599,7 @@ out_release:
 	*pagep = NULL;
 
 	if (pos + len > inode->i_size)
-		if (!inode->i_op->setsize)
+		if (inode->i_op->truncate)
 			vmtruncate(inode, inode->i_size);
 
 	return ret;
Index: linux-2.6/fs/direct-io.c
===================================================================
--- linux-2.6.orig/fs/direct-io.c	2009-07-07 17:22:16.710364362 +0200
+++ linux-2.6/fs/direct-io.c	2009-07-07 17:22:26.601241382 +0200
@@ -1217,7 +1217,7 @@ __blockdev_direct_IO(int rw, struct kioc
 		loff_t isize = i_size_read(inode);
 
 		if (end > isize && dio_lock_type == DIO_LOCKING)
-			if (!inode->i_op->setsize)
+			if (inode->i_op->truncate)
 				vmtruncate(inode, isize);
 	}
 
Index: linux-2.6/fs/libfs.c
===================================================================
--- linux-2.6.orig/fs/libfs.c	2009-07-07 17:21:07.357268403 +0200
+++ linux-2.6/fs/libfs.c	2009-07-07 17:21:32.413241823 +0200
@@ -329,10 +329,8 @@ int simple_rename(struct inode *old_dir,
 	return 0;
 }
 
-int simple_setsize(struct dentry *dentry, loff_t newsize,
-			unsigned flags, struct file *file)
+int simple_setsize(struct inode *inode, loff_t newsize)
 {
-	struct inode *inode = dentry->d_inode;
 	loff_t oldsize;
 	int error;
 
Index: linux-2.6/include/linux/fs.h
===================================================================
--- linux-2.6.orig/include/linux/fs.h	2009-07-07 17:21:35.255363657 +0200
+++ linux-2.6/include/linux/fs.h	2009-07-07 17:23:14.795241323 +0200
@@ -431,12 +431,6 @@ typedef void (dio_iodone_t)(struct kiocb
 #define ATTR_TIMES_SET	(1 << 16)
 
 /*
- * Setsize flags.
- */
-#define SETSIZE_FILE	(1 << 0) /* Trucating via an open file (eg ftruncate) */
-#define SETSIZE_OPEN	(1 << 1) /* Truncating from open(O_TRUNC) */
-
-/*
  * This is the Inode Attributes structure, used for notify_change().  It
  * uses the above definitions as flags, to know which values have changed.
  * Also, in this manner, a Filesystem can look at only the values it cares
@@ -1533,7 +1527,6 @@ struct inode_operations {
 	void * (*follow_link) (struct dentry *, struct nameidata *);
 	void (*put_link) (struct dentry *, struct nameidata *, void *);
 	void (*truncate) (struct inode *);
-	int (*setsize) (struct dentry *, loff_t, unsigned, struct file *);
 	int (*permission) (struct inode *, int);
 	int (*setattr) (struct dentry *, struct iattr *);
 	int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
@@ -2339,8 +2332,7 @@ extern int simple_link(struct dentry *, 
 extern int simple_unlink(struct inode *, struct dentry *);
 extern int simple_rmdir(struct inode *, struct dentry *);
 extern int simple_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
-extern int simple_setsize(struct dentry *dentry, loff_t newsize,
-			unsigned flags, struct file *file);
+extern int simple_setsize(struct inode *inode, loff_t newsize);
 extern int simple_sync_file(struct file *, struct dentry *, int);
 extern int simple_empty(struct dentry *);
 extern int simple_readpage(struct file *file, struct page *page);
Index: linux-2.6/mm/shmem.c
===================================================================
--- linux-2.6.orig/mm/shmem.c	2009-07-07 17:19:51.972394381 +0200
+++ linux-2.6/mm/shmem.c	2009-07-07 17:53:20.961241413 +0200
@@ -764,25 +764,19 @@ done2:
 	}
 }
 
-static int shmem_setsize(struct dentry *dentry, loff_t newsize,
-			unsigned int flags, struct file *file)
-{
-	int error;
-
-	error = simple_setsize(dentry, newsize, flags, file);
-	if (error)
-		return error;
-	shmem_truncate_range(dentry->d_inode, newsize, (loff_t)-1);
-
-	return error;
-}
-
 static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
 {
 	struct inode *inode = dentry->d_inode;
 	struct page *page = NULL;
 	int error;
 
+	if (attr->ia_valid & ATTR_SIZE) {
+		error = simple_setsize(dentry->d_inode, attr->ia_size);
+		if (error)
+			return error;
+		shmem_truncate_range(dentry->d_inode, attr->ia_size, -1);
+	}
+
 	if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
 		if (attr->ia_size < inode->i_size) {
 			/*
@@ -831,7 +825,7 @@ static void shmem_delete_inode(struct in
 {
 	struct shmem_inode_info *info = SHMEM_I(inode);
 
-	if (inode->i_op->setsize == shmem_setsize) {
+	if (inode->i_mapping->a_ops == &shmem_aops) {
 		truncate_inode_pages(inode->i_mapping, 0);
 		shmem_unacct_size(info->flags, inode->i_size);
 		inode->i_size = 0;
@@ -2027,7 +2021,6 @@ static const struct inode_operations shm
 };
 
 static const struct inode_operations shmem_symlink_inode_operations = {
-	.setsize	= shmem_setsize,
 	.readlink	= generic_readlink,
 	.follow_link	= shmem_follow_link,
 	.put_link	= shmem_put_link,
@@ -2447,7 +2440,6 @@ static const struct file_operations shme
 };
 
 static const struct inode_operations shmem_inode_operations = {
-	.setsize	= shmem_setsize,
 	.setattr	= shmem_notify_change,
 	.truncate_range	= shmem_truncate_range,
 #ifdef CONFIG_TMPFS_POSIX_ACL

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [rfc][patch 4/4] fs: tmpfs, ext2 use new truncate
  2009-07-07 14:49 ` [rfc][patch 4/4] fs: tmpfs, ext2 use new truncate Nick Piggin
@ 2009-07-07 16:38   ` Christoph Hellwig
  2009-07-08  6:53     ` Nick Piggin
  0 siblings, 1 reply; 36+ messages in thread
From: Christoph Hellwig @ 2009-07-07 16:38 UTC (permalink / raw)
  To: Nick Piggin; +Cc: linux-fsdevel, Christoph Hellwig, Jan Kara, LKML, linux-mm

I'd still prefer this to be split into one patch for shmem, and one for
ext2 to make bisecting easier.

> @@ -68,7 +70,7 @@ void ext2_delete_inode (struct inode * i
>  
>  	inode->i_size = 0;
>  	if (inode->i_blocks)
> -		ext2_truncate (inode);
> +		ext2_truncate_blocks(inode, 0);
>  	ext2_free_inode (inode);
>  
>  	return;

> -void ext2_truncate(struct inode *inode)
> +static void ext2_truncate_blocks(struct inode *inode, loff_t offset)
>  {
>  	__le32 *i_data = EXT2_I(inode)->i_data;
>  	struct ext2_inode_info *ei = EXT2_I(inode);
> @@ -1032,27 +1074,8 @@ void ext2_truncate(struct inode *inode)
>  	int n;
>  	long iblock;
>  	unsigned blocksize;
> -
> -	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
> -	    S_ISLNK(inode->i_mode)))
> -		return;
> -	if (ext2_inode_is_fast_symlink(inode))
> -		return;
> -	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
> -		return;
> -

We can't move this to the caller easily.  ext2_delete_inode gets
called for all inodes, but we only want to go on truncating for the
limited set that passes this check.



> -	if (mapping_is_xip(inode->i_mapping))
> -		xip_truncate_page(inode->i_mapping, inode->i_size);
> -	else if (test_opt(inode->i_sb, NOBH))
> -		nobh_truncate_page(inode->i_mapping,
> -				inode->i_size, ext2_get_block);
> -	else
> -		block_truncate_page(inode->i_mapping,
> -				inode->i_size, ext2_get_block);

The patch header should have an explanation for why we don't need this
anymore for the various existing callers.


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [rfc][patch 3/4] fs: new truncate sequence
  2009-07-07 16:30           ` Christoph Hellwig
@ 2009-07-08  6:32             ` Nick Piggin
  2009-07-08 10:47               ` Christoph Hellwig
  0 siblings, 1 reply; 36+ messages in thread
From: Nick Piggin @ 2009-07-08  6:32 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-fsdevel, Jan Kara, LKML, linux-mm

On Tue, Jul 07, 2009 at 12:30:42PM -0400, Christoph Hellwig wrote:
> On Tue, Jul 07, 2009 at 05:48:09PM +0200, Nick Piggin wrote:
> > OK, so what do you suggest? If the filesystem defines
> > ->setsize then do not pass ATTR_SIZE changes into setattr?
> > But then do you also not pass in ATTR_TIME cchanges to setattr
> > iff they  are together with ATTR_SIZE change? It sees also like
> > quite a difficult calling convention.
> 
> Ok, I played around with these ideas and your patches a bit.  I think
> we're actually best of to return to one of the early ideas and just
> get rid of ->truncate without any replacement, e.g. let ->setattr
> handle all of it.

Yes I do agree. ->setsize inside inode_setattr would have been
OK as wel I think, but this is probably even cleaner even though
it might be a bit more work.

 
> Below is a patch ontop of you four patches that implements exactly that
> and it looks surprisingly nice.  The only gotcha I can see is that we
> need to audit for existing filesystems not implementing ->truncate
> getting a behaviour change due to the checks to decide if we want
> to call vmtruncate.  But most likely any existing filesystems without
> ->truncate using the buffer.c helper or direct I/O is buggy anyway.

Thanks for the patch, I think I will fold it in to the series. I
think we probably do need to call simple_setsize in inode_setattr
though (unless you propose to eventually convert every filesystem
to define a .setattr). This would also require eg. your ext2
conversion to strip ATTR_SIZE before passing through to inode_setattr.

We could just add some temporary field for example in the i_op
structure to test for and remove it when everybody is converted,
which woud guarantee back compatibility.


> Note that it doesn't touch i_alloc_mutex locking for now - if we go
> down this route I would do the lock shift in one patch at the end of
> the series.

Yeah fine by me (or do it in a new series).

 
>  int ext2_setattr(struct dentry *dentry, struct iattr *iattr)
>  {
>  	struct inode *inode = dentry->d_inode;
>  	int error;
>  
> +	if (iattr->ia_valid & ATTR_SIZE) {
> +		error = ext2_setsize(inode, iattr->ia_size);
> +		if (error)
> +			return error;
> +	}
> +
>  	error = inode_change_ok(inode, iattr);
>  	if (error)
>  		return error;

Probably want to call inode_change_ok first here.


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [rfc][patch 4/4] fs: tmpfs, ext2 use new truncate
  2009-07-07 16:38   ` Christoph Hellwig
@ 2009-07-08  6:53     ` Nick Piggin
  2009-07-08 11:14       ` Jan Kara
  0 siblings, 1 reply; 36+ messages in thread
From: Nick Piggin @ 2009-07-08  6:53 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-fsdevel, Jan Kara, LKML, linux-mm

On Tue, Jul 07, 2009 at 12:38:29PM -0400, Christoph Hellwig wrote:
> I'd still prefer this to be split into one patch for shmem, and one for
> ext2 to make bisecting easier.

Definitely agreed. I would prefer to even send individual fs patches
to respective maintainers so they can do their specific review and
QA on them and merge them as appropriate. The core change is
nicely back compatible, so there is no reason to tie it to the fs
patches.

 
> > @@ -68,7 +70,7 @@ void ext2_delete_inode (struct inode * i
> >  
> >  	inode->i_size = 0;
> >  	if (inode->i_blocks)
> > -		ext2_truncate (inode);
> > +		ext2_truncate_blocks(inode, 0);
> >  	ext2_free_inode (inode);
> >  
> >  	return;
> 
> > -void ext2_truncate(struct inode *inode)
> > +static void ext2_truncate_blocks(struct inode *inode, loff_t offset)
> >  {
> >  	__le32 *i_data = EXT2_I(inode)->i_data;
> >  	struct ext2_inode_info *ei = EXT2_I(inode);
> > @@ -1032,27 +1074,8 @@ void ext2_truncate(struct inode *inode)
> >  	int n;
> >  	long iblock;
> >  	unsigned blocksize;
> > -
> > -	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
> > -	    S_ISLNK(inode->i_mode)))
> > -		return;
> > -	if (ext2_inode_is_fast_symlink(inode))
> > -		return;
> > -	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
> > -		return;
> > -
> 
> We can't move this to the caller easily.  ext2_delete_inode gets
> called for all inodes, but we only want to go on truncating for the
> limited set that passes this check.

Hmm, shouldn't they have no ->i_blocks in that case?


> > -	if (mapping_is_xip(inode->i_mapping))
> > -		xip_truncate_page(inode->i_mapping, inode->i_size);
> > -	else if (test_opt(inode->i_sb, NOBH))
> > -		nobh_truncate_page(inode->i_mapping,
> > -				inode->i_size, ext2_get_block);
> > -	else
> > -		block_truncate_page(inode->i_mapping,
> > -				inode->i_size, ext2_get_block);
> 
> The patch header should have an explanation for why we don't need this
> anymore for the various existing callers.

OK. I guess it's not needed when blocks were completely outside
of i_size or the inode no longer has references and all blocks
will be freed. I will add the description.



^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [rfc][patch 3/4] fs: new truncate sequence
  2009-07-08  6:32             ` Nick Piggin
@ 2009-07-08 10:47               ` Christoph Hellwig
  2009-07-08 12:34                 ` Nick Piggin
  0 siblings, 1 reply; 36+ messages in thread
From: Christoph Hellwig @ 2009-07-08 10:47 UTC (permalink / raw)
  To: Nick Piggin; +Cc: Christoph Hellwig, linux-fsdevel, Jan Kara, LKML, linux-mm

On Wed, Jul 08, 2009 at 08:32:25AM +0200, Nick Piggin wrote:
> Thanks for the patch, I think I will fold it in to the series. I
> think we probably do need to call simple_setsize in inode_setattr
> though (unless you propose to eventually convert every filesystem
> to define a .setattr). This would also require eg. your ext2
> conversion to strip ATTR_SIZE before passing through to inode_setattr.

Yes, we should eventually make .setattr mandatory.  Doing a default
action when a method lacks tends to cause more issues than it solves.

I'm happy to help in doing that part of the conversion (and also other
bits)

> >  int ext2_setattr(struct dentry *dentry, struct iattr *iattr)
> >  {
> >  	struct inode *inode = dentry->d_inode;
> >  	int error;
> >  
> > +	if (iattr->ia_valid & ATTR_SIZE) {
> > +		error = ext2_setsize(inode, iattr->ia_size);
> > +		if (error)
> > +			return error;
> > +	}
> > +
> >  	error = inode_change_ok(inode, iattr);
> >  	if (error)
> >  		return error;
> 
> Probably want to call inode_change_ok first here.

Right now I tried to no reorder anything versus the previous patch.

But yes, we should do all the checks first.  Possibly we can even add
a call to inode_newsize_ok to inode_change_ok, but I'd like to defer
that until all conversions are done and we can easily audit it.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [rfc][patch 4/4] fs: tmpfs, ext2 use new truncate
  2009-07-08  6:53     ` Nick Piggin
@ 2009-07-08 11:14       ` Jan Kara
  2009-07-08 12:22         ` Nick Piggin
  0 siblings, 1 reply; 36+ messages in thread
From: Jan Kara @ 2009-07-08 11:14 UTC (permalink / raw)
  To: Nick Piggin; +Cc: Christoph Hellwig, linux-fsdevel, Jan Kara, LKML, linux-mm

On Wed 08-07-09 08:53:27, Nick Piggin wrote:
> On Tue, Jul 07, 2009 at 12:38:29PM -0400, Christoph Hellwig wrote:
> > > @@ -68,7 +70,7 @@ void ext2_delete_inode (struct inode * i
> > >  
> > >  	inode->i_size = 0;
> > >  	if (inode->i_blocks)
> > > -		ext2_truncate (inode);
> > > +		ext2_truncate_blocks(inode, 0);
> > >  	ext2_free_inode (inode);
> > >  
> > >  	return;
> > 
> > > -void ext2_truncate(struct inode *inode)
> > > +static void ext2_truncate_blocks(struct inode *inode, loff_t offset)
> > >  {
> > >  	__le32 *i_data = EXT2_I(inode)->i_data;
> > >  	struct ext2_inode_info *ei = EXT2_I(inode);
> > > @@ -1032,27 +1074,8 @@ void ext2_truncate(struct inode *inode)
> > >  	int n;
> > >  	long iblock;
> > >  	unsigned blocksize;
> > > -
> > > -	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
> > > -	    S_ISLNK(inode->i_mode)))
> > > -		return;
> > > -	if (ext2_inode_is_fast_symlink(inode))
> > > -		return;
> > > -	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
> > > -		return;
> > > -
> > 
> > We can't move this to the caller easily.  ext2_delete_inode gets
> > called for all inodes, but we only want to go on truncating for the
> > limited set that passes this check.
> 
> Hmm, shouldn't they have no ->i_blocks in that case?
  Not necessarily. Inode can have extended attributes set and those can
be stored in a special block which is accounted in i_blocks.

								Honza
-- 
Jan Kara <jack@suse.cz>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [rfc][patch 4/4] fs: tmpfs, ext2 use new truncate
  2009-07-08 11:14       ` Jan Kara
@ 2009-07-08 12:22         ` Nick Piggin
  2009-07-08 12:32           ` Christoph Hellwig
  0 siblings, 1 reply; 36+ messages in thread
From: Nick Piggin @ 2009-07-08 12:22 UTC (permalink / raw)
  To: Jan Kara; +Cc: Christoph Hellwig, linux-fsdevel, LKML, linux-mm

On Wed, Jul 08, 2009 at 01:14:20PM +0200, Jan Kara wrote:
> On Wed 08-07-09 08:53:27, Nick Piggin wrote:
> > On Tue, Jul 07, 2009 at 12:38:29PM -0400, Christoph Hellwig wrote:
> > > We can't move this to the caller easily.  ext2_delete_inode gets
> > > called for all inodes, but we only want to go on truncating for the
> > > limited set that passes this check.
> > 
> > Hmm, shouldn't they have no ->i_blocks in that case?
>   Not necessarily. Inode can have extended attributes set and those can
> be stored in a special block which is accounted in i_blocks.

OK fair enough. But I don't know if all those checks are
realy appropriate. For example an IS_APPEND inode should
be able to have its blocks trimmed off if a write fails.


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [rfc][patch 4/4] fs: tmpfs, ext2 use new truncate
  2009-07-08 12:22         ` Nick Piggin
@ 2009-07-08 12:32           ` Christoph Hellwig
  2009-07-08 12:39             ` Nick Piggin
  0 siblings, 1 reply; 36+ messages in thread
From: Christoph Hellwig @ 2009-07-08 12:32 UTC (permalink / raw)
  To: Nick Piggin; +Cc: Jan Kara, Christoph Hellwig, linux-fsdevel, LKML, linux-mm

On Wed, Jul 08, 2009 at 02:22:50PM +0200, Nick Piggin wrote:
> OK fair enough. But I don't know if all those checks are
> realy appropriate. For example an IS_APPEND inode should
> be able to have its blocks trimmed off if a write fails.

It should.  But I think that's a separate issue of what we're trying to
fix right now.  So let's just do the method reshuffle now and then sort
out the checks later.


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [rfc][patch 3/4] fs: new truncate sequence
  2009-07-08 10:47               ` Christoph Hellwig
@ 2009-07-08 12:34                 ` Nick Piggin
  2009-07-08 12:40                   ` Christoph Hellwig
  2009-07-08 16:07                   ` Boaz Harrosh
  0 siblings, 2 replies; 36+ messages in thread
From: Nick Piggin @ 2009-07-08 12:34 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-fsdevel, Jan Kara, LKML, linux-mm

On Wed, Jul 08, 2009 at 06:47:01AM -0400, Christoph Hellwig wrote:
> On Wed, Jul 08, 2009 at 08:32:25AM +0200, Nick Piggin wrote:
> > Thanks for the patch, I think I will fold it in to the series. I
> > think we probably do need to call simple_setsize in inode_setattr
> > though (unless you propose to eventually convert every filesystem
> > to define a .setattr). This would also require eg. your ext2
> > conversion to strip ATTR_SIZE before passing through to inode_setattr.
> 
> Yes, we should eventually make .setattr mandatory.  Doing a default
> action when a method lacks tends to cause more issues than it solves.
> 
> I'm happy to help in doing that part of the conversion (and also other
> bits)

OK well here is what I have now for 3/4 and 4/4. Basically just
folded your patch on top, changed ordering of some checks, have
fs clear ATTR_SIZE before calling inode_setattr, add a .new_truncate
field to check against rather than .truncate, and provide a default
ATTR_SIZE handler in inode_setattr (simple_setsize).

---
Introduce a new truncate calling sequence into fs/mm subsystems. Rather than
setattr > vmtruncate > truncate, have filesystems call their truncate sequence
from ->setattr if filesystem specific operations are required. vmtruncate is
deprecated, and truncate_pagecache and inode_newsize_ok helpers introduced
previously should be used.

simple_setsize is also introduced to perform the equivalent of vmtruncate.
simple_setsize gets called by inode_setattr when ATTR_SIZE is passed. So
filesystems implementing their own truncate code in setattr then calling
through to inode_setattr should clear ATTR_SIZE.

A new attribute is introduced into inode_operations structure; .new_truncate
is a temporary hack to distinguish filesystems that implement the new
truncate system. These guys cannot trim off block past i_size via vmtruncate,
so instead they must handle it in fs code. This gives better opportunity to
catch errors etc anyway. .new_truncate and .truncate will go away once all
filesystems are converted.

Big problem with the previous calling sequence: the filesystem is not called
until i_size has already changed.  This means it is not allowed to fail the
call, and also it does not know what the previous i_size was. Also, generic
code calling vmtruncate to truncate allocated blocks in case of error had
no good way to return a meaningful error (or, for example, atomically handle
block deallocation).

Signed-off-by: Nick Piggin <npiggin@suse.de>
---
 Documentation/filesystems/vfs.txt |    7 ++++++-
 fs/attr.c                         |    7 ++++++-
 fs/buffer.c                       |   12 +++++++++---
 fs/direct-io.c                    |    7 ++++---
 fs/libfs.c                        |   17 +++++++++++++++++
 include/linux/fs.h                |    2 ++
 mm/truncate.c                     |    6 ++----
 7 files changed, 46 insertions(+), 12 deletions(-)

Index: linux-2.6/fs/libfs.c
===================================================================
--- linux-2.6.orig/fs/libfs.c
+++ linux-2.6/fs/libfs.c
@@ -329,6 +329,22 @@ int simple_rename(struct inode *old_dir,
 	return 0;
 }
 
+int simple_setsize(struct inode *inode, loff_t newsize)
+{
+	loff_t oldsize;
+	int error;
+
+	error = inode_newsize_ok(inode, newsize);
+	if (error)
+		return error;
+
+	oldsize = inode->i_size;
+	i_size_write(inode, newsize);
+	truncate_pagecache(inode, oldsize, newsize);
+
+	return error;
+}
+
 int simple_readpage(struct file *file, struct page *page)
 {
 	clear_highpage(page);
@@ -840,6 +856,7 @@ EXPORT_SYMBOL(generic_read_dir);
 EXPORT_SYMBOL(get_sb_pseudo);
 EXPORT_SYMBOL(simple_write_begin);
 EXPORT_SYMBOL(simple_write_end);
+EXPORT_SYMBOL(simple_setsize);
 EXPORT_SYMBOL(simple_dir_inode_operations);
 EXPORT_SYMBOL(simple_dir_operations);
 EXPORT_SYMBOL(simple_empty);
Index: linux-2.6/include/linux/fs.h
===================================================================
--- linux-2.6.orig/include/linux/fs.h
+++ linux-2.6/include/linux/fs.h
@@ -1527,6 +1527,7 @@ struct inode_operations {
 	void * (*follow_link) (struct dentry *, struct nameidata *);
 	void (*put_link) (struct dentry *, struct nameidata *, void *);
 	void (*truncate) (struct inode *);
+	int new_truncate; /* nasty hack to transition to new truncate code */
 	int (*permission) (struct inode *, int);
 	int (*setattr) (struct dentry *, struct iattr *);
 	int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
@@ -2332,6 +2333,7 @@ extern int simple_link(struct dentry *,
 extern int simple_unlink(struct inode *, struct dentry *);
 extern int simple_rmdir(struct inode *, struct dentry *);
 extern int simple_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
+extern int simple_setsize(struct inode *inode, loff_t newsize);
 extern int simple_sync_file(struct file *, struct dentry *, int);
 extern int simple_empty(struct dentry *);
 extern int simple_readpage(struct file *file, struct page *page);
Index: linux-2.6/fs/buffer.c
===================================================================
--- linux-2.6.orig/fs/buffer.c
+++ linux-2.6/fs/buffer.c
@@ -1992,9 +1992,14 @@ int block_write_begin(struct file *file,
 			 * prepare_write() may have instantiated a few blocks
 			 * outside i_size.  Trim these off again. Don't need
 			 * i_size_read because we hold i_mutex.
+			 *
+			 * Filesystems which define i_op->new_truncate must
+			 * handle this themselves. Eventually this will go
+			 * away because everyone will be converted.
 			 */
 			if (pos + len > inode->i_size)
-				vmtruncate(inode, inode->i_size);
+				if (!inode->i_op->new_truncate)
+					vmtruncate(inode, inode->i_size);
 		}
 	}
 
@@ -2371,7 +2376,7 @@ int block_commit_write(struct page *page
  *
  * We are not allowed to take the i_mutex here so we have to play games to
  * protect against truncate races as the page could now be beyond EOF.  Because
- * vmtruncate() writes the inode size before removing pages, once we have the
+ * truncate writes the inode size before removing pages, once we have the
  * page lock we can determine safely if the page is beyond EOF. If it is not
  * beyond EOF, then the page is guaranteed safe against truncation until we
  * unlock the page.
@@ -2595,7 +2600,8 @@ out_release:
 	*pagep = NULL;
 
 	if (pos + len > inode->i_size)
-		vmtruncate(inode, inode->i_size);
+		if (!inode->i_op->new_truncate)
+			vmtruncate(inode, inode->i_size);
 
 	return ret;
 }
Index: linux-2.6/fs/direct-io.c
===================================================================
--- linux-2.6.orig/fs/direct-io.c
+++ linux-2.6/fs/direct-io.c
@@ -1210,14 +1210,15 @@ __blockdev_direct_IO(int rw, struct kioc
 	/*
 	 * In case of error extending write may have instantiated a few
 	 * blocks outside i_size. Trim these off again for DIO_LOCKING.
-	 * NOTE: DIO_NO_LOCK/DIO_OWN_LOCK callers have to handle this by
-	 * it's own meaner.
+	 * NOTE: DIO_NO_LOCK/DIO_OWN_LOCK callers have to handle this in
+	 * their own manner.
 	 */
 	if (unlikely(retval < 0 && (rw & WRITE))) {
 		loff_t isize = i_size_read(inode);
 
 		if (end > isize && dio_lock_type == DIO_LOCKING)
-			vmtruncate(inode, isize);
+			if (!inode->i_op->new_truncate)
+				vmtruncate(inode, isize);
 	}
 
 	if (rw == READ && dio_lock_type == DIO_LOCKING)
Index: linux-2.6/fs/attr.c
===================================================================
--- linux-2.6.orig/fs/attr.c
+++ linux-2.6/fs/attr.c
@@ -112,7 +112,12 @@ int inode_setattr(struct inode * inode,
 
 	if (ia_valid & ATTR_SIZE &&
 	    attr->ia_size != i_size_read(inode)) {
-		int error = vmtruncate(inode, attr->ia_size);
+		int error;
+
+		if (inode->i_op->new_truncate)
+			error = simple_setsize(inode, attr->ia_size);
+		else
+			error = vmtruncate(inode, attr->ia_size);
 		if (error)
 			return error;
 	}
Index: linux-2.6/Documentation/filesystems/vfs.txt
===================================================================
--- linux-2.6.orig/Documentation/filesystems/vfs.txt
+++ linux-2.6/Documentation/filesystems/vfs.txt
@@ -401,11 +401,16 @@ otherwise noted.
   	started might not be in the page cache at the end of the
   	walk).
 
-  truncate: called by the VFS to change the size of a file.  The
+  truncate: Deprecated. This will not be called if ->setsize is defined.
+	Called by the VFS to change the size of a file.  The
  	i_size field of the inode is set to the desired size by the
  	VFS before this method is called.  This method is called by
  	the truncate(2) system call and related functionality.
 
+	Note: ->truncate and vmtruncate are deprecated. Do not add new
+	instances/calls of these. Filesystems shoud be converted to do their
+	truncate sequence via ->setattr().
+
   permission: called by the VFS to check for access rights on a POSIX-like
   	filesystem.
 
Index: linux-2.6/mm/truncate.c
===================================================================
--- linux-2.6.orig/mm/truncate.c
+++ linux-2.6/mm/truncate.c
@@ -513,12 +513,10 @@ int vmtruncate(struct inode * inode, lof
 	loff_t oldsize;
 	int error;
 
-	error = inode_newsize_ok(inode, offset);
+	error = simple_setsize(inode, offset);
 	if (error)
 		return error;
-	oldsize = inode->i_size;
-	i_size_write(inode, offset);
-	truncate_pagecache(inode, oldsize, offset);
+
 	if (inode->i_op->truncate)
 		inode->i_op->truncate(inode);
 

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [rfc][patch 4/4] fs: tmpfs, ext2 use new truncate
  2009-07-08 12:32           ` Christoph Hellwig
@ 2009-07-08 12:39             ` Nick Piggin
  2009-07-08 13:49               ` Christoph Hellwig
  0 siblings, 1 reply; 36+ messages in thread
From: Nick Piggin @ 2009-07-08 12:39 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Jan Kara, linux-fsdevel, LKML, linux-mm

On Wed, Jul 08, 2009 at 08:32:44AM -0400, Christoph Hellwig wrote:
> On Wed, Jul 08, 2009 at 02:22:50PM +0200, Nick Piggin wrote:
> > OK fair enough. But I don't know if all those checks are
> > realy appropriate. For example an IS_APPEND inode should
> > be able to have its blocks trimmed off if a write fails.
> 
> It should.  But I think that's a separate issue of what we're trying to
> fix right now.  So let's just do the method reshuffle now and then sort
> out the checks later.

Here is patch 4/4 after your parts folded in and other changes
I said in last mail. (yes I do agree to split it up, but I'll
just wait until we all agree on basics and then resend a new
patchset).

--
Convert ext2 and tmpfs to use the new truncate convention.

Signed-off-by: Nick Piggin <npiggin@suse.de>
---
 fs/ext2/ext2.h  |    1 
 fs/ext2/file.c  |    2 
 fs/ext2/inode.c |  137 +++++++++++++++++++++++++++++++++++++++++++++-----------
 mm/shmem.c      |   35 +++++++-------
 4 files changed, 131 insertions(+), 44 deletions(-)

Index: linux-2.6/fs/ext2/inode.c
===================================================================
--- linux-2.6.orig/fs/ext2/inode.c
+++ linux-2.6/fs/ext2/inode.c
@@ -53,6 +53,8 @@ static inline int ext2_inode_is_fast_sym
 		inode->i_blocks - ea_blocks == 0);
 }
 
+static void ext2_truncate_blocks(struct inode *inode, loff_t offset);
+
 /*
  * Called at the last iput() if i_nlink is zero.
  */
@@ -68,7 +70,7 @@ void ext2_delete_inode (struct inode * i
 
 	inode->i_size = 0;
 	if (inode->i_blocks)
-		ext2_truncate (inode);
+		ext2_truncate_blocks(inode, 0);
 	ext2_free_inode (inode);
 
 	return;
@@ -761,8 +763,33 @@ ext2_write_begin(struct file *file, stru
 		loff_t pos, unsigned len, unsigned flags,
 		struct page **pagep, void **fsdata)
 {
+	int ret;
+
 	*pagep = NULL;
-	return __ext2_write_begin(file, mapping, pos, len, flags, pagep,fsdata);
+	ret = __ext2_write_begin(file, mapping, pos, len, flags, pagep,fsdata);
+	if (ret < 0) {
+		struct inode *inode = mapping->host;
+		loff_t isize = inode->i_size;
+		if (pos + len > isize)
+			ext2_truncate_blocks(inode, isize);
+	}
+	return ret;
+}
+
+static int ext2_write_end(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned copied,
+			struct page *page, void *fsdata)
+{
+	int ret;
+
+	ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
+	if (ret < len) {
+		struct inode *inode = mapping->host;
+		loff_t isize = inode->i_size;
+		if (pos + len > isize)
+			ext2_truncate_blocks(inode, isize);
+	}
+	return ret;
 }
 
 static int
@@ -770,13 +797,22 @@ ext2_nobh_write_begin(struct file *file,
 		loff_t pos, unsigned len, unsigned flags,
 		struct page **pagep, void **fsdata)
 {
+	int ret;
+
 	/*
 	 * Dir-in-pagecache still uses ext2_write_begin. Would have to rework
 	 * directory handling code to pass around offsets rather than struct
 	 * pages in order to make this work easily.
 	 */
-	return nobh_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+	ret = nobh_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
 							ext2_get_block);
+	if (ret < 0) {
+		struct inode *inode = mapping->host;
+		loff_t isize = inode->i_size;
+		if (pos + len > isize)
+			ext2_truncate_blocks(inode, isize);
+	}
+	return ret;
 }
 
 static int ext2_nobh_writepage(struct page *page,
@@ -796,9 +832,15 @@ ext2_direct_IO(int rw, struct kiocb *ioc
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
+	ssize_t ret;
 
-	return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
+	ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
 				offset, nr_segs, ext2_get_block, NULL);
+	if (ret < 0 && (rw & WRITE)) {
+		loff_t isize = inode->i_size;
+		ext2_truncate_blocks(inode, isize);
+	}
+	return ret;
 }
 
 static int
@@ -813,7 +855,7 @@ const struct address_space_operations ex
 	.writepage		= ext2_writepage,
 	.sync_page		= block_sync_page,
 	.write_begin		= ext2_write_begin,
-	.write_end		= generic_write_end,
+	.write_end		= ext2_write_end,
 	.bmap			= ext2_bmap,
 	.direct_IO		= ext2_direct_IO,
 	.writepages		= ext2_writepages,
@@ -1020,7 +1062,7 @@ static void ext2_free_branches(struct in
 		ext2_free_data(inode, p, q);
 }
 
-void ext2_truncate(struct inode *inode)
+static void __ext2_truncate_blocks(struct inode *inode, loff_t offset)
 {
 	__le32 *i_data = EXT2_I(inode)->i_data;
 	struct ext2_inode_info *ei = EXT2_I(inode);
@@ -1032,27 +1074,8 @@ void ext2_truncate(struct inode *inode)
 	int n;
 	long iblock;
 	unsigned blocksize;
-
-	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
-	    S_ISLNK(inode->i_mode)))
-		return;
-	if (ext2_inode_is_fast_symlink(inode))
-		return;
-	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
-		return;
-
 	blocksize = inode->i_sb->s_blocksize;
-	iblock = (inode->i_size + blocksize-1)
-					>> EXT2_BLOCK_SIZE_BITS(inode->i_sb);
-
-	if (mapping_is_xip(inode->i_mapping))
-		xip_truncate_page(inode->i_mapping, inode->i_size);
-	else if (test_opt(inode->i_sb, NOBH))
-		nobh_truncate_page(inode->i_mapping,
-				inode->i_size, ext2_get_block);
-	else
-		block_truncate_page(inode->i_mapping,
-				inode->i_size, ext2_get_block);
+	iblock = (offset + blocksize-1) >> EXT2_BLOCK_SIZE_BITS(inode->i_sb);
 
 	n = ext2_block_to_path(inode, iblock, offsets, NULL);
 	if (n == 0)
@@ -1120,6 +1143,59 @@ do_indirects:
 	ext2_discard_reservation(inode);
 
 	mutex_unlock(&ei->truncate_mutex);
+}
+
+static void ext2_truncate_blocks(struct inode *inode, loff_t offset)
+{
+	/*
+	 * XXX: it seems like a bug here that we don't allow
+	 * IS_APPEND inode to have blocks-past-i_size trimmed off.
+	 * review and fix this.
+	 */
+	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+	    S_ISLNK(inode->i_mode)))
+		return -EINVAL;
+	if (ext2_inode_is_fast_symlink(inode))
+		return -EINVAL;
+	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+		return -EPERM;
+	__ext2_truncate_blocks(inode, offset);
+}
+
+int ext2_setsize(struct inode *inode, loff_t newsize)
+{
+	loff_t oldsize;
+	int error;
+
+	error = inode_newsize_ok(inode, newsize);
+	if (error)
+		return error;
+
+	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+	    S_ISLNK(inode->i_mode)))
+		return -EINVAL;
+	if (ext2_inode_is_fast_symlink(inode))
+		return -EINVAL;
+	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+		return -EPERM;
+
+	if (mapping_is_xip(inode->i_mapping))
+		error = xip_truncate_page(inode->i_mapping, newsize);
+	else if (test_opt(inode->i_sb, NOBH))
+		error = nobh_truncate_page(inode->i_mapping,
+				newsize, ext2_get_block);
+	else
+		error = block_truncate_page(inode->i_mapping,
+				newsize, ext2_get_block);
+	if (error)
+		return error;
+
+	oldsize = inode->i_size;
+	i_size_write(inode, newsize);
+	truncate_pagecache(inode, oldsize, newsize);
+
+	__ext2_truncate_blocks(inode, newsize);
+
 	inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
 	if (inode_needs_sync(inode)) {
 		sync_mapping_buffers(inode->i_mapping);
@@ -1127,6 +1203,8 @@ do_indirects:
 	} else {
 		mark_inode_dirty(inode);
 	}
+
+	return 0;
 }
 
 static struct ext2_inode *ext2_get_inode(struct super_block *sb, ino_t ino,
@@ -1459,6 +1537,13 @@ int ext2_setattr(struct dentry *dentry,
 		if (error)
 			return error;
 	}
+	if (iattr->ia_valid & ATTR_SIZE) {
+		error = ext2_setsize(inode, iattr->ia_size);
+		if (error)
+			return error;
+		iattr->ia_valid &= ~ATTR_SIZE;
+		/* strip ATTR_SIZE for inode_setattr */
+	}
 	error = inode_setattr(inode, iattr);
 	if (!error && (iattr->ia_valid & ATTR_MODE))
 		error = ext2_acl_chmod(inode);
Index: linux-2.6/mm/shmem.c
===================================================================
--- linux-2.6.orig/mm/shmem.c
+++ linux-2.6/mm/shmem.c
@@ -730,10 +730,11 @@ done2:
 	if (inode->i_mapping->nrpages && (info->flags & SHMEM_PAGEIN)) {
 		/*
 		 * Call truncate_inode_pages again: racing shmem_unuse_inode
-		 * may have swizzled a page in from swap since vmtruncate or
-		 * generic_delete_inode did it, before we lowered next_index.
-		 * Also, though shmem_getpage checks i_size before adding to
-		 * cache, no recheck after: so fix the narrow window there too.
+		 * may have swizzled a page in from swap since
+		 * truncate_pagecache or generic_delete_inode did it, before we
+		 * lowered next_index.  Also, though shmem_getpage checks
+		 * i_size before adding to cache, no recheck after: so fix the
+		 * narrow window there too.
 		 *
 		 * Recalling truncate_inode_pages_range and unmap_mapping_range
 		 * every time for punch_hole (which never got a chance to clear
@@ -763,11 +764,6 @@ done2:
 	}
 }
 
-static void shmem_truncate(struct inode *inode)
-{
-	shmem_truncate_range(inode, inode->i_size, (loff_t)-1);
-}
-
 static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
 {
 	struct inode *inode = dentry->d_inode;
@@ -775,6 +771,8 @@ static int shmem_notify_change(struct de
 	int error;
 
 	if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
+		loff_t newsize = attr->ia_size;
+
 		if (attr->ia_size < inode->i_size) {
 			/*
 			 * If truncating down to a partial page, then
@@ -783,9 +781,9 @@ static int shmem_notify_change(struct de
 			 * truncate_partial_page cannnot miss it were
 			 * it assigned to swap.
 			 */
-			if (attr->ia_size & (PAGE_CACHE_SIZE-1)) {
+			if (newsize & (PAGE_CACHE_SIZE-1)) {
 				(void) shmem_getpage(inode,
-					attr->ia_size>>PAGE_CACHE_SHIFT,
+					newsize >> PAGE_CACHE_SHIFT,
 						&page, SGP_READ, NULL);
 				if (page)
 					unlock_page(page);
@@ -797,13 +795,19 @@ static int shmem_notify_change(struct de
 			 * if it's being fully truncated to zero-length: the
 			 * nrpages check is efficient enough in that case.
 			 */
-			if (attr->ia_size) {
+			if (newsize) {
 				struct shmem_inode_info *info = SHMEM_I(inode);
 				spin_lock(&info->lock);
 				info->flags &= ~SHMEM_PAGEIN;
 				spin_unlock(&info->lock);
 			}
 		}
+
+		error = simple_setsize(inode, newsize);
+		if (error)
+			return error;
+		shmem_truncate_range(inode, newsize, (loff_t)-1);
+		attr->ia_valid &= ~ATTR_SIZE;
 	}
 
 	error = inode_change_ok(inode, attr);
@@ -822,11 +826,11 @@ static void shmem_delete_inode(struct in
 {
 	struct shmem_inode_info *info = SHMEM_I(inode);
 
-	if (inode->i_op->truncate == shmem_truncate) {
+	if (inode->i_mapping->a_ops == &shmem_aops) {
 		truncate_inode_pages(inode->i_mapping, 0);
 		shmem_unacct_size(info->flags, inode->i_size);
 		inode->i_size = 0;
-		shmem_truncate(inode);
+		shmem_truncate_range(inode, 0, (loff_t)-1);
 		if (!list_empty(&info->swaplist)) {
 			mutex_lock(&shmem_swaplist_mutex);
 			list_del_init(&info->swaplist);
@@ -2018,7 +2022,6 @@ static const struct inode_operations shm
 };
 
 static const struct inode_operations shmem_symlink_inode_operations = {
-	.truncate	= shmem_truncate,
 	.readlink	= generic_readlink,
 	.follow_link	= shmem_follow_link,
 	.put_link	= shmem_put_link,
@@ -2438,7 +2441,7 @@ static const struct file_operations shme
 };
 
 static const struct inode_operations shmem_inode_operations = {
-	.truncate	= shmem_truncate,
+	.new_truncate	= 1,
 	.setattr	= shmem_notify_change,
 	.truncate_range	= shmem_truncate_range,
 #ifdef CONFIG_TMPFS_POSIX_ACL
Index: linux-2.6/fs/ext2/ext2.h
===================================================================
--- linux-2.6.orig/fs/ext2/ext2.h
+++ linux-2.6/fs/ext2/ext2.h
@@ -122,7 +122,6 @@ extern int ext2_write_inode (struct inod
 extern void ext2_delete_inode (struct inode *);
 extern int ext2_sync_inode (struct inode *);
 extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int);
-extern void ext2_truncate (struct inode *);
 extern int ext2_setattr (struct dentry *, struct iattr *);
 extern void ext2_set_inode_flags(struct inode *inode);
 extern void ext2_get_inode_flags(struct ext2_inode_info *);
Index: linux-2.6/fs/ext2/file.c
===================================================================
--- linux-2.6.orig/fs/ext2/file.c
+++ linux-2.6/fs/ext2/file.c
@@ -77,13 +77,13 @@ const struct file_operations ext2_xip_fi
 #endif
 
 const struct inode_operations ext2_file_inode_operations = {
-	.truncate	= ext2_truncate,
 #ifdef CONFIG_EXT2_FS_XATTR
 	.setxattr	= generic_setxattr,
 	.getxattr	= generic_getxattr,
 	.listxattr	= ext2_listxattr,
 	.removexattr	= generic_removexattr,
 #endif
+	.new_truncate	= 1,
 	.setattr	= ext2_setattr,
 	.permission	= ext2_permission,
 	.fiemap		= ext2_fiemap,

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [rfc][patch 3/4] fs: new truncate sequence
  2009-07-08 12:34                 ` Nick Piggin
@ 2009-07-08 12:40                   ` Christoph Hellwig
  2009-07-08 12:48                     ` Nick Piggin
  2009-07-08 16:07                   ` Boaz Harrosh
  1 sibling, 1 reply; 36+ messages in thread
From: Christoph Hellwig @ 2009-07-08 12:40 UTC (permalink / raw)
  To: Nick Piggin; +Cc: Christoph Hellwig, linux-fsdevel, Jan Kara, LKML, linux-mm

On Wed, Jul 08, 2009 at 02:34:12PM +0200, Nick Piggin wrote:
> On Wed, Jul 08, 2009 at 06:47:01AM -0400, Christoph Hellwig wrote:
> > On Wed, Jul 08, 2009 at 08:32:25AM +0200, Nick Piggin wrote:
> > > Thanks for the patch, I think I will fold it in to the series. I
> > > think we probably do need to call simple_setsize in inode_setattr
> > > though (unless you propose to eventually convert every filesystem
> > > to define a .setattr). This would also require eg. your ext2
> > > conversion to strip ATTR_SIZE before passing through to inode_setattr.
> > 
> > Yes, we should eventually make .setattr mandatory.  Doing a default
> > action when a method lacks tends to cause more issues than it solves.
> > 
> > I'm happy to help in doing that part of the conversion (and also other
> > bits)
> 
> OK well here is what I have now for 3/4 and 4/4. Basically just
> folded your patch on top, changed ordering of some checks, have
> fs clear ATTR_SIZE before calling inode_setattr, add a .new_truncate
> field to check against rather than .truncate, and provide a default
> ATTR_SIZE handler in inode_setattr (simple_setsize).

Can we leave that last part out?  Converting those filesystems that do
not have a ->truncate method to a trivial ->setattr is easy, and I can
do it pretty soon (next week probably).

That allows us to get rid of all that ATTR_SIZE clearing which is pretty
ugly.

> +			 *
> +			 * Filesystems which define i_op->new_truncate must
> +			 * handle this themselves. Eventually this will go
> +			 * away because everyone will be converted.

s/define/set/ ?

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [rfc][patch 3/4] fs: new truncate sequence
  2009-07-08 12:40                   ` Christoph Hellwig
@ 2009-07-08 12:48                     ` Nick Piggin
  0 siblings, 0 replies; 36+ messages in thread
From: Nick Piggin @ 2009-07-08 12:48 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-fsdevel, Jan Kara, LKML, linux-mm

On Wed, Jul 08, 2009 at 08:40:56AM -0400, Christoph Hellwig wrote:
> On Wed, Jul 08, 2009 at 02:34:12PM +0200, Nick Piggin wrote:
> > On Wed, Jul 08, 2009 at 06:47:01AM -0400, Christoph Hellwig wrote:
> > > On Wed, Jul 08, 2009 at 08:32:25AM +0200, Nick Piggin wrote:
> > > > Thanks for the patch, I think I will fold it in to the series. I
> > > > think we probably do need to call simple_setsize in inode_setattr
> > > > though (unless you propose to eventually convert every filesystem
> > > > to define a .setattr). This would also require eg. your ext2
> > > > conversion to strip ATTR_SIZE before passing through to inode_setattr.
> > > 
> > > Yes, we should eventually make .setattr mandatory.  Doing a default
> > > action when a method lacks tends to cause more issues than it solves.
> > > 
> > > I'm happy to help in doing that part of the conversion (and also other
> > > bits)
> > 
> > OK well here is what I have now for 3/4 and 4/4. Basically just
> > folded your patch on top, changed ordering of some checks, have
> > fs clear ATTR_SIZE before calling inode_setattr, add a .new_truncate
> > field to check against rather than .truncate, and provide a default
> > ATTR_SIZE handler in inode_setattr (simple_setsize).
> 
> Can we leave that last part out?  Converting those filesystems that do
> not have a ->truncate method to a trivial ->setattr is easy, and I can
> do it pretty soon (next week probably).
> 
> That allows us to get rid of all that ATTR_SIZE clearing which is pretty
> ugly.

Is it not common procedure to use when handling some attributes
and passing others to inode_setattr?

Anyway, no big deal either way. And it's using .new_truncate, so
there is no rush to convert everything (and it will remain back
compatible either way until we remove .new_truncate and all the
vmtruncate calls).


> 
> > +			 *
> > +			 * Filesystems which define i_op->new_truncate must
> > +			 * handle this themselves. Eventually this will go
> > +			 * away because everyone will be converted.
> 
> s/define/set/ ?

Yes.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [rfc][patch 4/4] fs: tmpfs, ext2 use new truncate
  2009-07-08 12:39             ` Nick Piggin
@ 2009-07-08 13:49               ` Christoph Hellwig
  0 siblings, 0 replies; 36+ messages in thread
From: Christoph Hellwig @ 2009-07-08 13:49 UTC (permalink / raw)
  To: Nick Piggin; +Cc: Christoph Hellwig, Jan Kara, linux-fsdevel, LKML, linux-mm

On Wed, Jul 08, 2009 at 02:39:04PM +0200, Nick Piggin wrote:
> Here is patch 4/4 after your parts folded in and other changes
> I said in last mail. (yes I do agree to split it up, but I'll
> just wait until we all agree on basics and then resend a new
> patchset).

Sure, that's fine.


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [rfc][patch 3/4] fs: new truncate sequence
  2009-07-08 12:34                 ` Nick Piggin
  2009-07-08 12:40                   ` Christoph Hellwig
@ 2009-07-08 16:07                   ` Boaz Harrosh
  2009-07-09  7:51                     ` Nick Piggin
  1 sibling, 1 reply; 36+ messages in thread
From: Boaz Harrosh @ 2009-07-08 16:07 UTC (permalink / raw)
  To: Nick Piggin; +Cc: Christoph Hellwig, linux-fsdevel, Jan Kara, LKML, linux-mm

On 07/08/2009 03:34 PM, Nick Piggin wrote:
> On Wed, Jul 08, 2009 at 06:47:01AM -0400, Christoph Hellwig wrote:
>> On Wed, Jul 08, 2009 at 08:32:25AM +0200, Nick Piggin wrote:
>>> Thanks for the patch, I think I will fold it in to the series. I
>>> think we probably do need to call simple_setsize in inode_setattr
>>> though (unless you propose to eventually convert every filesystem
>>> to define a .setattr). This would also require eg. your ext2
>>> conversion to strip ATTR_SIZE before passing through to inode_setattr.
>> Yes, we should eventually make .setattr mandatory.  Doing a default
>> action when a method lacks tends to cause more issues than it solves.
>>
>> I'm happy to help in doing that part of the conversion (and also other
>> bits)
> 
> OK well here is what I have now for 3/4 and 4/4. Basically just
> folded your patch on top, changed ordering of some checks, have
> fs clear ATTR_SIZE before calling inode_setattr, add a .new_truncate
> field to check against rather than .truncate, and provide a default
> ATTR_SIZE handler in inode_setattr (simple_setsize).
> 
> ---
> Introduce a new truncate calling sequence into fs/mm subsystems. Rather than
> setattr > vmtruncate > truncate, have filesystems call their truncate sequence
> from ->setattr if filesystem specific operations are required. vmtruncate is
> deprecated, and truncate_pagecache and inode_newsize_ok helpers introduced
> previously should be used.
> 
> simple_setsize is also introduced to perform the equivalent of vmtruncate.
> simple_setsize gets called by inode_setattr when ATTR_SIZE is passed. So
> filesystems implementing their own truncate code in setattr then calling
> through to inode_setattr should clear ATTR_SIZE.
> 
> A new attribute is introduced into inode_operations structure; .new_truncate
> is a temporary hack to distinguish filesystems that implement the new
> truncate system. These guys cannot trim off block past i_size via vmtruncate,
> so instead they must handle it in fs code. This gives better opportunity to
> catch errors etc anyway. .new_truncate and .truncate will go away once all
> filesystems are converted.
> 
> Big problem with the previous calling sequence: the filesystem is not called
> until i_size has already changed.  This means it is not allowed to fail the
> call, and also it does not know what the previous i_size was. Also, generic
> code calling vmtruncate to truncate allocated blocks in case of error had
> no good way to return a meaningful error (or, for example, atomically handle
> block deallocation).
> 
> Signed-off-by: Nick Piggin <npiggin@suse.de>
> ---
>  Documentation/filesystems/vfs.txt |    7 ++++++-
>  fs/attr.c                         |    7 ++++++-
>  fs/buffer.c                       |   12 +++++++++---
>  fs/direct-io.c                    |    7 ++++---
>  fs/libfs.c                        |   17 +++++++++++++++++
>  include/linux/fs.h                |    2 ++
>  mm/truncate.c                     |    6 ++----
>  7 files changed, 46 insertions(+), 12 deletions(-)
> 
> Index: linux-2.6/fs/libfs.c
> ===================================================================
> --- linux-2.6.orig/fs/libfs.c
> +++ linux-2.6/fs/libfs.c
> @@ -329,6 +329,22 @@ int simple_rename(struct inode *old_dir,
>  	return 0;
>  }
>  
> +int simple_setsize(struct inode *inode, loff_t newsize)
> +{
> +	loff_t oldsize;
> +	int error;
> +
> +	error = inode_newsize_ok(inode, newsize);
> +	if (error)
> +		return error;
> +
> +	oldsize = inode->i_size;
> +	i_size_write(inode, newsize);
> +	truncate_pagecache(inode, oldsize, newsize);
> +
> +	return error;
> +}
> +
>  int simple_readpage(struct file *file, struct page *page)
>  {
>  	clear_highpage(page);
> @@ -840,6 +856,7 @@ EXPORT_SYMBOL(generic_read_dir);
>  EXPORT_SYMBOL(get_sb_pseudo);
>  EXPORT_SYMBOL(simple_write_begin);
>  EXPORT_SYMBOL(simple_write_end);
> +EXPORT_SYMBOL(simple_setsize);
>  EXPORT_SYMBOL(simple_dir_inode_operations);
>  EXPORT_SYMBOL(simple_dir_operations);
>  EXPORT_SYMBOL(simple_empty);
> Index: linux-2.6/include/linux/fs.h
> ===================================================================
> --- linux-2.6.orig/include/linux/fs.h
> +++ linux-2.6/include/linux/fs.h
> @@ -1527,6 +1527,7 @@ struct inode_operations {
>  	void * (*follow_link) (struct dentry *, struct nameidata *);
>  	void (*put_link) (struct dentry *, struct nameidata *, void *);
>  	void (*truncate) (struct inode *);
> +	int new_truncate; /* nasty hack to transition to new truncate code */
>  	int (*permission) (struct inode *, int);
>  	int (*setattr) (struct dentry *, struct iattr *);
>  	int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
> @@ -2332,6 +2333,7 @@ extern int simple_link(struct dentry *,
>  extern int simple_unlink(struct inode *, struct dentry *);
>  extern int simple_rmdir(struct inode *, struct dentry *);
>  extern int simple_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
> +extern int simple_setsize(struct inode *inode, loff_t newsize);
>  extern int simple_sync_file(struct file *, struct dentry *, int);
>  extern int simple_empty(struct dentry *);
>  extern int simple_readpage(struct file *file, struct page *page);
> Index: linux-2.6/fs/buffer.c
> ===================================================================
> --- linux-2.6.orig/fs/buffer.c
> +++ linux-2.6/fs/buffer.c
> @@ -1992,9 +1992,14 @@ int block_write_begin(struct file *file,
>  			 * prepare_write() may have instantiated a few blocks
>  			 * outside i_size.  Trim these off again. Don't need
>  			 * i_size_read because we hold i_mutex.
> +			 *
> +			 * Filesystems which define i_op->new_truncate must
> +			 * handle this themselves. Eventually this will go
> +			 * away because everyone will be converted.
>  			 */
>  			if (pos + len > inode->i_size)
> -				vmtruncate(inode, inode->i_size);
> +				if (!inode->i_op->new_truncate)
> +					vmtruncate(inode, inode->i_size);
>  		}
>  	}
>  
> @@ -2371,7 +2376,7 @@ int block_commit_write(struct page *page
>   *
>   * We are not allowed to take the i_mutex here so we have to play games to
>   * protect against truncate races as the page could now be beyond EOF.  Because
> - * vmtruncate() writes the inode size before removing pages, once we have the
> + * truncate writes the inode size before removing pages, once we have the
>   * page lock we can determine safely if the page is beyond EOF. If it is not
>   * beyond EOF, then the page is guaranteed safe against truncation until we
>   * unlock the page.
> @@ -2595,7 +2600,8 @@ out_release:
>  	*pagep = NULL;
>  
>  	if (pos + len > inode->i_size)
> -		vmtruncate(inode, inode->i_size);
> +		if (!inode->i_op->new_truncate)
> +			vmtruncate(inode, inode->i_size);
>  
>  	return ret;
>  }
> Index: linux-2.6/fs/direct-io.c
> ===================================================================
> --- linux-2.6.orig/fs/direct-io.c
> +++ linux-2.6/fs/direct-io.c
> @@ -1210,14 +1210,15 @@ __blockdev_direct_IO(int rw, struct kioc
>  	/*
>  	 * In case of error extending write may have instantiated a few
>  	 * blocks outside i_size. Trim these off again for DIO_LOCKING.
> -	 * NOTE: DIO_NO_LOCK/DIO_OWN_LOCK callers have to handle this by
> -	 * it's own meaner.
> +	 * NOTE: DIO_NO_LOCK/DIO_OWN_LOCK callers have to handle this in
> +	 * their own manner.
>  	 */
>  	if (unlikely(retval < 0 && (rw & WRITE))) {
>  		loff_t isize = i_size_read(inode);
>  
>  		if (end > isize && dio_lock_type == DIO_LOCKING)
> -			vmtruncate(inode, isize);
> +			if (!inode->i_op->new_truncate)
> +				vmtruncate(inode, isize);
>  	}
>  
>  	if (rw == READ && dio_lock_type == DIO_LOCKING)
> Index: linux-2.6/fs/attr.c
> ===================================================================
> --- linux-2.6.orig/fs/attr.c
> +++ linux-2.6/fs/attr.c
> @@ -112,7 +112,12 @@ int inode_setattr(struct inode * inode,
>  
>  	if (ia_valid & ATTR_SIZE &&
>  	    attr->ia_size != i_size_read(inode)) {
> -		int error = vmtruncate(inode, attr->ia_size);
> +		int error;
> +
> +		if (inode->i_op->new_truncate)
> +			error = simple_setsize(inode, attr->ia_size);

I don't understand this branch.
If a filesystem has been converted to set "i_op->new_truncate=true"
then it must have been converted to intersect ->setattr and has set
the i_size (And needs to clear ATTR_SIZE, why?)

All other cases of systems not converted, or systems that do not have
->truncate will fall to the "else" part.

before the removal of i_op->new_truncate you will need to do something
with the systems that do not have ->truncate which will be a 
.setattr = simple_setattr or something

So I don't understand this conditional

> +		else
> +			error = vmtruncate(inode, attr->ia_size);
>  		if (error)
>  			return error;
>  	}
> Index: linux-2.6/Documentation/filesystems/vfs.txt
> ===================================================================
> --- linux-2.6.orig/Documentation/filesystems/vfs.txt
> +++ linux-2.6/Documentation/filesystems/vfs.txt
> @@ -401,11 +401,16 @@ otherwise noted.
>    	started might not be in the page cache at the end of the
>    	walk).
>  
> -  truncate: called by the VFS to change the size of a file.  The
> +  truncate: Deprecated. This will not be called if ->setsize is defined.
> +	Called by the VFS to change the size of a file.  The
>   	i_size field of the inode is set to the desired size by the
>   	VFS before this method is called.  This method is called by
>   	the truncate(2) system call and related functionality.
>  
> +	Note: ->truncate and vmtruncate are deprecated. Do not add new
> +	instances/calls of these. Filesystems shoud be converted to do their
> +	truncate sequence via ->setattr().
> +
>    permission: called by the VFS to check for access rights on a POSIX-like
>    	filesystem.
>  
> Index: linux-2.6/mm/truncate.c
> ===================================================================
> --- linux-2.6.orig/mm/truncate.c
> +++ linux-2.6/mm/truncate.c
> @@ -513,12 +513,10 @@ int vmtruncate(struct inode * inode, lof
>  	loff_t oldsize;
>  	int error;
>  
> -	error = inode_newsize_ok(inode, offset);
> +	error = simple_setsize(inode, offset);
>  	if (error)
>  		return error;
> -	oldsize = inode->i_size;
> -	i_size_write(inode, offset);
> -	truncate_pagecache(inode, oldsize, offset);
> +
>  	if (inode->i_op->truncate)
>  		inode->i_op->truncate(inode);
>  
> --
> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

Boaz

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [rfc][patch 3/4] fs: new truncate sequence
  2009-07-08 16:07                   ` Boaz Harrosh
@ 2009-07-09  7:51                     ` Nick Piggin
  2009-07-12  8:55                       ` Boaz Harrosh
  0 siblings, 1 reply; 36+ messages in thread
From: Nick Piggin @ 2009-07-09  7:51 UTC (permalink / raw)
  To: Boaz Harrosh; +Cc: Christoph Hellwig, linux-fsdevel, Jan Kara, LKML, linux-mm

On Wed, Jul 08, 2009 at 07:07:17PM +0300, Boaz Harrosh wrote:
> On 07/08/2009 03:34 PM, Nick Piggin wrote:
> > On Wed, Jul 08, 2009 at 06:47:01AM -0400, Christoph Hellwig wrote:
> > Index: linux-2.6/fs/attr.c
> > ===================================================================
> > --- linux-2.6.orig/fs/attr.c
> > +++ linux-2.6/fs/attr.c
> > @@ -112,7 +112,12 @@ int inode_setattr(struct inode * inode,
> >  
> >  	if (ia_valid & ATTR_SIZE &&
> >  	    attr->ia_size != i_size_read(inode)) {
> > -		int error = vmtruncate(inode, attr->ia_size);
> > +		int error;
> > +
> > +		if (inode->i_op->new_truncate)
> > +			error = simple_setsize(inode, attr->ia_size);
> 
> I don't understand this branch.
> If a filesystem has been converted to set "i_op->new_truncate=true"
> then it must have been converted to intersect ->setattr and has set
> the i_size (And needs to clear ATTR_SIZE, why?)
> 
> All other cases of systems not converted, or systems that do not have
> ->truncate will fall to the "else" part.
> 
> before the removal of i_op->new_truncate you will need to do something
> with the systems that do not have ->truncate which will be a 
> .setattr = simple_setattr or something
> 
> So I don't understand this conditional

inode_setattr *is* our "simple_setattr".


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [rfc][patch 3/4] fs: new truncate sequence
  2009-07-09  7:51                     ` Nick Piggin
@ 2009-07-12  8:55                       ` Boaz Harrosh
  2009-07-12 14:47                         ` Christoph Hellwig
  0 siblings, 1 reply; 36+ messages in thread
From: Boaz Harrosh @ 2009-07-12  8:55 UTC (permalink / raw)
  To: Nick Piggin; +Cc: Christoph Hellwig, linux-fsdevel, Jan Kara, LKML, linux-mm

On 07/09/2009 10:51 AM, Nick Piggin wrote:
> On Wed, Jul 08, 2009 at 07:07:17PM +0300, Boaz Harrosh wrote:
>> On 07/08/2009 03:34 PM, Nick Piggin wrote:
>>> On Wed, Jul 08, 2009 at 06:47:01AM -0400, Christoph Hellwig wrote:
>>> Index: linux-2.6/fs/attr.c
>>> ===================================================================
>>> --- linux-2.6.orig/fs/attr.c
>>> +++ linux-2.6/fs/attr.c
>>> @@ -112,7 +112,12 @@ int inode_setattr(struct inode * inode,
>>>  
>>>  	if (ia_valid & ATTR_SIZE &&
>>>  	    attr->ia_size != i_size_read(inode)) {
>>> -		int error = vmtruncate(inode, attr->ia_size);
>>> +		int error;
>>> +
>>> +		if (inode->i_op->new_truncate)
>>> +			error = simple_setsize(inode, attr->ia_size);
>> I don't understand this branch.
>> If a filesystem has been converted to set "i_op->new_truncate=true"
>> then it must have been converted to intersect ->setattr and has set
>> the i_size (And needs to clear ATTR_SIZE, why?)
>>
>> All other cases of systems not converted, or systems that do not have
>> ->truncate will fall to the "else" part.
>>
>> before the removal of i_op->new_truncate you will need to do something
>> with the systems that do not have ->truncate which will be a 
>> .setattr = simple_setattr or something
>>
>> So I don't understand this conditional
> 
> inode_setattr *is* our "simple_setattr".
> 

I wish you would split it.

one - helper to be called by converted file systems
      (Which just ignores the ATTR_SIZE)
second - to be set into .setattr which does the simple_setsize + above.

More clear for FS users like me (and that ugly unmask of ATTR_SIZE)

or it's just me?

Thanks
Boaz

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [rfc][patch 3/4] fs: new truncate sequence
  2009-07-12  8:55                       ` Boaz Harrosh
@ 2009-07-12 14:47                         ` Christoph Hellwig
  2009-07-12 15:00                           ` Boaz Harrosh
  2009-07-13  6:59                           ` Nick Piggin
  0 siblings, 2 replies; 36+ messages in thread
From: Christoph Hellwig @ 2009-07-12 14:47 UTC (permalink / raw)
  To: Boaz Harrosh
  Cc: Nick Piggin, Christoph Hellwig, linux-fsdevel, Jan Kara, LKML,
	linux-mm

On Sun, Jul 12, 2009 at 11:55:51AM +0300, Boaz Harrosh wrote:
> I wish you would split it.
> 
> one - helper to be called by converted file systems
>       (Which just ignores the ATTR_SIZE)
> second - to be set into .setattr which does the simple_setsize + above.
> 
> More clear for FS users like me (and that ugly unmask of ATTR_SIZE)
> 
> or it's just me?

Yeah, that seems be a lot cleaner.  But let's wait until we got
rid of ->truncate for all filesystems to have the bigger picture.

> 
> Thanks
> Boaz
---end quoted text---

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [rfc][patch 3/4] fs: new truncate sequence
  2009-07-12 14:47                         ` Christoph Hellwig
@ 2009-07-12 15:00                           ` Boaz Harrosh
  2009-07-13  6:59                           ` Nick Piggin
  1 sibling, 0 replies; 36+ messages in thread
From: Boaz Harrosh @ 2009-07-12 15:00 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Nick Piggin, linux-fsdevel, Jan Kara, LKML, linux-mm

On 07/12/2009 05:47 PM, Christoph Hellwig wrote:
> On Sun, Jul 12, 2009 at 11:55:51AM +0300, Boaz Harrosh wrote:
>> I wish you would split it.
>>
>> one - helper to be called by converted file systems
>>       (Which just ignores the ATTR_SIZE)
>> second - to be set into .setattr which does the simple_setsize + above.
>>
>> More clear for FS users like me (and that ugly unmask of ATTR_SIZE)
>>
>> or it's just me?
> 
> Yeah, that seems be a lot cleaner.  But let's wait until we got
> rid of ->truncate for all filesystems to have the bigger picture.
> 

I want to convert exofs. do you want that I call inode_setattr clearing
ATTR_SIZE bit, and at second stage remove the clearing and rename inode_setattr
to something else?

When it's time to convert exofs, tel me I'll do it. I have dependent work
on top of that, and I want to cleanup the delete_inode as well as some other
leftovers.

(BTW For none-buffer-heads systems like exofs the new way makes lots of sense)

Thanks
Boaz

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [rfc][patch 3/4] fs: new truncate sequence
  2009-07-12 14:47                         ` Christoph Hellwig
  2009-07-12 15:00                           ` Boaz Harrosh
@ 2009-07-13  6:59                           ` Nick Piggin
  2009-07-13  8:54                             ` Boaz Harrosh
  2009-07-13 13:53                             ` Christoph Hellwig
  1 sibling, 2 replies; 36+ messages in thread
From: Nick Piggin @ 2009-07-13  6:59 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Boaz Harrosh, linux-fsdevel, Jan Kara, LKML, linux-mm

On Sun, Jul 12, 2009 at 10:47:18AM -0400, Christoph Hellwig wrote:
> On Sun, Jul 12, 2009 at 11:55:51AM +0300, Boaz Harrosh wrote:
> > I wish you would split it.
> > 
> > one - helper to be called by converted file systems
> >       (Which just ignores the ATTR_SIZE)
> > second - to be set into .setattr which does the simple_setsize + above.
> > 
> > More clear for FS users like me (and that ugly unmask of ATTR_SIZE)
> > 
> > or it's just me?
> 
> Yeah, that seems be a lot cleaner.  But let's wait until we got
> rid of ->truncate for all filesystems to have the bigger picture.

Agreed, if it is a common sequence / requirement for filesystems
then of course I will not object to a helper to make things clearer
or share code.

I would like to see inode_setattr renamed into simple_setattr, and
then also .setattr made mandatory, so I don't like to cut code out
of inode_setattr which makes it unable to be the simple_setattr
after the old truncate code is removed.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [rfc][patch 3/4] fs: new truncate sequence
  2009-07-13  6:59                           ` Nick Piggin
@ 2009-07-13  8:54                             ` Boaz Harrosh
  2009-07-13  9:00                               ` Nick Piggin
  2009-07-13 13:53                             ` Christoph Hellwig
  1 sibling, 1 reply; 36+ messages in thread
From: Boaz Harrosh @ 2009-07-13  8:54 UTC (permalink / raw)
  To: Nick Piggin; +Cc: Christoph Hellwig, linux-fsdevel, Jan Kara, LKML, linux-mm

On 07/13/2009 09:59 AM, Nick Piggin wrote:
> On Sun, Jul 12, 2009 at 10:47:18AM -0400, Christoph Hellwig wrote:
>> On Sun, Jul 12, 2009 at 11:55:51AM +0300, Boaz Harrosh wrote:
>>> I wish you would split it.
>>>
>>> one - helper to be called by converted file systems
>>>       (Which just ignores the ATTR_SIZE)
>>> second - to be set into .setattr which does the simple_setsize + above.
>>>
>>> More clear for FS users like me (and that ugly unmask of ATTR_SIZE)
>>>
>>> or it's just me?
>> Yeah, that seems be a lot cleaner.  But let's wait until we got
>> rid of ->truncate for all filesystems to have the bigger picture.
> 
> Agreed, if it is a common sequence / requirement for filesystems
> then of course I will not object to a helper to make things clearer
> or share code.
> 
> I would like to see inode_setattr renamed into simple_setattr, and
> then also .setattr made mandatory, so I don't like to cut code out
> of inode_setattr which makes it unable to be the simple_setattr
> after the old truncate code is removed.
> 

I thought you meant inode_setattr will go away. There will
only be simple_setattr() and inode_setattr_nosize()

For the time been simple_setattr() will also take care
of old ->truncate FSs. And in the absence of .setattr
simple_setattr() is called. Have I miss-understood?

again please tell me when all this is in effect I want
to do the conversion in exofs.

[BTW these changes are a life saver for me in regard to
the kind of things I need to do for pNFS-exports]

Thanks
Boaz

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [rfc][patch 3/4] fs: new truncate sequence
  2009-07-13  8:54                             ` Boaz Harrosh
@ 2009-07-13  9:00                               ` Nick Piggin
  2009-07-13 11:17                                 ` Boaz Harrosh
  0 siblings, 1 reply; 36+ messages in thread
From: Nick Piggin @ 2009-07-13  9:00 UTC (permalink / raw)
  To: Boaz Harrosh; +Cc: Christoph Hellwig, linux-fsdevel, Jan Kara, LKML, linux-mm

On Mon, Jul 13, 2009 at 11:54:15AM +0300, Boaz Harrosh wrote:
> On 07/13/2009 09:59 AM, Nick Piggin wrote:
> > On Sun, Jul 12, 2009 at 10:47:18AM -0400, Christoph Hellwig wrote:
> >> On Sun, Jul 12, 2009 at 11:55:51AM +0300, Boaz Harrosh wrote:
> >>> I wish you would split it.
> >>>
> >>> one - helper to be called by converted file systems
> >>>       (Which just ignores the ATTR_SIZE)
> >>> second - to be set into .setattr which does the simple_setsize + above.
> >>>
> >>> More clear for FS users like me (and that ugly unmask of ATTR_SIZE)
> >>>
> >>> or it's just me?
> >> Yeah, that seems be a lot cleaner.  But let's wait until we got
> >> rid of ->truncate for all filesystems to have the bigger picture.
> > 
> > Agreed, if it is a common sequence / requirement for filesystems
> > then of course I will not object to a helper to make things clearer
> > or share code.
> > 
> > I would like to see inode_setattr renamed into simple_setattr, and
> > then also .setattr made mandatory, so I don't like to cut code out
> > of inode_setattr which makes it unable to be the simple_setattr
> > after the old truncate code is removed.
> > 
> 
> I thought you meant inode_setattr will go away. There will
> only be simple_setattr() and inode_setattr_nosize()
> 
> For the time been simple_setattr() will also take care
> of old ->truncate FSs. And in the absence of .setattr
> simple_setattr() is called. Have I miss-understood?
> 
> again please tell me when all this is in effect I want
> to do the conversion in exofs.

AFAIKS inode_setattr basically is simple_setattr, so I think at some
point it should just get renamed to simple_setattr. Adding
simple_setattr_nosize or similar helper would be fine too. I don't
care much about the exact details... But anyway these things are not
so important to this truncate patchset at the moment.


> [BTW these changes are a life saver for me in regard to
> the kind of things I need to do for pNFS-exports]

You mean the truncate patches? Well that's nice to know. I
guess it has always been possible just to redefine your own
setattr, but now it should be a bit nicer with the truncate
helpers...

Thanks,
Nick

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [rfc][patch 3/4] fs: new truncate sequence
  2009-07-13  9:00                               ` Nick Piggin
@ 2009-07-13 11:17                                 ` Boaz Harrosh
  2009-07-13 11:32                                   ` Nick Piggin
  0 siblings, 1 reply; 36+ messages in thread
From: Boaz Harrosh @ 2009-07-13 11:17 UTC (permalink / raw)
  To: Nick Piggin; +Cc: Christoph Hellwig, linux-fsdevel, Jan Kara, LKML, linux-mm

On 07/13/2009 12:00 PM, Nick Piggin wrote:
> On Mon, Jul 13, 2009 at 11:54:15AM +0300, Boaz Harrosh wrote:
>> On 07/13/2009 09:59 AM, Nick Piggin wrote:
>>> On Sun, Jul 12, 2009 at 10:47:18AM -0400, Christoph Hellwig wrote:
>>>> On Sun, Jul 12, 2009 at 11:55:51AM +0300, Boaz Harrosh wrote:
>>>>> I wish you would split it.
>>>>>
>>>>> one - helper to be called by converted file systems
>>>>>       (Which just ignores the ATTR_SIZE)
>>>>> second - to be set into .setattr which does the simple_setsize + above.
>>>>>
>>>>> More clear for FS users like me (and that ugly unmask of ATTR_SIZE)
>>>>>
>>>>> or it's just me?
>>>> Yeah, that seems be a lot cleaner.  But let's wait until we got
>>>> rid of ->truncate for all filesystems to have the bigger picture.
>>> Agreed, if it is a common sequence / requirement for filesystems
>>> then of course I will not object to a helper to make things clearer
>>> or share code.
>>>
>>> I would like to see inode_setattr renamed into simple_setattr, and
>>> then also .setattr made mandatory, so I don't like to cut code out
>>> of inode_setattr which makes it unable to be the simple_setattr
>>> after the old truncate code is removed.
>>>
>> I thought you meant inode_setattr will go away. There will
>> only be simple_setattr() and inode_setattr_nosize()
>>
>> For the time been simple_setattr() will also take care
>> of old ->truncate FSs. And in the absence of .setattr
>> simple_setattr() is called. Have I miss-understood?
>>
>> again please tell me when all this is in effect I want
>> to do the conversion in exofs.
> 
> AFAIKS inode_setattr basically is simple_setattr, so I think at some
> point it should just get renamed to simple_setattr. Adding
> simple_setattr_nosize or similar helper would be fine too. I don't
> care much about the exact details... But anyway these things are not
> so important to this truncate patchset at the moment.
> 

I see. So what is the schedule? when are we to convert all FSs?

> 
>> [BTW these changes are a life saver for me in regard to
>> the kind of things I need to do for pNFS-exports]
> 
> You mean the truncate patches? Well that's nice to know. I
> guess it has always been possible just to redefine your own
> setattr, but now it should be a bit nicer with the truncate
> helpers...
> 

OK, yes redefine .setattr, do the right thing in write_begin/end
and the helpers do help a lot, to the point that it was not safe to
open-code all this work. The situation is much better after your
patchset.

> Thanks,
> Nick

Thanks
Boaz

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [rfc][patch 3/4] fs: new truncate sequence
  2009-07-13 11:17                                 ` Boaz Harrosh
@ 2009-07-13 11:32                                   ` Nick Piggin
  0 siblings, 0 replies; 36+ messages in thread
From: Nick Piggin @ 2009-07-13 11:32 UTC (permalink / raw)
  To: Boaz Harrosh; +Cc: Christoph Hellwig, linux-fsdevel, Jan Kara, LKML, linux-mm

On Mon, Jul 13, 2009 at 02:17:55PM +0300, Boaz Harrosh wrote:
> On 07/13/2009 12:00 PM, Nick Piggin wrote:
> > AFAIKS inode_setattr basically is simple_setattr, so I think at some
> > point it should just get renamed to simple_setattr. Adding
> > simple_setattr_nosize or similar helper would be fine too. I don't
> > care much about the exact details... But anyway these things are not
> > so important to this truncate patchset at the moment.
> > 
> 
> I see. So what is the schedule? when are we to convert all FSs?

Well the changes can be back compatible, and there is not too
much complexity to support the current system, so I guess it
will just be done as soon as somebody writes the patches.

But probably it is a good idea to convert any filesystem using
->truncate to the new sequence at the same time (ie. don't
just simply add .setattr = simple_setattr, and rely on its calling
vmtruncate, but actually DTRT and remove .truncate at the
same time).

Any patches would be welcome, for any filesystem. Probably it
won't exactly be a rapid process, judging by past experience.


> >> [BTW these changes are a life saver for me in regard to
> >> the kind of things I need to do for pNFS-exports]
> > 
> > You mean the truncate patches? Well that's nice to know. I
> > guess it has always been possible just to redefine your own
> > setattr, but now it should be a bit nicer with the truncate
> > helpers...
> > 
> 
> OK, yes redefine .setattr, do the right thing in write_begin/end
> and the helpers do help a lot, to the point that it was not safe to
> open-code all this work. The situation is much better after your
> patchset.

OK good.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [rfc][patch 3/4] fs: new truncate sequence
  2009-07-13  6:59                           ` Nick Piggin
  2009-07-13  8:54                             ` Boaz Harrosh
@ 2009-07-13 13:53                             ` Christoph Hellwig
  2009-07-13 14:05                               ` Nick Piggin
  1 sibling, 1 reply; 36+ messages in thread
From: Christoph Hellwig @ 2009-07-13 13:53 UTC (permalink / raw)
  To: Nick Piggin
  Cc: Christoph Hellwig, Boaz Harrosh, linux-fsdevel, Jan Kara, LKML,
	linux-mm

On Mon, Jul 13, 2009 at 08:59:17AM +0200, Nick Piggin wrote:
> Agreed, if it is a common sequence / requirement for filesystems
> then of course I will not object to a helper to make things clearer
> or share code.
> 
> I would like to see inode_setattr renamed into simple_setattr, and
> then also .setattr made mandatory, so I don't like to cut code out
> of inode_setattr which makes it unable to be the simple_setattr
> after the old truncate code is removed.

But inode_setattr isn't anything like simple_setattr.  Except for
the truncate special case it's really just a helper to copy values
into the inode.  It doesn't even even have the same prototype as
->setattr.

A simple_setattr would look like the following:

int simple_setattr(struct dentry *dentry, struct iattr *iattr)
{
	struct inode *inode = dentry->d_inode;
	int error;

	error = inode_change_ok(inode, iattr);
        if (error)
                return error;

	if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
	    (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
		if (vfs_dq_transfer(inode, iattr))
			return -EDQUOT;
	}

	if (iattr->ia_valid & ATTR_ATTR_SIZE &&
	    iattr->ia_size !== i_size_read(inode) &&
	    inode->i_op->new_truncate) {
		error = simple_setsize(inode, attr->ia_size);
		if (error)
			return error;
	}

	return inode_setattr(inode, attr);
}

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [rfc][patch 3/4] fs: new truncate sequence
  2009-07-13 13:53                             ` Christoph Hellwig
@ 2009-07-13 14:05                               ` Nick Piggin
  2009-07-13 14:10                                 ` Christoph Hellwig
  0 siblings, 1 reply; 36+ messages in thread
From: Nick Piggin @ 2009-07-13 14:05 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Boaz Harrosh, linux-fsdevel, Jan Kara, LKML, linux-mm

On Mon, Jul 13, 2009 at 09:53:24AM -0400, Christoph Hellwig wrote:
> On Mon, Jul 13, 2009 at 08:59:17AM +0200, Nick Piggin wrote:
> > Agreed, if it is a common sequence / requirement for filesystems
> > then of course I will not object to a helper to make things clearer
> > or share code.
> > 
> > I would like to see inode_setattr renamed into simple_setattr, and
> > then also .setattr made mandatory, so I don't like to cut code out
> > of inode_setattr which makes it unable to be the simple_setattr
> > after the old truncate code is removed.
> 
> But inode_setattr isn't anything like simple_setattr.  Except for
> the truncate special case it's really just a helper to copy values
> into the inode.  It doesn't even even have the same prototype as
> ->setattr.
> 
> A simple_setattr would look like the following:

OK that's kind of what I imagine inode_setattr becomes, but now
that you make me look at it in that perspective, it is better to
say inode_setattr returns to a plain helper to copy values into
the inode once we move the truncate code out of there.

It would be good to add your simple_setattr and factor it out
from fnotify_change, then. I guess this is what you plan to do
after my patchset?

> 
> int simple_setattr(struct dentry *dentry, struct iattr *iattr)
> {
> 	struct inode *inode = dentry->d_inode;
> 	int error;
> 
> 	error = inode_change_ok(inode, iattr);
>         if (error)
>                 return error;
> 
> 	if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
> 	    (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
> 		if (vfs_dq_transfer(inode, iattr))
> 			return -EDQUOT;
> 	}
> 
> 	if (iattr->ia_valid & ATTR_ATTR_SIZE &&
> 	    iattr->ia_size !== i_size_read(inode) &&
> 	    inode->i_op->new_truncate) {
> 		error = simple_setsize(inode, attr->ia_size);
> 		if (error)
> 			return error;
> 	}
> 
> 	return inode_setattr(inode, attr);
> }

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [rfc][patch 3/4] fs: new truncate sequence
  2009-07-13 14:05                               ` Nick Piggin
@ 2009-07-13 14:10                                 ` Christoph Hellwig
  0 siblings, 0 replies; 36+ messages in thread
From: Christoph Hellwig @ 2009-07-13 14:10 UTC (permalink / raw)
  To: Nick Piggin
  Cc: Christoph Hellwig, Boaz Harrosh, linux-fsdevel, Jan Kara, LKML,
	linux-mm

On Mon, Jul 13, 2009 at 04:05:15PM +0200, Nick Piggin wrote:
> OK that's kind of what I imagine inode_setattr becomes, but now
> that you make me look at it in that perspective, it is better to
> say inode_setattr returns to a plain helper to copy values into
> the inode once we move the truncate code out of there.
> 
> It would be good to add your simple_setattr and factor it out
> from fnotify_change, then. I guess this is what you plan to do
> after my patchset?

Exactly.  Maybe we can even fold it into your patchset, but I want
to see a few more if not all conversions before going ahead.


^ permalink raw reply	[flat|nested] 36+ messages in thread

end of thread, other threads:[~2009-07-13 14:10 UTC | newest]

Thread overview: 36+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-07-07 14:44 [rfc][patch 1/4] fs: new truncate helpers Nick Piggin
2009-07-07 14:46 ` [rfc][patch 2/4] fs: use " Nick Piggin
2009-07-07 14:53   ` Christoph Hellwig
2009-07-07 14:48 ` [rfc][patch 3/4] fs: new truncate sequence Nick Piggin
2009-07-07 14:58   ` Christoph Hellwig
2009-07-07 15:02     ` Nick Piggin
2009-07-07 15:07       ` Christoph Hellwig
2009-07-07 15:48         ` Nick Piggin
2009-07-07 16:30           ` Christoph Hellwig
2009-07-08  6:32             ` Nick Piggin
2009-07-08 10:47               ` Christoph Hellwig
2009-07-08 12:34                 ` Nick Piggin
2009-07-08 12:40                   ` Christoph Hellwig
2009-07-08 12:48                     ` Nick Piggin
2009-07-08 16:07                   ` Boaz Harrosh
2009-07-09  7:51                     ` Nick Piggin
2009-07-12  8:55                       ` Boaz Harrosh
2009-07-12 14:47                         ` Christoph Hellwig
2009-07-12 15:00                           ` Boaz Harrosh
2009-07-13  6:59                           ` Nick Piggin
2009-07-13  8:54                             ` Boaz Harrosh
2009-07-13  9:00                               ` Nick Piggin
2009-07-13 11:17                                 ` Boaz Harrosh
2009-07-13 11:32                                   ` Nick Piggin
2009-07-13 13:53                             ` Christoph Hellwig
2009-07-13 14:05                               ` Nick Piggin
2009-07-13 14:10                                 ` Christoph Hellwig
2009-07-07 14:48 ` [rfc][patch 1/4] fs: new truncate helpers Christoph Hellwig
2009-07-07 14:49 ` [rfc][patch 4/4] fs: tmpfs, ext2 use new truncate Nick Piggin
2009-07-07 16:38   ` Christoph Hellwig
2009-07-08  6:53     ` Nick Piggin
2009-07-08 11:14       ` Jan Kara
2009-07-08 12:22         ` Nick Piggin
2009-07-08 12:32           ` Christoph Hellwig
2009-07-08 12:39             ` Nick Piggin
2009-07-08 13:49               ` Christoph Hellwig

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).