All of lore.kernel.org
 help / color / mirror / Atom feed
From: npiggin@suse.de
To: Andrew Morton <akpm@linux-foundation.org>
Cc: linux-fsdevel@vger.kernel.org,
	Mark Fasheh <mark.fasheh@oracle.com>,
	hirofumi@mail.parknet.co.jp
Subject: [patch 20/41] fs: new cont helpers
Date: Fri, 25 May 2007 22:22:04 +1000	[thread overview]
Message-ID: <20070524053156.299232000@linux.local0.net> (raw)
In-Reply-To: 20070524052844.860329000@suse.de

[-- Attachment #1: fs-cont-aops.patch --]
[-- Type: text/plain, Size: 10832 bytes --]

Rework the generic block "cont" routines to handle the new aops.
Supporting cont_prepare_write would take quite a lot of code to support,
so remove it instead (and we later convert all filesystems to use it).

write_begin gets passed AOP_FLAG_CONT_EXPAND when called from
generic_cont_expand, so filesystems can avoid the old hacks they used.

Cc: hirofumi@mail.parknet.co.jp
Cc: Linux Filesystems <linux-fsdevel@vger.kernel.org>
Signed-off-by: Nick Piggin <npiggin@suse.de>

 fs/buffer.c                 |  198 +++++++++++++++++++++-----------------------
 include/linux/buffer_head.h |    5 -
 include/linux/fs.h          |    1 
 mm/filemap.c                |    5 +
 4 files changed, 105 insertions(+), 104 deletions(-)

Index: linux-2.6/fs/buffer.c
===================================================================
--- linux-2.6.orig/fs/buffer.c
+++ linux-2.6/fs/buffer.c
@@ -2156,14 +2156,14 @@ int block_read_full_page(struct page *pa
 }
 
 /* utility function for filesystems that need to do work on expanding
- * truncates.  Uses prepare/commit_write to allow the filesystem to
+ * truncates.  Uses filesystem pagecache writes to allow the filesystem to
  * deal with the hole.  
  */
-static int __generic_cont_expand(struct inode *inode, loff_t size,
-				 pgoff_t index, unsigned int offset)
+int generic_cont_expand_simple(struct inode *inode, loff_t size)
 {
 	struct address_space *mapping = inode->i_mapping;
 	struct page *page;
+	void *fsdata;
 	unsigned long limit;
 	int err;
 
@@ -2176,140 +2176,134 @@ static int __generic_cont_expand(struct 
 	if (size > inode->i_sb->s_maxbytes)
 		goto out;
 
-	err = -ENOMEM;
-	page = grab_cache_page(mapping, index);
-	if (!page)
-		goto out;
-	err = mapping->a_ops->prepare_write(NULL, page, offset, offset);
-	if (err) {
-		/*
-		 * ->prepare_write() may have instantiated a few blocks
-		 * outside i_size.  Trim these off again.
-		 */
-		unlock_page(page);
-		page_cache_release(page);
-		vmtruncate(inode, inode->i_size);
+	err = pagecache_write_begin(NULL, mapping, size, 0,
+				AOP_FLAG_UNINTERRUPTIBLE|AOP_FLAG_CONT_EXPAND,
+				&page, &fsdata);
+	if (err)
 		goto out;
-	}
 
-	err = mapping->a_ops->commit_write(NULL, page, offset, offset);
+	err = pagecache_write_end(NULL, mapping, size, 0, 0, page, fsdata);
+	BUG_ON(err > 0);
 
-	unlock_page(page);
-	page_cache_release(page);
-	if (err > 0)
-		err = 0;
 out:
 	return err;
 }
 
 int generic_cont_expand(struct inode *inode, loff_t size)
 {
-	pgoff_t index;
 	unsigned int offset;
 
 	offset = (size & (PAGE_CACHE_SIZE - 1)); /* Within page */
 
 	/* ugh.  in prepare/commit_write, if from==to==start of block, we
-	** skip the prepare.  make sure we never send an offset for the start
-	** of a block
-	*/
+	 * skip the prepare.  make sure we never send an offset for the start
+	 * of a block.
+	 * XXX: actually, this should be handled in those filesystems by
+	 * checking for the AOP_FLAG_CONT_EXPAND flag.
+	 */
 	if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) {
 		/* caller must handle this extra byte. */
-		offset++;
+		size++;
 	}
-	index = size >> PAGE_CACHE_SHIFT;
-
-	return __generic_cont_expand(inode, size, index, offset);
-}
-
-int generic_cont_expand_simple(struct inode *inode, loff_t size)
-{
-	loff_t pos = size - 1;
-	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
-	unsigned int offset = (pos & (PAGE_CACHE_SIZE - 1)) + 1;
-
-	/* prepare/commit_write can handle even if from==to==start of block. */
-	return __generic_cont_expand(inode, size, index, offset);
+	return generic_cont_expand_simple(inode, size);
 }
 
-/*
- * For moronic filesystems that do not allow holes in file.
- * We may have to extend the file.
- */
-
-int cont_prepare_write(struct page *page, unsigned offset,
-		unsigned to, get_block_t *get_block, loff_t *bytes)
+int cont_expand_zero(struct file *file, struct address_space *mapping,
+			loff_t pos, loff_t *bytes)
 {
-	struct address_space *mapping = page->mapping;
 	struct inode *inode = mapping->host;
-	struct page *new_page;
-	pgoff_t pgpos;
-	long status;
-	unsigned zerofrom;
 	unsigned blocksize = 1 << inode->i_blkbits;
+	struct page *page;
+	void *fsdata;
+	pgoff_t index, curidx;
+	loff_t curpos;
+	unsigned zerofrom, offset, len;
+	int err = 0;
 
-	while(page->index > (pgpos = *bytes>>PAGE_CACHE_SHIFT)) {
-		status = -ENOMEM;
-		new_page = grab_cache_page(mapping, pgpos);
-		if (!new_page)
-			goto out;
-		/* we might sleep */
-		if (*bytes>>PAGE_CACHE_SHIFT != pgpos) {
-			unlock_page(new_page);
-			page_cache_release(new_page);
-			continue;
-		}
-		zerofrom = *bytes & ~PAGE_CACHE_MASK;
+	index = pos >> PAGE_CACHE_SHIFT;
+	offset = pos & ~PAGE_CACHE_MASK;
+
+	while (index > (curidx = (curpos = *bytes)>>PAGE_CACHE_SHIFT)) {
+		zerofrom = curpos & ~PAGE_CACHE_MASK;
 		if (zerofrom & (blocksize-1)) {
 			*bytes |= (blocksize-1);
 			(*bytes)++;
 		}
-		status = __block_prepare_write(inode, new_page, zerofrom,
-						PAGE_CACHE_SIZE, get_block);
-		if (status)
-			goto out_unmap;
-		zero_user_page(new_page, zerofrom, PAGE_CACHE_SIZE - zerofrom,
-				KM_USER0);
-		generic_commit_write(NULL, new_page, zerofrom, PAGE_CACHE_SIZE);
-		unlock_page(new_page);
-		page_cache_release(new_page);
-	}
+		len = PAGE_CACHE_SIZE - zerofrom;
 
-	if (page->index < pgpos) {
-		/* completely inside the area */
-		zerofrom = offset;
-	} else {
-		/* page covers the boundary, find the boundary offset */
-		zerofrom = *bytes & ~PAGE_CACHE_MASK;
+		err = pagecache_write_begin(file, mapping, curpos, len,
+						AOP_FLAG_UNINTERRUPTIBLE,
+						&page, &fsdata);
+		if (err)
+			goto out;
+		zero_user_page(page, zerofrom, len, KM_USER0);
+		err = pagecache_write_end(file, mapping, curpos, len, len,
+						page, fsdata);
+		if (err < 0)
+			goto out;
+		BUG_ON(err != len);
+		err = 0;
+	}
 
+	/* page covers the boundary, find the boundary offset */
+	if (index == curidx) {
+		zerofrom = curpos & ~PAGE_CACHE_MASK;
 		/* if we will expand the thing last block will be filled */
-		if (to > zerofrom && (zerofrom & (blocksize-1))) {
+		if (offset <= zerofrom) {
+			goto out;
+		}
+		if (zerofrom & (blocksize-1)) {
 			*bytes |= (blocksize-1);
 			(*bytes)++;
 		}
+		len = offset - zerofrom;
 
-		/* starting below the boundary? Nothing to zero out */
-		if (offset <= zerofrom)
-			zerofrom = offset;
-	}
-	status = __block_prepare_write(inode, page, zerofrom, to, get_block);
-	if (status)
-		goto out1;
-	if (zerofrom < offset) {
-		zero_user_page(page, zerofrom, offset - zerofrom, KM_USER0);
-		__block_commit_write(inode, page, zerofrom, offset);
+		err = pagecache_write_begin(file, mapping, curpos, len,
+						AOP_FLAG_UNINTERRUPTIBLE,
+						&page, &fsdata);
+		if (err)
+			goto out;
+		zero_user_page(page, zerofrom, len, KM_USER0);
+		err = pagecache_write_end(file, mapping, curpos, len, len,
+						page, fsdata);
+		if (err < 0)
+			goto out;
+		BUG_ON(err != len);
+		err = 0;
 	}
-	return 0;
-out1:
-	ClearPageUptodate(page);
-	return status;
-
-out_unmap:
-	ClearPageUptodate(new_page);
-	unlock_page(new_page);
-	page_cache_release(new_page);
 out:
-	return status;
+	return err;
+}
+
+/*
+ * For moronic filesystems that do not allow holes in file.
+ * We may have to extend the file.
+ */
+int cont_write_begin(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned flags,
+			struct page **pagep, void **fsdata,
+			get_block_t *get_block, loff_t *bytes)
+{
+	struct inode *inode = mapping->host;
+	unsigned blocksize = 1 << inode->i_blkbits;
+	unsigned zerofrom;
+	int err;
+
+	err = cont_expand_zero(file, mapping, pos, bytes);
+	if (err)
+		goto out;
+
+	zerofrom = *bytes & ~PAGE_CACHE_MASK;
+	if (pos+len > *bytes && zerofrom & (blocksize-1)) {
+		*bytes |= (blocksize-1);
+		(*bytes)++;
+	}
+
+	*pagep = NULL;
+	err = block_write_begin(file, mapping, pos, len,
+				flags, pagep, fsdata, get_block);
+out:
+	return err;
 }
 
 int block_prepare_write(struct page *page, unsigned from, unsigned to,
@@ -3134,7 +3128,7 @@ EXPORT_SYMBOL(block_read_full_page);
 EXPORT_SYMBOL(block_sync_page);
 EXPORT_SYMBOL(block_truncate_page);
 EXPORT_SYMBOL(block_write_full_page);
-EXPORT_SYMBOL(cont_prepare_write);
+EXPORT_SYMBOL(cont_write_begin);
 EXPORT_SYMBOL(end_buffer_read_sync);
 EXPORT_SYMBOL(end_buffer_write_sync);
 EXPORT_SYMBOL(file_fsync);
Index: linux-2.6/include/linux/buffer_head.h
===================================================================
--- linux-2.6.orig/include/linux/buffer_head.h
+++ linux-2.6/include/linux/buffer_head.h
@@ -214,8 +214,9 @@ int generic_write_end(struct file *, str
 				struct page *, void *);
 void page_zero_new_buffers(struct page *page, unsigned from, unsigned to);
 int block_prepare_write(struct page*, unsigned, unsigned, get_block_t*);
-int cont_prepare_write(struct page*, unsigned, unsigned, get_block_t*,
-				loff_t *);
+int cont_write_begin(struct file *, struct address_space *, loff_t,
+			unsigned, unsigned, struct page **, void **,
+			get_block_t *, loff_t *);
 int generic_cont_expand(struct inode *inode, loff_t size);
 int generic_cont_expand_simple(struct inode *inode, loff_t size);
 int block_commit_write(struct page *page, unsigned from, unsigned to);
Index: linux-2.6/include/linux/fs.h
===================================================================
--- linux-2.6.orig/include/linux/fs.h
+++ linux-2.6/include/linux/fs.h
@@ -410,6 +410,7 @@ enum positive_aop_returns {
 };
 
 #define AOP_FLAG_UNINTERRUPTIBLE	0x0001 /* will not do a short write */
+#define AOP_FLAG_CONT_EXPAND		0x0002 /* called from cont_expand */
 
 /*
  * oh the beauties of C type declarations.
Index: linux-2.6/mm/filemap.c
===================================================================
--- linux-2.6.orig/mm/filemap.c
+++ linux-2.6/mm/filemap.c
@@ -1756,6 +1756,7 @@ size_t iov_iter_copy_from_user_atomic(st
 
 	return copied;
 }
+EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
 
 /*
  * This has the same sideeffects and return value as
@@ -1782,6 +1783,7 @@ size_t iov_iter_copy_from_user(struct pa
 	kunmap(page);
 	return copied;
 }
+EXPORT_SYMBOL(iov_iter_copy_from_user);
 
 static void __iov_iter_advance_iov(struct iov_iter *i, size_t bytes)
 {
@@ -1813,12 +1815,14 @@ void iov_iter_advance(struct iov_iter *i
 	__iov_iter_advance_iov(i, bytes);
 	i->count -= bytes;
 }
+EXPORT_SYMBOL(iov_iter_advance);
 
 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
 {
 	char __user *buf = i->iov->iov_base + i->iov_offset;
 	return fault_in_pages_readable(buf, bytes);
 }
+EXPORT_SYMBOL(iov_iter_fault_in_readable);
 
 /*
  * Return the count of just the current iov_iter segment.
@@ -1831,6 +1835,7 @@ size_t iov_iter_single_seg_count(struct 
 	else
 		return min(i->count, iov->iov_len - i->iov_offset);
 }
+EXPORT_SYMBOL(iov_iter_single_seg_count);
 
 /*
  * Performs necessary checks before doing a write

-- 


  parent reply	other threads:[~2007-05-25 12:37 UTC|newest]

Thread overview: 76+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-05-25 12:21 [patch 00/41] Buffered write deadlock fix and new aops for 2.6.22-rc2-mm1 npiggin
2007-05-25 12:21 ` [patch 01/41] mm: revert KERNEL_DS buffered write optimisation npiggin
2007-05-25 12:21   ` npiggin
2007-05-25 12:21 ` [patch 02/41] Revert 81b0c8713385ce1b1b9058e916edcf9561ad76d6 npiggin
2007-05-25 12:21   ` npiggin, Andrew Morton
2007-05-25 12:21 ` [patch 03/41] Revert 6527c2bdf1f833cc18e8f42bd97973d583e4aa83 npiggin
2007-05-25 12:21   ` npiggin, Andrew Morton
2007-05-25 12:21 ` [patch 04/41] mm: clean up buffered write code npiggin
2007-05-25 12:21   ` npiggin, Andrew Morton
2007-05-25 12:21 ` [patch 05/41] mm: debug write deadlocks npiggin
2007-05-25 12:21   ` npiggin
2007-05-25 12:21 ` [patch 06/41] mm: trim more holes npiggin
2007-05-25 12:21   ` npiggin
2007-05-25 12:21 ` [patch 07/41] mm: buffered write cleanup npiggin
2007-05-25 12:21   ` npiggin
2007-05-25 12:21 ` [patch 08/41] mm: write iovec cleanup npiggin
2007-05-25 12:21   ` npiggin
2007-05-25 12:21 ` [patch 09/41] mm: fix pagecache write deadlocks npiggin
2007-05-25 12:21   ` npiggin
2007-05-25 12:21 ` [patch 10/41] mm: buffered write iterator npiggin
2007-05-25 12:21   ` npiggin
2007-05-25 12:21 ` [patch 11/41] fs: fix data-loss on error npiggin
2007-05-25 12:21   ` npiggin
2007-05-25 12:21 ` [patch 12/41] fs: introduce write_begin, write_end, and perform_write aops npiggin
2007-05-25 12:21   ` npiggin
2007-05-31  4:30   ` Andrew Morton
2007-05-31  4:30     ` Andrew Morton
2007-05-31  4:43     ` Nick Piggin
2007-05-31  4:43       ` Nick Piggin
2007-05-31  4:52       ` Andrew Morton
2007-05-31  4:52         ` Andrew Morton
2007-05-31  4:57         ` Nick Piggin
2007-05-31  4:57           ` Nick Piggin
2007-05-31  5:11           ` Andrew Morton
2007-05-31  5:11             ` Andrew Morton
2007-05-31  5:15             ` Nick Piggin
2007-05-31  5:15               ` Nick Piggin
2007-05-31  7:05               ` Andrew Morton
2007-05-31  7:05                 ` Andrew Morton
2007-06-01  1:18                 ` Nick Piggin
2007-06-01  1:18                   ` Nick Piggin
2007-05-25 12:21 ` [patch 13/41] mm: restore KERNEL_DS optimisations npiggin
2007-05-25 12:21   ` npiggin
2007-05-25 12:21 ` [patch 14/41] implement simple fs aops npiggin
2007-05-25 12:21 ` [patch 15/41] block_dev convert to new aops npiggin
2007-05-25 12:22 ` [patch 16/41] ext2 " npiggin
2007-05-25 12:22 ` [patch 17/41] ext3 " npiggin
2007-05-25 12:22 ` [patch 18/41] ext4 " npiggin
2007-05-25 12:22 ` [patch 19/41] xfs " npiggin
2007-05-25 12:22 ` npiggin [this message]
2007-05-25 12:22 ` [patch 21/41] fat " npiggin
2007-05-25 12:22 ` [patch 22/41] adfs " npiggin
2007-05-26  9:14   ` Russell King
2007-05-27  1:35     ` Nick Piggin
2007-05-25 12:22 ` [patch 23/41] hfs " npiggin
2007-05-25 12:22 ` [patch 24/41] hfsplus " npiggin
2007-05-25 12:22 ` [patch 25/41] hpfs " npiggin
2007-05-25 12:22 ` [patch 26/41] bfs " npiggin
2007-05-25 12:22 ` [patch 27/41] qnx4 " npiggin
2007-05-25 14:35   ` Anders Larsen
2007-05-26  7:23     ` Nick Piggin
2007-05-25 12:22 ` [patch 28/41] reiserfs use generic write npiggin
2007-05-25 12:22 ` [patch 29/41] reiserfs convert to new aops npiggin
2007-05-25 12:22 ` [patch 30/41] reiserfs use generic_cont_expand_simple npiggin
2007-05-25 12:22 ` [patch 31/41] With reiserfs no longer using the weird generic_cont_expand, remove it completely npiggin
2007-05-25 12:22 ` [patch 32/41] nfs convert to new aops npiggin
2007-05-25 12:22 ` [patch 33/41] smb " npiggin
2007-05-25 12:22 ` [patch 34/41] fuse " npiggin
2007-05-25 12:22 ` [patch 35/41] hostfs " npiggin
2007-05-25 12:22 ` [patch 36/41] jffs2 " npiggin
2007-05-25 12:22 ` [patch 37/41] ufs " npiggin
2007-05-25 12:22 ` [patch 38/41] udf " npiggin
2007-05-25 12:22 ` [patch 39/41] sysv " npiggin
2007-05-25 12:22 ` [patch 40/41] minix " npiggin
2007-05-25 12:22 ` [patch 41/41] jfs " npiggin
2007-05-25 22:55 ` [patch 00/41] Buffered write deadlock fix and new aops for 2.6.22-rc2-mm1 Mark Fasheh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070524053156.299232000@linux.local0.net \
    --to=npiggin@suse.de \
    --cc=akpm@linux-foundation.org \
    --cc=hirofumi@mail.parknet.co.jp \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=mark.fasheh@oracle.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.