From: Nick Piggin <npiggin@suse.de>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Linux Filesystems <linux-fsdevel@vger.kernel.org>,
Mark Fasheh <mark.fasheh@oracle.com>,
hirofumi@mail.parknet.co.jp
Subject: [patch 21/44] fs: new cont helpers
Date: Tue, 24 Apr 2007 11:24:07 +1000 [thread overview]
Message-ID: <20070424013435.704954000@suse.de> (raw)
In-Reply-To: 20070424012346.696840000@suse.de
[-- Attachment #1: fs-cont-aops.patch --]
[-- Type: text/plain, Size: 11579 bytes --]
Rework the generic block "cont" routines to handle the new aops.
Supporting cont_prepare_write would take quite a lot of code to support,
so remove it instead (and we later convert all filesystems to use it).
write_begin gets passed AOP_FLAG_CONT_EXPAND when called from
generic_cont_expand, so filesystems can avoid the old hacks they used.
Cc: hirofumi@mail.parknet.co.jp
Cc: Linux Filesystems <linux-fsdevel@vger.kernel.org>
Signed-off-by: Nick Piggin <npiggin@suse.de>
fs/buffer.c | 204 +++++++++++++++++++++-----------------------
include/linux/buffer_head.h | 5 -
include/linux/fs.h | 1
mm/filemap.c | 5 +
4 files changed, 110 insertions(+), 105 deletions(-)
Index: linux-2.6/fs/buffer.c
===================================================================
--- linux-2.6.orig/fs/buffer.c
+++ linux-2.6/fs/buffer.c
@@ -2027,6 +2027,7 @@ int generic_write_end(struct file *file,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
{
+ struct inode *inode = mapping->host;
copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
unlock_page(page);
@@ -2041,6 +2042,8 @@ int generic_write_end(struct file *file,
i_size_write(inode, pos+copied);
mark_inode_dirty(inode);
}
+
+ return copied;
}
EXPORT_SYMBOL(generic_write_end);
@@ -2142,14 +2145,14 @@ int block_read_full_page(struct page *pa
}
/* utility function for filesystems that need to do work on expanding
- * truncates. Uses prepare/commit_write to allow the filesystem to
+ * truncates. Uses filesystem pagecache writes to allow the filesystem to
* deal with the hole.
*/
-static int __generic_cont_expand(struct inode *inode, loff_t size,
- pgoff_t index, unsigned int offset)
+int generic_cont_expand_simple(struct inode *inode, loff_t size)
{
struct address_space *mapping = inode->i_mapping;
struct page *page;
+ void *fsdata;
unsigned long limit;
int err;
@@ -2162,146 +2165,141 @@ static int __generic_cont_expand(struct
if (size > inode->i_sb->s_maxbytes)
goto out;
- err = -ENOMEM;
- page = grab_cache_page(mapping, index);
- if (!page)
- goto out;
- err = mapping->a_ops->prepare_write(NULL, page, offset, offset);
- if (err) {
- /*
- * ->prepare_write() may have instantiated a few blocks
- * outside i_size. Trim these off again.
- */
- unlock_page(page);
- page_cache_release(page);
- vmtruncate(inode, inode->i_size);
+ err = pagecache_write_begin(NULL, mapping, size, 0,
+ AOP_FLAG_UNINTERRUPTIBLE|AOP_FLAG_CONT_EXPAND,
+ &page, &fsdata);
+ if (err)
goto out;
- }
- err = mapping->a_ops->commit_write(NULL, page, offset, offset);
+ err = pagecache_write_end(NULL, mapping, size, 0, 0, page, fsdata);
+ BUG_ON(err > 0);
- unlock_page(page);
- page_cache_release(page);
- if (err > 0)
- err = 0;
out:
return err;
}
int generic_cont_expand(struct inode *inode, loff_t size)
{
- pgoff_t index;
unsigned int offset;
offset = (size & (PAGE_CACHE_SIZE - 1)); /* Within page */
/* ugh. in prepare/commit_write, if from==to==start of block, we
- ** skip the prepare. make sure we never send an offset for the start
- ** of a block
- */
+ * skip the prepare. make sure we never send an offset for the start
+ * of a block.
+ * XXX: actually, this should be handled in those filesystems by
+ * checking for the AOP_FLAG_CONT_EXPAND flag.
+ */
if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) {
/* caller must handle this extra byte. */
- offset++;
+ size++;
}
- index = size >> PAGE_CACHE_SHIFT;
-
- return __generic_cont_expand(inode, size, index, offset);
+ return generic_cont_expand_simple(inode, size);
}
-int generic_cont_expand_simple(struct inode *inode, loff_t size)
+int cont_expand_zero(struct file *file, struct address_space *mapping,
+ loff_t pos, loff_t *bytes)
{
- loff_t pos = size - 1;
- pgoff_t index = pos >> PAGE_CACHE_SHIFT;
- unsigned int offset = (pos & (PAGE_CACHE_SIZE - 1)) + 1;
-
- /* prepare/commit_write can handle even if from==to==start of block. */
- return __generic_cont_expand(inode, size, index, offset);
-}
-
-/*
- * For moronic filesystems that do not allow holes in file.
- * We may have to extend the file.
- */
-
-int cont_prepare_write(struct page *page, unsigned offset,
- unsigned to, get_block_t *get_block, loff_t *bytes)
-{
- struct address_space *mapping = page->mapping;
struct inode *inode = mapping->host;
- struct page *new_page;
- pgoff_t pgpos;
- long status;
- unsigned zerofrom;
unsigned blocksize = 1 << inode->i_blkbits;
+ struct page *page;
+ void *fsdata;
+ pgoff_t index, curidx;
+ loff_t curpos;
+ unsigned zerofrom, offset, len;
void *kaddr;
+ int err = 0;
- while(page->index > (pgpos = *bytes>>PAGE_CACHE_SHIFT)) {
- status = -ENOMEM;
- new_page = grab_cache_page(mapping, pgpos);
- if (!new_page)
- goto out;
- /* we might sleep */
- if (*bytes>>PAGE_CACHE_SHIFT != pgpos) {
- unlock_page(new_page);
- page_cache_release(new_page);
- continue;
- }
- zerofrom = *bytes & ~PAGE_CACHE_MASK;
+ index = pos >> PAGE_CACHE_SHIFT;
+ offset = pos & ~PAGE_CACHE_MASK;
+
+ while (index > (curidx = (curpos = *bytes)>>PAGE_CACHE_SHIFT)) {
+ zerofrom = curpos & ~PAGE_CACHE_MASK;
if (zerofrom & (blocksize-1)) {
*bytes |= (blocksize-1);
(*bytes)++;
}
- status = __block_prepare_write(inode, new_page, zerofrom,
- PAGE_CACHE_SIZE, get_block);
- if (status)
- goto out_unmap;
- kaddr = kmap_atomic(new_page, KM_USER0);
- memset(kaddr+zerofrom, 0, PAGE_CACHE_SIZE-zerofrom);
- flush_dcache_page(new_page);
+ len = PAGE_CACHE_SIZE - zerofrom;
+
+ err = pagecache_write_begin(file, mapping, curpos, len,
+ AOP_FLAG_UNINTERRUPTIBLE,
+ &page, &fsdata);
+ if (err)
+ goto out;
+ kaddr = kmap_atomic(page, KM_USER0);
+ memset(kaddr+zerofrom, 0, len);
+ flush_dcache_page(page);
kunmap_atomic(kaddr, KM_USER0);
- generic_commit_write(NULL, new_page, zerofrom, PAGE_CACHE_SIZE);
- unlock_page(new_page);
- page_cache_release(new_page);
+ err = pagecache_write_end(file, mapping, curpos, len, len,
+ page, fsdata);
+ if (err < 0)
+ goto out;
+ BUG_ON(err != len);
+ err = 0;
}
- if (page->index < pgpos) {
- /* completely inside the area */
- zerofrom = offset;
- } else {
- /* page covers the boundary, find the boundary offset */
- zerofrom = *bytes & ~PAGE_CACHE_MASK;
-
+ /* page covers the boundary, find the boundary offset */
+ if (index == curidx) {
+ zerofrom = curpos & ~PAGE_CACHE_MASK;
/* if we will expand the thing last block will be filled */
- if (to > zerofrom && (zerofrom & (blocksize-1))) {
+ if (offset <= zerofrom) {
+ goto out;
+ }
+ if (zerofrom & (blocksize-1)) {
*bytes |= (blocksize-1);
(*bytes)++;
}
+ len = offset - zerofrom;
- /* starting below the boundary? Nothing to zero out */
- if (offset <= zerofrom)
- zerofrom = offset;
- }
- status = __block_prepare_write(inode, page, zerofrom, to, get_block);
- if (status)
- goto out1;
- if (zerofrom < offset) {
+ err = pagecache_write_begin(file, mapping, curpos, len,
+ AOP_FLAG_UNINTERRUPTIBLE,
+ &page, &fsdata);
+ if (err)
+ goto out;
kaddr = kmap_atomic(page, KM_USER0);
- memset(kaddr+zerofrom, 0, offset-zerofrom);
+ memset(kaddr+zerofrom, 0, len);
flush_dcache_page(page);
kunmap_atomic(kaddr, KM_USER0);
- __block_commit_write(inode, page, zerofrom, offset);
+ err = pagecache_write_end(file, mapping, curpos, len, len,
+ page, fsdata);
+ if (err < 0)
+ goto out;
+ BUG_ON(err != len);
+ err = 0;
}
- return 0;
-out1:
- ClearPageUptodate(page);
- return status;
-
-out_unmap:
- ClearPageUptodate(new_page);
- unlock_page(new_page);
- page_cache_release(new_page);
out:
- return status;
+ return err;
+}
+
+/*
+ * For moronic filesystems that do not allow holes in file.
+ * We may have to extend the file.
+ */
+int cont_write_begin(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned flags,
+ struct page **pagep, void **fsdata,
+ get_block_t *get_block, loff_t *bytes)
+{
+ struct inode *inode = mapping->host;
+ unsigned blocksize = 1 << inode->i_blkbits;
+ unsigned zerofrom;
+ int err;
+
+ err = cont_expand_zero(file, mapping, pos, bytes);
+ if (err)
+ goto out;
+
+ zerofrom = *bytes & ~PAGE_CACHE_MASK;
+ if (pos+len > *bytes && zerofrom & (blocksize-1)) {
+ *bytes |= (blocksize-1);
+ (*bytes)++;
+ }
+
+ *pagep = NULL;
+ err = block_write_begin(file, mapping, pos, len,
+ flags, pagep, fsdata, get_block);
+out:
+ return err;
}
int block_prepare_write(struct page *page, unsigned from, unsigned to,
@@ -3160,7 +3158,7 @@ EXPORT_SYMBOL(block_read_full_page);
EXPORT_SYMBOL(block_sync_page);
EXPORT_SYMBOL(block_truncate_page);
EXPORT_SYMBOL(block_write_full_page);
-EXPORT_SYMBOL(cont_prepare_write);
+EXPORT_SYMBOL(cont_write_begin);
EXPORT_SYMBOL(end_buffer_read_sync);
EXPORT_SYMBOL(end_buffer_write_sync);
EXPORT_SYMBOL(file_fsync);
Index: linux-2.6/include/linux/buffer_head.h
===================================================================
--- linux-2.6.orig/include/linux/buffer_head.h
+++ linux-2.6/include/linux/buffer_head.h
@@ -213,8 +213,9 @@ int generic_write_end(struct file *, str
struct page *, void *);
void page_zero_new_buffers(struct page *page, unsigned from, unsigned to);
int block_prepare_write(struct page*, unsigned, unsigned, get_block_t*);
-int cont_prepare_write(struct page*, unsigned, unsigned, get_block_t*,
- loff_t *);
+int cont_write_begin(struct file *, struct address_space *, loff_t,
+ unsigned, unsigned, struct page **, void **,
+ get_block_t *, loff_t *);
int generic_cont_expand(struct inode *inode, loff_t size);
int generic_cont_expand_simple(struct inode *inode, loff_t size);
int block_commit_write(struct page *page, unsigned from, unsigned to);
Index: linux-2.6/include/linux/fs.h
===================================================================
--- linux-2.6.orig/include/linux/fs.h
+++ linux-2.6/include/linux/fs.h
@@ -392,6 +392,7 @@ enum positive_aop_returns {
};
#define AOP_FLAG_UNINTERRUPTIBLE 0x0001 /* will not do a short write */
+#define AOP_FLAG_CONT_EXPAND 0x0002 /* called from cont_expand */
/*
* oh the beauties of C type declarations.
Index: linux-2.6/mm/filemap.c
===================================================================
--- linux-2.6.orig/mm/filemap.c
+++ linux-2.6/mm/filemap.c
@@ -1789,6 +1789,7 @@ size_t iov_iter_copy_from_user_atomic(st
return copied;
}
+EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
/*
* This has the same sideeffects and return value as
@@ -1815,6 +1816,7 @@ size_t iov_iter_copy_from_user(struct pa
kunmap(page);
return copied;
}
+EXPORT_SYMBOL(iov_iter_copy_from_user);
static void __iov_iter_advance_iov(struct iov_iter *i, size_t bytes)
{
@@ -1846,6 +1848,7 @@ void iov_iter_advance(struct iov_iter *i
__iov_iter_advance_iov(i, bytes);
i->count -= bytes;
}
+EXPORT_SYMBOL(iov_iter_advance);
int iov_iter_fault_in_readable(struct iov_iter *i)
{
@@ -1853,6 +1856,7 @@ int iov_iter_fault_in_readable(struct io
char __user *buf = i->iov->iov_base + i->iov_offset;
return fault_in_pages_readable(buf, seglen);
}
+EXPORT_SYMBOL(iov_iter_fault_in_readable);
/*
* Return the count of just the current iov_iter segment.
@@ -1865,6 +1869,7 @@ size_t iov_iter_single_seg_count(struct
else
return min(i->count, iov->iov_len - i->iov_offset);
}
+EXPORT_SYMBOL(iov_iter_single_seg_count);
/*
* Performs necessary checks before doing a write
--
next prev parent reply other threads:[~2007-04-24 5:21 UTC|newest]
Thread overview: 82+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-04-24 1:23 [patch 00/44] Buffered write deadlock fix and new aops for 2.6.21-rc6-mm1 Nick Piggin
2007-04-24 1:23 ` [patch 01/44] mm: revert KERNEL_DS buffered write optimisation Nick Piggin
2007-04-24 1:23 ` Nick Piggin
2007-04-24 1:23 ` [patch 02/44] Revert 81b0c8713385ce1b1b9058e916edcf9561ad76d6 Nick Piggin
2007-04-24 1:23 ` Nick Piggin, Andrew Morton
2007-04-24 1:23 ` [patch 03/44] Revert 6527c2bdf1f833cc18e8f42bd97973d583e4aa83 Nick Piggin
2007-04-24 1:23 ` Nick Piggin, Andrew Morton
2007-04-24 1:23 ` [patch 04/44] mm: clean up buffered write code Nick Piggin
2007-04-24 1:23 ` Nick Piggin, Andrew Morton
2007-04-24 1:23 ` [patch 05/44] mm: debug write deadlocks Nick Piggin
2007-04-24 1:23 ` Nick Piggin
2007-04-24 1:23 ` [patch 06/44] mm: trim more holes Nick Piggin
2007-04-24 1:23 ` Nick Piggin
2007-04-24 6:07 ` Neil Brown
2007-04-24 6:07 ` Neil Brown
2007-04-24 6:17 ` Nick Piggin
2007-04-24 6:17 ` Nick Piggin
2007-04-24 1:23 ` [patch 07/44] mm: buffered write cleanup Nick Piggin
2007-04-24 1:23 ` Nick Piggin
2007-04-24 1:23 ` [patch 08/44] mm: write iovec cleanup Nick Piggin
2007-04-24 1:23 ` Nick Piggin
2007-04-24 1:23 ` [patch 09/44] mm: fix pagecache write deadlocks Nick Piggin
2007-04-24 1:23 ` Nick Piggin
2007-04-24 1:23 ` [patch 10/44] mm: buffered write iterator Nick Piggin
2007-04-24 1:23 ` Nick Piggin
2007-04-24 1:23 ` [patch 11/44] fs: fix data-loss on error Nick Piggin
2007-04-24 1:23 ` Nick Piggin
2007-04-24 1:23 ` [patch 12/44] fs: introduce write_begin, write_end, and perform_write aops Nick Piggin
2007-04-24 1:23 ` Nick Piggin
2007-04-24 6:59 ` Neil Brown
2007-04-24 6:59 ` Neil Brown
2007-04-24 7:23 ` Nick Piggin
2007-04-24 7:23 ` Nick Piggin
2007-04-24 7:49 ` Neil Brown
2007-04-24 7:49 ` Neil Brown
2007-04-24 10:37 ` Nick Piggin
2007-04-24 10:37 ` Nick Piggin
2007-04-24 1:23 ` [patch 13/44] mm: restore KERNEL_DS optimisations Nick Piggin
2007-04-24 1:23 ` Nick Piggin
2007-04-24 10:43 ` Christoph Hellwig
2007-04-24 10:43 ` Christoph Hellwig
2007-04-24 11:03 ` Nick Piggin
2007-04-24 11:03 ` Nick Piggin
2007-04-24 1:24 ` [patch 14/44] implement simple fs aops Nick Piggin
2007-04-24 1:24 ` [patch 15/44] block_dev convert to new aops Nick Piggin
2007-04-24 1:24 ` [patch 16/44] rd " Nick Piggin
2007-04-24 10:46 ` Christoph Hellwig
2007-04-24 11:05 ` Nick Piggin
2007-04-24 11:11 ` Christoph Hellwig
2007-04-24 11:16 ` Nick Piggin
2007-04-24 11:18 ` Christoph Hellwig
2007-04-24 11:20 ` Nick Piggin
2007-04-24 11:42 ` Neil Brown
2007-04-24 1:24 ` [patch 17/44] ext2 " Nick Piggin
2007-04-24 1:24 ` [patch 18/44] ext3 " Nick Piggin
2007-04-24 1:24 ` [patch 19/44] ext4 " Nick Piggin
2007-04-24 1:24 ` [patch 20/44] xfs " Nick Piggin
2007-04-24 1:24 ` Nick Piggin [this message]
2007-04-24 1:24 ` [patch 22/44] fat " Nick Piggin
2007-04-24 1:24 ` [patch 23/44] adfs " Nick Piggin
2007-04-24 1:24 ` [patch 24/44] affs " Nick Piggin
2007-04-24 1:24 ` [patch 25/44] hfs " Nick Piggin
2007-04-24 1:24 ` [patch 26/44] hfsplus " Nick Piggin
2007-04-24 1:24 ` [patch 27/44] hpfs " Nick Piggin
2007-04-24 1:24 ` [patch 28/44] bfs " Nick Piggin
2007-04-24 1:24 ` [patch 29/44] qnx4 " Nick Piggin
2007-04-24 1:24 ` [patch 30/44] nfs " Nick Piggin
2007-04-24 1:24 ` [patch 31/44] smb " Nick Piggin
2007-04-24 1:24 ` [patch 32/44] ocfs2: " Nick Piggin
2007-04-24 1:24 ` [patch 33/44] gfs2 " Nick Piggin
2007-04-24 1:24 ` [patch 34/44] fs: no AOP_TRUNCATED_PAGE for writes Nick Piggin
2007-04-24 1:24 ` [patch 35/44] ecryptfs convert to new aops Nick Piggin
2007-04-24 1:24 ` [patch 36/44] fuse " Nick Piggin
2007-04-24 1:24 ` [patch 37/44] hostfs " Nick Piggin
2007-04-27 16:11 ` Jeff Dike
2007-04-24 1:24 ` [patch 38/44] jffs2 " Nick Piggin
2007-04-24 1:24 ` [patch 39/44] cifs " Nick Piggin
2007-04-24 1:24 ` [patch 40/44] ufs " Nick Piggin
2007-04-24 1:24 ` [patch 41/44] udf " Nick Piggin
2007-04-24 1:24 ` [patch 42/44] sysv " Nick Piggin
2007-04-24 1:24 ` [patch 43/44] minix " Nick Piggin
2007-04-24 1:24 ` [patch 44/44] jfs " Nick Piggin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20070424013435.704954000@suse.de \
--to=npiggin@suse.de \
--cc=akpm@linux-foundation.org \
--cc=hirofumi@mail.parknet.co.jp \
--cc=linux-fsdevel@vger.kernel.org \
--cc=mark.fasheh@oracle.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.