* [patch] fs: remove prepare_write/commit_write
@ 2008-09-24 23:41 Nick Piggin
2008-09-24 23:48 ` Christoph Hellwig
2008-10-17 17:34 ` Christoph Hellwig
0 siblings, 2 replies; 20+ messages in thread
From: Nick Piggin @ 2008-09-24 23:41 UTC (permalink / raw)
To: Badari Pulavarty, linux-fsdevel, akpm
Just FYI (I'll resend for -mm after the last conversions get in), this shows
how much cruft we shed after the old aops go away...
Index: linux-2.6/drivers/block/loop.c
===================================================================
--- linux-2.6.orig/drivers/block/loop.c
+++ linux-2.6/drivers/block/loop.c
@@ -40,8 +40,7 @@
* Heinz Mauelshagen <mge@sistina.com>, Feb 2002
*
* Support for falling back on the write file operation when the address space
- * operations prepare_write and/or commit_write are not available on the
- * backing filesystem.
+ * operations write_begin is not available on the backing filesystem.
* Anton Altaparmakov, 16 Feb 2005
*
* Still To Fix:
@@ -766,7 +765,7 @@ static int loop_set_fd(struct loop_devic
*/
if (!file->f_op->splice_read)
goto out_putf;
- if (aops->prepare_write || aops->write_begin)
+ if (aops->write_begin)
lo_flags |= LO_FLAGS_USE_AOPS;
if (!(lo_flags & LO_FLAGS_USE_AOPS) && !file->f_op->write)
lo_flags |= LO_FLAGS_READ_ONLY;
Index: linux-2.6/fs/fat/inode.c
===================================================================
--- linux-2.6.orig/fs/fat/inode.c
+++ linux-2.6/fs/fat/inode.c
@@ -175,7 +175,7 @@ static ssize_t fat_direct_IO(int rw, str
if (rw == WRITE) {
/*
- * FIXME: blockdev_direct_IO() doesn't use ->prepare_write(),
+ * FIXME: blockdev_direct_IO() doesn't use ->write_begin(),
* so we need to update the ->mmu_private to block boundary.
*
* But we must fill the remaining area or hole by nul for
Index: linux-2.6/fs/ocfs2/file.c
===================================================================
--- linux-2.6.orig/fs/ocfs2/file.c
+++ linux-2.6/fs/ocfs2/file.c
@@ -837,8 +837,7 @@ leave:
/* Some parts of this taken from generic_cont_expand, which turned out
* to be too fragile to do exactly what we need without us having to
- * worry about recursive locking in ->prepare_write() and
- * ->commit_write(). */
+ * worry about recursive locking in ->write_begin() and ->write_end(). */
static int ocfs2_write_zero_page(struct inode *inode,
u64 size)
{
Index: linux-2.6/fs/splice.c
===================================================================
--- linux-2.6.orig/fs/splice.c
+++ linux-2.6/fs/splice.c
@@ -731,8 +731,8 @@ ssize_t splice_from_pipe(struct pipe_ino
};
/*
- * The actor worker might be calling ->prepare_write and
- * ->commit_write. Most of the time, these expect i_mutex to
+ * The actor worker might be calling ->write_begin and
+ * ->write_end. Most of the time, these expect i_mutex to
* be held. Since this may result in an ABBA deadlock with
* pipe->inode, we have to order lock acquiry here.
*/
Index: linux-2.6/include/linux/fs.h
===================================================================
--- linux-2.6.orig/include/linux/fs.h
+++ linux-2.6/include/linux/fs.h
@@ -479,13 +479,6 @@ struct address_space_operations {
int (*readpages)(struct file *filp, struct address_space *mapping,
struct list_head *pages, unsigned nr_pages);
- /*
- * ext3 requires that a successful prepare_write() call be followed
- * by a commit_write() call - they must be balanced
- */
- int (*prepare_write)(struct file *, struct page *, unsigned, unsigned);
- int (*commit_write)(struct file *, struct page *, unsigned, unsigned);
-
int (*write_begin)(struct file *, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata);
Index: linux-2.6/mm/filemap.c
===================================================================
--- linux-2.6.orig/mm/filemap.c
+++ linux-2.6/mm/filemap.c
@@ -2016,48 +2016,8 @@ int pagecache_write_begin(struct file *f
{
const struct address_space_operations *aops = mapping->a_ops;
- if (aops->write_begin) {
- return aops->write_begin(file, mapping, pos, len, flags,
+ return aops->write_begin(file, mapping, pos, len, flags,
pagep, fsdata);
- } else {
- int ret;
- pgoff_t index = pos >> PAGE_CACHE_SHIFT;
- unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
- struct inode *inode = mapping->host;
- struct page *page;
-again:
- page = __grab_cache_page(mapping, index);
- *pagep = page;
- if (!page)
- return -ENOMEM;
-
- if (flags & AOP_FLAG_UNINTERRUPTIBLE && !PageUptodate(page)) {
- /*
- * There is no way to resolve a short write situation
- * for a !Uptodate page (except by double copying in
- * the caller done by generic_perform_write_2copy).
- *
- * Instead, we have to bring it uptodate here.
- */
- ret = aops->readpage(file, page);
- page_cache_release(page);
- if (ret) {
- if (ret == AOP_TRUNCATED_PAGE)
- goto again;
- return ret;
- }
- goto again;
- }
-
- ret = aops->prepare_write(file, page, offset, offset+len);
- if (ret) {
- unlock_page(page);
- page_cache_release(page);
- if (pos + len > inode->i_size)
- vmtruncate(inode, inode->i_size);
- }
- return ret;
- }
}
EXPORT_SYMBOL(pagecache_write_begin);
@@ -2068,28 +2028,8 @@ int pagecache_write_end(struct file *fil
const struct address_space_operations *aops = mapping->a_ops;
int ret;
- if (aops->write_end) {
- mark_page_accessed(page);
- ret = aops->write_end(file, mapping, pos, len, copied,
- page, fsdata);
- } else {
- unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
- struct inode *inode = mapping->host;
-
- flush_dcache_page(page);
- ret = aops->commit_write(file, page, offset, offset+len);
- unlock_page(page);
- mark_page_accessed(page);
- page_cache_release(page);
-
- if (ret < 0) {
- if (pos + len > inode->i_size)
- vmtruncate(inode, inode->i_size);
- } else if (ret > 0)
- ret = min_t(size_t, copied, ret);
- else
- ret = copied;
- }
+ mark_page_accessed(page);
+ ret = aops->write_end(file, mapping, pos, len, copied, page, fsdata);
return ret;
}
@@ -2213,174 +2153,6 @@ repeat:
}
EXPORT_SYMBOL(__grab_cache_page);
-static ssize_t generic_perform_write_2copy(struct file *file,
- struct iov_iter *i, loff_t pos)
-{
- struct address_space *mapping = file->f_mapping;
- const struct address_space_operations *a_ops = mapping->a_ops;
- struct inode *inode = mapping->host;
- long status = 0;
- ssize_t written = 0;
-
- do {
- struct page *src_page;
- struct page *page;
- pgoff_t index; /* Pagecache index for current page */
- unsigned long offset; /* Offset into pagecache page */
- unsigned long bytes; /* Bytes to write to page */
- size_t copied; /* Bytes copied from user */
-
- offset = (pos & (PAGE_CACHE_SIZE - 1));
- index = pos >> PAGE_CACHE_SHIFT;
- bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset,
- iov_iter_count(i));
-
- /*
- * a non-NULL src_page indicates that we're doing the
- * copy via get_user_pages and kmap.
- */
- src_page = NULL;
-
- /*
- * Bring in the user page that we will copy from _first_.
- * Otherwise there's a nasty deadlock on copying from the
- * same page as we're writing to, without it being marked
- * up-to-date.
- *
- * Not only is this an optimisation, but it is also required
- * to check that the address is actually valid, when atomic
- * usercopies are used, below.
- */
- if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
- status = -EFAULT;
- break;
- }
-
- page = __grab_cache_page(mapping, index);
- if (!page) {
- status = -ENOMEM;
- break;
- }
-
- /*
- * non-uptodate pages cannot cope with short copies, and we
- * cannot take a pagefault with the destination page locked.
- * So pin the source page to copy it.
- */
- if (!PageUptodate(page) && !segment_eq(get_fs(), KERNEL_DS)) {
- unlock_page(page);
-
- src_page = alloc_page(GFP_KERNEL);
- if (!src_page) {
- page_cache_release(page);
- status = -ENOMEM;
- break;
- }
-
- /*
- * Cannot get_user_pages with a page locked for the
- * same reason as we can't take a page fault with a
- * page locked (as explained below).
- */
- copied = iov_iter_copy_from_user(src_page, i,
- offset, bytes);
- if (unlikely(copied == 0)) {
- status = -EFAULT;
- page_cache_release(page);
- page_cache_release(src_page);
- break;
- }
- bytes = copied;
-
- lock_page(page);
- /*
- * Can't handle the page going uptodate here, because
- * that means we would use non-atomic usercopies, which
- * zero out the tail of the page, which can cause
- * zeroes to become transiently visible. We could just
- * use a non-zeroing copy, but the APIs aren't too
- * consistent.
- */
- if (unlikely(!page->mapping || PageUptodate(page))) {
- unlock_page(page);
- page_cache_release(page);
- page_cache_release(src_page);
- continue;
- }
- }
-
- status = a_ops->prepare_write(file, page, offset, offset+bytes);
- if (unlikely(status))
- goto fs_write_aop_error;
-
- if (!src_page) {
- /*
- * Must not enter the pagefault handler here, because
- * we hold the page lock, so we might recursively
- * deadlock on the same lock, or get an ABBA deadlock
- * against a different lock, or against the mmap_sem
- * (which nests outside the page lock). So increment
- * preempt count, and use _atomic usercopies.
- *
- * The page is uptodate so we are OK to encounter a
- * short copy: if unmodified parts of the page are
- * marked dirty and written out to disk, it doesn't
- * really matter.
- */
- pagefault_disable();
- copied = iov_iter_copy_from_user_atomic(page, i,
- offset, bytes);
- pagefault_enable();
- } else {
- void *src, *dst;
- src = kmap_atomic(src_page, KM_USER0);
- dst = kmap_atomic(page, KM_USER1);
- memcpy(dst + offset, src + offset, bytes);
- kunmap_atomic(dst, KM_USER1);
- kunmap_atomic(src, KM_USER0);
- copied = bytes;
- }
- flush_dcache_page(page);
-
- status = a_ops->commit_write(file, page, offset, offset+bytes);
- if (unlikely(status < 0))
- goto fs_write_aop_error;
- if (unlikely(status > 0)) /* filesystem did partial write */
- copied = min_t(size_t, copied, status);
-
- unlock_page(page);
- mark_page_accessed(page);
- page_cache_release(page);
- if (src_page)
- page_cache_release(src_page);
-
- iov_iter_advance(i, copied);
- pos += copied;
- written += copied;
-
- balance_dirty_pages_ratelimited(mapping);
- cond_resched();
- continue;
-
-fs_write_aop_error:
- unlock_page(page);
- page_cache_release(page);
- if (src_page)
- page_cache_release(src_page);
-
- /*
- * prepare_write() may have instantiated a few blocks
- * outside i_size. Trim these off again. Don't need
- * i_size_read because we hold i_mutex.
- */
- if (pos + bytes > inode->i_size)
- vmtruncate(inode, inode->i_size);
- break;
- } while (iov_iter_count(i));
-
- return written ? written : status;
-}
-
static ssize_t generic_perform_write(struct file *file,
struct iov_iter *i, loff_t pos)
{
@@ -2481,10 +2253,7 @@ generic_file_buffered_write(struct kiocb
struct iov_iter i;
iov_iter_init(&i, iov, nr_segs, count, written);
- if (a_ops->write_begin)
- status = generic_perform_write(file, &i, pos);
- else
- status = generic_perform_write_2copy(file, &i, pos);
+ status = generic_perform_write(file, &i, pos);
if (likely(status >= 0)) {
written += status;
Index: linux-2.6/Documentation/filesystems/Locking
===================================================================
--- linux-2.6.orig/Documentation/filesystems/Locking
+++ linux-2.6/Documentation/filesystems/Locking
@@ -161,8 +161,12 @@ prototypes:
int (*set_page_dirty)(struct page *page);
int (*readpages)(struct file *filp, struct address_space *mapping,
struct list_head *pages, unsigned nr_pages);
- int (*prepare_write)(struct file *, struct page *, unsigned, unsigned);
- int (*commit_write)(struct file *, struct page *, unsigned, unsigned);
+ int (*write_begin)(struct file *, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned flags,
+ struct page **pagep, void **fsdata);
+ int (*write_end)(struct file *, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct page *page, void *fsdata);
sector_t (*bmap)(struct address_space *, sector_t);
int (*invalidatepage) (struct page *, unsigned long);
int (*releasepage) (struct page *, int);
@@ -180,8 +184,6 @@ sync_page: no maybe
writepages: no
set_page_dirty no no
readpages: no
-prepare_write: no yes yes
-commit_write: no yes yes
write_begin: no locks the page yes
write_end: no yes, unlocks yes
perform_write: no n/a yes
@@ -191,7 +193,7 @@ releasepage: no yes
direct_IO: no
launder_page: no yes
- ->prepare_write(), ->commit_write(), ->sync_page() and ->readpage()
+ ->write_begin(), ->write_end(), ->sync_page() and ->readpage()
may be called from the request handler (/dev/loop).
->readpage() unlocks the page, either synchronously or via I/O
Index: linux-2.6/Documentation/filesystems/vfs.txt
===================================================================
--- linux-2.6.orig/Documentation/filesystems/vfs.txt
+++ linux-2.6/Documentation/filesystems/vfs.txt
@@ -492,7 +492,7 @@ written-back to storage typically in who
address_space has finer control of write sizes.
The read process essentially only requires 'readpage'. The write
-process is more complicated and uses prepare_write/commit_write or
+process is more complicated and uses write_begin/write_end or
set_page_dirty to write data into the address_space, and writepage,
sync_page, and writepages to writeback data to storage.
@@ -521,8 +521,6 @@ struct address_space_operations {
int (*set_page_dirty)(struct page *page);
int (*readpages)(struct file *filp, struct address_space *mapping,
struct list_head *pages, unsigned nr_pages);
- int (*prepare_write)(struct file *, struct page *, unsigned, unsigned);
- int (*commit_write)(struct file *, struct page *, unsigned, unsigned);
int (*write_begin)(struct file *, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata);
@@ -598,37 +596,7 @@ struct address_space_operations {
readpages is only used for read-ahead, so read errors are
ignored. If anything goes wrong, feel free to give up.
- prepare_write: called by the generic write path in VM to set up a write
- request for a page. This indicates to the address space that
- the given range of bytes is about to be written. The
- address_space should check that the write will be able to
- complete, by allocating space if necessary and doing any other
- internal housekeeping. If the write will update parts of
- any basic-blocks on storage, then those blocks should be
- pre-read (if they haven't been read already) so that the
- updated blocks can be written out properly.
- The page will be locked.
-
- Note: the page _must not_ be marked uptodate in this function
- (or anywhere else) unless it actually is uptodate right now. As
- soon as a page is marked uptodate, it is possible for a concurrent
- read(2) to copy it to userspace.
-
- commit_write: If prepare_write succeeds, new data will be copied
- into the page and then commit_write will be called. It will
- typically update the size of the file (if appropriate) and
- mark the inode as dirty, and do any other related housekeeping
- operations. It should avoid returning an error if possible -
- errors should have been handled by prepare_write.
-
- write_begin: This is intended as a replacement for prepare_write. The
- key differences being that:
- - it returns a locked page (in *pagep) rather than being
- given a pre locked page;
- - it must be able to cope with short writes (where the
- length passed to write_begin is greater than the number
- of bytes copied into the page).
-
+ write_begin:
Called by the generic buffered write code to ask the filesystem to
prepare to write len bytes at the given offset in the file. The
address_space should check that the write will be able to complete,
@@ -640,6 +608,9 @@ struct address_space_operations {
The filesystem must return the locked pagecache page for the specified
offset, in *pagep, for the caller to write into.
+ It must be able to cope with short writes (where the length passed to
+ write_begin is greater than the number of bytes copied into the page).
+
flags is a field for AOP_FLAG_xxx flags, described in
include/linux/fs.h.
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [patch] fs: remove prepare_write/commit_write
2008-09-24 23:41 [patch] fs: remove prepare_write/commit_write Nick Piggin
@ 2008-09-24 23:48 ` Christoph Hellwig
2008-09-24 23:54 ` Nick Piggin
2008-10-17 17:34 ` Christoph Hellwig
1 sibling, 1 reply; 20+ messages in thread
From: Christoph Hellwig @ 2008-09-24 23:48 UTC (permalink / raw)
To: Nick Piggin; +Cc: Badari Pulavarty, linux-fsdevel, akpm
On Thu, Sep 25, 2008 at 01:41:16AM +0200, Nick Piggin wrote:
> --- linux-2.6.orig/mm/filemap.c
> +++ linux-2.6/mm/filemap.c
> @@ -2016,48 +2016,8 @@ int pagecache_write_begin(struct file *f
> {
> const struct address_space_operations *aops = mapping->a_ops;
>
> + return aops->write_begin(file, mapping, pos, len, flags,
> pagep, fsdata);
> }
> EXPORT_SYMBOL(pagecache_write_begin);
Please kill pagecache_write_begin/pagecache_write_end, too.
> static ssize_t generic_perform_write(struct file *file,
> struct iov_iter *i, loff_t pos)
> {
> @@ -2481,10 +2253,7 @@ generic_file_buffered_write(struct kiocb
> struct iov_iter i;
>
> iov_iter_init(&i, iov, nr_segs, count, written);
> - if (a_ops->write_begin)
> - status = generic_perform_write(file, &i, pos);
> - else
> - status = generic_perform_write_2copy(file, &i, pos);
> + status = generic_perform_write(file, &i, pos);
>
> if (likely(status >= 0)) {
> written += status;
Can we merge generic_perform_write back into
generic_file_buffered_write? generic_file_buffered_write is left as a
rather useless wrapper after this.
> - ->prepare_write(), ->commit_write(), ->sync_page() and ->readpage()
> + ->write_begin(), ->write_end(), ->sync_page() and ->readpage()
> may be called from the request handler (/dev/loop).
This comment can go away, all file I/O done by the loop device now is
from a kthread.
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [patch] fs: remove prepare_write/commit_write
2008-09-24 23:48 ` Christoph Hellwig
@ 2008-09-24 23:54 ` Nick Piggin
0 siblings, 0 replies; 20+ messages in thread
From: Nick Piggin @ 2008-09-24 23:54 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: Badari Pulavarty, linux-fsdevel, akpm
On Wed, Sep 24, 2008 at 07:48:19PM -0400, Christoph Hellwig wrote:
> On Thu, Sep 25, 2008 at 01:41:16AM +0200, Nick Piggin wrote:
> > --- linux-2.6.orig/mm/filemap.c
> > +++ linux-2.6/mm/filemap.c
> > @@ -2016,48 +2016,8 @@ int pagecache_write_begin(struct file *f
> > {
> > const struct address_space_operations *aops = mapping->a_ops;
> >
> > + return aops->write_begin(file, mapping, pos, len, flags,
> > pagep, fsdata);
> > }
> > EXPORT_SYMBOL(pagecache_write_begin);
>
> Please kill pagecache_write_begin/pagecache_write_end, too.
Well.... we *could* have it handle writes for filesystems that don't
implement write_begin/write_end, couldn't we?
> > static ssize_t generic_perform_write(struct file *file,
> > struct iov_iter *i, loff_t pos)
> > {
> > @@ -2481,10 +2253,7 @@ generic_file_buffered_write(struct kiocb
> > struct iov_iter i;
> >
> > iov_iter_init(&i, iov, nr_segs, count, written);
> > - if (a_ops->write_begin)
> > - status = generic_perform_write(file, &i, pos);
> > - else
> > - status = generic_perform_write_2copy(file, &i, pos);
> > + status = generic_perform_write(file, &i, pos);
> >
> > if (likely(status >= 0)) {
> > written += status;
>
> Can we merge generic_perform_write back into
> generic_file_buffered_write? generic_file_buffered_write is left as a
> rather useless wrapper after this.
Yeah that makes sense.
> > - ->prepare_write(), ->commit_write(), ->sync_page() and ->readpage()
> > + ->write_begin(), ->write_end(), ->sync_page() and ->readpage()
> > may be called from the request handler (/dev/loop).
>
> This comment can go away, all file I/O done by the loop device now is
> from a kthread.
Feel like sending a patch to kill that now?
Thanks,
Nick
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [patch] fs: remove prepare_write/commit_write
2008-09-24 23:41 [patch] fs: remove prepare_write/commit_write Nick Piggin
2008-09-24 23:48 ` Christoph Hellwig
@ 2008-10-17 17:34 ` Christoph Hellwig
2008-10-17 18:45 ` Badari Pulavarty
2008-10-18 1:57 ` [patch] fs: remove prepare_write/commit_write Nick Piggin
1 sibling, 2 replies; 20+ messages in thread
From: Christoph Hellwig @ 2008-10-17 17:34 UTC (permalink / raw)
To: Nick Piggin; +Cc: Badari Pulavarty, linux-fsdevel, akpm
On Thu, Sep 25, 2008 at 01:41:16AM +0200, Nick Piggin wrote:
> Just FYI (I'll resend for -mm after the last conversions get in), this shows
> how much cruft we shed after the old aops go away...
All conversions are in mainline now, so can we get this patch in, too
for 2.6.28?
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [patch] fs: remove prepare_write/commit_write
2008-10-17 17:34 ` Christoph Hellwig
@ 2008-10-17 18:45 ` Badari Pulavarty
2008-10-21 6:20 ` Nick Piggin
2008-10-18 1:57 ` [patch] fs: remove prepare_write/commit_write Nick Piggin
1 sibling, 1 reply; 20+ messages in thread
From: Badari Pulavarty @ 2008-10-17 18:45 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: Nick Piggin, linux-fsdevel, akpm
On Fri, 2008-10-17 at 13:34 -0400, Christoph Hellwig wrote:
> On Thu, Sep 25, 2008 at 01:41:16AM +0200, Nick Piggin wrote:
> > Just FYI (I'll resend for -mm after the last conversions get in), this shows
> > how much cruft we shed after the old aops go away...
>
> All conversions are in mainline now, so can we get this patch in, too
> for 2.6.28?
>
This patch applied cleanly against 2.6.27-git7, compiled and booted
fine.
FYI.
Thanks,
Badari
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [patch] fs: remove prepare_write/commit_write
2008-10-17 17:34 ` Christoph Hellwig
2008-10-17 18:45 ` Badari Pulavarty
@ 2008-10-18 1:57 ` Nick Piggin
1 sibling, 0 replies; 20+ messages in thread
From: Nick Piggin @ 2008-10-18 1:57 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: Badari Pulavarty, linux-fsdevel, akpm
On Fri, Oct 17, 2008 at 01:34:45PM -0400, Christoph Hellwig wrote:
> On Thu, Sep 25, 2008 at 01:41:16AM +0200, Nick Piggin wrote:
> > Just FYI (I'll resend for -mm after the last conversions get in), this shows
> > how much cruft we shed after the old aops go away...
>
> All conversions are in mainline now, so can we get this patch in, too
> for 2.6.28?
Ahh, good. Yes it should go upstream definitely. I'll update the patch and
resend with your feedback incorporated.
^ permalink raw reply [flat|nested] 20+ messages in thread
* [patch] fs: remove prepare_write/commit_write
2008-10-17 18:45 ` Badari Pulavarty
@ 2008-10-21 6:20 ` Nick Piggin
2008-10-21 21:35 ` Andrew Morton
2008-10-22 0:43 ` Andrew Morton
0 siblings, 2 replies; 20+ messages in thread
From: Nick Piggin @ 2008-10-21 6:20 UTC (permalink / raw)
To: Linus Torvalds; +Cc: Christoph Hellwig, linux-fsdevel, akpm, Badari Pulavarty
On Fri, Oct 17, 2008 at 11:45:28AM -0700, Badari Pulavarty wrote:
> On Fri, 2008-10-17 at 13:34 -0400, Christoph Hellwig wrote:
> > On Thu, Sep 25, 2008 at 01:41:16AM +0200, Nick Piggin wrote:
> > > Just FYI (I'll resend for -mm after the last conversions get in), this shows
> > > how much cruft we shed after the old aops go away...
> >
> > All conversions are in mainline now, so can we get this patch in, too
> > for 2.6.28?
> >
>
> This patch applied cleanly against 2.6.27-git7, compiled and booted
> fine.
>
> FYI.
Hi Linus,
Please apply. Subsequent patch to fold generic_perform_write into
generic_file_buffered_write is not completely trivial, so I'll send
that one to -mm. This one is more obviously OK as prepare_write should
always be NULL now.
--
Nothing uses prepare_write or commit_write. Remove them from the tree
completely.
Signed-off-by: Nick Piggin <npiggin@suse.de>
---
Index: linux-2.6/drivers/block/loop.c
===================================================================
--- linux-2.6.orig/drivers/block/loop.c
+++ linux-2.6/drivers/block/loop.c
@@ -40,8 +40,7 @@
* Heinz Mauelshagen <mge@sistina.com>, Feb 2002
*
* Support for falling back on the write file operation when the address space
- * operations prepare_write and/or commit_write are not available on the
- * backing filesystem.
+ * operations write_begin is not available on the backing filesystem.
* Anton Altaparmakov, 16 Feb 2005
*
* Still To Fix:
@@ -766,7 +765,7 @@ static int loop_set_fd(struct loop_devic
*/
if (!file->f_op->splice_read)
goto out_putf;
- if (aops->prepare_write || aops->write_begin)
+ if (aops->write_begin)
lo_flags |= LO_FLAGS_USE_AOPS;
if (!(lo_flags & LO_FLAGS_USE_AOPS) && !file->f_op->write)
lo_flags |= LO_FLAGS_READ_ONLY;
Index: linux-2.6/fs/fat/inode.c
===================================================================
--- linux-2.6.orig/fs/fat/inode.c
+++ linux-2.6/fs/fat/inode.c
@@ -175,7 +175,7 @@ static ssize_t fat_direct_IO(int rw, str
if (rw == WRITE) {
/*
- * FIXME: blockdev_direct_IO() doesn't use ->prepare_write(),
+ * FIXME: blockdev_direct_IO() doesn't use ->write_begin(),
* so we need to update the ->mmu_private to block boundary.
*
* But we must fill the remaining area or hole by nul for
Index: linux-2.6/fs/ocfs2/file.c
===================================================================
--- linux-2.6.orig/fs/ocfs2/file.c
+++ linux-2.6/fs/ocfs2/file.c
@@ -679,8 +679,7 @@ leave:
/* Some parts of this taken from generic_cont_expand, which turned out
* to be too fragile to do exactly what we need without us having to
- * worry about recursive locking in ->prepare_write() and
- * ->commit_write(). */
+ * worry about recursive locking in ->write_begin() and ->write_end(). */
static int ocfs2_write_zero_page(struct inode *inode,
u64 size)
{
Index: linux-2.6/fs/splice.c
===================================================================
--- linux-2.6.orig/fs/splice.c
+++ linux-2.6/fs/splice.c
@@ -731,8 +731,8 @@ ssize_t splice_from_pipe(struct pipe_ino
};
/*
- * The actor worker might be calling ->prepare_write and
- * ->commit_write. Most of the time, these expect i_mutex to
+ * The actor worker might be calling ->write_begin and
+ * ->write_end. Most of the time, these expect i_mutex to
* be held. Since this may result in an ABBA deadlock with
* pipe->inode, we have to order lock acquiry here.
*/
Index: linux-2.6/include/linux/fs.h
===================================================================
--- linux-2.6.orig/include/linux/fs.h
+++ linux-2.6/include/linux/fs.h
@@ -484,13 +484,6 @@ struct address_space_operations {
int (*readpages)(struct file *filp, struct address_space *mapping,
struct list_head *pages, unsigned nr_pages);
- /*
- * ext3 requires that a successful prepare_write() call be followed
- * by a commit_write() call - they must be balanced
- */
- int (*prepare_write)(struct file *, struct page *, unsigned, unsigned);
- int (*commit_write)(struct file *, struct page *, unsigned, unsigned);
-
int (*write_begin)(struct file *, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata);
Index: linux-2.6/mm/filemap.c
===================================================================
--- linux-2.6.orig/mm/filemap.c
+++ linux-2.6/mm/filemap.c
@@ -2029,48 +2029,8 @@ int pagecache_write_begin(struct file *f
{
const struct address_space_operations *aops = mapping->a_ops;
- if (aops->write_begin) {
- return aops->write_begin(file, mapping, pos, len, flags,
+ return aops->write_begin(file, mapping, pos, len, flags,
pagep, fsdata);
- } else {
- int ret;
- pgoff_t index = pos >> PAGE_CACHE_SHIFT;
- unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
- struct inode *inode = mapping->host;
- struct page *page;
-again:
- page = __grab_cache_page(mapping, index);
- *pagep = page;
- if (!page)
- return -ENOMEM;
-
- if (flags & AOP_FLAG_UNINTERRUPTIBLE && !PageUptodate(page)) {
- /*
- * There is no way to resolve a short write situation
- * for a !Uptodate page (except by double copying in
- * the caller done by generic_perform_write_2copy).
- *
- * Instead, we have to bring it uptodate here.
- */
- ret = aops->readpage(file, page);
- page_cache_release(page);
- if (ret) {
- if (ret == AOP_TRUNCATED_PAGE)
- goto again;
- return ret;
- }
- goto again;
- }
-
- ret = aops->prepare_write(file, page, offset, offset+len);
- if (ret) {
- unlock_page(page);
- page_cache_release(page);
- if (pos + len > inode->i_size)
- vmtruncate(inode, inode->i_size);
- }
- return ret;
- }
}
EXPORT_SYMBOL(pagecache_write_begin);
@@ -2079,32 +2039,9 @@ int pagecache_write_end(struct file *fil
struct page *page, void *fsdata)
{
const struct address_space_operations *aops = mapping->a_ops;
- int ret;
-
- if (aops->write_end) {
- mark_page_accessed(page);
- ret = aops->write_end(file, mapping, pos, len, copied,
- page, fsdata);
- } else {
- unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
- struct inode *inode = mapping->host;
-
- flush_dcache_page(page);
- ret = aops->commit_write(file, page, offset, offset+len);
- unlock_page(page);
- mark_page_accessed(page);
- page_cache_release(page);
- if (ret < 0) {
- if (pos + len > inode->i_size)
- vmtruncate(inode, inode->i_size);
- } else if (ret > 0)
- ret = min_t(size_t, copied, ret);
- else
- ret = copied;
- }
-
- return ret;
+ mark_page_accessed(page);
+ return aops->write_end(file, mapping, pos, len, copied, page, fsdata);
}
EXPORT_SYMBOL(pagecache_write_end);
@@ -2226,174 +2163,6 @@ repeat:
}
EXPORT_SYMBOL(__grab_cache_page);
-static ssize_t generic_perform_write_2copy(struct file *file,
- struct iov_iter *i, loff_t pos)
-{
- struct address_space *mapping = file->f_mapping;
- const struct address_space_operations *a_ops = mapping->a_ops;
- struct inode *inode = mapping->host;
- long status = 0;
- ssize_t written = 0;
-
- do {
- struct page *src_page;
- struct page *page;
- pgoff_t index; /* Pagecache index for current page */
- unsigned long offset; /* Offset into pagecache page */
- unsigned long bytes; /* Bytes to write to page */
- size_t copied; /* Bytes copied from user */
-
- offset = (pos & (PAGE_CACHE_SIZE - 1));
- index = pos >> PAGE_CACHE_SHIFT;
- bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset,
- iov_iter_count(i));
-
- /*
- * a non-NULL src_page indicates that we're doing the
- * copy via get_user_pages and kmap.
- */
- src_page = NULL;
-
- /*
- * Bring in the user page that we will copy from _first_.
- * Otherwise there's a nasty deadlock on copying from the
- * same page as we're writing to, without it being marked
- * up-to-date.
- *
- * Not only is this an optimisation, but it is also required
- * to check that the address is actually valid, when atomic
- * usercopies are used, below.
- */
- if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
- status = -EFAULT;
- break;
- }
-
- page = __grab_cache_page(mapping, index);
- if (!page) {
- status = -ENOMEM;
- break;
- }
-
- /*
- * non-uptodate pages cannot cope with short copies, and we
- * cannot take a pagefault with the destination page locked.
- * So pin the source page to copy it.
- */
- if (!PageUptodate(page) && !segment_eq(get_fs(), KERNEL_DS)) {
- unlock_page(page);
-
- src_page = alloc_page(GFP_KERNEL);
- if (!src_page) {
- page_cache_release(page);
- status = -ENOMEM;
- break;
- }
-
- /*
- * Cannot get_user_pages with a page locked for the
- * same reason as we can't take a page fault with a
- * page locked (as explained below).
- */
- copied = iov_iter_copy_from_user(src_page, i,
- offset, bytes);
- if (unlikely(copied == 0)) {
- status = -EFAULT;
- page_cache_release(page);
- page_cache_release(src_page);
- break;
- }
- bytes = copied;
-
- lock_page(page);
- /*
- * Can't handle the page going uptodate here, because
- * that means we would use non-atomic usercopies, which
- * zero out the tail of the page, which can cause
- * zeroes to become transiently visible. We could just
- * use a non-zeroing copy, but the APIs aren't too
- * consistent.
- */
- if (unlikely(!page->mapping || PageUptodate(page))) {
- unlock_page(page);
- page_cache_release(page);
- page_cache_release(src_page);
- continue;
- }
- }
-
- status = a_ops->prepare_write(file, page, offset, offset+bytes);
- if (unlikely(status))
- goto fs_write_aop_error;
-
- if (!src_page) {
- /*
- * Must not enter the pagefault handler here, because
- * we hold the page lock, so we might recursively
- * deadlock on the same lock, or get an ABBA deadlock
- * against a different lock, or against the mmap_sem
- * (which nests outside the page lock). So increment
- * preempt count, and use _atomic usercopies.
- *
- * The page is uptodate so we are OK to encounter a
- * short copy: if unmodified parts of the page are
- * marked dirty and written out to disk, it doesn't
- * really matter.
- */
- pagefault_disable();
- copied = iov_iter_copy_from_user_atomic(page, i,
- offset, bytes);
- pagefault_enable();
- } else {
- void *src, *dst;
- src = kmap_atomic(src_page, KM_USER0);
- dst = kmap_atomic(page, KM_USER1);
- memcpy(dst + offset, src + offset, bytes);
- kunmap_atomic(dst, KM_USER1);
- kunmap_atomic(src, KM_USER0);
- copied = bytes;
- }
- flush_dcache_page(page);
-
- status = a_ops->commit_write(file, page, offset, offset+bytes);
- if (unlikely(status < 0))
- goto fs_write_aop_error;
- if (unlikely(status > 0)) /* filesystem did partial write */
- copied = min_t(size_t, copied, status);
-
- unlock_page(page);
- mark_page_accessed(page);
- page_cache_release(page);
- if (src_page)
- page_cache_release(src_page);
-
- iov_iter_advance(i, copied);
- pos += copied;
- written += copied;
-
- balance_dirty_pages_ratelimited(mapping);
- cond_resched();
- continue;
-
-fs_write_aop_error:
- unlock_page(page);
- page_cache_release(page);
- if (src_page)
- page_cache_release(src_page);
-
- /*
- * prepare_write() may have instantiated a few blocks
- * outside i_size. Trim these off again. Don't need
- * i_size_read because we hold i_mutex.
- */
- if (pos + bytes > inode->i_size)
- vmtruncate(inode, inode->i_size);
- break;
- } while (iov_iter_count(i));
-
- return written ? written : status;
-}
-
static ssize_t generic_perform_write(struct file *file,
struct iov_iter *i, loff_t pos)
{
@@ -2494,10 +2263,7 @@ generic_file_buffered_write(struct kiocb
struct iov_iter i;
iov_iter_init(&i, iov, nr_segs, count, written);
- if (a_ops->write_begin)
- status = generic_perform_write(file, &i, pos);
- else
- status = generic_perform_write_2copy(file, &i, pos);
+ status = generic_perform_write(file, &i, pos);
if (likely(status >= 0)) {
written += status;
Index: linux-2.6/Documentation/filesystems/Locking
===================================================================
--- linux-2.6.orig/Documentation/filesystems/Locking
+++ linux-2.6/Documentation/filesystems/Locking
@@ -161,8 +161,12 @@ prototypes:
int (*set_page_dirty)(struct page *page);
int (*readpages)(struct file *filp, struct address_space *mapping,
struct list_head *pages, unsigned nr_pages);
- int (*prepare_write)(struct file *, struct page *, unsigned, unsigned);
- int (*commit_write)(struct file *, struct page *, unsigned, unsigned);
+ int (*write_begin)(struct file *, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned flags,
+ struct page **pagep, void **fsdata);
+ int (*write_end)(struct file *, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct page *page, void *fsdata);
sector_t (*bmap)(struct address_space *, sector_t);
int (*invalidatepage) (struct page *, unsigned long);
int (*releasepage) (struct page *, int);
@@ -180,8 +184,6 @@ sync_page: no maybe
writepages: no
set_page_dirty no no
readpages: no
-prepare_write: no yes yes
-commit_write: no yes yes
write_begin: no locks the page yes
write_end: no yes, unlocks yes
perform_write: no n/a yes
@@ -191,7 +193,7 @@ releasepage: no yes
direct_IO: no
launder_page: no yes
- ->prepare_write(), ->commit_write(), ->sync_page() and ->readpage()
+ ->write_begin(), ->write_end(), ->sync_page() and ->readpage()
may be called from the request handler (/dev/loop).
->readpage() unlocks the page, either synchronously or via I/O
Index: linux-2.6/Documentation/filesystems/vfs.txt
===================================================================
--- linux-2.6.orig/Documentation/filesystems/vfs.txt
+++ linux-2.6/Documentation/filesystems/vfs.txt
@@ -492,7 +492,7 @@ written-back to storage typically in who
address_space has finer control of write sizes.
The read process essentially only requires 'readpage'. The write
-process is more complicated and uses prepare_write/commit_write or
+process is more complicated and uses write_begin/write_end or
set_page_dirty to write data into the address_space, and writepage,
sync_page, and writepages to writeback data to storage.
@@ -521,8 +521,6 @@ struct address_space_operations {
int (*set_page_dirty)(struct page *page);
int (*readpages)(struct file *filp, struct address_space *mapping,
struct list_head *pages, unsigned nr_pages);
- int (*prepare_write)(struct file *, struct page *, unsigned, unsigned);
- int (*commit_write)(struct file *, struct page *, unsigned, unsigned);
int (*write_begin)(struct file *, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata);
@@ -598,37 +596,7 @@ struct address_space_operations {
readpages is only used for read-ahead, so read errors are
ignored. If anything goes wrong, feel free to give up.
- prepare_write: called by the generic write path in VM to set up a write
- request for a page. This indicates to the address space that
- the given range of bytes is about to be written. The
- address_space should check that the write will be able to
- complete, by allocating space if necessary and doing any other
- internal housekeeping. If the write will update parts of
- any basic-blocks on storage, then those blocks should be
- pre-read (if they haven't been read already) so that the
- updated blocks can be written out properly.
- The page will be locked.
-
- Note: the page _must not_ be marked uptodate in this function
- (or anywhere else) unless it actually is uptodate right now. As
- soon as a page is marked uptodate, it is possible for a concurrent
- read(2) to copy it to userspace.
-
- commit_write: If prepare_write succeeds, new data will be copied
- into the page and then commit_write will be called. It will
- typically update the size of the file (if appropriate) and
- mark the inode as dirty, and do any other related housekeeping
- operations. It should avoid returning an error if possible -
- errors should have been handled by prepare_write.
-
- write_begin: This is intended as a replacement for prepare_write. The
- key differences being that:
- - it returns a locked page (in *pagep) rather than being
- given a pre locked page;
- - it must be able to cope with short writes (where the
- length passed to write_begin is greater than the number
- of bytes copied into the page).
-
+ write_begin:
Called by the generic buffered write code to ask the filesystem to
prepare to write len bytes at the given offset in the file. The
address_space should check that the write will be able to complete,
@@ -640,6 +608,9 @@ struct address_space_operations {
The filesystem must return the locked pagecache page for the specified
offset, in *pagep, for the caller to write into.
+ It must be able to cope with short writes (where the length passed to
+ write_begin is greater than the number of bytes copied into the page).
+
flags is a field for AOP_FLAG_xxx flags, described in
include/linux/fs.h.
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [patch] fs: remove prepare_write/commit_write
2008-10-21 6:20 ` Nick Piggin
@ 2008-10-21 21:35 ` Andrew Morton
2008-10-21 21:36 ` Christoph Hellwig
2008-10-22 9:24 ` Nick Piggin
2008-10-22 0:43 ` Andrew Morton
1 sibling, 2 replies; 20+ messages in thread
From: Andrew Morton @ 2008-10-21 21:35 UTC (permalink / raw)
To: Nick Piggin; +Cc: torvalds, hch, linux-fsdevel, pbadari
On Tue, 21 Oct 2008 08:20:20 +0200
Nick Piggin <npiggin@suse.de> wrote:
> Nothing uses prepare_write or commit_write. Remove them from the tree
> completely.
We could withdraw simple_prepare_write() too, I guess. But there's
some risk that out-of-tree filesystems might be using it within their
write_begin() implementations.
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [patch] fs: remove prepare_write/commit_write
2008-10-21 21:35 ` Andrew Morton
@ 2008-10-21 21:36 ` Christoph Hellwig
2008-10-21 21:45 ` Andrew Morton
2008-10-22 9:24 ` Nick Piggin
1 sibling, 1 reply; 20+ messages in thread
From: Christoph Hellwig @ 2008-10-21 21:36 UTC (permalink / raw)
To: Andrew Morton; +Cc: Nick Piggin, torvalds, hch, linux-fsdevel, pbadari
On Tue, Oct 21, 2008 at 02:35:18PM -0700, Andrew Morton wrote:
> On Tue, 21 Oct 2008 08:20:20 +0200
> Nick Piggin <npiggin@suse.de> wrote:
>
> > Nothing uses prepare_write or commit_write. Remove them from the tree
> > completely.
>
> We could withdraw simple_prepare_write() too, I guess. But there's
> some risk that out-of-tree filesystems might be using it within their
> write_begin() implementations.
So? That's their risk for beeing out of tree. Nuke it.
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [patch] fs: remove prepare_write/commit_write
2008-10-21 21:36 ` Christoph Hellwig
@ 2008-10-21 21:45 ` Andrew Morton
2008-10-21 21:49 ` Christoph Hellwig
0 siblings, 1 reply; 20+ messages in thread
From: Andrew Morton @ 2008-10-21 21:45 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: npiggin, torvalds, hch, linux-fsdevel, pbadari
On Tue, 21 Oct 2008 17:36:35 -0400
Christoph Hellwig <hch@infradead.org> wrote:
> On Tue, Oct 21, 2008 at 02:35:18PM -0700, Andrew Morton wrote:
> > On Tue, 21 Oct 2008 08:20:20 +0200
> > Nick Piggin <npiggin@suse.de> wrote:
> >
> > > Nothing uses prepare_write or commit_write. Remove them from the tree
> > > completely.
> >
> > We could withdraw simple_prepare_write() too, I guess. But there's
> > some risk that out-of-tree filesystems might be using it within their
> > write_begin() implementations.
>
> So? That's their risk for beeing out of tree. Nuke it.
Obviously, multiple repetition of the reasons why this is a poor idea
didn't sink in, so I won't trouble you with another repetition.
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [patch] fs: remove prepare_write/commit_write
2008-10-21 21:45 ` Andrew Morton
@ 2008-10-21 21:49 ` Christoph Hellwig
2008-10-21 22:01 ` Andrew Morton
0 siblings, 1 reply; 20+ messages in thread
From: Christoph Hellwig @ 2008-10-21 21:49 UTC (permalink / raw)
To: Andrew Morton
Cc: Christoph Hellwig, npiggin, torvalds, linux-fsdevel, pbadari
On Tue, Oct 21, 2008 at 02:45:05PM -0700, Andrew Morton wrote:
> Obviously, multiple repetition of the reasons why this is a poor idea
> didn't sink in, so I won't trouble you with another repetition.
Sorry, but there's absolutely no point of keeping dead code around for
out of tree code. It just means we keep dead weight around, that we
can't test and thus won't fix. It's not actually doing a service to
anyone.
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [patch] fs: remove prepare_write/commit_write
2008-10-21 21:49 ` Christoph Hellwig
@ 2008-10-21 22:01 ` Andrew Morton
0 siblings, 0 replies; 20+ messages in thread
From: Andrew Morton @ 2008-10-21 22:01 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: hch, npiggin, torvalds, linux-fsdevel, pbadari
On Tue, 21 Oct 2008 17:49:29 -0400
Christoph Hellwig <hch@infradead.org> wrote:
> On Tue, Oct 21, 2008 at 02:45:05PM -0700, Andrew Morton wrote:
> > Obviously, multiple repetition of the reasons why this is a poor idea
> > didn't sink in, so I won't trouble you with another repetition.
>
> Sorry, but there's absolutely no point of keeping dead code around for
> out of tree code.
yes, there _are_ reasons. Two of which are:
- we inconvenience the maintainers and the users of that out-of-tree code
- we drive away testers. People who are dependent upon out-of-tree
code cannot test kernel.org kernels. This happens! People have
reported it!
> It just means we keep dead weight around, that we
> can't test and thus won't fix. It's not actually doing a service to
> anyone.
We mark it EXPORT_UNUSED_SYMBOL() in 2.6.28.
We remove it from 2.6.29.
That solves the above problems and nothing could be simpler to do.
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [patch] fs: remove prepare_write/commit_write
2008-10-21 6:20 ` Nick Piggin
2008-10-21 21:35 ` Andrew Morton
@ 2008-10-22 0:43 ` Andrew Morton
2008-10-22 11:57 ` Edward Shishkin
2008-11-22 10:14 ` [patch 1/2] reiser4: adjust to the new aops Edward Shishkin
1 sibling, 2 replies; 20+ messages in thread
From: Andrew Morton @ 2008-10-22 0:43 UTC (permalink / raw)
To: Nick Piggin; +Cc: torvalds, hch, linux-fsdevel, pbadari, Edward Shishkin
On Tue, 21 Oct 2008 08:20:20 +0200
Nick Piggin <npiggin@suse.de> wrote:
> Nothing uses prepare_write or commit_write. Remove them from the tree
> completely.
argh, reiser4 broke.
Edward, I'll disable it in config for now.
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [patch] fs: remove prepare_write/commit_write
2008-10-21 21:35 ` Andrew Morton
2008-10-21 21:36 ` Christoph Hellwig
@ 2008-10-22 9:24 ` Nick Piggin
1 sibling, 0 replies; 20+ messages in thread
From: Nick Piggin @ 2008-10-22 9:24 UTC (permalink / raw)
To: Andrew Morton; +Cc: torvalds, hch, linux-fsdevel, pbadari
On Tue, Oct 21, 2008 at 02:35:18PM -0700, Andrew Morton wrote:
> On Tue, 21 Oct 2008 08:20:20 +0200
> Nick Piggin <npiggin@suse.de> wrote:
>
> > Nothing uses prepare_write or commit_write. Remove them from the tree
> > completely.
>
> We could withdraw simple_prepare_write() too, I guess. But there's
> some risk that out-of-tree filesystems might be using it within their
> write_begin() implementations.
That's true. A number of the helper functions are still write_begin
implemented by calling into prepare_write... Of course, after this patch,
the bare prepare_write functions won't be any use to external modules
anymore. At least the code still gets exercised and no hacks, though.
With the perform_write_2copy stuff, it almost becomes crazy for us to
support given that very few in-kernel users (and now one) ever test it.
Anyway... this has taken long enough that I don't much care exactly how
it gets merged. Whatever you think best.
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [patch] fs: remove prepare_write/commit_write
2008-10-22 0:43 ` Andrew Morton
@ 2008-10-22 11:57 ` Edward Shishkin
2008-11-22 10:14 ` [patch 1/2] reiser4: adjust to the new aops Edward Shishkin
1 sibling, 0 replies; 20+ messages in thread
From: Edward Shishkin @ 2008-10-22 11:57 UTC (permalink / raw)
To: Andrew Morton; +Cc: Nick Piggin, torvalds, hch, linux-fsdevel, pbadari
Andrew Morton wrote:
> On Tue, 21 Oct 2008 08:20:20 +0200
> Nick Piggin <npiggin@suse.de> wrote:
>
>
>> Nothing uses prepare_write or commit_write. Remove them from the tree
>> completely.
>>
>
> argh, reiser4 broke.
>
> Edward, I'll disable it in config for now.
>
>
Ok, no probs.
Will be fixed up a bit later..
^ permalink raw reply [flat|nested] 20+ messages in thread
* [patch 1/2] reiser4: adjust to the new aops
2008-10-22 0:43 ` Andrew Morton
2008-10-22 11:57 ` Edward Shishkin
@ 2008-11-22 10:14 ` Edward Shishkin
2008-11-27 10:34 ` Nick Piggin
1 sibling, 1 reply; 20+ messages in thread
From: Edward Shishkin @ 2008-11-22 10:14 UTC (permalink / raw)
To: Andrew Morton
Cc: Nick Piggin, torvalds, hch, linux-fsdevel, pbadari,
Reiserfs mailing list
[-- Attachment #1: Type: text/plain, Size: 409 bytes --]
Andrew Morton wrote:
> On Tue, 21 Oct 2008 08:20:20 +0200
> Nick Piggin <npiggin@suse.de> wrote:
>
>
>> Nothing uses prepare_write or commit_write. Remove them from the tree
>> completely.
>>
>
> argh, reiser4 broke.
>
> Edward, I'll disable it in config for now.
>
Here is the fixup plus support of loop devices
over compressed files (an old to-do issue).
Andrew, please apply.
Thanks,
Edward.
[-- Attachment #2: reiser4-adjust-to-new-aops.patch --]
[-- Type: text/x-patch, Size: 16926 bytes --]
. adjust reiser4 to the new aops (->write_begin, ->write_end)
. add support of loop devices over cryptcompress files.
Signed-off-by: Edward Shishkin<edward.shishkin@gmail.com>
---
linux-2.6.28-rc2-mm1/fs/reiser4/as_ops.c | 18 --
linux-2.6.28-rc2-mm1/fs/reiser4/page_cache.c | 4
linux-2.6.28-rc2-mm1/fs/reiser4/plugin/file/cryptcompress.c | 56 +++++-
linux-2.6.28-rc2-mm1/fs/reiser4/plugin/file/file.c | 61 +------
linux-2.6.28-rc2-mm1/fs/reiser4/plugin/file/file.h | 31 ++-
linux-2.6.28-rc2-mm1/fs/reiser4/plugin/file/file_conversion.c | 83 ++++++++++
linux-2.6.28-rc2-mm1/fs/reiser4/plugin/file_ops.c | 47 -----
linux-2.6.28-rc2-mm1/fs/reiser4/plugin/object.c | 16 -
linux-2.6.28-rc2-mm1/fs/reiser4/plugin/object.h | 3
linux-2.6.28-rc2-mm1/fs/reiser4/plugin/plugin.h | 8
10 files changed, 175 insertions(+), 152 deletions(-)
--- linux-2.6.28-rc2-mm1/fs/reiser4/as_ops.c.orig
+++ linux-2.6.28-rc2-mm1/fs/reiser4/as_ops.c
@@ -347,24 +347,6 @@ int reiser4_writepages(struct address_sp
return inode_file_plugin(mapping->host)->writepages(mapping, wbc);
}
-int reiser4_prepare_write(struct file *file, struct page *page,
- unsigned from, unsigned to)
-{
- return inode_file_plugin(file->f_dentry->d_inode)->prepare_write(file,
- page,
- from,
- to);
-}
-
-int reiser4_commit_write(struct file *file, struct page *page,
- unsigned from, unsigned to)
-{
- return inode_file_plugin(file->f_dentry->d_inode)->commit_write(file,
- page,
- from,
- to);
-}
-
/* Make Linus happy.
Local variables:
c-indentation-style: "K&R"
--- linux-2.6.28-rc2-mm1/fs/reiser4/page_cache.c.orig
+++ linux-2.6.28-rc2-mm1/fs/reiser4/page_cache.c
@@ -560,8 +560,8 @@ static struct address_space_operations f
.set_page_dirty = formatted_set_page_dirty,
/* used for read-ahead. Not applicable */
.readpages = NULL,
- .prepare_write = NULL,
- .commit_write = NULL,
+ .write_begin = NULL,
+ .write_end = NULL,
.bmap = NULL,
/* called just before page is being detached from inode mapping and
removed from memory. Called on truncate, cut/squeeze, and
--- linux-2.6.28-rc2-mm1/fs/reiser4/plugin/file/cryptcompress.c.orig
+++ linux-2.6.28-rc2-mm1/fs/reiser4/plugin/file/cryptcompress.c
@@ -3405,11 +3405,12 @@ static int cryptcompress_truncate(struct
return result;
}
-/* Capture an anonymous pager cluster. (Page cluser is
- * anonymous if it contains at least one anonymous page
+/**
+ * Capture a pager cluster.
+ * @clust must be set up by a caller.
*/
-static int capture_anon_page_cluster(struct cluster_handle * clust,
- struct inode * inode)
+static int capture_page_cluster(struct cluster_handle * clust,
+ struct inode * inode)
{
int result;
@@ -3420,6 +3421,7 @@ static int capture_anon_page_cluster(str
result = prepare_logical_cluster(inode, 0, 0, clust, LC_APPOV);
if (result)
return result;
+
set_cluster_pages_dirty(clust, inode);
result = checkin_logical_cluster(clust, inode);
put_hint_cluster(clust, inode, ZNODE_WRITE_LOCK);
@@ -3502,7 +3504,7 @@ static int capture_anon_pages(struct add
break;
}
move_cluster_forward(&clust, inode, pages[0]->index);
- result = capture_anon_page_cluster(&clust, inode);
+ result = capture_page_cluster(&clust, inode);
put_found_pages(pages, found); /* find_anon_page_cluster */
if (result)
@@ -3743,18 +3745,48 @@ int release_cryptcompress(struct inode *
}
/* plugin->prepare_write */
-int prepare_write_cryptcompress(struct file *file, struct page *page,
- unsigned from, unsigned to)
+int write_begin_cryptcompress(struct file *file, struct page *page,
+ unsigned from, unsigned to)
{
- return -EINVAL;
+ return do_prepare_write(file, page, from, to);
}
/* plugin->commit_write */
-int commit_write_cryptcompress(struct file *file, struct page *page,
- unsigned from, unsigned to)
+int write_end_cryptcompress(struct file *file, struct page *page,
+ unsigned from, unsigned to)
{
- BUG();
- return 0;
+ int ret;
+ hint_t *hint;
+ lock_handle *lh;
+ struct inode * inode;
+ struct cluster_handle clust;
+
+ unlock_page(page);
+
+ inode = page->mapping->host;
+ hint = kmalloc(sizeof(*hint), reiser4_ctx_gfp_mask_get());
+ if (hint == NULL)
+ return RETERR(-ENOMEM);
+ hint_init_zero(hint);
+ lh = &hint->lh;
+
+ cluster_init_read(&clust, NULL);
+ clust.hint = hint;
+
+ ret = alloc_cluster_pgset(&clust, cluster_nrpages(inode));
+ if (ret)
+ goto out;
+ clust.index = pg_to_clust(page->index, inode);
+ ret = capture_page_cluster(&clust, inode);
+ if (ret)
+ warning("edward-1557",
+ "Capture failed (inode %llu, result=%i)",
+ (unsigned long long)get_inode_oid(inode), ret);
+ out:
+ done_lh(lh);
+ kfree(hint);
+ put_cluster_handle(&clust);
+ return ret;
}
/* plugin->bmap */
--- linux-2.6.28-rc2-mm1/fs/reiser4/plugin/file/file.c.orig
+++ linux-2.6.28-rc2-mm1/fs/reiser4/plugin/file/file.c
@@ -889,36 +889,12 @@ static int capture_page_and_create_exten
return result;
}
-/* this is implementation of method commit_write of struct
- address_space_operations for unix file plugin */
-int
-commit_write_unix_file(struct file *file, struct page *page,
- unsigned from, unsigned to)
+/* plugin->write_end() */
+int write_end_unix_file(struct file *file, struct page *page,
+ unsigned from, unsigned to)
{
- reiser4_context *ctx;
- struct inode *inode;
- int result;
-
- assert("umka-3101", file != NULL);
- assert("umka-3102", page != NULL);
- assert("umka-3093", PageLocked(page));
-
- SetPageUptodate(page);
-
- inode = page->mapping->host;
- ctx = reiser4_init_context(page->mapping->host->i_sb);
- if (IS_ERR(ctx))
- return PTR_ERR(ctx);
- page_cache_get(page);
unlock_page(page);
- result = capture_page_and_create_extent(page);
- lock_page(page);
- page_cache_release(page);
-
- /* don't commit transaction under inode semaphore */
- context_set_commit_async(ctx);
- reiser4_exit_context(ctx);
- return result;
+ return capture_page_and_create_extent(page);
}
/*
@@ -2687,32 +2663,23 @@ int delete_object_unix_file(struct inode
return reiser4_delete_object_common(inode);
}
-int
-prepare_write_unix_file(struct file *file, struct page *page,
- unsigned from, unsigned to)
+/* plugin->write_begin() */
+int write_begin_unix_file(struct file *file, struct page *page,
+ unsigned from, unsigned to)
{
- reiser4_context *ctx;
- struct unix_file_info *uf_info;
int ret;
+ struct unix_file_info *info;
- ctx = reiser4_init_context(file->f_dentry->d_inode->i_sb);
- if (IS_ERR(ctx))
- return PTR_ERR(ctx);
-
- uf_info = unix_file_inode_data(file->f_dentry->d_inode);
- get_exclusive_access(uf_info);
- ret = find_file_state(file->f_dentry->d_inode, uf_info);
- if (ret == 0) {
- if (uf_info->container == UF_CONTAINER_TAILS)
+ info = unix_file_inode_data(file->f_dentry->d_inode);
+ get_exclusive_access(info);
+ ret = find_file_state(file->f_dentry->d_inode, info);
+ if (likely(ret == 0)) {
+ if (info->container == UF_CONTAINER_TAILS)
ret = -EINVAL;
else
ret = do_prepare_write(file, page, from, to);
}
- drop_exclusive_access(uf_info);
-
- /* don't commit transaction under inode semaphore */
- context_set_commit_async(ctx);
- reiser4_exit_context(ctx);
+ drop_exclusive_access(info);
return ret;
}
--- linux-2.6.28-rc2-mm1/fs/reiser4/plugin/file/file.h.orig
+++ linux-2.6.28-rc2-mm1/fs/reiser4/plugin/file/file.h
@@ -59,10 +59,14 @@ int reiser4_readpage(struct file *, stru
int reiser4_readpages(struct file*, struct address_space*, struct list_head*,
unsigned);
int reiser4_writepages(struct address_space *, struct writeback_control *);
-int reiser4_prepare_write(struct file *, struct page *, unsigned from,
- unsigned to);
-int reiser4_commit_write(struct file *, struct page *, unsigned from,
- unsigned to);
+int reiser4_write_begin_careful(struct file *file,
+ struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned flags,
+ struct page **pagep, void **fsdata);
+int reiser4_write_end_careful(struct file *file,
+ struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct page *page, void *fsdata);
sector_t reiser4_bmap_careful(struct address_space *, sector_t lblock);
/*
@@ -87,12 +91,13 @@ int release_unix_file(struct inode *, st
/* private address space operations */
int readpage_unix_file(struct file *, struct page *);
-int readpages_unix_file(struct file*, struct address_space*, struct list_head*, unsigned);
+int readpages_unix_file(struct file*, struct address_space*, struct list_head*,
+ unsigned);
int writepages_unix_file(struct address_space *, struct writeback_control *);
-int prepare_write_unix_file(struct file *, struct page *, unsigned from,
- unsigned to);
-int commit_write_unix_file(struct file *, struct page *, unsigned from,
- unsigned to);
+int write_begin_unix_file(struct file *file, struct page *page,
+ unsigned from, unsigned to);
+int write_end_unix_file(struct file *file, struct page *page,
+ unsigned from, unsigned to);
sector_t bmap_unix_file(struct address_space *, sector_t lblock);
/* other private methods */
@@ -129,10 +134,10 @@ int readpages_cryptcompress(struct file*
struct list_head*, unsigned);
int writepages_cryptcompress(struct address_space *,
struct writeback_control *);
-int prepare_write_cryptcompress(struct file *, struct page *, unsigned from,
- unsigned to);
-int commit_write_cryptcompress(struct file *, struct page *, unsigned from,
- unsigned to);
+int write_begin_cryptcompress(struct file *file, struct page *page,
+ unsigned from, unsigned to);
+int write_end_cryptcompress(struct file *file, struct page *page,
+ unsigned from, unsigned to);
sector_t bmap_cryptcompress(struct address_space *, sector_t lblock);
/* other private methods */
--- linux-2.6.28-rc2-mm1/fs/reiser4/plugin/file/file_conversion.c.orig
+++ linux-2.6.28-rc2-mm1/fs/reiser4/plugin/file/file_conversion.c
@@ -667,6 +667,89 @@ sector_t reiser4_bmap_careful(struct add
return PROT_PASSIVE(sector_t, bmap, (mapping, lblock));
}
+int reiser4_write_begin_careful(struct file *file,
+ struct address_space *mapping,
+ loff_t pos,
+ unsigned len,
+ unsigned flags,
+ struct page **pagep,
+ void **fsdata)
+{
+ int ret = 0;
+ unsigned start, end;
+ struct page *page;
+ pgoff_t index;
+ reiser4_context *ctx;
+ struct inode * inode = file->f_dentry->d_inode;
+
+ index = pos >> PAGE_CACHE_SHIFT;
+ start = pos & (PAGE_CACHE_SIZE - 1);
+ end = start + len;
+
+ page = __grab_cache_page(mapping, index);
+ *pagep = page;
+ if (!page)
+ return -ENOMEM;
+
+ ctx = reiser4_init_context(file->f_dentry->d_inode->i_sb);
+ if (IS_ERR(ctx)) {
+ ret = PTR_ERR(ctx);
+ goto out;
+ }
+ ret = PROT_PASSIVE(int, write_begin, (file, page, start, end));
+
+ /* don't commit transaction under inode semaphore */
+ context_set_commit_async(ctx);
+ reiser4_exit_context(ctx);
+ out:
+ if (unlikely(ret)) {
+ unlock_page(page);
+ page_cache_release(page);
+ }
+ return ret;
+}
+
+int reiser4_write_end_careful(struct file *file,
+ struct address_space *mapping,
+ loff_t pos,
+ unsigned len,
+ unsigned copied,
+ struct page *page,
+ void *fsdata)
+{
+ int ret;
+ reiser4_context *ctx;
+ unsigned start, end;
+ struct inode *inode = page->mapping->host;
+
+ assert("umka-3101", file != NULL);
+ assert("umka-3102", page != NULL);
+ assert("umka-3093", PageLocked(page));
+
+ start = pos & (PAGE_CACHE_SIZE - 1);
+ end = start + len;
+
+ flush_dcache_page(page);
+ SetPageUptodate(page);
+
+ ctx = reiser4_init_context(page->mapping->host->i_sb);
+ if (IS_ERR(ctx)){
+ unlock_page(page);
+ ret = PTR_ERR(ctx);
+ goto out;
+ }
+ ret = PROT_PASSIVE(int, write_end, (file, page, start, end));
+
+ /* don't commit transaction under inode semaphore */
+ context_set_commit_async(ctx);
+ reiser4_exit_context(ctx);
+ out:
+ page_cache_release(page);
+ if (!ret)
+ ret = copied;
+ return ret;
+}
+
/*
* Wrappers without protection for:
*
--- linux-2.6.28-rc2-mm1/fs/reiser4/plugin/file_ops.c.orig
+++ linux-2.6.28-rc2-mm1/fs/reiser4/plugin/file_ops.c
@@ -95,55 +95,12 @@ int reiser4_sync_file_common(struct file
return 0;
}
-/* this is common implementation of vfs's sendfile method of struct
- file_operations
-
- Reads @count bytes from @file and calls @actor for every page read. This is
- needed for loop back devices support.
-*/
-#if 0
-ssize_t
-sendfile_common(struct file *file, loff_t *ppos, size_t count,
- read_actor_t actor, void *target)
-{
- reiser4_context *ctx;
- ssize_t result;
-
- ctx = reiser4_init_context(file->f_dentry->d_inode->i_sb);
- if (IS_ERR(ctx))
- return PTR_ERR(ctx);
- result = generic_file_sendfile(file, ppos, count, actor, target);
- reiser4_exit_context(ctx);
- return result;
-}
-#endif /* 0 */
/* address space operations */
-/* this is common implementation of vfs's prepare_write method of struct
- address_space_operations
-*/
-int
-prepare_write_common(struct file *file, struct page *page, unsigned from,
- unsigned to)
-{
- reiser4_context *ctx;
- int result;
- ctx = reiser4_init_context(page->mapping->host->i_sb);
- result = do_prepare_write(file, page, from, to);
-
- /* don't commit transaction under inode semaphore */
- context_set_commit_async(ctx);
- reiser4_exit_context(ctx);
-
- return result;
-}
-
-/* this is helper for prepare_write_common and prepare_write_unix_file
- */
-int
-do_prepare_write(struct file *file, struct page *page, unsigned from,
+/* this is helper for plugin->write_begin() */
+int do_prepare_write(struct file *file, struct page *page, unsigned from,
unsigned to)
{
int result;
--- linux-2.6.28-rc2-mm1/fs/reiser4/plugin/object.c.orig
+++ linux-2.6.28-rc2-mm1/fs/reiser4/plugin/object.c
@@ -114,8 +114,8 @@ static struct address_space_operations r
.writepages = reiser4_writepages,
.set_page_dirty = reiser4_set_page_dirty,
.readpages = reiser4_readpages,
- .prepare_write = reiser4_prepare_write,
- .commit_write = reiser4_commit_write,
+ .write_begin = reiser4_write_begin_careful,
+ .write_end = reiser4_write_end_careful,
.bmap = reiser4_bmap_careful,
.invalidatepage = reiser4_invalidatepage,
.releasepage = reiser4_releasepage
@@ -165,8 +165,8 @@ static struct address_space_operations d
.writepages = dummyop,
.set_page_dirty = bugop,
.readpages = bugop,
- .prepare_write = bugop,
- .commit_write = bugop,
+ .write_begin = bugop,
+ .write_end = bugop,
.bmap = bugop,
.invalidatepage = bugop,
.releasepage = bugop
@@ -209,8 +209,8 @@ file_plugin file_plugins[LAST_FILE_PLUGI
.readpage = readpage_unix_file,
.readpages = readpages_unix_file,
.writepages = writepages_unix_file,
- .prepare_write = prepare_write_unix_file,
- .commit_write = commit_write_unix_file,
+ .write_begin = write_begin_unix_file,
+ .write_end = write_end_unix_file,
/*
* private a_ops
*/
@@ -403,8 +403,8 @@ file_plugin file_plugins[LAST_FILE_PLUGI
.readpage = readpage_cryptcompress,
.readpages = readpages_cryptcompress,
.writepages = writepages_cryptcompress,
- .prepare_write = prepare_write_cryptcompress,
- .commit_write = commit_write_cryptcompress,
+ .write_begin = write_begin_cryptcompress,
+ .write_end = write_end_cryptcompress,
.bmap = bmap_cryptcompress,
--- linux-2.6.28-rc2-mm1/fs/reiser4/plugin/object.h.orig
+++ linux-2.6.28-rc2-mm1/fs/reiser4/plugin/object.h
@@ -36,9 +36,6 @@ int reiser4_readdir_common(struct file *
int reiser4_release_dir_common(struct inode *, struct file *);
int reiser4_sync_common(struct file *, struct dentry *, int datasync);
-/* common implementations of address space operations */
-int prepare_write_common(struct file *, struct page *, unsigned from,
- unsigned to);
/* file plugin operations: common implementations */
int write_sd_by_inode_common(struct inode *);
--- linux-2.6.28-rc2-mm1/fs/reiser4/plugin/plugin.h.orig
+++ linux-2.6.28-rc2-mm1/fs/reiser4/plugin/plugin.h
@@ -248,10 +248,10 @@ typedef struct file_plugin {
struct list_head *pages, unsigned nr_pages);
int (*writepages)(struct address_space *mapping,
struct writeback_control *wbc);
- int (*prepare_write)(struct file *file, struct page *page,
- unsigned from, unsigned to);
- int (*commit_write)(struct file *file, struct page *page,
- unsigned from, unsigned to);
+ int (*write_begin)(struct file *file, struct page *page,
+ unsigned from, unsigned to);
+ int (*write_end)(struct file *file, struct page *page,
+ unsigned from, unsigned to);
sector_t (*bmap) (struct address_space * mapping, sector_t lblock);
/* other private methods */
/* save inode cached stat-data onto disk. It was called
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [patch 1/2] reiser4: adjust to the new aops
2008-11-22 10:14 ` [patch 1/2] reiser4: adjust to the new aops Edward Shishkin
@ 2008-11-27 10:34 ` Nick Piggin
2008-11-27 14:34 ` Edward Shishkin
0 siblings, 1 reply; 20+ messages in thread
From: Nick Piggin @ 2008-11-27 10:34 UTC (permalink / raw)
To: Edward Shishkin
Cc: Andrew Morton, torvalds, hch, linux-fsdevel, pbadari,
Reiserfs mailing list
On Sat, Nov 22, 2008 at 01:14:02PM +0300, Edward Shishkin wrote:
> Andrew Morton wrote:
> > On Tue, 21 Oct 2008 08:20:20 +0200
> > Nick Piggin <npiggin@suse.de> wrote:
> >
> >
> >> Nothing uses prepare_write or commit_write. Remove them from the tree
> >> completely.
> >>
> >
> > argh, reiser4 broke.
> >
> > Edward, I'll disable it in config for now.
> >
>
> Here is the fixup plus support of loop devices
> over compressed files (an old to-do issue).
> Andrew, please apply.
>
> Thanks,
> Edward.
>
Thanks Edward, I appreciate your help with this. I don't know the
reiser4 code at all, but just remember that you have to be able to
tolerate a short-write in write_end (eg. the page may not actually
have all or any of the memory initialized in the range (pos, pos+len),
only the range (pos, pos+copied) (ie. copied may be < len).
One thing that many block based filesystems have to be careful of is
to ensure that the uncopied range (which might contain garbage) doesn't
get written back to the filesystem.
I can't immediately see whether you handle that or not, but you're not
using 'copied' anywhere, so that might flag a problem.
Thanks,
Nick
> . adjust reiser4 to the new aops (->write_begin, ->write_end)
> . add support of loop devices over cryptcompress files.
>
> Signed-off-by: Edward Shishkin<edward.shishkin@gmail.com>
...
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [patch 1/2] reiser4: adjust to the new aops
2008-11-27 10:34 ` Nick Piggin
@ 2008-11-27 14:34 ` Edward Shishkin
2008-11-27 14:48 ` Nick Piggin
0 siblings, 1 reply; 20+ messages in thread
From: Edward Shishkin @ 2008-11-27 14:34 UTC (permalink / raw)
To: Nick Piggin
Cc: Andrew Morton, torvalds, hch, linux-fsdevel, pbadari,
Reiserfs mailing list
Nick Piggin wrote:
> On Sat, Nov 22, 2008 at 01:14:02PM +0300, Edward Shishkin wrote:
>
>> Andrew Morton wrote:
>>
>>> On Tue, 21 Oct 2008 08:20:20 +0200
>>> Nick Piggin <npiggin@suse.de> wrote:
>>>
>>>
>>>
>>>> Nothing uses prepare_write or commit_write. Remove them from the tree
>>>> completely.
>>>>
>>>>
>>> argh, reiser4 broke.
>>>
>>> Edward, I'll disable it in config for now.
>>>
>>>
>> Here is the fixup plus support of loop devices
>> over compressed files (an old to-do issue).
>> Andrew, please apply.
>>
>> Thanks,
>> Edward.
>>
>>
>
> Thanks Edward, I appreciate your help with this.
Hi Nick,
I am really happy that somebody looks at my patches..
> I don't know the
> reiser4 code at all, but just remember that you have to be able to
> tolerate a short-write in write_end (eg. the page may not actually
> have all or any of the memory initialized in the range (pos, pos+len),
> only the range (pos, pos+copied) (ie. copied may be < len).
>
> One thing that many block based filesystems have to be careful of is
> to ensure that the uncopied range (which might contain garbage) doesn't
> get written back to the filesystem.
>
> I can't immediately see whether you handle that or not, but you're not
> using 'copied' anywhere, so that might flag a problem.
>
reiser4_write_{begin,end} works only in splice.c (i.e. only for
loopback functionality) in the chunk of code which looks like this:
pagecache_write_begin();
...
memcpy(.., to_page);
...
pagecache_write_end(.., to_page, ..);
i.e. there can not be short writes and everything is uptodate.
reiser4_write() has its own means to fight with short writes.
Not everything is excellent here though, but this is another topic..
Thanks,
Edward.
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [patch 1/2] reiser4: adjust to the new aops
2008-11-27 14:34 ` Edward Shishkin
@ 2008-11-27 14:48 ` Nick Piggin
2008-12-26 23:33 ` [patch 2/2] reiser4: adjust to the new aops fixup Edward Shishkin
0 siblings, 1 reply; 20+ messages in thread
From: Nick Piggin @ 2008-11-27 14:48 UTC (permalink / raw)
To: Edward Shishkin
Cc: Andrew Morton, torvalds, hch, linux-fsdevel, pbadari,
Reiserfs mailing list
On Thu, Nov 27, 2008 at 05:34:26PM +0300, Edward Shishkin wrote:
> reiser4_write_{begin,end} works only in splice.c (i.e. only for
> loopback functionality) in the chunk of code which looks like this:
>
> pagecache_write_begin();
> ...
> memcpy(.., to_page);
> ...
> pagecache_write_end(.., to_page, ..);
>
> i.e. there can not be short writes and everything is uptodate.
Ah OK, indeed you are right. Although that's not to say that some
other kernel code may not be able to start doing interruptible
copies, although it would be fairly unlikely. Maybe if you just have
a BUG_ON(!(flags & AOP_FLAG_UNINTERRUPTIBLE)); or something to just
make sure?
> reiser4_write() has its own means to fight with short writes.
> Not everything is excellent here though, but this is another topic..
OK, well in that case, I don't see any problem with your patch.
Thanks,
Nick
^ permalink raw reply [flat|nested] 20+ messages in thread
* [patch 2/2] reiser4: adjust to the new aops fixup
2008-11-27 14:48 ` Nick Piggin
@ 2008-12-26 23:33 ` Edward Shishkin
0 siblings, 0 replies; 20+ messages in thread
From: Edward Shishkin @ 2008-12-26 23:33 UTC (permalink / raw)
To: Nick Piggin, Andrew Morton
Cc: torvalds, hch, linux-fsdevel, pbadari, Reiserfs mailing list
[-- Attachment #1: Type: text/plain, Size: 756 bytes --]
Nick Piggin wrote:
> On Thu, Nov 27, 2008 at 05:34:26PM +0300, Edward Shishkin wrote:
>
>> reiser4_write_{begin,end} works only in splice.c (i.e. only for
>> loopback functionality) in the chunk of code which looks like this:
>>
>> pagecache_write_begin();
>> ...
>> memcpy(.., to_page);
>> ...
>> pagecache_write_end(.., to_page, ..);
>>
>> i.e. there can not be short writes and everything is uptodate.
>>
>
> Ah OK, indeed you are right. Although that's not to say that some
> other kernel code may not be able to start doing interruptible
> copies, although it would be fairly unlikely. Maybe if you just have
> a BUG_ON(!(flags & AOP_FLAG_UNINTERRUPTIBLE)); or something to just
> make sure?
Yup, I would say, it's a must..
Thanks,
Edward.
[-- Attachment #2: reiser4-adjust-to-the-new-aops-fixup.patch --]
[-- Type: text/x-patch, Size: 751 bytes --]
Make sure that reiser4_write_begin()
is not called for interruptible copies.
Signed-off-by: Edward Shishkin<edward.shishkin@gmail.com>
---
linux-2.6.28-rc2-mm1/fs/reiser4/plugin/file/file_conversion.c | 6 ++++++
1 file changed, 6 insertions(+)
--- linux-2.6.28-rc2-mm1/fs/reiser4/plugin/file/file_conversion.c.orig
+++ linux-2.6.28-rc2-mm1/fs/reiser4/plugin/file/file_conversion.c
@@ -682,6 +682,12 @@ int reiser4_write_begin_careful(struct f
reiser4_context *ctx;
struct inode * inode = file->f_dentry->d_inode;
+ /**
+ * reiser4_write_end() can not cope with
+ * short writes for now
+ */
+ BUG_ON(!(flags & AOP_FLAG_UNINTERRUPTIBLE));
+
index = pos >> PAGE_CACHE_SHIFT;
start = pos & (PAGE_CACHE_SIZE - 1);
end = start + len;
^ permalink raw reply [flat|nested] 20+ messages in thread
end of thread, other threads:[~2008-12-26 23:33 UTC | newest]
Thread overview: 20+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-09-24 23:41 [patch] fs: remove prepare_write/commit_write Nick Piggin
2008-09-24 23:48 ` Christoph Hellwig
2008-09-24 23:54 ` Nick Piggin
2008-10-17 17:34 ` Christoph Hellwig
2008-10-17 18:45 ` Badari Pulavarty
2008-10-21 6:20 ` Nick Piggin
2008-10-21 21:35 ` Andrew Morton
2008-10-21 21:36 ` Christoph Hellwig
2008-10-21 21:45 ` Andrew Morton
2008-10-21 21:49 ` Christoph Hellwig
2008-10-21 22:01 ` Andrew Morton
2008-10-22 9:24 ` Nick Piggin
2008-10-22 0:43 ` Andrew Morton
2008-10-22 11:57 ` Edward Shishkin
2008-11-22 10:14 ` [patch 1/2] reiser4: adjust to the new aops Edward Shishkin
2008-11-27 10:34 ` Nick Piggin
2008-11-27 14:34 ` Edward Shishkin
2008-11-27 14:48 ` Nick Piggin
2008-12-26 23:33 ` [patch 2/2] reiser4: adjust to the new aops fixup Edward Shishkin
2008-10-18 1:57 ` [patch] fs: remove prepare_write/commit_write Nick Piggin
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).