* [PATCH 1/3] NFS: Fix up the fsync code
@ 2010-07-12 22:09 Trond Myklebust
2010-07-12 22:09 ` [PATCH 2/3] NFS: Clean up the callers of nfs_wb_all() Trond Myklebust
` (3 more replies)
0 siblings, 4 replies; 8+ messages in thread
From: Trond Myklebust @ 2010-07-12 22:09 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-nfs
Christoph points out that the VFS will always flush out data before calling
nfs_fsync(), so we can dispense with a full call to nfs_wb_all(), and
replace that with a simpler call to nfs_commit_inode().
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
fs/nfs/file.c | 50 +++++++++++++++++++++-----------------------------
fs/nfs/internal.h | 1 +
fs/nfs/write.c | 4 ++--
3 files changed, 24 insertions(+), 31 deletions(-)
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 36a5e74..4bf7306 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -202,31 +202,6 @@ static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin)
}
/*
- * Helper for nfs_file_flush() and nfs_file_fsync()
- *
- * Notice that it clears the NFS_CONTEXT_ERROR_WRITE before synching to
- * disk, but it retrieves and clears ctx->error after synching, despite
- * the two being set at the same time in nfs_context_set_write_error().
- * This is because the former is used to notify the _next_ call to
- * nfs_file_write() that a write error occured, and hence cause it to
- * fall back to doing a synchronous write.
- */
-static int nfs_do_fsync(struct nfs_open_context *ctx, struct inode *inode)
-{
- int have_error, status;
- int ret = 0;
-
- have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
- status = nfs_wb_all(inode);
- have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
- if (have_error)
- ret = xchg(&ctx->error, 0);
- if (!ret)
- ret = status;
- return ret;
-}
-
-/*
* Flush all dirty pages, and check for write errors.
*/
static int
@@ -245,7 +220,7 @@ nfs_file_flush(struct file *file, fl_owner_t id)
return 0;
/* Flush writes to the server and return any errors */
- return nfs_do_fsync(ctx, inode);
+ return vfs_fsync(file, 0);
}
static ssize_t
@@ -320,6 +295,13 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
* Flush any dirty pages for this process, and check for write errors.
* The return status from this call provides a reliable indication of
* whether any write errors occurred for this process.
+ *
+ * Notice that it clears the NFS_CONTEXT_ERROR_WRITE before synching to
+ * disk, but it retrieves and clears ctx->error after synching, despite
+ * the two being set at the same time in nfs_context_set_write_error().
+ * This is because the former is used to notify the _next_ call to
+ * nfs_file_write() that a write error occured, and hence cause it to
+ * fall back to doing a synchronous write.
*/
static int
nfs_file_fsync(struct file *file, int datasync)
@@ -327,13 +309,23 @@ nfs_file_fsync(struct file *file, int datasync)
struct dentry *dentry = file->f_path.dentry;
struct nfs_open_context *ctx = nfs_file_open_context(file);
struct inode *inode = dentry->d_inode;
+ int have_error, status;
+ int ret = 0;
+
dprintk("NFS: fsync file(%s/%s) datasync %d\n",
dentry->d_parent->d_name.name, dentry->d_name.name,
datasync);
nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
- return nfs_do_fsync(ctx, inode);
+ have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
+ status = nfs_commit_inode(inode, FLUSH_SYNC);
+ have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
+ if (have_error)
+ ret = xchg(&ctx->error, 0);
+ if (!ret)
+ ret = status;
+ return ret;
}
/*
@@ -639,7 +631,7 @@ static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
/* Return error values for O_DSYNC and IS_SYNC() */
if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) {
- int err = nfs_do_fsync(nfs_file_open_context(iocb->ki_filp), inode);
+ int err = vfs_fsync(iocb->ki_filp, 0);
if (err < 0)
result = err;
}
@@ -675,7 +667,7 @@ static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe,
written = ret;
if (ret >= 0 && nfs_need_sync_write(filp, inode)) {
- int err = nfs_do_fsync(nfs_file_open_context(filp), inode);
+ int err = vfs_fsync(filp, 0);
if (err < 0)
ret = err;
}
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index d8bd619..bdaaedf 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -251,6 +251,7 @@ extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh);
extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
/* write.c */
+extern int nfs_commit_inode(struct inode *inode, int how);
extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
#ifdef CONFIG_MIGRATION
extern int nfs_migrate_page(struct address_space *,
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 5eccea1..8fda8a7 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1382,7 +1382,7 @@ static const struct rpc_call_ops nfs_commit_ops = {
.rpc_release = nfs_commit_release,
};
-static int nfs_commit_inode(struct inode *inode, int how)
+int nfs_commit_inode(struct inode *inode, int how)
{
LIST_HEAD(head);
int may_wait = how & FLUSH_SYNC;
@@ -1446,7 +1446,7 @@ out_mark_dirty:
return ret;
}
#else
-static int nfs_commit_inode(struct inode *inode, int how)
+int nfs_commit_inode(struct inode *inode, int how)
{
return 0;
}
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 3/3] NFS: nfs_rename() should not have to flush out writebacks
2010-07-12 22:09 [PATCH 1/3] NFS: Fix up the fsync code Trond Myklebust
2010-07-12 22:09 ` [PATCH 2/3] NFS: Clean up the callers of nfs_wb_all() Trond Myklebust
@ 2010-07-12 22:09 ` Trond Myklebust
2010-07-13 1:13 ` [PATCH 1/3] NFS: Fix up the fsync code Christoph Hellwig
2010-08-13 8:39 ` Christoph Hellwig
3 siblings, 0 replies; 8+ messages in thread
From: Trond Myklebust @ 2010-07-12 22:09 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-nfs
We don't really support nfs servers that invalidate the file handle after a
rename, so precautions such as flushing out dirty data before renaming the
file are superfluous.
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
fs/nfs/dir.c | 9 ---------
1 files changed, 0 insertions(+), 9 deletions(-)
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 782b431..067a051 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1652,16 +1652,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
}
}
- /*
- * ... prune child dentries and writebacks if needed.
- */
- if (atomic_read(&old_dentry->d_count) > 1) {
- if (S_ISREG(old_inode->i_mode))
- nfs_wb_all(old_inode);
- shrink_dcache_parent(old_dentry);
- }
nfs_inode_return_delegation(old_inode);
-
if (new_inode != NULL)
nfs_inode_return_delegation(new_inode);
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 2/3] NFS: Clean up the callers of nfs_wb_all()
2010-07-12 22:09 [PATCH 1/3] NFS: Fix up the fsync code Trond Myklebust
@ 2010-07-12 22:09 ` Trond Myklebust
2010-07-12 22:09 ` [PATCH 3/3] NFS: nfs_rename() should not have to flush out writebacks Trond Myklebust
` (2 subsequent siblings)
3 siblings, 0 replies; 8+ messages in thread
From: Trond Myklebust @ 2010-07-12 22:09 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-nfs
There is no need to flush out writes before calling nfs_wb_all().
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
fs/nfs/delegation.c | 10 +---------
fs/nfs/inode.c | 4 +---
2 files changed, 2 insertions(+), 12 deletions(-)
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index f34f4ac..b9c3c43 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -268,14 +268,6 @@ out:
return status;
}
-/* Sync all data to disk upon delegation return */
-static void nfs_msync_inode(struct inode *inode)
-{
- filemap_fdatawrite(inode->i_mapping);
- nfs_wb_all(inode);
- filemap_fdatawait(inode->i_mapping);
-}
-
/*
* Basic procedure for returning a delegation to the server
*/
@@ -367,7 +359,7 @@ int nfs_inode_return_delegation(struct inode *inode)
delegation = nfs_detach_delegation_locked(nfsi, NULL, clp);
spin_unlock(&clp->cl_lock);
if (delegation != NULL) {
- nfs_msync_inode(inode);
+ nfs_wb_all(inode);
err = __nfs_inode_return_delegation(inode, delegation, 1);
}
}
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index ec7a8f9..581d8f0 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -413,10 +413,8 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
return 0;
/* Write all dirty data */
- if (S_ISREG(inode->i_mode)) {
- filemap_write_and_wait(inode->i_mapping);
+ if (S_ISREG(inode->i_mode))
nfs_wb_all(inode);
- }
fattr = nfs_alloc_fattr();
if (fattr == NULL)
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH 1/3] NFS: Fix up the fsync code
2010-07-12 22:09 [PATCH 1/3] NFS: Fix up the fsync code Trond Myklebust
2010-07-12 22:09 ` [PATCH 2/3] NFS: Clean up the callers of nfs_wb_all() Trond Myklebust
2010-07-12 22:09 ` [PATCH 3/3] NFS: nfs_rename() should not have to flush out writebacks Trond Myklebust
@ 2010-07-13 1:13 ` Christoph Hellwig
2010-07-13 19:50 ` Trond Myklebust
2010-08-13 8:39 ` Christoph Hellwig
3 siblings, 1 reply; 8+ messages in thread
From: Christoph Hellwig @ 2010-07-13 1:13 UTC (permalink / raw)
To: Trond Myklebust; +Cc: Christoph Hellwig, linux-nfs
> /* Flush writes to the server and return any errors */
> - return nfs_do_fsync(ctx, inode);
> + return vfs_fsync(file, 0);
Either the argument to vfs_fsync or the comment needs some changes.
datasync = 0 means we don't just flush writes but also metadata. I
don't know enough about the NFS CTO guarantees to figure which one you
actually want.
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 1/3] NFS: Fix up the fsync code
2010-07-13 1:13 ` [PATCH 1/3] NFS: Fix up the fsync code Christoph Hellwig
@ 2010-07-13 19:50 ` Trond Myklebust
2010-07-14 7:00 ` Christoph Hellwig
0 siblings, 1 reply; 8+ messages in thread
From: Trond Myklebust @ 2010-07-13 19:50 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-nfs
On Mon, 2010-07-12 at 21:13 -0400, Christoph Hellwig wrote:
> > /* Flush writes to the server and return any errors */
> > - return nfs_do_fsync(ctx, inode);
> > + return vfs_fsync(file, 0);
>
> Either the argument to vfs_fsync or the comment needs some changes.
> datasync = 0 means we don't just flush writes but also metadata. I
> don't know enough about the NFS CTO guarantees to figure which one you
> actually want.
>
NFS can't distinguish between a datasync and a full sync: a successful
COMMIT operation guarantees that both data+metadata updates are on disk.
For this reason we ignore the 'datasync' parameter in our fsync
implementation.
Would it perhaps help if I added a comment to that effect in
nfs_file_fsync() itself?
Cheers
Trond
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 1/3] NFS: Fix up the fsync code
2010-07-13 19:50 ` Trond Myklebust
@ 2010-07-14 7:00 ` Christoph Hellwig
2010-07-14 13:17 ` Trond Myklebust
0 siblings, 1 reply; 8+ messages in thread
From: Christoph Hellwig @ 2010-07-14 7:00 UTC (permalink / raw)
To: Trond Myklebust; +Cc: Christoph Hellwig, linux-nfs
On Tue, Jul 13, 2010 at 03:50:36PM -0400, Trond Myklebust wrote:
> NFS can't distinguish between a datasync and a full sync: a successful
> COMMIT operation guarantees that both data+metadata updates are on disk.
> For this reason we ignore the 'datasync' parameter in our fsync
> implementation.
That's not what the datasync parameter means.
Both fsync and fdatasync will commit data and metadata to disk, the
questions is how much metadata we need to commit. For fdatasync it's
only the metadata requires to locate the file data on disk, an
fsync requires everything (which is the above + timestamps basically).
I suspect for NFS the difference still doesn't matter, I'd just try
to make it clear.
> Would it perhaps help if I added a comment to that effect in
> nfs_file_fsync() itself?
Yes, comments explaining such higher level concepts are always good.
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 1/3] NFS: Fix up the fsync code
2010-07-14 7:00 ` Christoph Hellwig
@ 2010-07-14 13:17 ` Trond Myklebust
0 siblings, 0 replies; 8+ messages in thread
From: Trond Myklebust @ 2010-07-14 13:17 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-nfs
On Wed, 2010-07-14 at 03:00 -0400, Christoph Hellwig wrote:
> On Tue, Jul 13, 2010 at 03:50:36PM -0400, Trond Myklebust wrote:
> > NFS can't distinguish between a datasync and a full sync: a successful
> > COMMIT operation guarantees that both data+metadata updates are on disk.
> > For this reason we ignore the 'datasync' parameter in our fsync
> > implementation.
>
> That's not what the datasync parameter means.
>
> Both fsync and fdatasync will commit data and metadata to disk, the
> questions is how much metadata we need to commit. For fdatasync it's
> only the metadata requires to locate the file data on disk, an
> fsync requires everything (which is the above + timestamps basically).
>
> I suspect for NFS the difference still doesn't matter, I'd just try
> to make it clear.
Right. My point was that in NFSv3 and NFSv4, COMMIT always acts like
fsync(): it commits data + _all_ metadata (i.e. including size
+timestamps) to disk.
The exception is NFSv4.1 with pNFS, where you have an extra
'LAYOUTCOMMIT' operation that tells the metadata server when to write
the size+timestamps (because the WRITE and COMMIT operations are sent to
the data servers). So we might want to distinguish between fsync() and
fdatasync() when we merge that code.
> > Would it perhaps help if I added a comment to that effect in
> > nfs_file_fsync() itself?
>
> Yes, comments explaining such higher level concepts are always good.
OK. I'll add something to that effect.
Thanks for the review!
Trond
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 1/3] NFS: Fix up the fsync code
2010-07-12 22:09 [PATCH 1/3] NFS: Fix up the fsync code Trond Myklebust
` (2 preceding siblings ...)
2010-07-13 1:13 ` [PATCH 1/3] NFS: Fix up the fsync code Christoph Hellwig
@ 2010-08-13 8:39 ` Christoph Hellwig
3 siblings, 0 replies; 8+ messages in thread
From: Christoph Hellwig @ 2010-08-13 8:39 UTC (permalink / raw)
To: Trond Myklebust; +Cc: linux-nfs
On Mon, Jul 12, 2010 at 06:09:32PM -0400, Trond Myklebust wrote:
> Christoph points out that the VFS will always flush out data before calling
> nfs_fsync(), so we can dispense with a full call to nfs_wb_all(), and
> replace that with a simpler call to nfs_commit_inode().
Btw, shouldn't nfs_file_write and nfs_file_splice_write use
vfs_fsync_range to just sync the range previously written?
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2010-08-13 8:39 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-07-12 22:09 [PATCH 1/3] NFS: Fix up the fsync code Trond Myklebust
2010-07-12 22:09 ` [PATCH 2/3] NFS: Clean up the callers of nfs_wb_all() Trond Myklebust
2010-07-12 22:09 ` [PATCH 3/3] NFS: nfs_rename() should not have to flush out writebacks Trond Myklebust
2010-07-13 1:13 ` [PATCH 1/3] NFS: Fix up the fsync code Christoph Hellwig
2010-07-13 19:50 ` Trond Myklebust
2010-07-14 7:00 ` Christoph Hellwig
2010-07-14 13:17 ` Trond Myklebust
2010-08-13 8:39 ` Christoph Hellwig
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).