* [PATCH 1/2] commit_metadata export operation replacing nfsd_sync_dir
2010-02-11 19:26 [PATCH 0/2] commit_metadata export operation v3 Ben Myers
@ 2010-02-11 19:26 ` Ben Myers
0 siblings, 0 replies; 12+ messages in thread
From: Ben Myers @ 2010-02-11 19:26 UTC (permalink / raw)
To: linux-nfs; +Cc: xfs
- Add commit_metadata export_operation to allow the underlying filesystem to
decide how to sync parent and child inodes most efficiently.
- Usage of nfsd_sync_dir and write_inode_now has been replaced with the
commit_metadata function that takes a svc_fh and optional dentry for the child.
- The commit_metadata function calls the commit_metadata export_op if it's
there, or else falls back to sync_inode instead of fsync and write_inode_now
because only metadata need be synched here.
- nfsd4_sync_rec_dir now uses vfs_fsync so that commit_metadata can be static
- Add a 'delay_commit' arg to nfsd_setattr so that callers of
nfsd_create_setattr can commit parent and child together avoiding an extra
sync.
Signed-off-by: Ben Myers <bpm@sgi.com>
---
fs/nfsd/nfs3proc.c | 2 -
fs/nfsd/nfs4proc.c | 2 -
fs/nfsd/nfs4recover.c | 4 --
fs/nfsd/nfs4state.c | 2 -
fs/nfsd/nfsproc.c | 4 +-
fs/nfsd/vfs.c | 116 ++++++++++++++++++++++------------------------
fs/nfsd/vfs.h | 3 -
include/linux/exportfs.h | 6 ++
8 files changed, 68 insertions(+), 71 deletions(-)
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 3d68f45..fe3af23 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -76,7 +76,7 @@ nfsd3_proc_setattr(struct svc_rqst *rqstp, struct nfsd3_sattrargs *argp,
fh_copy(&resp->fh, &argp->fh);
nfserr = nfsd_setattr(rqstp, &resp->fh, &argp->attrs,
- argp->check_guard, argp->guardtime);
+ argp->check_guard, argp->guardtime, 0);
RETURN_STATUS(nfserr);
}
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 37514c4..c6e3c7f 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -803,7 +803,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (status)
goto out;
status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr,
- 0, (time_t)0);
+ 0, 0, 0);
out:
mnt_drop_write(cstate->current_fh.fh_export->ex_path.mnt);
return status;
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 5a754f7..98fb98e 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -119,9 +119,7 @@ out_no_tfm:
static void
nfsd4_sync_rec_dir(void)
{
- mutex_lock(&rec_dir.dentry->d_inode->i_mutex);
- nfsd_sync_dir(rec_dir.dentry);
- mutex_unlock(&rec_dir.dentry->d_inode->i_mutex);
+ vfs_fsync(NULL, rec_dir.dentry, 0);
}
int
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 3a20c09..f2fc8c8 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2290,7 +2290,7 @@ nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh,
return 0;
if (!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE))
return nfserr_inval;
- return nfsd_setattr(rqstp, fh, &iattr, 0, (time_t)0);
+ return nfsd_setattr(rqstp, fh, &iattr, 0, 0, 0);
}
static __be32
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index a047ad6..f5e8280 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -69,7 +69,7 @@ nfsd_proc_setattr(struct svc_rqst *rqstp, struct nfsd_sattrargs *argp,
argp->attrs.ia_valid, (long) argp->attrs.ia_size);
fh_copy(&resp->fh, &argp->fh);
- nfserr = nfsd_setattr(rqstp, &resp->fh, &argp->attrs,0, (time_t)0);
+ nfserr = nfsd_setattr(rqstp, &resp->fh, &argp->attrs, 0, 0, 0);
return nfsd_return_attrs(nfserr, resp);
}
@@ -326,7 +326,7 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
*/
attr->ia_valid &= ATTR_SIZE;
if (attr->ia_valid)
- nfserr = nfsd_setattr(rqstp, newfhp, attr, 0, (time_t)0);
+ nfserr = nfsd_setattr(rqstp, newfhp, attr, 0, 0, 0);
}
out_unlock:
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 7062925..734a088 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -27,6 +27,8 @@
#include <linux/jhash.h>
#include <linux/ima.h>
#include <asm/uaccess.h>
+#include <linux/exportfs.h>
+#include <linux/writeback.h>
#ifdef CONFIG_NFSD_V3
#include "xdr3.h"
@@ -271,6 +273,38 @@ out:
return err;
}
+/*
+ * Commit metadata changes to stable storage. You pay pass NULL for dchild.
+ */
+static int
+commit_metadata(struct svc_fh *fhp, struct dentry *dchild)
+{
+ struct inode *parent = fhp->fh_dentry->d_inode;
+ struct inode *child = NULL;
+ const struct export_operations *export_ops = parent->i_sb->s_export_op;
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_ALL,
+ .nr_to_write = 0, /* metadata only */
+ };
+ int error = 0, error2 = 0;
+
+ if (!EX_ISSYNC(fhp->fh_export))
+ return 0;
+
+ if (dchild)
+ child = dchild->d_inode;
+
+ if (export_ops->commit_metadata) {
+ error = export_ops->commit_metadata(parent, child);
+ } else {
+ if (child)
+ error2 = sync_inode(child, &wbc);
+ error = sync_inode(parent, &wbc);
+ if (error2)
+ error = error2;
+ }
+ return error;
+}
/*
* Set various file attributes.
@@ -278,7 +312,7 @@ out:
*/
__be32
nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
- int check_guard, time_t guardtime)
+ int check_guard, time_t guardtime, int delay_commit)
{
struct dentry *dentry;
struct inode *inode;
@@ -415,9 +449,8 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
}
if (size_change)
put_write_access(inode);
- if (!err)
- if (EX_ISSYNC(fhp->fh_export))
- write_inode_now(inode, 1);
+ if (!err && !delay_commit)
+ err = nfserrno(commit_metadata(fhp, NULL));
out:
return err;
@@ -770,28 +803,6 @@ nfsd_close(struct file *filp)
}
/*
- * Sync a directory to disk.
- *
- * We can't just call vfs_fsync because our requirements are slightly odd:
- *
- * a) we do not have a file struct available
- * b) we expect to have i_mutex already held by the caller
- */
-int
-nfsd_sync_dir(struct dentry *dentry)
-{
- struct inode *inode = dentry->d_inode;
- int error;
-
- WARN_ON(!mutex_is_locked(&inode->i_mutex));
-
- error = filemap_write_and_wait(inode->i_mapping);
- if (!error && inode->i_fop->fsync)
- error = inode->i_fop->fsync(NULL, dentry, 0);
- return error;
-}
-
-/*
* Obtain the readahead parameters for the file
* specified by (dev, ino).
*/
@@ -1199,8 +1210,10 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp,
*/
if (current_fsuid() != 0)
iap->ia_valid &= ~(ATTR_UID|ATTR_GID);
- if (iap->ia_valid)
- return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
+ if (iap->ia_valid) {
+ return nfsd_setattr(rqstp, resfhp, iap, 0, 0,
+ 1 /* delay commit. our caller does it. */);
+ }
return 0;
}
@@ -1332,12 +1345,8 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
goto out_nfserr;
}
- if (EX_ISSYNC(fhp->fh_export)) {
- err = nfserrno(nfsd_sync_dir(dentry));
- write_inode_now(dchild->d_inode, 1);
- }
-
- err2 = nfsd_create_setattr(rqstp, resfhp, iap);
+ err = nfsd_create_setattr(rqstp, resfhp, iap);
+ err2 = nfserrno(commit_metadata(fhp, dchild));
if (err2)
err = err2;
mnt_drop_write(fhp->fh_export->ex_path.mnt);
@@ -1369,7 +1378,6 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct dentry *dentry, *dchild = NULL;
struct inode *dirp;
__be32 err;
- __be32 err2;
int host_err;
__u32 v_mtime=0, v_atime=0;
@@ -1464,11 +1472,6 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (created)
*created = 1;
- if (EX_ISSYNC(fhp->fh_export)) {
- err = nfserrno(nfsd_sync_dir(dentry));
- /* setattr will sync the child (or not) */
- }
-
nfsd_check_ignore_resizing(iap);
if (createmode == NFS3_CREATE_EXCLUSIVE) {
@@ -1483,9 +1486,9 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
}
set_attr:
- err2 = nfsd_create_setattr(rqstp, resfhp, iap);
- if (err2)
- err = err2;
+ err = nfsd_create_setattr(rqstp, resfhp, iap);
+ if (!err)
+ err = nfserrno(commit_metadata(fhp, dchild));
mnt_drop_write(fhp->fh_export->ex_path.mnt);
/*
@@ -1600,12 +1603,10 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
}
} else
host_err = vfs_symlink(dentry->d_inode, dnew, path);
-
- if (!host_err) {
- if (EX_ISSYNC(fhp->fh_export))
- host_err = nfsd_sync_dir(dentry);
- }
err = nfserrno(host_err);
+ if (!err)
+ err = nfserrno(commit_metadata(fhp, NULL));
+
fh_unlock(fhp);
mnt_drop_write(fhp->fh_export->ex_path.mnt);
@@ -1667,11 +1668,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
}
host_err = vfs_link(dold, dirp, dnew);
if (!host_err) {
- if (EX_ISSYNC(ffhp->fh_export)) {
- err = nfserrno(nfsd_sync_dir(ddir));
- write_inode_now(dest, 1);
- }
- err = 0;
+ err = nfserrno(commit_metadata(ffhp, dold));
} else {
if (host_err == -EXDEV && rqstp->rq_vers == 2)
err = nfserr_acces;
@@ -1767,10 +1764,10 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
goto out_dput_new;
host_err = vfs_rename(fdir, odentry, tdir, ndentry);
- if (!host_err && EX_ISSYNC(tfhp->fh_export)) {
- host_err = nfsd_sync_dir(tdentry);
+ if (!host_err) {
+ host_err = commit_metadata(tfhp, NULL);
if (!host_err)
- host_err = nfsd_sync_dir(fdentry);
+ host_err = commit_metadata(ffhp, NULL);
}
mnt_drop_write(ffhp->fh_export->ex_path.mnt);
@@ -1851,12 +1848,9 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
dput(rdentry);
- if (host_err)
- goto out_drop;
- if (EX_ISSYNC(fhp->fh_export))
- host_err = nfsd_sync_dir(dentry);
+ if (!host_err)
+ host_err = commit_metadata(fhp, NULL);
-out_drop:
mnt_drop_write(fhp->fh_export->ex_path.mnt);
out_nfserr:
err = nfserrno(host_err);
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 4b1de0a..5062afd 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -41,7 +41,7 @@ __be32 nfsd_lookup_dentry(struct svc_rqst *, struct svc_fh *,
const char *, unsigned int,
struct svc_export **, struct dentry **);
__be32 nfsd_setattr(struct svc_rqst *, struct svc_fh *,
- struct iattr *, int, time_t);
+ struct iattr *, int, time_t, int);
int nfsd_mountpoint(struct dentry *, struct svc_export *);
#ifdef CONFIG_NFSD_V4
__be32 nfsd4_set_nfs4_acl(struct svc_rqst *, struct svc_fh *,
@@ -91,7 +91,6 @@ __be32 nfsd_statfs(struct svc_rqst *, struct svc_fh *,
int nfsd_notify_change(struct inode *, struct iattr *);
__be32 nfsd_permission(struct svc_rqst *, struct svc_export *,
struct dentry *, int);
-int nfsd_sync_dir(struct dentry *dp);
#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
struct posix_acl *nfsd_get_posix_acl(struct svc_fh *, int);
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index dc12f41..9102ecf 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -96,6 +96,7 @@ struct fid {
* @fh_to_parent: find the implied object's parent and get a dentry for it
* @get_name: find the name for a given inode in a given directory
* @get_parent: find the parent of a given directory
+ * @commit_metadata: commit metadata changes to stable storage
*
* See Documentation/filesystems/nfs/Exporting for details on how to use
* this interface correctly.
@@ -137,6 +138,10 @@ struct fid {
* is also a directory. In the event that it cannot be found, or storage
* space cannot be allocated, a %ERR_PTR should be returned.
*
+ * commit_metadata:
+ * @commit_metadata should commit metadata changes to stable storage.
+ * Parent or child can be NULL.
+ *
* Locking rules:
* get_parent is called with child->d_inode->i_mutex down
* get_name is not (which is possibly inconsistent)
@@ -152,6 +157,7 @@ struct export_operations {
int (*get_name)(struct dentry *parent, char *name,
struct dentry *child);
struct dentry * (*get_parent)(struct dentry *child);
+ int (*commit_metadata)(struct inode *parent, struct inode *child);
};
extern int exportfs_encode_fh(struct dentry *dentry, struct fid *fid,
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 1/2] commit_metadata export operation replacing nfsd_sync_dir
2010-02-11 22:04 [PATCH 0/2] commit_metadata export operation v4 Ben Myers
@ 2010-02-11 22:05 ` Ben Myers
2010-02-12 14:23 ` Alex Elder
0 siblings, 1 reply; 12+ messages in thread
From: Ben Myers @ 2010-02-11 22:05 UTC (permalink / raw)
To: bfields; +Cc: linux-nfs, xfs
- Add commit_metadata export_operation to allow the underlying filesystem to
decide how to sync parent and child inodes most efficiently.
- Usage of nfsd_sync_dir and write_inode_now has been replaced with the
commit_metadata function that takes a svc_fh and optional dentry for the child.
- The commit_metadata function calls the commit_metadata export_op if it's
there, or else falls back to sync_inode instead of fsync and write_inode_now
because only metadata need be synched here.
- nfsd4_sync_rec_dir now uses vfs_fsync so that commit_metadata can be static
- Add a 'delay_commit' arg to nfsd_setattr so that callers of
nfsd_create_setattr can commit parent and child together avoiding an extra
sync.
Signed-off-by: Ben Myers <bpm@sgi.com>
---
fs/nfsd/nfs3proc.c | 2 -
fs/nfsd/nfs4proc.c | 2 -
fs/nfsd/nfs4recover.c | 4 --
fs/nfsd/nfs4state.c | 2 -
fs/nfsd/nfsproc.c | 4 +-
fs/nfsd/vfs.c | 116 ++++++++++++++++++++++------------------------
fs/nfsd/vfs.h | 3 -
include/linux/exportfs.h | 6 ++
8 files changed, 68 insertions(+), 71 deletions(-)
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 3d68f45..fe3af23 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -76,7 +76,7 @@ nfsd3_proc_setattr(struct svc_rqst *rqstp, struct nfsd3_sattrargs *argp,
fh_copy(&resp->fh, &argp->fh);
nfserr = nfsd_setattr(rqstp, &resp->fh, &argp->attrs,
- argp->check_guard, argp->guardtime);
+ argp->check_guard, argp->guardtime, 0);
RETURN_STATUS(nfserr);
}
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 37514c4..c6e3c7f 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -803,7 +803,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (status)
goto out;
status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr,
- 0, (time_t)0);
+ 0, 0, 0);
out:
mnt_drop_write(cstate->current_fh.fh_export->ex_path.mnt);
return status;
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 5a754f7..98fb98e 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -119,9 +119,7 @@ out_no_tfm:
static void
nfsd4_sync_rec_dir(void)
{
- mutex_lock(&rec_dir.dentry->d_inode->i_mutex);
- nfsd_sync_dir(rec_dir.dentry);
- mutex_unlock(&rec_dir.dentry->d_inode->i_mutex);
+ vfs_fsync(NULL, rec_dir.dentry, 0);
}
int
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 3a20c09..f2fc8c8 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2290,7 +2290,7 @@ nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh,
return 0;
if (!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE))
return nfserr_inval;
- return nfsd_setattr(rqstp, fh, &iattr, 0, (time_t)0);
+ return nfsd_setattr(rqstp, fh, &iattr, 0, 0, 0);
}
static __be32
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index a047ad6..f5e8280 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -69,7 +69,7 @@ nfsd_proc_setattr(struct svc_rqst *rqstp, struct nfsd_sattrargs *argp,
argp->attrs.ia_valid, (long) argp->attrs.ia_size);
fh_copy(&resp->fh, &argp->fh);
- nfserr = nfsd_setattr(rqstp, &resp->fh, &argp->attrs,0, (time_t)0);
+ nfserr = nfsd_setattr(rqstp, &resp->fh, &argp->attrs, 0, 0, 0);
return nfsd_return_attrs(nfserr, resp);
}
@@ -326,7 +326,7 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
*/
attr->ia_valid &= ATTR_SIZE;
if (attr->ia_valid)
- nfserr = nfsd_setattr(rqstp, newfhp, attr, 0, (time_t)0);
+ nfserr = nfsd_setattr(rqstp, newfhp, attr, 0, 0, 0);
}
out_unlock:
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index ed024d3..97474fb 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -27,6 +27,8 @@
#include <linux/jhash.h>
#include <linux/ima.h>
#include <asm/uaccess.h>
+#include <linux/exportfs.h>
+#include <linux/writeback.h>
#ifdef CONFIG_NFSD_V3
#include "xdr3.h"
@@ -271,6 +273,38 @@ out:
return err;
}
+/*
+ * Commit metadata changes to stable storage. You pay pass NULL for dchild.
+ */
+static int
+commit_metadata(struct svc_fh *fhp, struct dentry *dchild)
+{
+ struct inode *parent = fhp->fh_dentry->d_inode;
+ struct inode *child = NULL;
+ const struct export_operations *export_ops = parent->i_sb->s_export_op;
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_ALL,
+ .nr_to_write = 0, /* metadata only */
+ };
+ int error = 0, error2 = 0;
+
+ if (!EX_ISSYNC(fhp->fh_export))
+ return 0;
+
+ if (dchild)
+ child = dchild->d_inode;
+
+ if (export_ops->commit_metadata) {
+ error = export_ops->commit_metadata(parent, child);
+ } else {
+ if (child)
+ error2 = sync_inode(child, &wbc);
+ error = sync_inode(parent, &wbc);
+ if (error2)
+ error = error2;
+ }
+ return error;
+}
/*
* Set various file attributes.
@@ -278,7 +312,7 @@ out:
*/
__be32
nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
- int check_guard, time_t guardtime)
+ int check_guard, time_t guardtime, int delay_commit)
{
struct dentry *dentry;
struct inode *inode;
@@ -415,9 +449,8 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
}
if (size_change)
put_write_access(inode);
- if (!err)
- if (EX_ISSYNC(fhp->fh_export))
- write_inode_now(inode, 1);
+ if (!err && !delay_commit)
+ err = nfserrno(commit_metadata(fhp, NULL));
out:
return err;
@@ -769,28 +802,6 @@ nfsd_close(struct file *filp)
}
/*
- * Sync a directory to disk.
- *
- * We can't just call vfs_fsync because our requirements are slightly odd:
- *
- * a) we do not have a file struct available
- * b) we expect to have i_mutex already held by the caller
- */
-int
-nfsd_sync_dir(struct dentry *dentry)
-{
- struct inode *inode = dentry->d_inode;
- int error;
-
- WARN_ON(!mutex_is_locked(&inode->i_mutex));
-
- error = filemap_write_and_wait(inode->i_mapping);
- if (!error && inode->i_fop->fsync)
- error = inode->i_fop->fsync(NULL, dentry, 0);
- return error;
-}
-
-/*
* Obtain the readahead parameters for the file
* specified by (dev, ino).
*/
@@ -1198,8 +1209,10 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp,
*/
if (current_fsuid() != 0)
iap->ia_valid &= ~(ATTR_UID|ATTR_GID);
- if (iap->ia_valid)
- return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
+ if (iap->ia_valid) {
+ return nfsd_setattr(rqstp, resfhp, iap, 0, 0,
+ 1 /* delay commit. our caller does it. */);
+ }
return 0;
}
@@ -1331,12 +1344,8 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
goto out_nfserr;
}
- if (EX_ISSYNC(fhp->fh_export)) {
- err = nfserrno(nfsd_sync_dir(dentry));
- write_inode_now(dchild->d_inode, 1);
- }
-
- err2 = nfsd_create_setattr(rqstp, resfhp, iap);
+ err = nfsd_create_setattr(rqstp, resfhp, iap);
+ err2 = nfserrno(commit_metadata(fhp, dchild));
if (err2)
err = err2;
mnt_drop_write(fhp->fh_export->ex_path.mnt);
@@ -1368,7 +1377,6 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct dentry *dentry, *dchild = NULL;
struct inode *dirp;
__be32 err;
- __be32 err2;
int host_err;
__u32 v_mtime=0, v_atime=0;
@@ -1463,11 +1471,6 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (created)
*created = 1;
- if (EX_ISSYNC(fhp->fh_export)) {
- err = nfserrno(nfsd_sync_dir(dentry));
- /* setattr will sync the child (or not) */
- }
-
nfsd_check_ignore_resizing(iap);
if (createmode == NFS3_CREATE_EXCLUSIVE) {
@@ -1482,9 +1485,9 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
}
set_attr:
- err2 = nfsd_create_setattr(rqstp, resfhp, iap);
- if (err2)
- err = err2;
+ err = nfsd_create_setattr(rqstp, resfhp, iap);
+ if (!err)
+ err = nfserrno(commit_metadata(fhp, dchild));
mnt_drop_write(fhp->fh_export->ex_path.mnt);
/*
@@ -1599,12 +1602,10 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
}
} else
host_err = vfs_symlink(dentry->d_inode, dnew, path);
-
- if (!host_err) {
- if (EX_ISSYNC(fhp->fh_export))
- host_err = nfsd_sync_dir(dentry);
- }
err = nfserrno(host_err);
+ if (!err)
+ err = nfserrno(commit_metadata(fhp, NULL));
+
fh_unlock(fhp);
mnt_drop_write(fhp->fh_export->ex_path.mnt);
@@ -1666,11 +1667,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
}
host_err = vfs_link(dold, dirp, dnew);
if (!host_err) {
- if (EX_ISSYNC(ffhp->fh_export)) {
- err = nfserrno(nfsd_sync_dir(ddir));
- write_inode_now(dest, 1);
- }
- err = 0;
+ err = nfserrno(commit_metadata(ffhp, dold));
} else {
if (host_err == -EXDEV && rqstp->rq_vers == 2)
err = nfserr_acces;
@@ -1766,10 +1763,10 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
goto out_dput_new;
host_err = vfs_rename(fdir, odentry, tdir, ndentry);
- if (!host_err && EX_ISSYNC(tfhp->fh_export)) {
- host_err = nfsd_sync_dir(tdentry);
+ if (!host_err) {
+ host_err = commit_metadata(tfhp, NULL);
if (!host_err)
- host_err = nfsd_sync_dir(fdentry);
+ host_err = commit_metadata(ffhp, NULL);
}
mnt_drop_write(ffhp->fh_export->ex_path.mnt);
@@ -1850,12 +1847,9 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
dput(rdentry);
- if (host_err)
- goto out_drop;
- if (EX_ISSYNC(fhp->fh_export))
- host_err = nfsd_sync_dir(dentry);
+ if (!host_err)
+ host_err = commit_metadata(fhp, NULL);
-out_drop:
mnt_drop_write(fhp->fh_export->ex_path.mnt);
out_nfserr:
err = nfserrno(host_err);
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 4b1de0a..5062afd 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -41,7 +41,7 @@ __be32 nfsd_lookup_dentry(struct svc_rqst *, struct svc_fh *,
const char *, unsigned int,
struct svc_export **, struct dentry **);
__be32 nfsd_setattr(struct svc_rqst *, struct svc_fh *,
- struct iattr *, int, time_t);
+ struct iattr *, int, time_t, int);
int nfsd_mountpoint(struct dentry *, struct svc_export *);
#ifdef CONFIG_NFSD_V4
__be32 nfsd4_set_nfs4_acl(struct svc_rqst *, struct svc_fh *,
@@ -91,7 +91,6 @@ __be32 nfsd_statfs(struct svc_rqst *, struct svc_fh *,
int nfsd_notify_change(struct inode *, struct iattr *);
__be32 nfsd_permission(struct svc_rqst *, struct svc_export *,
struct dentry *, int);
-int nfsd_sync_dir(struct dentry *dp);
#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
struct posix_acl *nfsd_get_posix_acl(struct svc_fh *, int);
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index dc12f41..9102ecf 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -96,6 +96,7 @@ struct fid {
* @fh_to_parent: find the implied object's parent and get a dentry for it
* @get_name: find the name for a given inode in a given directory
* @get_parent: find the parent of a given directory
+ * @commit_metadata: commit metadata changes to stable storage
*
* See Documentation/filesystems/nfs/Exporting for details on how to use
* this interface correctly.
@@ -137,6 +138,10 @@ struct fid {
* is also a directory. In the event that it cannot be found, or storage
* space cannot be allocated, a %ERR_PTR should be returned.
*
+ * commit_metadata:
+ * @commit_metadata should commit metadata changes to stable storage.
+ * Parent or child can be NULL.
+ *
* Locking rules:
* get_parent is called with child->d_inode->i_mutex down
* get_name is not (which is possibly inconsistent)
@@ -152,6 +157,7 @@ struct export_operations {
int (*get_name)(struct dentry *parent, char *name,
struct dentry *child);
struct dentry * (*get_parent)(struct dentry *child);
+ int (*commit_metadata)(struct inode *parent, struct inode *child);
};
extern int exportfs_encode_fh(struct dentry *dentry, struct fid *fid,
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH 1/2] commit_metadata export operation replacing nfsd_sync_dir
2010-02-11 22:05 ` [PATCH 1/2] commit_metadata export operation replacing nfsd_sync_dir Ben Myers
@ 2010-02-12 14:23 ` Alex Elder
2010-02-12 17:31 ` Christoph Hellwig
0 siblings, 1 reply; 12+ messages in thread
From: Alex Elder @ 2010-02-12 14:23 UTC (permalink / raw)
To: Ben Myers; +Cc: bfields, linux-nfs, xfs
Generally this looks like a good change, but I do have
some questions embedded below.
-Alex
On Thu, 2010-02-11 at 16:05 -0600, Ben Myers wrote:
> - Add commit_metadata export_operation to allow the underlying filesystem to
> decide how to sync parent and child inodes most efficiently.
>
> - Usage of nfsd_sync_dir and write_inode_now has been replaced with the
> commit_metadata function that takes a svc_fh and optional dentry for the child.
In the cases you pass the child dentry, you will now be syncing
things to disk in a slightly different way/order from before.
I think this way is actually much better, but I thought I'd ask
anyway: are you sure that it's OK to sync them at the same time,
rather than directory first, then child (or the other way around,
depending on the case)?
> - The commit_metadata function calls the commit_metadata export_op if it's
> there, or else falls back to sync_inode instead of fsync and write_inode_now
> because only metadata need be synched here.
>
> - nfsd4_sync_rec_dir now uses vfs_fsync so that commit_metadata can be static
>
> - Add a 'delay_commit' arg to nfsd_setattr so that callers of
> nfsd_create_setattr can commit parent and child together avoiding an extra
> sync.
I think this is a distinct enough change to warrant a separate patch
from the commit_metadata feature.
>
> Signed-off-by: Ben Myers <bpm@sgi.com>
. . .
> index 5a754f7..98fb98e 100644
> --- a/fs/nfsd/nfs4recover.c
> +++ b/fs/nfsd/nfs4recover.c
> @@ -119,9 +119,7 @@ out_no_tfm:
> static void
> nfsd4_sync_rec_dir(void)
> {
> - mutex_lock(&rec_dir.dentry->d_inode->i_mutex);
> - nfsd_sync_dir(rec_dir.dentry);
> - mutex_unlock(&rec_dir.dentry->d_inode->i_mutex);
> + vfs_fsync(NULL, rec_dir.dentry, 0);
Why do you no longer need to acquire the mutex here?
I see it gets acquired during the ->fsync() call
inside vfs_fsync_range(), but is there a reason
that it needed to be held during the filemap_write_and_wait()
(as is done in the nfsd_sync_dir() code) also?
. . .
> diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
> index ed024d3..97474fb 100644
> --- a/fs/nfsd/vfs.c
> +++ b/fs/nfsd/vfs.c
. . .
> @@ -415,9 +449,8 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
> }
> if (size_change)
> put_write_access(inode);
> - if (!err)
> - if (EX_ISSYNC(fhp->fh_export))
> - write_inode_now(inode, 1);
> + if (!err && !delay_commit)
> + err = nfserrno(commit_metadata(fhp, NULL));
Is this sufficient even if the size has changed?
> out:
> return err;
>
. . .
> @@ -1766,10 +1763,10 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
> goto out_dput_new;
>
> host_err = vfs_rename(fdir, odentry, tdir, ndentry);
> - if (!host_err && EX_ISSYNC(tfhp->fh_export)) {
> - host_err = nfsd_sync_dir(tdentry);
> + if (!host_err) {
> + host_err = commit_metadata(tfhp, NULL);
> if (!host_err)
> - host_err = nfsd_sync_dir(fdentry);
> + host_err = commit_metadata(ffhp, NULL);
It violates the spirit of the "parent" and "child"
nature of its arguments, but it might be nice to
commit both directories' metadata with the same
call here.
. . .
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 1/2] commit_metadata export operation replacing nfsd_sync_dir
2010-02-12 14:23 ` Alex Elder
@ 2010-02-12 17:31 ` Christoph Hellwig
0 siblings, 0 replies; 12+ messages in thread
From: Christoph Hellwig @ 2010-02-12 17:31 UTC (permalink / raw)
To: Alex Elder; +Cc: bfields, Ben Myers, linux-nfs, xfs
> In the cases you pass the child dentry, you will now be syncing
> things to disk in a slightly different way/order from before.
> I think this way is actually much better, but I thought I'd ask
> anyway: are you sure that it's OK to sync them at the same time,
> rather than directory first, then child (or the other way around,
> depending on the case)?
The order is defined by when we commit the transactions, if we do
the log force on the later one first we already write out the first
transaction. Note that in any transaction filesystems the changes
that create/mkdir/link/unlink do to parent and child will be in
the same transaction anyway, which is kinda the point of adding
the transactions to start with. Only for the case where we do
a setattr in addition to the primary transaction we'll actually
have another transaction to deal with and the above applies.
> > nfsd4_sync_rec_dir(void)
> > {
> > - mutex_lock(&rec_dir.dentry->d_inode->i_mutex);
> > - nfsd_sync_dir(rec_dir.dentry);
> > - mutex_unlock(&rec_dir.dentry->d_inode->i_mutex);
> > + vfs_fsync(NULL, rec_dir.dentry, 0);
>
> Why do you no longer need to acquire the mutex here?
> I see it gets acquired during the ->fsync() call
> inside vfs_fsync_range(), but is there a reason
> that it needed to be held during the filemap_write_and_wait()
> (as is done in the nfsd_sync_dir() code) also?
We don't need i_mutex for filemap_write_and_wait, it's just held
because someone used the nfsd_sync_dir helper where it doesn't
fit very well.
> > @@ -415,9 +449,8 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
> > }
> > if (size_change)
> > put_write_access(inode);
> > - if (!err)
> > - if (EX_ISSYNC(fhp->fh_export))
> > - write_inode_now(inode, 1);
> > + if (!err && !delay_commit)
> > + err = nfserrno(commit_metadata(fhp, NULL));
>
> Is this sufficient even if the size has changed?
Yes.
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH 0/2] commit_metadata export operation v5
@ 2010-02-16 21:04 Ben Myers
2010-02-16 21:04 ` [PATCH 1/2] commit_metadata export operation replacing nfsd_sync_dir Ben Myers
` (2 more replies)
0 siblings, 3 replies; 12+ messages in thread
From: Ben Myers @ 2010-02-16 21:04 UTC (permalink / raw)
To: bfields; +Cc: linux-nfs, xfs
Hey Bruce,
Here is the latest version of the knfsd sync changes that I've been spamming
you with. I have addressed the latest suggestions provided by Christoph,
Trond, Dave, and Alex. This version of the commit_metadata export operation
takes only one inode as suggested by Christoph and it turns out to be much
cleaner this way. Now we've gone back to committing all of the time in
nfsd_create_setattr and don't bother with the added argument. A couple extra
comments to explain the commit ordering.
Thanks,
Ben
---
Ben Myers (2):
commit_metadata export operation replacing nfsd_sync_dir
xfs_export_operations.commit_metadata
fs/nfsd/nfs4recover.c | 4 --
fs/nfsd/vfs.c | 109 ++++++++++++++++++++---------------------
fs/xfs/linux-2.6/xfs_export.c | 24 +++++++++
include/linux/exportfs.h | 5 ++
4 files changed, 83 insertions(+), 59 deletions(-)
--
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH 1/2] commit_metadata export operation replacing nfsd_sync_dir
2010-02-16 21:04 [PATCH 0/2] commit_metadata export operation v5 Ben Myers
@ 2010-02-16 21:04 ` Ben Myers
2010-02-16 22:06 ` Christoph Hellwig
2010-02-16 21:04 ` [PATCH 2/2] xfs_export_operations.commit_metadata Ben Myers
2010-02-17 2:39 ` [PATCH 0/2] commit_metadata export operation v5 J. Bruce Fields
2 siblings, 1 reply; 12+ messages in thread
From: Ben Myers @ 2010-02-16 21:04 UTC (permalink / raw)
To: bfields; +Cc: linux-nfs, xfs
- Add commit_metadata export_operation to allow the underlying filesystem to
decide how to commit an inode most efficiently.
- Usage of nfsd_sync_dir and write_inode_now has been replaced with the
commit_metadata function that takes a svc_fh.
- The commit_metadata function calls the commit_metadata export_op if it's
there, or else falls back to sync_inode instead of fsync and write_inode_now
because only metadata need be synced here.
- nfsd4_sync_rec_dir now uses vfs_fsync so that commit_metadata can be static
Signed-off-by: Ben Myers <bpm@sgi.com>
---
fs/nfsd/nfs4recover.c | 4 --
fs/nfsd/vfs.c | 109 ++++++++++++++++++++++------------------------
include/linux/exportfs.h | 5 ++
3 files changed, 59 insertions(+), 59 deletions(-)
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 5a754f7..98fb98e 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -119,9 +119,7 @@ out_no_tfm:
static void
nfsd4_sync_rec_dir(void)
{
- mutex_lock(&rec_dir.dentry->d_inode->i_mutex);
- nfsd_sync_dir(rec_dir.dentry);
- mutex_unlock(&rec_dir.dentry->d_inode->i_mutex);
+ vfs_fsync(NULL, rec_dir.dentry, 0);
}
int
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index ed024d3..cde275b 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -27,6 +27,8 @@
#include <linux/jhash.h>
#include <linux/ima.h>
#include <asm/uaccess.h>
+#include <linux/exportfs.h>
+#include <linux/writeback.h>
#ifdef CONFIG_NFSD_V3
#include "xdr3.h"
@@ -271,6 +273,31 @@ out:
return err;
}
+/*
+ * Commit metadata changes to stable storage. You pay pass NULL for dchild.
+ */
+static int
+commit_metadata(struct svc_fh *fhp)
+{
+ struct inode *inode = fhp->fh_dentry->d_inode;
+ const struct export_operations *export_ops = inode->i_sb->s_export_op;
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_ALL,
+ .nr_to_write = 0, /* metadata only */
+ };
+ int error = 0;
+
+ if (!EX_ISSYNC(fhp->fh_export))
+ return 0;
+
+ if (export_ops->commit_metadata) {
+ error = export_ops->commit_metadata(inode);
+ } else {
+ error = sync_inode(inode, &wbc);
+ }
+
+ return error;
+}
/*
* Set various file attributes.
@@ -769,28 +796,6 @@ nfsd_close(struct file *filp)
}
/*
- * Sync a directory to disk.
- *
- * We can't just call vfs_fsync because our requirements are slightly odd:
- *
- * a) we do not have a file struct available
- * b) we expect to have i_mutex already held by the caller
- */
-int
-nfsd_sync_dir(struct dentry *dentry)
-{
- struct inode *inode = dentry->d_inode;
- int error;
-
- WARN_ON(!mutex_is_locked(&inode->i_mutex));
-
- error = filemap_write_and_wait(inode->i_mapping);
- if (!error && inode->i_fop->fsync)
- error = inode->i_fop->fsync(NULL, dentry, 0);
- return error;
-}
-
-/*
* Obtain the readahead parameters for the file
* specified by (dev, ino).
*/
@@ -1199,7 +1204,7 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp,
if (current_fsuid() != 0)
iap->ia_valid &= ~(ATTR_UID|ATTR_GID);
if (iap->ia_valid)
- return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
+ return nfsd_setattr(rqstp, resfhp, iap, 0, 0);
return 0;
}
@@ -1331,13 +1336,15 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
goto out_nfserr;
}
- if (EX_ISSYNC(fhp->fh_export)) {
- err = nfserrno(nfsd_sync_dir(dentry));
- write_inode_now(dchild->d_inode, 1);
- }
+ err = nfsd_create_setattr(rqstp, resfhp, iap);
- err2 = nfsd_create_setattr(rqstp, resfhp, iap);
- if (err2)
+ /*
+ * nfsd_setattr already committed the child. Transactional filesystems
+ * had a chance to commit changes for both parent and child
+ * simultaneously making the following commit_metadata a noop.
+ */
+ err2 = nfserrno(commit_metadata(fhp));
+ if (err2)
err = err2;
mnt_drop_write(fhp->fh_export->ex_path.mnt);
/*
@@ -1368,7 +1375,6 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct dentry *dentry, *dchild = NULL;
struct inode *dirp;
__be32 err;
- __be32 err2;
int host_err;
__u32 v_mtime=0, v_atime=0;
@@ -1463,11 +1469,6 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (created)
*created = 1;
- if (EX_ISSYNC(fhp->fh_export)) {
- err = nfserrno(nfsd_sync_dir(dentry));
- /* setattr will sync the child (or not) */
- }
-
nfsd_check_ignore_resizing(iap);
if (createmode == NFS3_CREATE_EXCLUSIVE) {
@@ -1482,9 +1483,13 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
}
set_attr:
- err2 = nfsd_create_setattr(rqstp, resfhp, iap);
- if (err2)
- err = err2;
+ err = nfsd_create_setattr(rqstp, resfhp, iap);
+
+ /*
+ * nfsd_setattr already committed the child (and possibly also the parent).
+ */
+ if (!err)
+ err = nfserrno(commit_metadata(fhp));
mnt_drop_write(fhp->fh_export->ex_path.mnt);
/*
@@ -1599,12 +1604,9 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
}
} else
host_err = vfs_symlink(dentry->d_inode, dnew, path);
-
- if (!host_err) {
- if (EX_ISSYNC(fhp->fh_export))
- host_err = nfsd_sync_dir(dentry);
- }
err = nfserrno(host_err);
+ if (!err)
+ err = nfserrno(commit_metadata(fhp));
fh_unlock(fhp);
mnt_drop_write(fhp->fh_export->ex_path.mnt);
@@ -1666,11 +1668,9 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
}
host_err = vfs_link(dold, dirp, dnew);
if (!host_err) {
- if (EX_ISSYNC(ffhp->fh_export)) {
- err = nfserrno(nfsd_sync_dir(ddir));
- write_inode_now(dest, 1);
- }
- err = 0;
+ err = nfserrno(commit_metadata(ffhp));
+ if (!err)
+ err = nfserrno(commit_metadata(tfhp));
} else {
if (host_err == -EXDEV && rqstp->rq_vers == 2)
err = nfserr_acces;
@@ -1766,10 +1766,10 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
goto out_dput_new;
host_err = vfs_rename(fdir, odentry, tdir, ndentry);
- if (!host_err && EX_ISSYNC(tfhp->fh_export)) {
- host_err = nfsd_sync_dir(tdentry);
+ if (!host_err) {
+ host_err = commit_metadata(tfhp);
if (!host_err)
- host_err = nfsd_sync_dir(fdentry);
+ host_err = commit_metadata(ffhp);
}
mnt_drop_write(ffhp->fh_export->ex_path.mnt);
@@ -1850,12 +1850,9 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
dput(rdentry);
- if (host_err)
- goto out_drop;
- if (EX_ISSYNC(fhp->fh_export))
- host_err = nfsd_sync_dir(dentry);
+ if (!host_err)
+ host_err = commit_metadata(fhp);
-out_drop:
mnt_drop_write(fhp->fh_export->ex_path.mnt);
out_nfserr:
err = nfserrno(host_err);
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index dc12f41..a9cd507 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -96,6 +96,7 @@ struct fid {
* @fh_to_parent: find the implied object's parent and get a dentry for it
* @get_name: find the name for a given inode in a given directory
* @get_parent: find the parent of a given directory
+ * @commit_metadata: commit metadata changes to stable storage
*
* See Documentation/filesystems/nfs/Exporting for details on how to use
* this interface correctly.
@@ -137,6 +138,9 @@ struct fid {
* is also a directory. In the event that it cannot be found, or storage
* space cannot be allocated, a %ERR_PTR should be returned.
*
+ * commit_metadata:
+ * @commit_metadata should commit metadata changes to stable storage.
+ *
* Locking rules:
* get_parent is called with child->d_inode->i_mutex down
* get_name is not (which is possibly inconsistent)
@@ -152,6 +156,7 @@ struct export_operations {
int (*get_name)(struct dentry *parent, char *name,
struct dentry *child);
struct dentry * (*get_parent)(struct dentry *child);
+ int (*commit_metadata)(struct inode *inode);
};
extern int exportfs_encode_fh(struct dentry *dentry, struct fid *fid,
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 2/2] xfs_export_operations.commit_metadata
2010-02-16 21:04 [PATCH 0/2] commit_metadata export operation v5 Ben Myers
2010-02-16 21:04 ` [PATCH 1/2] commit_metadata export operation replacing nfsd_sync_dir Ben Myers
@ 2010-02-16 21:04 ` Ben Myers
2010-02-16 22:07 ` Christoph Hellwig
2010-02-17 0:29 ` Dave Chinner
2010-02-17 2:39 ` [PATCH 0/2] commit_metadata export operation v5 J. Bruce Fields
2 siblings, 2 replies; 12+ messages in thread
From: Ben Myers @ 2010-02-16 21:04 UTC (permalink / raw)
To: bfields; +Cc: linux-nfs, xfs
This is the commit_metadata export operation for XFS.
- Takes one inode to be committed.
- Forces the log up to the lsn of the inode.
- Doesn't force the log if the inode doesn't have a pincount.
Signed-off-by: Ben Myers <bpm@sgi.com>
---
fs/xfs/linux-2.6/xfs_export.c | 24 ++++++++++++++++++++++++
1 files changed, 24 insertions(+), 0 deletions(-)
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index 87b8cbd..47a8d1f 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -29,6 +29,7 @@
#include "xfs_vnodeops.h"
#include "xfs_bmap_btree.h"
#include "xfs_inode.h"
+#include "xfs_inode_item.h"
/*
* Note that we only accept fileids which are long enough rather than allow
@@ -215,9 +216,32 @@ xfs_fs_get_parent(
return d_obtain_alias(VFS_I(cip));
}
+STATIC int
+xfs_fs_nfs_commit_metadata(
+ struct inode *inode)
+{
+ struct xfs_inode *ip = XFS_I(inode);
+ struct xfs_mount *mp = ip->i_mount;
+ xfs_lsn_t force_lsn = NULLCOMMITLSN;
+ int error = 0;
+
+ xfs_ilock(ip, XFS_ILOCK_SHARED);
+ if (xfs_ipincount(ip)) {
+ force_lsn = ip->i_itemp->ili_last_lsn;
+ }
+ if (force_lsn != NULLCOMMITLSN) {
+ error = _xfs_log_force(mp, force_lsn,
+ XFS_LOG_FORCE | XFS_LOG_SYNC, NULL);
+ }
+ xfs_iunlock(ip, XFS_ILOCK_SHARED);
+
+ return error;
+}
+
const struct export_operations xfs_export_operations = {
.encode_fh = xfs_fs_encode_fh,
.fh_to_dentry = xfs_fs_fh_to_dentry,
.fh_to_parent = xfs_fs_fh_to_parent,
.get_parent = xfs_fs_get_parent,
+ .commit_metadata = xfs_fs_nfs_commit_metadata,
};
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH 1/2] commit_metadata export operation replacing nfsd_sync_dir
2010-02-16 21:04 ` [PATCH 1/2] commit_metadata export operation replacing nfsd_sync_dir Ben Myers
@ 2010-02-16 22:06 ` Christoph Hellwig
0 siblings, 0 replies; 12+ messages in thread
From: Christoph Hellwig @ 2010-02-16 22:06 UTC (permalink / raw)
To: Ben Myers; +Cc: bfields, linux-nfs, xfs
This looks very good to me. A couple of tiny nitpicks below:
> +/*
> + * Commit metadata changes to stable storage. You pay pass NULL for dchild.
> + */
The dchild argument is gone in this version.
> + struct writeback_control wbc = {
> + .sync_mode = WB_SYNC_ALL,
> + .nr_to_write = 0, /* metadata only */
> + };
> + int error = 0;
> +
> + if (!EX_ISSYNC(fhp->fh_export))
> + return 0;
> +
> + if (export_ops->commit_metadata) {
> + error = export_ops->commit_metadata(inode);
> + } else {
> + error = sync_inode(inode, &wbc);
> + }
Maybe move the wbc declaration into the else branch here to keep
variables in the smallest possible scope.
> @@ -1199,7 +1204,7 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp,
> if (current_fsuid() != 0)
> iap->ia_valid &= ~(ATTR_UID|ATTR_GID);
> if (iap->ia_valid)
> - return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
> + return nfsd_setattr(rqstp, resfhp, iap, 0, 0);
While this is a worthwhile cleanup I'd not put it into a patch that is
now entirely unrelated.
> + err = nfsd_create_setattr(rqstp, resfhp, iap);
>
> + /*
> + * nfsd_setattr already committed the child. Transactional filesystems
> + * had a chance to commit changes for both parent and child
> + * simultaneously making the following commit_metadata a noop.
> + */
> + err2 = nfserrno(commit_metadata(fhp));
> + if (err2)
> err = err2;
The if statement above seems rather minindented, possibly due to the
partial use of spaces instead of tabs.
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 2/2] xfs_export_operations.commit_metadata
2010-02-16 21:04 ` [PATCH 2/2] xfs_export_operations.commit_metadata Ben Myers
@ 2010-02-16 22:07 ` Christoph Hellwig
2010-02-17 0:29 ` Dave Chinner
1 sibling, 0 replies; 12+ messages in thread
From: Christoph Hellwig @ 2010-02-16 22:07 UTC (permalink / raw)
To: Ben Myers; +Cc: bfields, linux-nfs, xfs
Looks good,
Reviewed-by: Christoph Hellwig <hch@lst.de>
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 2/2] xfs_export_operations.commit_metadata
2010-02-16 21:04 ` [PATCH 2/2] xfs_export_operations.commit_metadata Ben Myers
2010-02-16 22:07 ` Christoph Hellwig
@ 2010-02-17 0:29 ` Dave Chinner
1 sibling, 0 replies; 12+ messages in thread
From: Dave Chinner @ 2010-02-17 0:29 UTC (permalink / raw)
To: Ben Myers; +Cc: bfields, linux-nfs, xfs
On Tue, Feb 16, 2010 at 03:04:18PM -0600, Ben Myers wrote:
> This is the commit_metadata export operation for XFS.
>
> - Takes one inode to be committed.
>
> - Forces the log up to the lsn of the inode.
>
> - Doesn't force the log if the inode doesn't have a pincount.
>
> Signed-off-by: Ben Myers <bpm@sgi.com>
> ---
> fs/xfs/linux-2.6/xfs_export.c | 24 ++++++++++++++++++++++++
> 1 files changed, 24 insertions(+), 0 deletions(-)
>
> diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
> index 87b8cbd..47a8d1f 100644
> --- a/fs/xfs/linux-2.6/xfs_export.c
> +++ b/fs/xfs/linux-2.6/xfs_export.c
> @@ -29,6 +29,7 @@
> #include "xfs_vnodeops.h"
> #include "xfs_bmap_btree.h"
> #include "xfs_inode.h"
> +#include "xfs_inode_item.h"
>
> /*
> * Note that we only accept fileids which are long enough rather than allow
> @@ -215,9 +216,32 @@ xfs_fs_get_parent(
> return d_obtain_alias(VFS_I(cip));
> }
>
> +STATIC int
> +xfs_fs_nfs_commit_metadata(
> + struct inode *inode)
> +{
> + struct xfs_inode *ip = XFS_I(inode);
> + struct xfs_mount *mp = ip->i_mount;
> + xfs_lsn_t force_lsn = NULLCOMMITLSN;
> + int error = 0;
> +
> + xfs_ilock(ip, XFS_ILOCK_SHARED);
> + if (xfs_ipincount(ip)) {
> + force_lsn = ip->i_itemp->ili_last_lsn;
> + }
> + if (force_lsn != NULLCOMMITLSN) {
> + error = _xfs_log_force(mp, force_lsn,
> + XFS_LOG_FORCE | XFS_LOG_SYNC, NULL);
> + }
That could be simplified to:
if (xfs_ipincount(ip))
_xfs_log_force(mp, ip->i_itemp->ili_last_lsn
XFS_LOG_FORCE | XFS_LOG_SYNC, NULL);
Cheers,
Dave.
--
Dave Chinner
david@fromorbit.com
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 0/2] commit_metadata export operation v5
2010-02-16 21:04 [PATCH 0/2] commit_metadata export operation v5 Ben Myers
2010-02-16 21:04 ` [PATCH 1/2] commit_metadata export operation replacing nfsd_sync_dir Ben Myers
2010-02-16 21:04 ` [PATCH 2/2] xfs_export_operations.commit_metadata Ben Myers
@ 2010-02-17 2:39 ` J. Bruce Fields
2 siblings, 0 replies; 12+ messages in thread
From: J. Bruce Fields @ 2010-02-17 2:39 UTC (permalink / raw)
To: Ben Myers; +Cc: linux-nfs, xfs
On Tue, Feb 16, 2010 at 03:04:08PM -0600, Ben Myers wrote:
> Here is the latest version of the knfsd sync changes that I've been spamming
> you with.
Don't worry about spamming--it's good that people have lots of comments!
--b.
> I have addressed the latest suggestions provided by Christoph,
> Trond, Dave, and Alex. This version of the commit_metadata export operation
> takes only one inode as suggested by Christoph and it turns out to be much
> cleaner this way. Now we've gone back to committing all of the time in
> nfsd_create_setattr and don't bother with the added argument. A couple extra
> comments to explain the commit ordering.
>
> Thanks,
> Ben
>
> ---
>
> Ben Myers (2):
> commit_metadata export operation replacing nfsd_sync_dir
> xfs_export_operations.commit_metadata
>
>
> fs/nfsd/nfs4recover.c | 4 --
> fs/nfsd/vfs.c | 109 ++++++++++++++++++++---------------------
> fs/xfs/linux-2.6/xfs_export.c | 24 +++++++++
> include/linux/exportfs.h | 5 ++
> 4 files changed, 83 insertions(+), 59 deletions(-)
>
> --
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH 1/2] commit_metadata export operation replacing nfsd_sync_dir
2010-02-17 20:05 [PATCH 0/2] commit_metadata export operation v6 Ben Myers
@ 2010-02-17 20:05 ` Ben Myers
0 siblings, 0 replies; 12+ messages in thread
From: Ben Myers @ 2010-02-17 20:05 UTC (permalink / raw)
To: bfields; +Cc: linux-nfs, xfs
- Add commit_metadata export_operation to allow the underlying filesystem to
decide how to commit an inode most efficiently.
- Usage of nfsd_sync_dir and write_inode_now has been replaced with the
commit_metadata function that takes a svc_fh.
- The commit_metadata function calls the commit_metadata export_op if it's
there, or else falls back to sync_inode instead of fsync and write_inode_now
because only metadata need be synced here.
- nfsd4_sync_rec_dir now uses vfs_fsync so that commit_metadata can be static
Signed-off-by: Ben Myers <bpm@sgi.com>
---
fs/nfsd/nfs4recover.c | 4 --
fs/nfsd/vfs.c | 106 +++++++++++++++++++++++-----------------------
include/linux/exportfs.h | 5 ++
3 files changed, 58 insertions(+), 57 deletions(-)
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 5a754f7..98fb98e 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -119,9 +119,7 @@ out_no_tfm:
static void
nfsd4_sync_rec_dir(void)
{
- mutex_lock(&rec_dir.dentry->d_inode->i_mutex);
- nfsd_sync_dir(rec_dir.dentry);
- mutex_unlock(&rec_dir.dentry->d_inode->i_mutex);
+ vfs_fsync(NULL, rec_dir.dentry, 0);
}
int
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index ed024d3..8afdba5 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -27,6 +27,8 @@
#include <linux/jhash.h>
#include <linux/ima.h>
#include <asm/uaccess.h>
+#include <linux/exportfs.h>
+#include <linux/writeback.h>
#ifdef CONFIG_NFSD_V3
#include "xdr3.h"
@@ -271,6 +273,32 @@ out:
return err;
}
+/*
+ * Commit metadata changes to stable storage.
+ */
+static int
+commit_metadata(struct svc_fh *fhp)
+{
+ struct inode *inode = fhp->fh_dentry->d_inode;
+ const struct export_operations *export_ops = inode->i_sb->s_export_op;
+ int error = 0;
+
+ if (!EX_ISSYNC(fhp->fh_export))
+ return 0;
+
+ if (export_ops->commit_metadata) {
+ error = export_ops->commit_metadata(inode);
+ } else {
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_ALL,
+ .nr_to_write = 0, /* metadata only */
+ };
+
+ error = sync_inode(inode, &wbc);
+ }
+
+ return error;
+}
/*
* Set various file attributes.
@@ -769,28 +797,6 @@ nfsd_close(struct file *filp)
}
/*
- * Sync a directory to disk.
- *
- * We can't just call vfs_fsync because our requirements are slightly odd:
- *
- * a) we do not have a file struct available
- * b) we expect to have i_mutex already held by the caller
- */
-int
-nfsd_sync_dir(struct dentry *dentry)
-{
- struct inode *inode = dentry->d_inode;
- int error;
-
- WARN_ON(!mutex_is_locked(&inode->i_mutex));
-
- error = filemap_write_and_wait(inode->i_mapping);
- if (!error && inode->i_fop->fsync)
- error = inode->i_fop->fsync(NULL, dentry, 0);
- return error;
-}
-
-/*
* Obtain the readahead parameters for the file
* specified by (dev, ino).
*/
@@ -1331,12 +1337,14 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
goto out_nfserr;
}
- if (EX_ISSYNC(fhp->fh_export)) {
- err = nfserrno(nfsd_sync_dir(dentry));
- write_inode_now(dchild->d_inode, 1);
- }
+ err = nfsd_create_setattr(rqstp, resfhp, iap);
- err2 = nfsd_create_setattr(rqstp, resfhp, iap);
+ /*
+ * nfsd_setattr already committed the child. Transactional filesystems
+ * had a chance to commit changes for both parent and child
+ * simultaneously making the following commit_metadata a noop.
+ */
+ err2 = nfserrno(commit_metadata(fhp));
if (err2)
err = err2;
mnt_drop_write(fhp->fh_export->ex_path.mnt);
@@ -1368,7 +1376,6 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct dentry *dentry, *dchild = NULL;
struct inode *dirp;
__be32 err;
- __be32 err2;
int host_err;
__u32 v_mtime=0, v_atime=0;
@@ -1463,11 +1470,6 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (created)
*created = 1;
- if (EX_ISSYNC(fhp->fh_export)) {
- err = nfserrno(nfsd_sync_dir(dentry));
- /* setattr will sync the child (or not) */
- }
-
nfsd_check_ignore_resizing(iap);
if (createmode == NFS3_CREATE_EXCLUSIVE) {
@@ -1482,9 +1484,13 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
}
set_attr:
- err2 = nfsd_create_setattr(rqstp, resfhp, iap);
- if (err2)
- err = err2;
+ err = nfsd_create_setattr(rqstp, resfhp, iap);
+
+ /*
+ * nfsd_setattr already committed the child (and possibly also the parent).
+ */
+ if (!err)
+ err = nfserrno(commit_metadata(fhp));
mnt_drop_write(fhp->fh_export->ex_path.mnt);
/*
@@ -1599,12 +1605,9 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
}
} else
host_err = vfs_symlink(dentry->d_inode, dnew, path);
-
- if (!host_err) {
- if (EX_ISSYNC(fhp->fh_export))
- host_err = nfsd_sync_dir(dentry);
- }
err = nfserrno(host_err);
+ if (!err)
+ err = nfserrno(commit_metadata(fhp));
fh_unlock(fhp);
mnt_drop_write(fhp->fh_export->ex_path.mnt);
@@ -1666,11 +1669,9 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
}
host_err = vfs_link(dold, dirp, dnew);
if (!host_err) {
- if (EX_ISSYNC(ffhp->fh_export)) {
- err = nfserrno(nfsd_sync_dir(ddir));
- write_inode_now(dest, 1);
- }
- err = 0;
+ err = nfserrno(commit_metadata(ffhp));
+ if (!err)
+ err = nfserrno(commit_metadata(tfhp));
} else {
if (host_err == -EXDEV && rqstp->rq_vers == 2)
err = nfserr_acces;
@@ -1766,10 +1767,10 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
goto out_dput_new;
host_err = vfs_rename(fdir, odentry, tdir, ndentry);
- if (!host_err && EX_ISSYNC(tfhp->fh_export)) {
- host_err = nfsd_sync_dir(tdentry);
+ if (!host_err) {
+ host_err = commit_metadata(tfhp);
if (!host_err)
- host_err = nfsd_sync_dir(fdentry);
+ host_err = commit_metadata(ffhp);
}
mnt_drop_write(ffhp->fh_export->ex_path.mnt);
@@ -1850,12 +1851,9 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
dput(rdentry);
- if (host_err)
- goto out_drop;
- if (EX_ISSYNC(fhp->fh_export))
- host_err = nfsd_sync_dir(dentry);
+ if (!host_err)
+ host_err = commit_metadata(fhp);
-out_drop:
mnt_drop_write(fhp->fh_export->ex_path.mnt);
out_nfserr:
err = nfserrno(host_err);
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index dc12f41..a9cd507 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -96,6 +96,7 @@ struct fid {
* @fh_to_parent: find the implied object's parent and get a dentry for it
* @get_name: find the name for a given inode in a given directory
* @get_parent: find the parent of a given directory
+ * @commit_metadata: commit metadata changes to stable storage
*
* See Documentation/filesystems/nfs/Exporting for details on how to use
* this interface correctly.
@@ -137,6 +138,9 @@ struct fid {
* is also a directory. In the event that it cannot be found, or storage
* space cannot be allocated, a %ERR_PTR should be returned.
*
+ * commit_metadata:
+ * @commit_metadata should commit metadata changes to stable storage.
+ *
* Locking rules:
* get_parent is called with child->d_inode->i_mutex down
* get_name is not (which is possibly inconsistent)
@@ -152,6 +156,7 @@ struct export_operations {
int (*get_name)(struct dentry *parent, char *name,
struct dentry *child);
struct dentry * (*get_parent)(struct dentry *child);
+ int (*commit_metadata)(struct inode *inode);
};
extern int exportfs_encode_fh(struct dentry *dentry, struct fid *fid,
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 12+ messages in thread
end of thread, other threads:[~2010-02-17 20:03 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-02-16 21:04 [PATCH 0/2] commit_metadata export operation v5 Ben Myers
2010-02-16 21:04 ` [PATCH 1/2] commit_metadata export operation replacing nfsd_sync_dir Ben Myers
2010-02-16 22:06 ` Christoph Hellwig
2010-02-16 21:04 ` [PATCH 2/2] xfs_export_operations.commit_metadata Ben Myers
2010-02-16 22:07 ` Christoph Hellwig
2010-02-17 0:29 ` Dave Chinner
2010-02-17 2:39 ` [PATCH 0/2] commit_metadata export operation v5 J. Bruce Fields
-- strict thread matches above, loose matches on Subject: below --
2010-02-17 20:05 [PATCH 0/2] commit_metadata export operation v6 Ben Myers
2010-02-17 20:05 ` [PATCH 1/2] commit_metadata export operation replacing nfsd_sync_dir Ben Myers
2010-02-11 22:04 [PATCH 0/2] commit_metadata export operation v4 Ben Myers
2010-02-11 22:05 ` [PATCH 1/2] commit_metadata export operation replacing nfsd_sync_dir Ben Myers
2010-02-12 14:23 ` Alex Elder
2010-02-12 17:31 ` Christoph Hellwig
2010-02-11 19:26 [PATCH 0/2] commit_metadata export operation v3 Ben Myers
2010-02-11 19:26 ` [PATCH 1/2] commit_metadata export operation replacing nfsd_sync_dir Ben Myers
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox