* [RFC PATCH 1/2] NFSD: Return the actual stable_how value to clients
2024-11-06 16:57 [RFC PATCH 0/2] Add a "file_sync" export option cel
@ 2024-11-06 16:57 ` cel
2024-11-06 16:57 ` [RFC PATCH 2/2] NFSD: Add a "file_sync" export option cel
1 sibling, 0 replies; 3+ messages in thread
From: cel @ 2024-11-06 16:57 UTC (permalink / raw)
To: Neil Brown, Jeff Layton, Olga Kornievskaia, Dai Ngo, Tom Talpey
Cc: linux-nfs, Chuck Lever
From: Chuck Lever <chuck.lever@oracle.com>
nfsd_vfs_write() may alter the stable_how value of an NFS WRITE
operation. The current implementation does not convey that change
back to the client, however. Instead it always returns the same
value as the client sent in the request. This might cause a client
to believe a FILE_SYNC WRITE payload was durable when in fact it was
converted to an UNSTABLE WRITE.
In the future, nfsd_vfs_write() might make the converse alteration:
promoting an UNSTABLE WRITE to be a FILE_SYNC WRITE. This would
signal that the client does not need a subsequent COMMIT operation.
Therefore the change to stable_how has to appear in the server's
WRITE response.
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
fs/nfsd/nfs3proc.c | 2 +-
fs/nfsd/nfs4proc.c | 2 +-
fs/nfsd/nfsproc.c | 3 ++-
fs/nfsd/vfs.c | 49 +++++++++++++++++++++++++++++++++++-----------
fs/nfsd/vfs.h | 8 ++++----
fs/nfsd/xdr3.h | 2 +-
6 files changed, 47 insertions(+), 19 deletions(-)
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 372bdcf5e07a..64abab971915 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -239,7 +239,7 @@ nfsd3_proc_write(struct svc_rqst *rqstp)
resp->status = nfsd_write(rqstp, &resp->fh, argp->offset,
rqstp->rq_vec, nvecs, &cnt,
- resp->committed, resp->verf);
+ &resp->committed, resp->verf);
resp->count = cnt;
resp->status = nfsd3_map_status(resp->status);
return rpc_success;
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index d32f2dfd148f..51bae11d5d23 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1205,7 +1205,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
status = nfsd_vfs_write(rqstp, &cstate->current_fh, nf,
write->wr_offset, rqstp->rq_vec, nvecs, &cnt,
- write->wr_how_written,
+ &write->wr_how_written,
(__be32 *)write->wr_verifier.data);
nfsd_file_put(nf);
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 6dda081eb24c..b9b03ae56a94 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -250,6 +250,7 @@ nfsd_proc_write(struct svc_rqst *rqstp)
struct nfsd_writeargs *argp = rqstp->rq_argp;
struct nfsd_attrstat *resp = rqstp->rq_resp;
unsigned long cnt = argp->len;
+ u32 committed = NFS_DATA_SYNC;
unsigned int nvecs;
dprintk("nfsd: WRITE %s %u bytes at %d\n",
@@ -260,7 +261,7 @@ nfsd_proc_write(struct svc_rqst *rqstp)
resp->status = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh),
argp->offset, rqstp->rq_vec, nvecs,
- &cnt, NFS_DATA_SYNC, NULL);
+ &cnt, &committed, NULL);
if (resp->status == nfs_ok)
resp->status = fh_getattr(&resp->fh, &resp->stat);
else if (resp->status == nfserr_jukebox)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 22325b590e17..cd00d95c997f 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1149,11 +1149,27 @@ static int wait_for_concurrent_writes(struct file *file)
return err;
}
+/**
+ * nfsd_vfs_write - Invoke vfs_write()
+ * @rqstp: RPC execution context
+ * @fhp: verified file handle
+ * @nf: open nfsd_file matching @fhp
+ * @offset: starting byte offset
+ * @vec: array of vectors containing write payload
+ * @vlen: size of @vec
+ * @cnt: count of bytes to write
+ * @stable: whether to persist the payload immediately
+ * @verf: NFS write verifier to be filled in
+ *
+ * Upon return, caller must fh_put @fhp .
+ *
+ * Returns nfs_ok on success, otherwise an nfserr stat value is
+ * returned.
+ */
__be32
nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
- loff_t offset, struct kvec *vec, int vlen,
- unsigned long *cnt, int stable,
- __be32 *verf)
+ loff_t offset, struct kvec *vec, int vlen, unsigned long *cnt,
+ u32 *stable, __be32 *verf)
{
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
struct file *file = nf->nf_file;
@@ -1190,9 +1206,9 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
exp = fhp->fh_export;
if (!EX_ISSYNC(exp))
- stable = NFS_UNSTABLE;
+ *stable = NFS_UNSTABLE;
- if (stable && !fhp->fh_use_wgather)
+ if (*stable && !fhp->fh_use_wgather)
flags |= RWF_SYNC;
iov_iter_kvec(&iter, ITER_SOURCE, vec, vlen, *cnt);
@@ -1211,7 +1227,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
if (host_err < 0)
goto out_nfserr;
- if (stable && fhp->fh_use_wgather) {
+ if (*stable && fhp->fh_use_wgather) {
host_err = wait_for_concurrent_writes(file);
if (host_err < 0)
commit_reset_write_verifier(nn, rqstp, host_err);
@@ -1293,14 +1309,25 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
return err;
}
-/*
- * Write data to a file.
- * The stable flag requests synchronous writes.
- * N.B. After this call fhp needs an fh_put
+/**
+ * nfsd_write - Write data to a file.
+ * @rqstp: RPC execution context
+ * @fhp: verified file handle
+ * @offset: starting byte offset
+ * @vec: array of vectors containing write payload
+ * @vlen: size of @vec
+ * @cnt: count of bytes to write
+ * @stable: whether to persist the payload immediately
+ * @verf: NFS write verifier to be filled in
+ *
+ * Upon return, caller must fh_put @fhp .
+ *
+ * Returns nfs_ok on success, otherwise an nfserr stat value is
+ * returned.
*/
__be32
nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
- struct kvec *vec, int vlen, unsigned long *cnt, int stable,
+ struct kvec *vec, int vlen, unsigned long *cnt, u32 *stable,
__be32 *verf)
{
struct nfsd_file *nf;
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 3ff146522556..0b002f183046 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -128,13 +128,13 @@ bool nfsd_read_splice_ok(struct svc_rqst *rqstp);
__be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
loff_t offset, unsigned long *count,
u32 *eof);
-__be32 nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t,
- struct kvec *, int, unsigned long *,
- int stable, __be32 *verf);
+__be32 nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ loff_t offset, struct kvec *vec, int vlen,
+ unsigned long *cnt, u32 *stable, __be32 *verf);
__be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct nfsd_file *nf, loff_t offset,
struct kvec *vec, int vlen, unsigned long *cnt,
- int stable, __be32 *verf);
+ u32 *stable, __be32 *verf);
__be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *,
char *, int *);
__be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *,
diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
index 522067b7fd75..c0e443ef3a6b 100644
--- a/fs/nfsd/xdr3.h
+++ b/fs/nfsd/xdr3.h
@@ -152,7 +152,7 @@ struct nfsd3_writeres {
__be32 status;
struct svc_fh fh;
unsigned long count;
- int committed;
+ u32 committed;
__be32 verf[2];
};
--
2.47.0
^ permalink raw reply related [flat|nested] 3+ messages in thread* [RFC PATCH 2/2] NFSD: Add a "file_sync" export option
2024-11-06 16:57 [RFC PATCH 0/2] Add a "file_sync" export option cel
2024-11-06 16:57 ` [RFC PATCH 1/2] NFSD: Return the actual stable_how value to clients cel
@ 2024-11-06 16:57 ` cel
1 sibling, 0 replies; 3+ messages in thread
From: cel @ 2024-11-06 16:57 UTC (permalink / raw)
To: Neil Brown, Jeff Layton, Olga Kornievskaia, Dai Ngo, Tom Talpey
Cc: linux-nfs, Chuck Lever
From: Chuck Lever <chuck.lever@oracle.com>
Introduce the kernel pieces for a "file_sync" export option. This
option would make all NFS WRITE operations on one export a
FILE_SYNC WRITE.
There are two primary use cases for this new export option:
1. The exported file system is not backed by persistent storage.
Thus a subsequent COMMIT will be a no-op. To prevent the client
from wasting the extra round-trip on a COMMIT operation, convert
all WRITEs to files on that export to FILE_SYNC.
2. The exported file system is backed by persistent storage that is
faster than the mean network round trip with the client. Waiting
for a separate COMMIT operation would cost more time than just
committing the data during the WRITE operation.
Either the underlying persistent storage is faster than most any
network fabric (eg, NVMe); or the network connection to the
client is very high latency (eg, a WAN link).
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
fs/nfsd/export.c | 1 +
fs/nfsd/nfs4proc.c | 1 +
fs/nfsd/vfs.c | 5 +++--
include/uapi/linux/nfsd/export.h | 3 ++-
4 files changed, 7 insertions(+), 3 deletions(-)
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index c82d8e3e0d4f..11b5337dd0ea 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1297,6 +1297,7 @@ static struct flags {
{ NFSEXP_V4ROOT, {"v4root", ""}},
{ NFSEXP_PNFS, {"pnfs", ""}},
{ NFSEXP_SECURITY_LABEL, {"security_label", ""}},
+ { NFSEXP_FILE_SYNC, {"file_sync", ""}},
{ 0, {"", ""}}
};
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 51bae11d5d23..7a4ded3ff7c2 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1269,6 +1269,7 @@ nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
status = nfsd4_clone_file_range(rqstp, src, clone->cl_src_pos,
dst, clone->cl_dst_pos, clone->cl_count,
EX_ISSYNC(cstate->current_fh.fh_export));
+ /* cel: check the "file_sync" export option as well */
nfsd_file_put(dst);
nfsd_file_put(src);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index cd00d95c997f..ffa6db6851bd 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1205,9 +1205,10 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
exp = fhp->fh_export;
- if (!EX_ISSYNC(exp))
+ if (exp->ex_flags & NFSEXP_FILE_SYNC)
+ *stable = NFS_FILE_SYNC;
+ else if (!EX_ISSYNC(exp))
*stable = NFS_UNSTABLE;
-
if (*stable && !fhp->fh_use_wgather)
flags |= RWF_SYNC;
diff --git a/include/uapi/linux/nfsd/export.h b/include/uapi/linux/nfsd/export.h
index a73ca3703abb..45afec454a37 100644
--- a/include/uapi/linux/nfsd/export.h
+++ b/include/uapi/linux/nfsd/export.h
@@ -53,9 +53,10 @@
*/
#define NFSEXP_V4ROOT 0x10000
#define NFSEXP_PNFS 0x20000
+#define NFSEXP_FILE_SYNC 0x40000
/* All flags that we claim to support. (Note we don't support NOACL.) */
-#define NFSEXP_ALLFLAGS 0x3FEFF
+#define NFSEXP_ALLFLAGS 0x7FEFF
/* The flags that may vary depending on security flavor: */
#define NFSEXP_SECINFO_FLAGS (NFSEXP_READONLY | NFSEXP_ROOTSQUASH \
--
2.47.0
^ permalink raw reply related [flat|nested] 3+ messages in thread