* [RFC PATCH 0/6] Add a client-side OFFLOAD_STATUS implementation
@ 2024-07-02 15:19 cel
2024-07-02 15:19 ` [RFC PATCH 1/6] NFS: Fix typo in OFFLOAD_CANCEL comment cel
` (5 more replies)
0 siblings, 6 replies; 12+ messages in thread
From: cel @ 2024-07-02 15:19 UTC (permalink / raw)
To: linux-nfs; +Cc: Olga Kornievskaia, Dai Ngo, Chuck Lever
From: Chuck Lever <chuck.lever@oracle.com>
*** COMPILE-TESTED ONLY ***
Not sure I've addressed all of Olga's concerns. This is more like a
progress report. I've more-or-less worked out how to sort through
the results of the operation and return a "bytes copied so far"
count to the caller.
Chuck Lever (6):
NFS: Fix typo in OFFLOAD_CANCEL comment
NFS: Implement NFSv4.2's OFFLOAD_STATUS XDR
NFS: Rename struct nfs4_offloadcancel_data
NFS: Implement NFSv4.2's OFFLOAD_STATUS operation
NFS: Use NFSv4.2's OFFLOAD_STATUS operation
NFS: Refactor trace_nfs4_offload_cancel
fs/nfs/nfs42proc.c | 173 ++++++++++++++++++++++++++++++++++----
fs/nfs/nfs42xdr.c | 88 ++++++++++++++++++-
fs/nfs/nfs4trace.h | 11 ++-
fs/nfs/nfs4xdr.c | 1 +
include/linux/nfs4.h | 1 +
include/linux/nfs_fs_sb.h | 1 +
include/linux/nfs_xdr.h | 5 +-
7 files changed, 260 insertions(+), 20 deletions(-)
--
2.45.2
^ permalink raw reply [flat|nested] 12+ messages in thread
* [RFC PATCH 1/6] NFS: Fix typo in OFFLOAD_CANCEL comment
2024-07-02 15:19 [RFC PATCH 0/6] Add a client-side OFFLOAD_STATUS implementation cel
@ 2024-07-02 15:19 ` cel
2024-07-02 15:19 ` [RFC PATCH 2/6] NFS: Implement NFSv4.2's OFFLOAD_STATUS XDR cel
` (4 subsequent siblings)
5 siblings, 0 replies; 12+ messages in thread
From: cel @ 2024-07-02 15:19 UTC (permalink / raw)
To: linux-nfs; +Cc: Olga Kornievskaia, Dai Ngo, Chuck Lever
From: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
fs/nfs/nfs42xdr.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index 9e3ae53e2205..ef5730c5e704 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -549,7 +549,7 @@ static void nfs4_xdr_enc_copy(struct rpc_rqst *req,
}
/*
- * Encode OFFLOAD_CANEL request
+ * Encode OFFLOAD_CANCEL request
*/
static void nfs4_xdr_enc_offload_cancel(struct rpc_rqst *req,
struct xdr_stream *xdr,
--
2.45.2
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [RFC PATCH 2/6] NFS: Implement NFSv4.2's OFFLOAD_STATUS XDR
2024-07-02 15:19 [RFC PATCH 0/6] Add a client-side OFFLOAD_STATUS implementation cel
2024-07-02 15:19 ` [RFC PATCH 1/6] NFS: Fix typo in OFFLOAD_CANCEL comment cel
@ 2024-07-02 15:19 ` cel
2024-07-02 15:19 ` [RFC PATCH 3/6] NFS: Rename struct nfs4_offloadcancel_data cel
` (3 subsequent siblings)
5 siblings, 0 replies; 12+ messages in thread
From: cel @ 2024-07-02 15:19 UTC (permalink / raw)
To: linux-nfs; +Cc: Olga Kornievskaia, Dai Ngo, Chuck Lever
From: Chuck Lever <chuck.lever@oracle.com>
Add XDR encoding and decoding functions for the NFSv4.2
OFFLOAD_STATUS operation.
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
fs/nfs/nfs42xdr.c | 86 +++++++++++++++++++++++++++++++++++++++++
fs/nfs/nfs4xdr.c | 1 +
include/linux/nfs4.h | 1 +
include/linux/nfs_xdr.h | 5 ++-
4 files changed, 91 insertions(+), 2 deletions(-)
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index ef5730c5e704..ad3d1293f917 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -35,6 +35,11 @@
#define encode_offload_cancel_maxsz (op_encode_hdr_maxsz + \
XDR_QUADLEN(NFS4_STATEID_SIZE))
#define decode_offload_cancel_maxsz (op_decode_hdr_maxsz)
+#define encode_offload_status_maxsz (op_encode_hdr_maxsz + \
+ XDR_QUADLEN(NFS4_STATEID_SIZE))
+#define decode_offload_status_maxsz (op_decode_hdr_maxsz + \
+ 2 /* osr_count */ + \
+ 2 /* osr_complete */)
#define encode_copy_notify_maxsz (op_encode_hdr_maxsz + \
XDR_QUADLEN(NFS4_STATEID_SIZE) + \
1 + /* nl4_type */ \
@@ -143,6 +148,14 @@
decode_sequence_maxsz + \
decode_putfh_maxsz + \
decode_offload_cancel_maxsz)
+#define NFS4_enc_offload_status_sz (compound_encode_hdr_maxsz + \
+ encode_sequence_maxsz + \
+ encode_putfh_maxsz + \
+ encode_offload_status_maxsz)
+#define NFS4_dec_offload_status_sz (compound_decode_hdr_maxsz + \
+ decode_sequence_maxsz + \
+ decode_putfh_maxsz + \
+ decode_offload_status_maxsz)
#define NFS4_enc_copy_notify_sz (compound_encode_hdr_maxsz + \
encode_putfh_maxsz + \
encode_copy_notify_maxsz)
@@ -343,6 +356,14 @@ static void encode_offload_cancel(struct xdr_stream *xdr,
encode_nfs4_stateid(xdr, &args->osa_stateid);
}
+static void encode_offload_status(struct xdr_stream *xdr,
+ const struct nfs42_offload_status_args *args,
+ struct compound_hdr *hdr)
+{
+ encode_op_hdr(xdr, OP_OFFLOAD_STATUS, decode_offload_status_maxsz, hdr);
+ encode_nfs4_stateid(xdr, &args->osa_stateid);
+}
+
static void encode_copy_notify(struct xdr_stream *xdr,
const struct nfs42_copy_notify_args *args,
struct compound_hdr *hdr)
@@ -567,6 +588,25 @@ static void nfs4_xdr_enc_offload_cancel(struct rpc_rqst *req,
encode_nops(&hdr);
}
+/*
+ * Encode OFFLOAD_STATUS request
+ */
+static void nfs4_xdr_enc_offload_status(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const void *data)
+{
+ const struct nfs42_offload_status_args *args = data;
+ struct compound_hdr hdr = {
+ .minorversion = nfs4_xdr_minorversion(&args->osa_seq_args),
+ };
+
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->osa_seq_args, &hdr);
+ encode_putfh(xdr, args->osa_src_fh, &hdr);
+ encode_offload_status(xdr, args, &hdr);
+ encode_nops(&hdr);
+}
+
/*
* Encode COPY_NOTIFY request
*/
@@ -919,6 +959,26 @@ static int decode_offload_cancel(struct xdr_stream *xdr,
return decode_op_hdr(xdr, OP_OFFLOAD_CANCEL);
}
+static int decode_offload_status(struct xdr_stream *xdr,
+ struct nfs42_offload_status_res *res)
+{
+ ssize_t result;
+ int status;
+
+ status = decode_op_hdr(xdr, OP_OFFLOAD_STATUS);
+ if (status)
+ return status;
+ /* osr_count */
+ if (xdr_stream_decode_u64(xdr, &res->osr_count) < 0)
+ return -EIO;
+ /* osr_complete<1> */
+ result = xdr_stream_decode_uint32_array(xdr, res->osr_complete, 1);
+ if (result < 0)
+ return -EIO;
+ res->complete_count = result;
+ return 0;
+}
+
static int decode_copy_notify(struct xdr_stream *xdr,
struct nfs42_copy_notify_res *res)
{
@@ -1368,6 +1428,32 @@ static int nfs4_xdr_dec_offload_cancel(struct rpc_rqst *rqstp,
return status;
}
+/*
+ * Decode OFFLOAD_STATUS response
+ */
+static int nfs4_xdr_dec_offload_status(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ void *data)
+{
+ struct nfs42_offload_status_res *res = data;
+ struct compound_hdr hdr;
+ int status;
+
+ status = decode_compound_hdr(xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_sequence(xdr, &res->osr_seq_res, rqstp);
+ if (status)
+ goto out;
+ status = decode_putfh(xdr);
+ if (status)
+ goto out;
+ status = decode_offload_status(xdr, res);
+
+out:
+ return status;
+}
+
/*
* Decode COPY_NOTIFY response
*/
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 1416099dfcd1..bcb7de1c1b44 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -7711,6 +7711,7 @@ const struct rpc_procinfo nfs4_procedures[] = {
PROC42(CLONE, enc_clone, dec_clone),
PROC42(COPY, enc_copy, dec_copy),
PROC42(OFFLOAD_CANCEL, enc_offload_cancel, dec_offload_cancel),
+ PROC42(OFFLOAD_STATUS, enc_offload_status, dec_offload_status),
PROC42(COPY_NOTIFY, enc_copy_notify, dec_copy_notify),
PROC(LOOKUPP, enc_lookupp, dec_lookupp),
PROC42(LAYOUTERROR, enc_layouterror, dec_layouterror),
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 0d896ce296ce..66e9f2f11fa1 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -681,6 +681,7 @@ enum {
NFSPROC4_CLNT_LISTXATTRS,
NFSPROC4_CLNT_REMOVEXATTR,
NFSPROC4_CLNT_READ_PLUS,
+ NFSPROC4_CLNT_OFFLOAD_STATUS,
};
/* nfs41 types */
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index d09b9773b20c..3372971360d8 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1488,8 +1488,9 @@ struct nfs42_offload_status_args {
struct nfs42_offload_status_res {
struct nfs4_sequence_res osr_seq_res;
- uint64_t osr_count;
- int osr_status;
+ u64 osr_count;
+ int complete_count;
+ u32 osr_complete[1];
};
struct nfs42_copy_notify_args {
--
2.45.2
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [RFC PATCH 3/6] NFS: Rename struct nfs4_offloadcancel_data
2024-07-02 15:19 [RFC PATCH 0/6] Add a client-side OFFLOAD_STATUS implementation cel
2024-07-02 15:19 ` [RFC PATCH 1/6] NFS: Fix typo in OFFLOAD_CANCEL comment cel
2024-07-02 15:19 ` [RFC PATCH 2/6] NFS: Implement NFSv4.2's OFFLOAD_STATUS XDR cel
@ 2024-07-02 15:19 ` cel
2024-07-02 15:19 ` [RFC PATCH 4/6] NFS: Implement NFSv4.2's OFFLOAD_STATUS operation cel
` (2 subsequent siblings)
5 siblings, 0 replies; 12+ messages in thread
From: cel @ 2024-07-02 15:19 UTC (permalink / raw)
To: linux-nfs; +Cc: Olga Kornievskaia, Dai Ngo, Chuck Lever
From: Chuck Lever <chuck.lever@oracle.com>
Refactor: This struct can be used unchanged for the new
OFFLOAD_STATUS implementation, so give it a more generic name.
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
fs/nfs/nfs42proc.c | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 28704f924612..869605a0a9d5 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -498,15 +498,15 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
return err;
}
-struct nfs42_offloadcancel_data {
+struct nfs42_offload_data {
struct nfs_server *seq_server;
struct nfs42_offload_status_args args;
struct nfs42_offload_status_res res;
};
-static void nfs42_offload_cancel_prepare(struct rpc_task *task, void *calldata)
+static void nfs42_offload_prepare(struct rpc_task *task, void *calldata)
{
- struct nfs42_offloadcancel_data *data = calldata;
+ struct nfs42_offload_data *data = calldata;
nfs4_setup_sequence(data->seq_server->nfs_client,
&data->args.osa_seq_args,
@@ -515,7 +515,7 @@ static void nfs42_offload_cancel_prepare(struct rpc_task *task, void *calldata)
static void nfs42_offload_cancel_done(struct rpc_task *task, void *calldata)
{
- struct nfs42_offloadcancel_data *data = calldata;
+ struct nfs42_offload_data *data = calldata;
trace_nfs4_offload_cancel(&data->args, task->tk_status);
nfs41_sequence_done(task, &data->res.osr_seq_res);
@@ -525,22 +525,22 @@ static void nfs42_offload_cancel_done(struct rpc_task *task, void *calldata)
rpc_restart_call_prepare(task);
}
-static void nfs42_free_offloadcancel_data(void *data)
+static void nfs42_offload_release(void *data)
{
kfree(data);
}
static const struct rpc_call_ops nfs42_offload_cancel_ops = {
- .rpc_call_prepare = nfs42_offload_cancel_prepare,
+ .rpc_call_prepare = nfs42_offload_prepare,
.rpc_call_done = nfs42_offload_cancel_done,
- .rpc_release = nfs42_free_offloadcancel_data,
+ .rpc_release = nfs42_offload_release,
};
static int nfs42_do_offload_cancel_async(struct file *dst,
nfs4_stateid *stateid)
{
struct nfs_server *dst_server = NFS_SERVER(file_inode(dst));
- struct nfs42_offloadcancel_data *data = NULL;
+ struct nfs42_offload_data *data = NULL;
struct nfs_open_context *ctx = nfs_file_open_context(dst);
struct rpc_task *task;
struct rpc_message msg = {
@@ -559,7 +559,7 @@ static int nfs42_do_offload_cancel_async(struct file *dst,
if (!(dst_server->caps & NFS_CAP_OFFLOAD_CANCEL))
return -EOPNOTSUPP;
- data = kzalloc(sizeof(struct nfs42_offloadcancel_data), GFP_KERNEL);
+ data = kzalloc(sizeof(struct nfs42_offload_data), GFP_KERNEL);
if (data == NULL)
return -ENOMEM;
--
2.45.2
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [RFC PATCH 4/6] NFS: Implement NFSv4.2's OFFLOAD_STATUS operation
2024-07-02 15:19 [RFC PATCH 0/6] Add a client-side OFFLOAD_STATUS implementation cel
` (2 preceding siblings ...)
2024-07-02 15:19 ` [RFC PATCH 3/6] NFS: Rename struct nfs4_offloadcancel_data cel
@ 2024-07-02 15:19 ` cel
2024-07-02 18:30 ` Olga Kornievskaia
2024-07-02 15:19 ` [RFC PATCH 5/6] NFS: Use " cel
2024-07-02 15:19 ` [RFC PATCH 6/6] NFS: Refactor trace_nfs4_offload_cancel cel
5 siblings, 1 reply; 12+ messages in thread
From: cel @ 2024-07-02 15:19 UTC (permalink / raw)
To: linux-nfs; +Cc: Olga Kornievskaia, Dai Ngo, Chuck Lever
From: Chuck Lever <chuck.lever@oracle.com>
Enable the Linux NFS client to observe the progress of an offloaded
asynchronous COPY operation. This new operation will be put to use
in a subsequent patch.
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
fs/nfs/nfs42proc.c | 113 ++++++++++++++++++++++++++++++++++++++
include/linux/nfs_fs_sb.h | 1 +
2 files changed, 114 insertions(+)
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 869605a0a9d5..c55247da8e49 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -21,6 +21,8 @@
#define NFSDBG_FACILITY NFSDBG_PROC
static int nfs42_do_offload_cancel_async(struct file *dst, nfs4_stateid *std);
+static int nfs42_proc_offload_status(struct file *file, nfs4_stateid *stateid,
+ u64 *copied);
static void nfs42_set_netaddr(struct file *filep, struct nfs42_netaddr *naddr)
{
@@ -582,6 +584,117 @@ static int nfs42_do_offload_cancel_async(struct file *dst,
return status;
}
+static void nfs42_offload_status_done(struct rpc_task *task, void *calldata)
+{
+ struct nfs42_offload_data *data = calldata;
+
+ if (!nfs4_sequence_done(task, &data->res.osr_seq_res))
+ return;
+
+ switch (task->tk_status) {
+ case 0:
+ return;
+ case -NFS4ERR_DELAY:
+ case -NFS4ERR_GRACE:
+ if (nfs4_async_handle_error(task, data->seq_server,
+ NULL, NULL) == -EAGAIN)
+ rpc_restart_call_prepare(task);
+ else
+ task->tk_status = -EIO;
+ break;
+ case -NFS4ERR_ADMIN_REVOKED:
+ case -NFS4ERR_BAD_STATEID:
+ case -NFS4ERR_OLD_STATEID:
+ task->tk_status = -EBADF;
+ break;
+ case -NFS4ERR_NOTSUPP:
+ case -ENOTSUPP:
+ case -EOPNOTSUPP:
+ data->seq_server->caps &= ~NFS_CAP_OFFLOAD_STATUS;
+ task->tk_status = -EOPNOTSUPP;
+ break;
+ default:
+ task->tk_status = -EIO;
+ }
+}
+
+static const struct rpc_call_ops nfs42_offload_status_ops = {
+ .rpc_call_prepare = nfs42_offload_prepare,
+ .rpc_call_done = nfs42_offload_status_done,
+ .rpc_release = nfs42_offload_release
+};
+
+/*
+ * Return values:
+ * %0: Server returned an NFS4_OK completion status
+ * %-EINPROGRESS: Server returned no completion status
+ * %-EREMOTEIO: Server returned an error completion status
+ * %-EBADF: Server did not recognize the copy stateid
+ * %-EOPNOTSUPP: Server does not support OFFLOAD_STATUS
+ *
+ * Other negative errnos indicate the client could not complete the
+ * request.
+ */
+static int nfs42_proc_offload_status(struct file *file, nfs4_stateid *stateid,
+ u64 *copied)
+{
+ struct nfs_open_context *ctx = nfs_file_open_context(file);
+ struct nfs_server *server = NFS_SERVER(file_inode(file));
+ struct nfs42_offload_data *data = NULL;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OFFLOAD_STATUS],
+ .rpc_cred = ctx->cred,
+ };
+ struct rpc_task_setup task_setup_data = {
+ .rpc_client = server->client,
+ .rpc_message = &msg,
+ .callback_ops = &nfs42_offload_status_ops,
+ .workqueue = nfsiod_workqueue,
+ .flags = RPC_TASK_ASYNC | RPC_TASK_SOFTCONN,
+ };
+ struct rpc_task *task;
+ int status;
+
+ if (!(server->caps & NFS_CAP_OFFLOAD_STATUS))
+ return -EOPNOTSUPP;
+
+ data = kzalloc(sizeof(struct nfs42_offload_data), GFP_KERNEL);
+ if (data == NULL)
+ return -ENOMEM;
+
+ data->seq_server = server;
+ data->args.osa_src_fh = NFS_FH(file_inode(file));
+ memcpy(&data->args.osa_stateid, stateid,
+ sizeof(data->args.osa_stateid));
+ msg.rpc_argp = &data->args;
+ msg.rpc_resp = &data->res;
+ task_setup_data.callback_data = data;
+ nfs4_init_sequence(&data->args.osa_seq_args, &data->res.osr_seq_res,
+ 1, 0);
+ task = rpc_run_task(&task_setup_data);
+ if (IS_ERR(task)) {
+ nfs42_offload_release(data);
+ return PTR_ERR(task);
+ }
+ status = rpc_wait_for_completion_task(task);
+ if (status)
+ goto out;
+
+ *copied = data->res.osr_count;
+ if (!data->res.complete_count) {
+ status = -EINPROGRESS;
+ goto out;
+ }
+ if (data->res.osr_complete[0] != NFS_OK) {
+ status = -EREMOTEIO;
+ goto out;
+ }
+
+out:
+ rpc_put_task(task);
+ return status;
+}
+
static int _nfs42_proc_copy_notify(struct file *src, struct file *dst,
struct nfs42_copy_notify_args *args,
struct nfs42_copy_notify_res *res)
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 92de074e63b9..0937e73c4767 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -278,6 +278,7 @@ struct nfs_server {
#define NFS_CAP_LGOPEN (1U << 5)
#define NFS_CAP_CASE_INSENSITIVE (1U << 6)
#define NFS_CAP_CASE_PRESERVING (1U << 7)
+#define NFS_CAP_OFFLOAD_STATUS (1U << 8)
#define NFS_CAP_POSIX_LOCK (1U << 14)
#define NFS_CAP_UIDGID_NOMAP (1U << 15)
#define NFS_CAP_STATEID_NFSV41 (1U << 16)
--
2.45.2
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [RFC PATCH 5/6] NFS: Use NFSv4.2's OFFLOAD_STATUS operation
2024-07-02 15:19 [RFC PATCH 0/6] Add a client-side OFFLOAD_STATUS implementation cel
` (3 preceding siblings ...)
2024-07-02 15:19 ` [RFC PATCH 4/6] NFS: Implement NFSv4.2's OFFLOAD_STATUS operation cel
@ 2024-07-02 15:19 ` cel
2024-07-02 18:30 ` Olga Kornievskaia
2024-07-02 15:19 ` [RFC PATCH 6/6] NFS: Refactor trace_nfs4_offload_cancel cel
5 siblings, 1 reply; 12+ messages in thread
From: cel @ 2024-07-02 15:19 UTC (permalink / raw)
To: linux-nfs; +Cc: Olga Kornievskaia, Dai Ngo, Chuck Lever
From: Chuck Lever <chuck.lever@oracle.com>
We've found that there are cases where a transport disconnection
results in the loss of callback RPCs. NFS servers typically do not
retransmit callback operations after a disconnect.
This can be a problem for the Linux NFS client's current
implementation of asynchronous COPY, which waits indefinitely for a
CB_OFFLOAD callback. If a transport disconnect occurs while an async
COPY is running, there's a good chance the client will never get the
completing CB_OFFLOAD.
Fix this by implementing the OFFLOAD_STATUS operation so that the
Linux NFS client can probe the NFS server if it doesn't see a
CB_OFFLOAD in a reasonable amount of time.
This patch implements a simplistic check. As future work, the client
might also be able to detect whether there is no forward progress on
the request asynchronous COPY operation, and CANCEL it.
Suggested-by: Olga Kornievskaia <kolga@netapp.com>
Link: https://bugzilla.kernel.org/show_bug.cgi?id=218735
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
fs/nfs/nfs42proc.c | 40 +++++++++++++++++++++++++++++++++-------
1 file changed, 33 insertions(+), 7 deletions(-)
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index c55247da8e49..246534bfc946 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -175,6 +175,11 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len)
return err;
}
+/* Wait this long before checking progress on a COPY operation */
+enum {
+ NFS42_COPY_TIMEOUT = 5 * HZ,
+};
+
static int handle_async_copy(struct nfs42_copy_res *res,
struct nfs_server *dst_server,
struct nfs_server *src_server,
@@ -184,9 +189,10 @@ static int handle_async_copy(struct nfs42_copy_res *res,
bool *restart)
{
struct nfs4_copy_state *copy, *tmp_copy = NULL, *iter;
- int status = NFS4_OK;
struct nfs_open_context *dst_ctx = nfs_file_open_context(dst);
struct nfs_open_context *src_ctx = nfs_file_open_context(src);
+ int status = NFS4_OK;
+ u64 copied;
copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_KERNEL);
if (!copy)
@@ -224,7 +230,9 @@ static int handle_async_copy(struct nfs42_copy_res *res,
spin_unlock(&src_server->nfs_client->cl_lock);
}
- status = wait_for_completion_interruptible(©->completion);
+wait:
+ status = wait_for_completion_interruptible_timeout(©->completion,
+ NFS42_COPY_TIMEOUT);
spin_lock(&dst_server->nfs_client->cl_lock);
list_del_init(©->copies);
spin_unlock(&dst_server->nfs_client->cl_lock);
@@ -233,12 +241,17 @@ static int handle_async_copy(struct nfs42_copy_res *res,
list_del_init(©->src_copies);
spin_unlock(&src_server->nfs_client->cl_lock);
}
- if (status == -ERESTARTSYS) {
- goto out_cancel;
- } else if (copy->flags || copy->error == NFS4ERR_PARTNER_NO_AUTH) {
- status = -EAGAIN;
- *restart = true;
+ switch (status) {
+ case 0:
+ goto timeout;
+ case -ERESTARTSYS:
goto out_cancel;
+ default:
+ if (copy->flags || copy->error == NFS4ERR_PARTNER_NO_AUTH) {
+ status = -EAGAIN;
+ *restart = true;
+ goto out_cancel;
+ }
}
out:
res->write_res.count = copy->count;
@@ -253,6 +266,19 @@ static int handle_async_copy(struct nfs42_copy_res *res,
if (!nfs42_files_from_same_server(src, dst))
nfs42_do_offload_cancel_async(src, src_stateid);
goto out_free;
+timeout:
+ status = nfs42_proc_offload_status(src, src_stateid, &copied);
+ switch (status) {
+ case 0:
+ case -EREMOTEIO:
+ res->write_res.count = copied;
+ memcpy(&res->write_res.verifier, ©->verf, sizeof(copy->verf));
+ goto out_free;
+ case -EINPROGRESS:
+ case -EOPNOTSUPP:
+ goto wait;
+ }
+ goto out;
}
static int process_copy_commit(struct file *dst, loff_t pos_dst,
--
2.45.2
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [RFC PATCH 6/6] NFS: Refactor trace_nfs4_offload_cancel
2024-07-02 15:19 [RFC PATCH 0/6] Add a client-side OFFLOAD_STATUS implementation cel
` (4 preceding siblings ...)
2024-07-02 15:19 ` [RFC PATCH 5/6] NFS: Use " cel
@ 2024-07-02 15:19 ` cel
5 siblings, 0 replies; 12+ messages in thread
From: cel @ 2024-07-02 15:19 UTC (permalink / raw)
To: linux-nfs; +Cc: Olga Kornievskaia, Dai Ngo, Chuck Lever
From: Chuck Lever <chuck.lever@oracle.com>
Add a trace_nfs4_offload_status trace point that looks just like
trace_nfs4_offload_cancel. Promote that event to an event class to
avoid duplicating code.
An alternative approach would be to expand trace_nfs4_offload_status
to report more of the actual OFFLOAD_STATUS result.
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
fs/nfs/nfs42proc.c | 2 ++
fs/nfs/nfs4trace.h | 11 ++++++++++-
2 files changed, 12 insertions(+), 1 deletion(-)
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 246534bfc946..fd4e66e9b43d 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -614,6 +614,8 @@ static void nfs42_offload_status_done(struct rpc_task *task, void *calldata)
{
struct nfs42_offload_data *data = calldata;
+ trace_nfs4_offload_status(&data->args, task->tk_status);
+
if (!nfs4_sequence_done(task, &data->res.osr_seq_res))
return;
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index 4de8780a7c48..885da7ef20a9 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -2520,7 +2520,7 @@ TRACE_EVENT(nfs4_copy_notify,
)
);
-TRACE_EVENT(nfs4_offload_cancel,
+DECLARE_EVENT_CLASS(nfs4_offload_class,
TP_PROTO(
const struct nfs42_offload_status_args *args,
int error
@@ -2552,6 +2552,15 @@ TRACE_EVENT(nfs4_offload_cancel,
__entry->stateid_seq, __entry->stateid_hash
)
);
+#define DEFINE_NFS4_OFFLOAD_EVENT(name) \
+ DEFINE_EVENT(nfs4_offload_class, name, \
+ TP_PROTO( \
+ const struct nfs42_offload_status_args *args, \
+ int error \
+ ), \
+ TP_ARGS(args, error))
+DEFINE_NFS4_OFFLOAD_EVENT(nfs4_offload_cancel);
+DEFINE_NFS4_OFFLOAD_EVENT(nfs4_offload_status);
DECLARE_EVENT_CLASS(nfs4_xattr_event,
TP_PROTO(
--
2.45.2
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [RFC PATCH 4/6] NFS: Implement NFSv4.2's OFFLOAD_STATUS operation
2024-07-02 15:19 ` [RFC PATCH 4/6] NFS: Implement NFSv4.2's OFFLOAD_STATUS operation cel
@ 2024-07-02 18:30 ` Olga Kornievskaia
2024-07-02 18:33 ` Chuck Lever III
0 siblings, 1 reply; 12+ messages in thread
From: Olga Kornievskaia @ 2024-07-02 18:30 UTC (permalink / raw)
To: cel; +Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Chuck Lever
On Tue, Jul 2, 2024 at 11:21 AM <cel@kernel.org> wrote:
>
> From: Chuck Lever <chuck.lever@oracle.com>
>
> Enable the Linux NFS client to observe the progress of an offloaded
> asynchronous COPY operation. This new operation will be put to use
> in a subsequent patch.
>
> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> ---
> fs/nfs/nfs42proc.c | 113 ++++++++++++++++++++++++++++++++++++++
> include/linux/nfs_fs_sb.h | 1 +
> 2 files changed, 114 insertions(+)
>
> diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
> index 869605a0a9d5..c55247da8e49 100644
> --- a/fs/nfs/nfs42proc.c
> +++ b/fs/nfs/nfs42proc.c
> @@ -21,6 +21,8 @@
>
> #define NFSDBG_FACILITY NFSDBG_PROC
> static int nfs42_do_offload_cancel_async(struct file *dst, nfs4_stateid *std);
> +static int nfs42_proc_offload_status(struct file *file, nfs4_stateid *stateid,
> + u64 *copied);
>
> static void nfs42_set_netaddr(struct file *filep, struct nfs42_netaddr *naddr)
> {
> @@ -582,6 +584,117 @@ static int nfs42_do_offload_cancel_async(struct file *dst,
> return status;
> }
>
> +static void nfs42_offload_status_done(struct rpc_task *task, void *calldata)
> +{
> + struct nfs42_offload_data *data = calldata;
> +
> + if (!nfs4_sequence_done(task, &data->res.osr_seq_res))
> + return;
> +
> + switch (task->tk_status) {
> + case 0:
> + return;
> + case -NFS4ERR_DELAY:
> + case -NFS4ERR_GRACE:
> + if (nfs4_async_handle_error(task, data->seq_server,
> + NULL, NULL) == -EAGAIN)
> + rpc_restart_call_prepare(task);
> + else
> + task->tk_status = -EIO;
> + break;
> + case -NFS4ERR_ADMIN_REVOKED:
> + case -NFS4ERR_BAD_STATEID:
> + case -NFS4ERR_OLD_STATEID:
> + task->tk_status = -EBADF;
> + break;
Hm. Shouldn't we be attempting to do state recovery here?
> + case -NFS4ERR_NOTSUPP:
> + case -ENOTSUPP:
> + case -EOPNOTSUPP:
> + data->seq_server->caps &= ~NFS_CAP_OFFLOAD_STATUS;
> + task->tk_status = -EOPNOTSUPP;
> + break;
> + default:
> + task->tk_status = -EIO;
> + }
> +}
> +
> +static const struct rpc_call_ops nfs42_offload_status_ops = {
> + .rpc_call_prepare = nfs42_offload_prepare,
> + .rpc_call_done = nfs42_offload_status_done,
> + .rpc_release = nfs42_offload_release
> +};
> +
> +/*
> + * Return values:
> + * %0: Server returned an NFS4_OK completion status
> + * %-EINPROGRESS: Server returned no completion status
> + * %-EREMOTEIO: Server returned an error completion status
> + * %-EBADF: Server did not recognize the copy stateid
> + * %-EOPNOTSUPP: Server does not support OFFLOAD_STATUS
> + *
> + * Other negative errnos indicate the client could not complete the
> + * request.
> + */
> +static int nfs42_proc_offload_status(struct file *file, nfs4_stateid *stateid,
> + u64 *copied)
> +{
> + struct nfs_open_context *ctx = nfs_file_open_context(file);
> + struct nfs_server *server = NFS_SERVER(file_inode(file));
> + struct nfs42_offload_data *data = NULL;
> + struct rpc_message msg = {
> + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OFFLOAD_STATUS],
> + .rpc_cred = ctx->cred,
> + };
> + struct rpc_task_setup task_setup_data = {
> + .rpc_client = server->client,
> + .rpc_message = &msg,
> + .callback_ops = &nfs42_offload_status_ops,
> + .workqueue = nfsiod_workqueue,
> + .flags = RPC_TASK_ASYNC | RPC_TASK_SOFTCONN,
> + };
> + struct rpc_task *task;
> + int status;
> +
> + if (!(server->caps & NFS_CAP_OFFLOAD_STATUS))
> + return -EOPNOTSUPP;
> +
> + data = kzalloc(sizeof(struct nfs42_offload_data), GFP_KERNEL);
> + if (data == NULL)
> + return -ENOMEM;
> +
> + data->seq_server = server;
> + data->args.osa_src_fh = NFS_FH(file_inode(file));
> + memcpy(&data->args.osa_stateid, stateid,
> + sizeof(data->args.osa_stateid));
> + msg.rpc_argp = &data->args;
> + msg.rpc_resp = &data->res;
> + task_setup_data.callback_data = data;
> + nfs4_init_sequence(&data->args.osa_seq_args, &data->res.osr_seq_res,
> + 1, 0);
> + task = rpc_run_task(&task_setup_data);
> + if (IS_ERR(task)) {
> + nfs42_offload_release(data);
> + return PTR_ERR(task);
> + }
> + status = rpc_wait_for_completion_task(task);
> + if (status)
> + goto out;
> +
> + *copied = data->res.osr_count;
> + if (!data->res.complete_count) {
> + status = -EINPROGRESS;
> + goto out;
> + }
> + if (data->res.osr_complete[0] != NFS_OK) {
> + status = -EREMOTEIO;
> + goto out;
> + }
> +
> +out:
> + rpc_put_task(task);
> + return status;
> +}
> +
> static int _nfs42_proc_copy_notify(struct file *src, struct file *dst,
> struct nfs42_copy_notify_args *args,
> struct nfs42_copy_notify_res *res)
> diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
> index 92de074e63b9..0937e73c4767 100644
> --- a/include/linux/nfs_fs_sb.h
> +++ b/include/linux/nfs_fs_sb.h
> @@ -278,6 +278,7 @@ struct nfs_server {
> #define NFS_CAP_LGOPEN (1U << 5)
> #define NFS_CAP_CASE_INSENSITIVE (1U << 6)
> #define NFS_CAP_CASE_PRESERVING (1U << 7)
> +#define NFS_CAP_OFFLOAD_STATUS (1U << 8)
> #define NFS_CAP_POSIX_LOCK (1U << 14)
> #define NFS_CAP_UIDGID_NOMAP (1U << 15)
> #define NFS_CAP_STATEID_NFSV41 (1U << 16)
> --
> 2.45.2
>
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [RFC PATCH 5/6] NFS: Use NFSv4.2's OFFLOAD_STATUS operation
2024-07-02 15:19 ` [RFC PATCH 5/6] NFS: Use " cel
@ 2024-07-02 18:30 ` Olga Kornievskaia
2024-07-02 18:43 ` Chuck Lever III
2024-07-03 18:52 ` Chuck Lever III
0 siblings, 2 replies; 12+ messages in thread
From: Olga Kornievskaia @ 2024-07-02 18:30 UTC (permalink / raw)
To: cel; +Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Chuck Lever
On Tue, Jul 2, 2024 at 11:21 AM <cel@kernel.org> wrote:
>
> From: Chuck Lever <chuck.lever@oracle.com>
>
> We've found that there are cases where a transport disconnection
> results in the loss of callback RPCs. NFS servers typically do not
> retransmit callback operations after a disconnect.
>
> This can be a problem for the Linux NFS client's current
> implementation of asynchronous COPY, which waits indefinitely for a
> CB_OFFLOAD callback. If a transport disconnect occurs while an async
> COPY is running, there's a good chance the client will never get the
> completing CB_OFFLOAD.
>
> Fix this by implementing the OFFLOAD_STATUS operation so that the
> Linux NFS client can probe the NFS server if it doesn't see a
> CB_OFFLOAD in a reasonable amount of time.
>
> This patch implements a simplistic check. As future work, the client
> might also be able to detect whether there is no forward progress on
> the request asynchronous COPY operation, and CANCEL it.
>
> Suggested-by: Olga Kornievskaia <kolga@netapp.com>
> Link: https://bugzilla.kernel.org/show_bug.cgi?id=218735
> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> ---
> fs/nfs/nfs42proc.c | 40 +++++++++++++++++++++++++++++++++-------
> 1 file changed, 33 insertions(+), 7 deletions(-)
>
> diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
> index c55247da8e49..246534bfc946 100644
> --- a/fs/nfs/nfs42proc.c
> +++ b/fs/nfs/nfs42proc.c
> @@ -175,6 +175,11 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len)
> return err;
> }
>
> +/* Wait this long before checking progress on a COPY operation */
> +enum {
> + NFS42_COPY_TIMEOUT = 5 * HZ,
> +};
> +
> static int handle_async_copy(struct nfs42_copy_res *res,
> struct nfs_server *dst_server,
> struct nfs_server *src_server,
> @@ -184,9 +189,10 @@ static int handle_async_copy(struct nfs42_copy_res *res,
> bool *restart)
> {
> struct nfs4_copy_state *copy, *tmp_copy = NULL, *iter;
> - int status = NFS4_OK;
> struct nfs_open_context *dst_ctx = nfs_file_open_context(dst);
> struct nfs_open_context *src_ctx = nfs_file_open_context(src);
> + int status = NFS4_OK;
> + u64 copied;
>
> copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_KERNEL);
> if (!copy)
> @@ -224,7 +230,9 @@ static int handle_async_copy(struct nfs42_copy_res *res,
> spin_unlock(&src_server->nfs_client->cl_lock);
> }
>
> - status = wait_for_completion_interruptible(©->completion);
> +wait:
> + status = wait_for_completion_interruptible_timeout(©->completion,
> + NFS42_COPY_TIMEOUT);
> spin_lock(&dst_server->nfs_client->cl_lock);
> list_del_init(©->copies);
> spin_unlock(&dst_server->nfs_client->cl_lock);
> @@ -233,12 +241,17 @@ static int handle_async_copy(struct nfs42_copy_res *res,
> list_del_init(©->src_copies);
> spin_unlock(&src_server->nfs_client->cl_lock);
> }
> - if (status == -ERESTARTSYS) {
> - goto out_cancel;
> - } else if (copy->flags || copy->error == NFS4ERR_PARTNER_NO_AUTH) {
> - status = -EAGAIN;
> - *restart = true;
> + switch (status) {
> + case 0:
> + goto timeout;
> + case -ERESTARTSYS:
> goto out_cancel;
> + default:
> + if (copy->flags || copy->error == NFS4ERR_PARTNER_NO_AUTH) {
> + status = -EAGAIN;
> + *restart = true;
> + goto out_cancel;
> + }
> }
> out:
> res->write_res.count = copy->count;
> @@ -253,6 +266,19 @@ static int handle_async_copy(struct nfs42_copy_res *res,
> if (!nfs42_files_from_same_server(src, dst))
> nfs42_do_offload_cancel_async(src, src_stateid);
> goto out_free;
> +timeout:
> + status = nfs42_proc_offload_status(src, src_stateid, &copied);
> + switch (status) {
> + case 0:
> + case -EREMOTEIO:
> + res->write_res.count = copied;
> + memcpy(&res->write_res.verifier, ©->verf, sizeof(copy->verf));
> + goto out_free;
Setting aside the grouping these 2cases together, I don't understand
why the assumption that if we received a reply from OFFLOAD_STATUS
with some value back means that we should consider copy done? Say the
copy was for 1M, client queried and got back that 500M done, I don't
think the server by replying to the OFFLOAD_STATUS says it's done with
the copy? I think it replies with how much was done but it might still
be inprogress? So shouldn't we check that everything was done and if
not done go back to waiting again?
Again I don't think this approach is going to work because of how
callback and the copy thread are handled (as of now). In
handle_async_copy() when we come out of
wait_for_completion_interruptible() we know the callback has arrived
and it has signalled the copy thread and thus we remove the copy
request from the list. However, on the timeout, we didn't receive the
wake up and thus if we remove the copy from the list then, when the
callback thread asynchronously receives the callback it won't have the
request to match it too. And in case the server does support an
offload_status query I guess that's ok, but imagine it didn't. So now,
we'd send the offload_status and get not supported and we'd go back to
waiting but we'd already missed the callback because it came and
didn't find the matching request is now just dropped on the floor.
When we wake up from wait_for_completion_interruptible() we need to
know if we timed out or got woken. If we timed out, I think we need to
keep the request in.
> + case -EINPROGRESS:
> + case -EOPNOTSUPP:
> + goto wait;
> + }
> + goto out;
> }
>
> static int process_copy_commit(struct file *dst, loff_t pos_dst,
> --
> 2.45.2
>
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [RFC PATCH 4/6] NFS: Implement NFSv4.2's OFFLOAD_STATUS operation
2024-07-02 18:30 ` Olga Kornievskaia
@ 2024-07-02 18:33 ` Chuck Lever III
0 siblings, 0 replies; 12+ messages in thread
From: Chuck Lever III @ 2024-07-02 18:33 UTC (permalink / raw)
To: Olga Kornievskaia
Cc: Chuck Lever, Linux NFS Mailing List, Olga Kornievskaia, Dai Ngo
> On Jul 2, 2024, at 2:30 PM, Olga Kornievskaia <aglo@umich.edu> wrote:
>
> On Tue, Jul 2, 2024 at 11:21 AM <cel@kernel.org> wrote:
>>
>> From: Chuck Lever <chuck.lever@oracle.com>
>>
>> Enable the Linux NFS client to observe the progress of an offloaded
>> asynchronous COPY operation. This new operation will be put to use
>> in a subsequent patch.
>>
>> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
>> ---
>> fs/nfs/nfs42proc.c | 113 ++++++++++++++++++++++++++++++++++++++
>> include/linux/nfs_fs_sb.h | 1 +
>> 2 files changed, 114 insertions(+)
>>
>> diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
>> index 869605a0a9d5..c55247da8e49 100644
>> --- a/fs/nfs/nfs42proc.c
>> +++ b/fs/nfs/nfs42proc.c
>> @@ -21,6 +21,8 @@
>>
>> #define NFSDBG_FACILITY NFSDBG_PROC
>> static int nfs42_do_offload_cancel_async(struct file *dst, nfs4_stateid *std);
>> +static int nfs42_proc_offload_status(struct file *file, nfs4_stateid *stateid,
>> + u64 *copied);
>>
>> static void nfs42_set_netaddr(struct file *filep, struct nfs42_netaddr *naddr)
>> {
>> @@ -582,6 +584,117 @@ static int nfs42_do_offload_cancel_async(struct file *dst,
>> return status;
>> }
>>
>> +static void nfs42_offload_status_done(struct rpc_task *task, void *calldata)
>> +{
>> + struct nfs42_offload_data *data = calldata;
>> +
>> + if (!nfs4_sequence_done(task, &data->res.osr_seq_res))
>> + return;
>> +
>> + switch (task->tk_status) {
>> + case 0:
>> + return;
>> + case -NFS4ERR_DELAY:
>> + case -NFS4ERR_GRACE:
>> + if (nfs4_async_handle_error(task, data->seq_server,
>> + NULL, NULL) == -EAGAIN)
>> + rpc_restart_call_prepare(task);
>> + else
>> + task->tk_status = -EIO;
>> + break;
>> + case -NFS4ERR_ADMIN_REVOKED:
>> + case -NFS4ERR_BAD_STATEID:
>> + case -NFS4ERR_OLD_STATEID:
>> + task->tk_status = -EBADF;
>> + break;
>
> Hm. Shouldn't we be attempting to do state recovery here?
Hard to say. Copy stateids are a little different.
If the server doesn't recognize the stateID in an
OFFLOAD_STATUS request, the spec says that can mean
that the server got a reply to its CB_OFFLOAD --
the server is allowed to delete the Copy stateid
in that case. It's not an error that requires
state recovery.
But I'm not saying this code is correct.
>> + case -NFS4ERR_NOTSUPP:
>> + case -ENOTSUPP:
>> + case -EOPNOTSUPP:
>> + data->seq_server->caps &= ~NFS_CAP_OFFLOAD_STATUS;
>> + task->tk_status = -EOPNOTSUPP;
>> + break;
>> + default:
>> + task->tk_status = -EIO;
>> + }
>> +}
>> +
>> +static const struct rpc_call_ops nfs42_offload_status_ops = {
>> + .rpc_call_prepare = nfs42_offload_prepare,
>> + .rpc_call_done = nfs42_offload_status_done,
>> + .rpc_release = nfs42_offload_release
>> +};
>> +
>> +/*
>> + * Return values:
>> + * %0: Server returned an NFS4_OK completion status
>> + * %-EINPROGRESS: Server returned no completion status
>> + * %-EREMOTEIO: Server returned an error completion status
>> + * %-EBADF: Server did not recognize the copy stateid
>> + * %-EOPNOTSUPP: Server does not support OFFLOAD_STATUS
>> + *
>> + * Other negative errnos indicate the client could not complete the
>> + * request.
>> + */
>> +static int nfs42_proc_offload_status(struct file *file, nfs4_stateid *stateid,
>> + u64 *copied)
>> +{
>> + struct nfs_open_context *ctx = nfs_file_open_context(file);
>> + struct nfs_server *server = NFS_SERVER(file_inode(file));
>> + struct nfs42_offload_data *data = NULL;
>> + struct rpc_message msg = {
>> + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OFFLOAD_STATUS],
>> + .rpc_cred = ctx->cred,
>> + };
>> + struct rpc_task_setup task_setup_data = {
>> + .rpc_client = server->client,
>> + .rpc_message = &msg,
>> + .callback_ops = &nfs42_offload_status_ops,
>> + .workqueue = nfsiod_workqueue,
>> + .flags = RPC_TASK_ASYNC | RPC_TASK_SOFTCONN,
>> + };
>> + struct rpc_task *task;
>> + int status;
>> +
>> + if (!(server->caps & NFS_CAP_OFFLOAD_STATUS))
>> + return -EOPNOTSUPP;
>> +
>> + data = kzalloc(sizeof(struct nfs42_offload_data), GFP_KERNEL);
>> + if (data == NULL)
>> + return -ENOMEM;
>> +
>> + data->seq_server = server;
>> + data->args.osa_src_fh = NFS_FH(file_inode(file));
>> + memcpy(&data->args.osa_stateid, stateid,
>> + sizeof(data->args.osa_stateid));
>> + msg.rpc_argp = &data->args;
>> + msg.rpc_resp = &data->res;
>> + task_setup_data.callback_data = data;
>> + nfs4_init_sequence(&data->args.osa_seq_args, &data->res.osr_seq_res,
>> + 1, 0);
>> + task = rpc_run_task(&task_setup_data);
>> + if (IS_ERR(task)) {
>> + nfs42_offload_release(data);
>> + return PTR_ERR(task);
>> + }
>> + status = rpc_wait_for_completion_task(task);
>> + if (status)
>> + goto out;
>> +
>> + *copied = data->res.osr_count;
>> + if (!data->res.complete_count) {
>> + status = -EINPROGRESS;
>> + goto out;
>> + }
>> + if (data->res.osr_complete[0] != NFS_OK) {
>> + status = -EREMOTEIO;
>> + goto out;
>> + }
>> +
>> +out:
>> + rpc_put_task(task);
>> + return status;
>> +}
>> +
>> static int _nfs42_proc_copy_notify(struct file *src, struct file *dst,
>> struct nfs42_copy_notify_args *args,
>> struct nfs42_copy_notify_res *res)
>> diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
>> index 92de074e63b9..0937e73c4767 100644
>> --- a/include/linux/nfs_fs_sb.h
>> +++ b/include/linux/nfs_fs_sb.h
>> @@ -278,6 +278,7 @@ struct nfs_server {
>> #define NFS_CAP_LGOPEN (1U << 5)
>> #define NFS_CAP_CASE_INSENSITIVE (1U << 6)
>> #define NFS_CAP_CASE_PRESERVING (1U << 7)
>> +#define NFS_CAP_OFFLOAD_STATUS (1U << 8)
>> #define NFS_CAP_POSIX_LOCK (1U << 14)
>> #define NFS_CAP_UIDGID_NOMAP (1U << 15)
>> #define NFS_CAP_STATEID_NFSV41 (1U << 16)
>> --
>> 2.45.2
--
Chuck Lever
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [RFC PATCH 5/6] NFS: Use NFSv4.2's OFFLOAD_STATUS operation
2024-07-02 18:30 ` Olga Kornievskaia
@ 2024-07-02 18:43 ` Chuck Lever III
2024-07-03 18:52 ` Chuck Lever III
1 sibling, 0 replies; 12+ messages in thread
From: Chuck Lever III @ 2024-07-02 18:43 UTC (permalink / raw)
To: Olga Kornievskaia
Cc: Chuck Lever, Linux NFS Mailing List, Olga Kornievskaia, Dai Ngo
> On Jul 2, 2024, at 2:30 PM, Olga Kornievskaia <aglo@umich.edu> wrote:
>
> On Tue, Jul 2, 2024 at 11:21 AM <cel@kernel.org> wrote:
>>
>> From: Chuck Lever <chuck.lever@oracle.com>
>>
>> We've found that there are cases where a transport disconnection
>> results in the loss of callback RPCs. NFS servers typically do not
>> retransmit callback operations after a disconnect.
>>
>> This can be a problem for the Linux NFS client's current
>> implementation of asynchronous COPY, which waits indefinitely for a
>> CB_OFFLOAD callback. If a transport disconnect occurs while an async
>> COPY is running, there's a good chance the client will never get the
>> completing CB_OFFLOAD.
>>
>> Fix this by implementing the OFFLOAD_STATUS operation so that the
>> Linux NFS client can probe the NFS server if it doesn't see a
>> CB_OFFLOAD in a reasonable amount of time.
>>
>> This patch implements a simplistic check. As future work, the client
>> might also be able to detect whether there is no forward progress on
>> the request asynchronous COPY operation, and CANCEL it.
>>
>> Suggested-by: Olga Kornievskaia <kolga@netapp.com>
>> Link: https://bugzilla.kernel.org/show_bug.cgi?id=218735
>> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
>> ---
>> fs/nfs/nfs42proc.c | 40 +++++++++++++++++++++++++++++++++-------
>> 1 file changed, 33 insertions(+), 7 deletions(-)
>>
>> diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
>> index c55247da8e49..246534bfc946 100644
>> --- a/fs/nfs/nfs42proc.c
>> +++ b/fs/nfs/nfs42proc.c
>> @@ -175,6 +175,11 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len)
>> return err;
>> }
>>
>> +/* Wait this long before checking progress on a COPY operation */
>> +enum {
>> + NFS42_COPY_TIMEOUT = 5 * HZ,
>> +};
>> +
>> static int handle_async_copy(struct nfs42_copy_res *res,
>> struct nfs_server *dst_server,
>> struct nfs_server *src_server,
>> @@ -184,9 +189,10 @@ static int handle_async_copy(struct nfs42_copy_res *res,
>> bool *restart)
>> {
>> struct nfs4_copy_state *copy, *tmp_copy = NULL, *iter;
>> - int status = NFS4_OK;
>> struct nfs_open_context *dst_ctx = nfs_file_open_context(dst);
>> struct nfs_open_context *src_ctx = nfs_file_open_context(src);
>> + int status = NFS4_OK;
>> + u64 copied;
>>
>> copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_KERNEL);
>> if (!copy)
>> @@ -224,7 +230,9 @@ static int handle_async_copy(struct nfs42_copy_res *res,
>> spin_unlock(&src_server->nfs_client->cl_lock);
>> }
>>
>> - status = wait_for_completion_interruptible(©->completion);
>> +wait:
>> + status = wait_for_completion_interruptible_timeout(©->completion,
>> + NFS42_COPY_TIMEOUT);
>> spin_lock(&dst_server->nfs_client->cl_lock);
>> list_del_init(©->copies);
>> spin_unlock(&dst_server->nfs_client->cl_lock);
>> @@ -233,12 +241,17 @@ static int handle_async_copy(struct nfs42_copy_res *res,
>> list_del_init(©->src_copies);
>> spin_unlock(&src_server->nfs_client->cl_lock);
>> }
>> - if (status == -ERESTARTSYS) {
>> - goto out_cancel;
>> - } else if (copy->flags || copy->error == NFS4ERR_PARTNER_NO_AUTH) {
>> - status = -EAGAIN;
>> - *restart = true;
>> + switch (status) {
>> + case 0:
>> + goto timeout;
>> + case -ERESTARTSYS:
>> goto out_cancel;
>> + default:
>> + if (copy->flags || copy->error == NFS4ERR_PARTNER_NO_AUTH) {
>> + status = -EAGAIN;
>> + *restart = true;
>> + goto out_cancel;
>> + }
>> }
>> out:
>> res->write_res.count = copy->count;
>> @@ -253,6 +266,19 @@ static int handle_async_copy(struct nfs42_copy_res *res,
>> if (!nfs42_files_from_same_server(src, dst))
>> nfs42_do_offload_cancel_async(src, src_stateid);
>> goto out_free;
>> +timeout:
>> + status = nfs42_proc_offload_status(src, src_stateid, &copied);
>> + switch (status) {
>> + case 0:
>> + case -EREMOTEIO:
>> + res->write_res.count = copied;
>> + memcpy(&res->write_res.verifier, ©->verf, sizeof(copy->verf));
>> + goto out_free;
>
> Setting aside the grouping these 2cases together, I don't understand
> why the assumption that if we received a reply from OFFLOAD_STATUS
> with some value back means that we should consider copy done? Say the
> copy was for 1M, client queried and got back that 500M done, I don't
> think the server by replying to the OFFLOAD_STATUS says it's done with
> the copy? I think it replies with how much was done but it might still
> be inprogress? So shouldn't we check that everything was done and if
> not done go back to waiting again?
The server responds to OFFLOAD_STATUS in one of the following ways:
1. nfsstat = NFS4_OK
1a. osr_complete has size zero. This means the copy is still in progress.
osr_count contains the number of bytes copied so far.
1b. osr_complete has size one, and contains NFS4_OK. This means the
copy operation has completed. osr_count contains the total
number of bytes copied.
1c. osr_complete has size one, and contains an error status. This means
the copy failed. osr_count contains the total number of byte
that were copied before the failure occurred.
2. nfsstat != NFS4_OK generally means that the server does not
recognize the copy stateid.
So, yes, the client can check, based on this information, whether
it needs to wait, retry, or abort. The code I added is not tested
so the new logic is not totally correct. And we can refine this
logic to match our preferred responses, as you listed above.
> Again I don't think this approach is going to work because of how
> callback and the copy thread are handled (as of now). In
> handle_async_copy() when we come out of
> wait_for_completion_interruptible() we know the callback has arrived
> and it has signalled the copy thread and thus we remove the copy
> request from the list. However, on the timeout, we didn't receive the
> wake up and thus if we remove the copy from the list then, when the
> callback thread asynchronously receives the callback it won't have the
> request to match it too. And in case the server does support an
> offload_status query I guess that's ok, but imagine it didn't. So now,
> we'd send the offload_status and get not supported and we'd go back to
> waiting but we'd already missed the callback because it came and
> didn't find the matching request is now just dropped on the floor.
If the client receives "OFFLOAD_STATUS is not supported" I would say
it should try the COPY again with the synchronous bit set. That's a
broken server, IMO.
> When we wake up from wait_for_completion_interruptible() we need to
> know if we timed out or got woken. If we timed out, I think we need to
> keep the request in.
Fair enough, I will try to remember to fix the list handling. The
wait_for_completion_interruptible does provide a return code that
enables the caller to distinguish between these cases.
>> + case -EINPROGRESS:
>> + case -EOPNOTSUPP:
>> + goto wait;
>> + }
>> + goto out;
>> }
>>
>> static int process_copy_commit(struct file *dst, loff_t pos_dst,
>> --
>> 2.45.2
--
Chuck Lever
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [RFC PATCH 5/6] NFS: Use NFSv4.2's OFFLOAD_STATUS operation
2024-07-02 18:30 ` Olga Kornievskaia
2024-07-02 18:43 ` Chuck Lever III
@ 2024-07-03 18:52 ` Chuck Lever III
1 sibling, 0 replies; 12+ messages in thread
From: Chuck Lever III @ 2024-07-03 18:52 UTC (permalink / raw)
To: Olga Kornievskaia
Cc: Chuck Lever, Linux NFS Mailing List, Olga Kornievskaia, Dai Ngo
> On Jul 2, 2024, at 2:30 PM, Olga Kornievskaia <aglo@umich.edu> wrote:
>
> Again I don't think this approach is going to work because of how
> callback and the copy thread are handled (as of now). In
> handle_async_copy() when we come out of
> wait_for_completion_interruptible() we know the callback has arrived
> and it has signalled the copy thread and thus we remove the copy
> request from the list. However, on the timeout, we didn't receive the
> wake up and thus if we remove the copy from the list then, when the
> callback thread asynchronously receives the callback it won't have the
> request to match it too. And in case the server does support an
> offload_status query I guess that's ok, but imagine it didn't. So now,
> we'd send the offload_status and get not supported and we'd go back to
> waiting but we'd already missed the callback because it came and
> didn't find the matching request is now just dropped on the floor.
If the client reports that it can't find a matching request,
then the server will keep the copy state ID (it's allowed to
delete the copy stateid /only if/ it gets an NFS4_OK in the
CB_OFFLOAD reply, as I read the spec).
The client will wait again, then send another OFFLOAD_STATUS
in a few seconds, and will see that the COPY completed. The
server is then allowed to delete the copy stateid.
---
Again, if a server doesn't support OFFLOAD_STATUS, the only
reliable recourse is for the client to stop using async COPY.
My patch series doesn't implement that, currently. It could,
say, send a dummy OFFLOAD_STATUS before its first COPY
operation to determine whether to use sync or async COPY
going forward.
The block layer folks I talked to at LSF unanimously stated
that there is no way to implement COPY offload reliably
without the ability for the client/initiator to probe copy
status.
IMO the spec should say (but probably does not) that server
MUST implement OFFLOAD_STATUS if it supports async COPY.
Likewise for the client.
--
Chuck Lever
^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2024-07-03 18:53 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-07-02 15:19 [RFC PATCH 0/6] Add a client-side OFFLOAD_STATUS implementation cel
2024-07-02 15:19 ` [RFC PATCH 1/6] NFS: Fix typo in OFFLOAD_CANCEL comment cel
2024-07-02 15:19 ` [RFC PATCH 2/6] NFS: Implement NFSv4.2's OFFLOAD_STATUS XDR cel
2024-07-02 15:19 ` [RFC PATCH 3/6] NFS: Rename struct nfs4_offloadcancel_data cel
2024-07-02 15:19 ` [RFC PATCH 4/6] NFS: Implement NFSv4.2's OFFLOAD_STATUS operation cel
2024-07-02 18:30 ` Olga Kornievskaia
2024-07-02 18:33 ` Chuck Lever III
2024-07-02 15:19 ` [RFC PATCH 5/6] NFS: Use " cel
2024-07-02 18:30 ` Olga Kornievskaia
2024-07-02 18:43 ` Chuck Lever III
2024-07-03 18:52 ` Chuck Lever III
2024-07-02 15:19 ` [RFC PATCH 6/6] NFS: Refactor trace_nfs4_offload_cancel cel
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox