* [RFC PATCH 1/4] NFS: Implement NFSv4.2's OFFLOAD_STATUS XDR
2024-04-29 15:16 [RFC PATCH 0/4] NFSv4.2 OFFLOAD_STATUS for the Linux NFS client cel
@ 2024-04-29 15:16 ` cel
2024-04-29 15:16 ` [RFC PATCH 2/4] NFS: Refactor trace_nfs4_offload_cancel cel
` (2 subsequent siblings)
3 siblings, 0 replies; 7+ messages in thread
From: cel @ 2024-04-29 15:16 UTC (permalink / raw)
To: Olga Kornievskaia, Dai Ngo; +Cc: linux-nfs, Chuck Lever
From: Chuck Lever <chuck.lever@oracle.com>
Add XDR encoding and decoding functions for NFSv4.2 OFFLOAD_STATUS
operation.
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
fs/nfs/nfs42xdr.c | 101 +++++++++++++++++++++++++++++++++++++++-
fs/nfs/nfs4xdr.c | 1 +
include/linux/nfs4.h | 1 +
include/linux/nfs_xdr.h | 1 +
4 files changed, 103 insertions(+), 1 deletion(-)
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index 9e3ae53e2205..bafa0005d038 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -35,6 +35,11 @@
#define encode_offload_cancel_maxsz (op_encode_hdr_maxsz + \
XDR_QUADLEN(NFS4_STATEID_SIZE))
#define decode_offload_cancel_maxsz (op_decode_hdr_maxsz)
+#define encode_offload_status_maxsz (op_encode_hdr_maxsz + \
+ XDR_QUADLEN(NFS4_STATEID_SIZE))
+#define decode_offload_status_maxsz (op_decode_hdr_maxsz + \
+ 2 /* osr_count */ + \
+ 2 /* osr_complete */)
#define encode_copy_notify_maxsz (op_encode_hdr_maxsz + \
XDR_QUADLEN(NFS4_STATEID_SIZE) + \
1 + /* nl4_type */ \
@@ -143,6 +148,14 @@
decode_sequence_maxsz + \
decode_putfh_maxsz + \
decode_offload_cancel_maxsz)
+#define NFS4_enc_offload_status_sz (compound_encode_hdr_maxsz + \
+ encode_sequence_maxsz + \
+ encode_putfh_maxsz + \
+ encode_offload_status_maxsz)
+#define NFS4_dec_offload_status_sz (compound_decode_hdr_maxsz + \
+ decode_sequence_maxsz + \
+ decode_putfh_maxsz + \
+ decode_offload_status_maxsz)
#define NFS4_enc_copy_notify_sz (compound_encode_hdr_maxsz + \
encode_putfh_maxsz + \
encode_copy_notify_maxsz)
@@ -343,6 +356,14 @@ static void encode_offload_cancel(struct xdr_stream *xdr,
encode_nfs4_stateid(xdr, &args->osa_stateid);
}
+static void encode_offload_status(struct xdr_stream *xdr,
+ const struct nfs42_offload_status_args *args,
+ struct compound_hdr *hdr)
+{
+ encode_op_hdr(xdr, OP_OFFLOAD_STATUS, decode_offload_status_maxsz, hdr);
+ encode_nfs4_stateid(xdr, &args->osa_stateid);
+}
+
static void encode_copy_notify(struct xdr_stream *xdr,
const struct nfs42_copy_notify_args *args,
struct compound_hdr *hdr)
@@ -549,7 +570,7 @@ static void nfs4_xdr_enc_copy(struct rpc_rqst *req,
}
/*
- * Encode OFFLOAD_CANEL request
+ * Encode OFFLOAD_CANCEL request
*/
static void nfs4_xdr_enc_offload_cancel(struct rpc_rqst *req,
struct xdr_stream *xdr,
@@ -567,6 +588,25 @@ static void nfs4_xdr_enc_offload_cancel(struct rpc_rqst *req,
encode_nops(&hdr);
}
+/*
+ * Encode OFFLOAD_STATUS request
+ */
+static void nfs4_xdr_enc_offload_status(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const void *data)
+{
+ const struct nfs42_offload_status_args *args = data;
+ struct compound_hdr hdr = {
+ .minorversion = nfs4_xdr_minorversion(&args->osa_seq_args),
+ };
+
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->osa_seq_args, &hdr);
+ encode_putfh(xdr, args->osa_src_fh, &hdr);
+ encode_offload_status(xdr, args, &hdr);
+ encode_nops(&hdr);
+}
+
/*
* Encode COPY_NOTIFY request
*/
@@ -919,6 +959,39 @@ static int decode_offload_cancel(struct xdr_stream *xdr,
return decode_op_hdr(xdr, OP_OFFLOAD_CANCEL);
}
+static int decode_offload_status(struct xdr_stream *xdr,
+ struct nfs42_offload_status_res *res)
+{
+ int status, count;
+ __be32 *p;
+
+ status = decode_op_hdr(xdr, OP_OFFLOAD_STATUS);
+ if (status)
+ return status;
+
+ /* osr_count */
+ p = xdr_inline_decode(xdr, 12);
+ if (unlikely(!p))
+ return -EIO;
+ p = xdr_decode_hyper(p, &res->osr_count);
+
+ res->completed = false;
+ count = be32_to_cpup(p);
+ if (unlikely(count > 1))
+ return -EIO;
+
+ if (count) {
+ /* osr_status */
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ return -EIO;
+
+ res->completed = true;
+ res->osr_status = be32_to_cpup(p);
+ }
+ return 0;
+}
+
static int decode_copy_notify(struct xdr_stream *xdr,
struct nfs42_copy_notify_res *res)
{
@@ -1368,6 +1441,32 @@ static int nfs4_xdr_dec_offload_cancel(struct rpc_rqst *rqstp,
return status;
}
+/*
+ * Decode OFFLOAD_STATUS response
+ */
+static int nfs4_xdr_dec_offload_status(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ void *data)
+{
+ struct nfs42_offload_status_res *res = data;
+ struct compound_hdr hdr;
+ int status;
+
+ status = decode_compound_hdr(xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_sequence(xdr, &res->osr_seq_res, rqstp);
+ if (status)
+ goto out;
+ status = decode_putfh(xdr);
+ if (status)
+ goto out;
+ status = decode_offload_status(xdr, res);
+
+out:
+ return status;
+}
+
/*
* Decode COPY_NOTIFY response
*/
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 1416099dfcd1..bcb7de1c1b44 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -7711,6 +7711,7 @@ const struct rpc_procinfo nfs4_procedures[] = {
PROC42(CLONE, enc_clone, dec_clone),
PROC42(COPY, enc_copy, dec_copy),
PROC42(OFFLOAD_CANCEL, enc_offload_cancel, dec_offload_cancel),
+ PROC42(OFFLOAD_STATUS, enc_offload_status, dec_offload_status),
PROC42(COPY_NOTIFY, enc_copy_notify, dec_copy_notify),
PROC(LOOKUPP, enc_lookupp, dec_lookupp),
PROC42(LAYOUTERROR, enc_layouterror, dec_layouterror),
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index ef8d2d618d5b..89ed7dd29a9e 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -681,6 +681,7 @@ enum {
NFSPROC4_CLNT_LISTXATTRS,
NFSPROC4_CLNT_REMOVEXATTR,
NFSPROC4_CLNT_READ_PLUS,
+ NFSPROC4_CLNT_OFFLOAD_STATUS,
};
/* nfs41 types */
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index d09b9773b20c..7b55a4e506e1 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1490,6 +1490,7 @@ struct nfs42_offload_status_res {
struct nfs4_sequence_res osr_seq_res;
uint64_t osr_count;
int osr_status;
+ bool completed;
};
struct nfs42_copy_notify_args {
--
2.44.0
^ permalink raw reply related [flat|nested] 7+ messages in thread* [RFC PATCH 2/4] NFS: Refactor trace_nfs4_offload_cancel
2024-04-29 15:16 [RFC PATCH 0/4] NFSv4.2 OFFLOAD_STATUS for the Linux NFS client cel
2024-04-29 15:16 ` [RFC PATCH 1/4] NFS: Implement NFSv4.2's OFFLOAD_STATUS XDR cel
@ 2024-04-29 15:16 ` cel
2024-04-29 15:16 ` [RFC PATCH 3/4] NFS: Rename struct nfs4_offloadcancel_data cel
2024-04-29 15:16 ` [RFC PATCH 4/4] NFS: Implement NFSv4.2's OFFLOAD_STATUS operation cel
3 siblings, 0 replies; 7+ messages in thread
From: cel @ 2024-04-29 15:16 UTC (permalink / raw)
To: Olga Kornievskaia, Dai Ngo; +Cc: linux-nfs, Chuck Lever
From: Chuck Lever <chuck.lever@oracle.com>
I'm about to add a trace_nfs4_offload_status trace point that looks
just like this one, so promote trace_nfs4_offload_cancel to a trace
class. A subsequent patch adds the new trace_nfs4_offload_status
tracepoint.
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
fs/nfs/nfs4trace.h | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index 10985a4b8259..8f32dbf9c91d 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -2524,7 +2524,7 @@ TRACE_EVENT(nfs4_copy_notify,
)
);
-TRACE_EVENT(nfs4_offload_cancel,
+DECLARE_EVENT_CLASS(nfs4_offload_class,
TP_PROTO(
const struct nfs42_offload_status_args *args,
int error
@@ -2556,6 +2556,14 @@ TRACE_EVENT(nfs4_offload_cancel,
__entry->stateid_seq, __entry->stateid_hash
)
);
+#define DEFINE_NFS4_OFFLOAD_EVENT(name) \
+ DEFINE_EVENT(nfs4_offload_class, name, \
+ TP_PROTO( \
+ const struct nfs42_offload_status_args *args, \
+ int error \
+ ), \
+ TP_ARGS(args, error))
+DEFINE_NFS4_OFFLOAD_EVENT(nfs4_offload_cancel);
DECLARE_EVENT_CLASS(nfs4_xattr_event,
TP_PROTO(
--
2.44.0
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [RFC PATCH 3/4] NFS: Rename struct nfs4_offloadcancel_data
2024-04-29 15:16 [RFC PATCH 0/4] NFSv4.2 OFFLOAD_STATUS for the Linux NFS client cel
2024-04-29 15:16 ` [RFC PATCH 1/4] NFS: Implement NFSv4.2's OFFLOAD_STATUS XDR cel
2024-04-29 15:16 ` [RFC PATCH 2/4] NFS: Refactor trace_nfs4_offload_cancel cel
@ 2024-04-29 15:16 ` cel
2024-04-29 15:16 ` [RFC PATCH 4/4] NFS: Implement NFSv4.2's OFFLOAD_STATUS operation cel
3 siblings, 0 replies; 7+ messages in thread
From: cel @ 2024-04-29 15:16 UTC (permalink / raw)
To: Olga Kornievskaia, Dai Ngo; +Cc: linux-nfs, Chuck Lever
From: Chuck Lever <chuck.lever@oracle.com>
Refactor: This struct can be used unchanged for OFFLOAD_STATUS, so
give it a more generic name.
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
fs/nfs/nfs42proc.c | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 28704f924612..7656d7c103fa 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -498,7 +498,7 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
return err;
}
-struct nfs42_offloadcancel_data {
+struct nfs42_offload_data {
struct nfs_server *seq_server;
struct nfs42_offload_status_args args;
struct nfs42_offload_status_res res;
@@ -506,7 +506,7 @@ struct nfs42_offloadcancel_data {
static void nfs42_offload_cancel_prepare(struct rpc_task *task, void *calldata)
{
- struct nfs42_offloadcancel_data *data = calldata;
+ struct nfs42_offload_data *data = calldata;
nfs4_setup_sequence(data->seq_server->nfs_client,
&data->args.osa_seq_args,
@@ -515,7 +515,7 @@ static void nfs42_offload_cancel_prepare(struct rpc_task *task, void *calldata)
static void nfs42_offload_cancel_done(struct rpc_task *task, void *calldata)
{
- struct nfs42_offloadcancel_data *data = calldata;
+ struct nfs42_offload_data *data = calldata;
trace_nfs4_offload_cancel(&data->args, task->tk_status);
nfs41_sequence_done(task, &data->res.osr_seq_res);
@@ -525,7 +525,7 @@ static void nfs42_offload_cancel_done(struct rpc_task *task, void *calldata)
rpc_restart_call_prepare(task);
}
-static void nfs42_free_offloadcancel_data(void *data)
+static void nfs42_free_offload_data(void *data)
{
kfree(data);
}
@@ -533,14 +533,14 @@ static void nfs42_free_offloadcancel_data(void *data)
static const struct rpc_call_ops nfs42_offload_cancel_ops = {
.rpc_call_prepare = nfs42_offload_cancel_prepare,
.rpc_call_done = nfs42_offload_cancel_done,
- .rpc_release = nfs42_free_offloadcancel_data,
+ .rpc_release = nfs42_free_offload_data,
};
static int nfs42_do_offload_cancel_async(struct file *dst,
nfs4_stateid *stateid)
{
struct nfs_server *dst_server = NFS_SERVER(file_inode(dst));
- struct nfs42_offloadcancel_data *data = NULL;
+ struct nfs42_offload_data *data = NULL;
struct nfs_open_context *ctx = nfs_file_open_context(dst);
struct rpc_task *task;
struct rpc_message msg = {
@@ -559,7 +559,7 @@ static int nfs42_do_offload_cancel_async(struct file *dst,
if (!(dst_server->caps & NFS_CAP_OFFLOAD_CANCEL))
return -EOPNOTSUPP;
- data = kzalloc(sizeof(struct nfs42_offloadcancel_data), GFP_KERNEL);
+ data = kzalloc(sizeof(struct nfs42_offload_data), GFP_KERNEL);
if (data == NULL)
return -ENOMEM;
--
2.44.0
^ permalink raw reply related [flat|nested] 7+ messages in thread* [RFC PATCH 4/4] NFS: Implement NFSv4.2's OFFLOAD_STATUS operation
2024-04-29 15:16 [RFC PATCH 0/4] NFSv4.2 OFFLOAD_STATUS for the Linux NFS client cel
` (2 preceding siblings ...)
2024-04-29 15:16 ` [RFC PATCH 3/4] NFS: Rename struct nfs4_offloadcancel_data cel
@ 2024-04-29 15:16 ` cel
2024-04-29 15:35 ` Olga Kornievskaia
3 siblings, 1 reply; 7+ messages in thread
From: cel @ 2024-04-29 15:16 UTC (permalink / raw)
To: Olga Kornievskaia, Dai Ngo; +Cc: linux-nfs, Chuck Lever
From: Chuck Lever <chuck.lever@oracle.com>
We've found that there are cases where a transport disconnection
results in the loss of callback RPCs. NFS servers typically do not
retransmit callback operations after a disconnect.
This can be a problem for the Linux NFS client's implementation of
asynchronous COPY, which waits indefinitely for a CB_OFFLOAD
callback. If a transport disconnect occurs while an async COPY is
running, there's a good chance the client will never get the
matching CB_OFFLOAD.
Fix this by implementing the OFFLOAD_STATUS operation so that the
Linux NFS client can probe the NFS server if it doesn't see a
CB_OFFLOAD in a reasonable amount of time.
This patch implements a simplistic check. As future work, the client
might also be able to detect whether there is no forward progress on
the request asynchronous COPY operation, and CANCEL it.
Suggested-by: Olga Kornievskaia <kolga@netapp.com>
Link: https://bugzilla.kernel.org/show_bug.cgi?id=218735
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
fs/nfs/nfs42proc.c | 100 +++++++++++++++++++++++++++++++++++---
fs/nfs/nfs4trace.h | 1 +
include/linux/nfs_fs_sb.h | 1 +
3 files changed, 96 insertions(+), 6 deletions(-)
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 7656d7c103fa..224fb3b8696a 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -21,6 +21,7 @@
#define NFSDBG_FACILITY NFSDBG_PROC
static int nfs42_do_offload_cancel_async(struct file *dst, nfs4_stateid *std);
+static int nfs42_proc_offload_status(struct file *file, nfs4_stateid *stateid);
static void nfs42_set_netaddr(struct file *filep, struct nfs42_netaddr *naddr)
{
@@ -173,6 +174,9 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len)
return err;
}
+/* Wait this long before checking progress on a COPY operation */
+#define NFS42_COPY_TIMEOUT (7 * HZ)
+
static int handle_async_copy(struct nfs42_copy_res *res,
struct nfs_server *dst_server,
struct nfs_server *src_server,
@@ -222,7 +226,9 @@ static int handle_async_copy(struct nfs42_copy_res *res,
spin_unlock(&src_server->nfs_client->cl_lock);
}
- status = wait_for_completion_interruptible(©->completion);
+wait:
+ status = wait_for_completion_interruptible_timeout(©->completion,
+ NFS42_COPY_TIMEOUT);
spin_lock(&dst_server->nfs_client->cl_lock);
list_del_init(©->copies);
spin_unlock(&dst_server->nfs_client->cl_lock);
@@ -231,12 +237,20 @@ static int handle_async_copy(struct nfs42_copy_res *res,
list_del_init(©->src_copies);
spin_unlock(&src_server->nfs_client->cl_lock);
}
- if (status == -ERESTARTSYS) {
- goto out_cancel;
- } else if (copy->flags || copy->error == NFS4ERR_PARTNER_NO_AUTH) {
- status = -EAGAIN;
- *restart = true;
+ switch (status) {
+ case 0:
+ status = nfs42_proc_offload_status(src, src_stateid);
+ if (status && status != -EOPNOTSUPP)
+ goto wait;
+ break;
+ case -ERESTARTSYS:
goto out_cancel;
+ default:
+ if (copy->flags || copy->error == NFS4ERR_PARTNER_NO_AUTH) {
+ status = -EAGAIN;
+ *restart = true;
+ goto out_cancel;
+ }
}
out:
res->write_res.count = copy->count;
@@ -582,6 +596,80 @@ static int nfs42_do_offload_cancel_async(struct file *dst,
return status;
}
+static void nfs42_offload_status_prepare(struct rpc_task *task, void *calldata)
+{
+ struct nfs42_offload_data *data = calldata;
+
+ nfs4_setup_sequence(data->seq_server->nfs_client,
+ &data->args.osa_seq_args,
+ &data->res.osr_seq_res, task);
+}
+
+static void nfs42_offload_status_done(struct rpc_task *task, void *calldata)
+{
+ struct nfs42_offload_data *data = calldata;
+
+ trace_nfs4_offload_status(&data->args, task->tk_status);
+ nfs41_sequence_done(task, &data->res.osr_seq_res);
+ if (task->tk_status &&
+ nfs4_async_handle_error(task, data->seq_server, NULL,
+ NULL) == -EAGAIN)
+ rpc_restart_call_prepare(task);
+}
+
+static const struct rpc_call_ops nfs42_offload_status_ops = {
+ .rpc_call_prepare = nfs42_offload_status_prepare,
+ .rpc_call_done = nfs42_offload_status_done,
+ .rpc_release = nfs42_free_offload_data,
+};
+
+static int nfs42_proc_offload_status(struct file *file, nfs4_stateid *stateid)
+{
+ struct nfs_open_context *ctx = nfs_file_open_context(file);
+ struct nfs_server *server = NFS_SERVER(file_inode(file));
+ struct nfs42_offload_data *data = NULL;
+ struct rpc_task *task;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OFFLOAD_STATUS],
+ .rpc_cred = ctx->cred,
+ };
+ struct rpc_task_setup task_setup_data = {
+ .rpc_client = server->client,
+ .rpc_message = &msg,
+ .callback_ops = &nfs42_offload_status_ops,
+ .workqueue = nfsiod_workqueue,
+ .flags = RPC_TASK_ASYNC | RPC_TASK_SOFTCONN,
+ };
+ int status;
+
+ if (!(server->caps & NFS_CAP_OFFLOAD_STATUS))
+ return -EOPNOTSUPP;
+
+ data = kzalloc(sizeof(struct nfs42_offload_data), GFP_KERNEL);
+ if (data == NULL)
+ return -ENOMEM;
+
+ data->seq_server = server;
+ data->args.osa_src_fh = NFS_FH(file_inode(file));
+ memcpy(&data->args.osa_stateid, stateid,
+ sizeof(data->args.osa_stateid));
+ msg.rpc_argp = &data->args;
+ msg.rpc_resp = &data->res;
+ task_setup_data.callback_data = data;
+ nfs4_init_sequence(&data->args.osa_seq_args, &data->res.osr_seq_res,
+ 1, 0);
+ task = rpc_run_task(&task_setup_data);
+ if (IS_ERR(task)) {
+ nfs42_free_offload_data(data);
+ return PTR_ERR(task);
+ }
+ status = rpc_wait_for_completion_task(task);
+ if (status == -ENOTSUPP)
+ server->caps &= ~NFS_CAP_OFFLOAD_STATUS;
+ rpc_put_task(task);
+ return status;
+}
+
static int _nfs42_proc_copy_notify(struct file *src, struct file *dst,
struct nfs42_copy_notify_args *args,
struct nfs42_copy_notify_res *res)
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index 8f32dbf9c91d..9bcc525c71d1 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -2564,6 +2564,7 @@ DECLARE_EVENT_CLASS(nfs4_offload_class,
), \
TP_ARGS(args, error))
DEFINE_NFS4_OFFLOAD_EVENT(nfs4_offload_cancel);
+DEFINE_NFS4_OFFLOAD_EVENT(nfs4_offload_status);
DECLARE_EVENT_CLASS(nfs4_xattr_event,
TP_PROTO(
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 92de074e63b9..0937e73c4767 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -278,6 +278,7 @@ struct nfs_server {
#define NFS_CAP_LGOPEN (1U << 5)
#define NFS_CAP_CASE_INSENSITIVE (1U << 6)
#define NFS_CAP_CASE_PRESERVING (1U << 7)
+#define NFS_CAP_OFFLOAD_STATUS (1U << 8)
#define NFS_CAP_POSIX_LOCK (1U << 14)
#define NFS_CAP_UIDGID_NOMAP (1U << 15)
#define NFS_CAP_STATEID_NFSV41 (1U << 16)
--
2.44.0
^ permalink raw reply related [flat|nested] 7+ messages in thread* Re: [RFC PATCH 4/4] NFS: Implement NFSv4.2's OFFLOAD_STATUS operation
2024-04-29 15:16 ` [RFC PATCH 4/4] NFS: Implement NFSv4.2's OFFLOAD_STATUS operation cel
@ 2024-04-29 15:35 ` Olga Kornievskaia
2024-04-29 16:12 ` Chuck Lever
0 siblings, 1 reply; 7+ messages in thread
From: Olga Kornievskaia @ 2024-04-29 15:35 UTC (permalink / raw)
To: cel; +Cc: Olga Kornievskaia, Dai Ngo, linux-nfs, Chuck Lever
On Mon, Apr 29, 2024 at 11:22 AM <cel@kernel.org> wrote:
>
> From: Chuck Lever <chuck.lever@oracle.com>
>
> We've found that there are cases where a transport disconnection
> results in the loss of callback RPCs. NFS servers typically do not
> retransmit callback operations after a disconnect.
>
> This can be a problem for the Linux NFS client's implementation of
> asynchronous COPY, which waits indefinitely for a CB_OFFLOAD
> callback. If a transport disconnect occurs while an async COPY is
> running, there's a good chance the client will never get the
> matching CB_OFFLOAD.
>
> Fix this by implementing the OFFLOAD_STATUS operation so that the
> Linux NFS client can probe the NFS server if it doesn't see a
> CB_OFFLOAD in a reasonable amount of time.
>
> This patch implements a simplistic check. As future work, the client
> might also be able to detect whether there is no forward progress on
> the request asynchronous COPY operation, and CANCEL it.
I think this patch series needs a bit more nuances
(1) if we know that server doesn't support offload_status we might as
well wait uninterrupted perhaps? but I can see how as you mentioned we
might want to measure no forward progress and cancel the copy and
fallback to read/write.
(2) we can't really go back to the "wait" after failing a
offload_status as the cb_offload callback might have already arrived
and I think we need to walk the pending_cb_callbacks to make sure we
haven't received it before waiting again (otherwise, we'd wait
forever).
(3) then also there is the case where we woke up and sent the
offload_status and got a 'copy finished' reply but we also got the
cb_callback reply as well and the copy things need to be cleaned up
now.
>
> Suggested-by: Olga Kornievskaia <kolga@netapp.com>
> Link: https://bugzilla.kernel.org/show_bug.cgi?id=218735
> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> ---
> fs/nfs/nfs42proc.c | 100 +++++++++++++++++++++++++++++++++++---
> fs/nfs/nfs4trace.h | 1 +
> include/linux/nfs_fs_sb.h | 1 +
> 3 files changed, 96 insertions(+), 6 deletions(-)
>
> diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
> index 7656d7c103fa..224fb3b8696a 100644
> --- a/fs/nfs/nfs42proc.c
> +++ b/fs/nfs/nfs42proc.c
> @@ -21,6 +21,7 @@
>
> #define NFSDBG_FACILITY NFSDBG_PROC
> static int nfs42_do_offload_cancel_async(struct file *dst, nfs4_stateid *std);
> +static int nfs42_proc_offload_status(struct file *file, nfs4_stateid *stateid);
>
> static void nfs42_set_netaddr(struct file *filep, struct nfs42_netaddr *naddr)
> {
> @@ -173,6 +174,9 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len)
> return err;
> }
>
> +/* Wait this long before checking progress on a COPY operation */
> +#define NFS42_COPY_TIMEOUT (7 * HZ)
> +
> static int handle_async_copy(struct nfs42_copy_res *res,
> struct nfs_server *dst_server,
> struct nfs_server *src_server,
> @@ -222,7 +226,9 @@ static int handle_async_copy(struct nfs42_copy_res *res,
> spin_unlock(&src_server->nfs_client->cl_lock);
> }
>
> - status = wait_for_completion_interruptible(©->completion);
> +wait:
> + status = wait_for_completion_interruptible_timeout(©->completion,
> + NFS42_COPY_TIMEOUT);
> spin_lock(&dst_server->nfs_client->cl_lock);
> list_del_init(©->copies);
> spin_unlock(&dst_server->nfs_client->cl_lock);
> @@ -231,12 +237,20 @@ static int handle_async_copy(struct nfs42_copy_res *res,
> list_del_init(©->src_copies);
> spin_unlock(&src_server->nfs_client->cl_lock);
> }
> - if (status == -ERESTARTSYS) {
> - goto out_cancel;
> - } else if (copy->flags || copy->error == NFS4ERR_PARTNER_NO_AUTH) {
> - status = -EAGAIN;
> - *restart = true;
> + switch (status) {
> + case 0:
> + status = nfs42_proc_offload_status(src, src_stateid);
> + if (status && status != -EOPNOTSUPP)
> + goto wait;
> + break;
> + case -ERESTARTSYS:
> goto out_cancel;
> + default:
> + if (copy->flags || copy->error == NFS4ERR_PARTNER_NO_AUTH) {
> + status = -EAGAIN;
> + *restart = true;
> + goto out_cancel;
> + }
> }
> out:
> res->write_res.count = copy->count;
> @@ -582,6 +596,80 @@ static int nfs42_do_offload_cancel_async(struct file *dst,
> return status;
> }
>
> +static void nfs42_offload_status_prepare(struct rpc_task *task, void *calldata)
> +{
> + struct nfs42_offload_data *data = calldata;
> +
> + nfs4_setup_sequence(data->seq_server->nfs_client,
> + &data->args.osa_seq_args,
> + &data->res.osr_seq_res, task);
> +}
> +
> +static void nfs42_offload_status_done(struct rpc_task *task, void *calldata)
> +{
> + struct nfs42_offload_data *data = calldata;
> +
> + trace_nfs4_offload_status(&data->args, task->tk_status);
> + nfs41_sequence_done(task, &data->res.osr_seq_res);
> + if (task->tk_status &&
> + nfs4_async_handle_error(task, data->seq_server, NULL,
> + NULL) == -EAGAIN)
> + rpc_restart_call_prepare(task);
> +}
> +
> +static const struct rpc_call_ops nfs42_offload_status_ops = {
> + .rpc_call_prepare = nfs42_offload_status_prepare,
> + .rpc_call_done = nfs42_offload_status_done,
> + .rpc_release = nfs42_free_offload_data,
> +};
> +
> +static int nfs42_proc_offload_status(struct file *file, nfs4_stateid *stateid)
> +{
> + struct nfs_open_context *ctx = nfs_file_open_context(file);
> + struct nfs_server *server = NFS_SERVER(file_inode(file));
> + struct nfs42_offload_data *data = NULL;
> + struct rpc_task *task;
> + struct rpc_message msg = {
> + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OFFLOAD_STATUS],
> + .rpc_cred = ctx->cred,
> + };
> + struct rpc_task_setup task_setup_data = {
> + .rpc_client = server->client,
> + .rpc_message = &msg,
> + .callback_ops = &nfs42_offload_status_ops,
> + .workqueue = nfsiod_workqueue,
> + .flags = RPC_TASK_ASYNC | RPC_TASK_SOFTCONN,
> + };
> + int status;
> +
> + if (!(server->caps & NFS_CAP_OFFLOAD_STATUS))
> + return -EOPNOTSUPP;
> +
> + data = kzalloc(sizeof(struct nfs42_offload_data), GFP_KERNEL);
> + if (data == NULL)
> + return -ENOMEM;
> +
> + data->seq_server = server;
> + data->args.osa_src_fh = NFS_FH(file_inode(file));
> + memcpy(&data->args.osa_stateid, stateid,
> + sizeof(data->args.osa_stateid));
> + msg.rpc_argp = &data->args;
> + msg.rpc_resp = &data->res;
> + task_setup_data.callback_data = data;
> + nfs4_init_sequence(&data->args.osa_seq_args, &data->res.osr_seq_res,
> + 1, 0);
> + task = rpc_run_task(&task_setup_data);
> + if (IS_ERR(task)) {
> + nfs42_free_offload_data(data);
> + return PTR_ERR(task);
> + }
> + status = rpc_wait_for_completion_task(task);
> + if (status == -ENOTSUPP)
> + server->caps &= ~NFS_CAP_OFFLOAD_STATUS;
> + rpc_put_task(task);
> + return status;
> +}
> +
> static int _nfs42_proc_copy_notify(struct file *src, struct file *dst,
> struct nfs42_copy_notify_args *args,
> struct nfs42_copy_notify_res *res)
> diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
> index 8f32dbf9c91d..9bcc525c71d1 100644
> --- a/fs/nfs/nfs4trace.h
> +++ b/fs/nfs/nfs4trace.h
> @@ -2564,6 +2564,7 @@ DECLARE_EVENT_CLASS(nfs4_offload_class,
> ), \
> TP_ARGS(args, error))
> DEFINE_NFS4_OFFLOAD_EVENT(nfs4_offload_cancel);
> +DEFINE_NFS4_OFFLOAD_EVENT(nfs4_offload_status);
>
> DECLARE_EVENT_CLASS(nfs4_xattr_event,
> TP_PROTO(
> diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
> index 92de074e63b9..0937e73c4767 100644
> --- a/include/linux/nfs_fs_sb.h
> +++ b/include/linux/nfs_fs_sb.h
> @@ -278,6 +278,7 @@ struct nfs_server {
> #define NFS_CAP_LGOPEN (1U << 5)
> #define NFS_CAP_CASE_INSENSITIVE (1U << 6)
> #define NFS_CAP_CASE_PRESERVING (1U << 7)
> +#define NFS_CAP_OFFLOAD_STATUS (1U << 8)
> #define NFS_CAP_POSIX_LOCK (1U << 14)
> #define NFS_CAP_UIDGID_NOMAP (1U << 15)
> #define NFS_CAP_STATEID_NFSV41 (1U << 16)
> --
> 2.44.0
>
>
^ permalink raw reply [flat|nested] 7+ messages in thread* Re: [RFC PATCH 4/4] NFS: Implement NFSv4.2's OFFLOAD_STATUS operation
2024-04-29 15:35 ` Olga Kornievskaia
@ 2024-04-29 16:12 ` Chuck Lever
0 siblings, 0 replies; 7+ messages in thread
From: Chuck Lever @ 2024-04-29 16:12 UTC (permalink / raw)
To: Olga Kornievskaia; +Cc: cel, Olga Kornievskaia, Dai Ngo, linux-nfs
On Mon, Apr 29, 2024 at 11:35:20AM -0400, Olga Kornievskaia wrote:
> On Mon, Apr 29, 2024 at 11:22 AM <cel@kernel.org> wrote:
> >
> > From: Chuck Lever <chuck.lever@oracle.com>
> >
> > We've found that there are cases where a transport disconnection
> > results in the loss of callback RPCs. NFS servers typically do not
> > retransmit callback operations after a disconnect.
> >
> > This can be a problem for the Linux NFS client's implementation of
> > asynchronous COPY, which waits indefinitely for a CB_OFFLOAD
> > callback. If a transport disconnect occurs while an async COPY is
> > running, there's a good chance the client will never get the
> > matching CB_OFFLOAD.
> >
> > Fix this by implementing the OFFLOAD_STATUS operation so that the
> > Linux NFS client can probe the NFS server if it doesn't see a
> > CB_OFFLOAD in a reasonable amount of time.
> >
> > This patch implements a simplistic check. As future work, the client
> > might also be able to detect whether there is no forward progress on
> > the request asynchronous COPY operation, and CANCEL it.
>
> I think this patch series needs a bit more nuances
> (1) if we know that server doesn't support offload_status we might as
> well wait uninterrupted perhaps? but I can see how as you mentioned we
> might want to measure no forward progress and cancel the copy and
> fallback to read/write.
The client doesn't know whether the server supports OFFLOAD_STATUS
until the first OFFLOAD_STATUS is sent. So at least /one/ of those
waits will be of the timeout variety, no matter what.
We can mitigate the cost of waking up periodically by making
NFS42_COPY_TIMEOUT longer than 7 seconds. That's just a number I
pulled out of the air.
> (2) we can't really go back to the "wait" after failing a
> offload_status as the cb_offload callback might have already arrived
> and I think we need to walk the pending_cb_callbacks to make sure we
> haven't received it before waiting again (otherwise, we'd wait
> forever).
IIUC the wait_for_completion() family of APIs does not sleep at all
if the completion has already been marked "done". So in that case
wfc() should just return a positive number and execution drops into
the "copy complete" code.
> (3) then also there is the case where we woke up and sent the
> offload_status and got a 'copy finished' reply but we also got the
> cb_callback reply as well and the copy things need to be cleaned up
> now.
The callback reply should just wake the completion, even though
there might no longer be a waiter, and then free the call data. That
seems OK.
I think what I missed is that the call data needs to be cleaned
up when we are sure the CB_OFFLOAD was lost, otherwise it leaks.
Will need to think about that. How is that done if the client
detects a server restart?
Also nfs42_proc_offload_status() needs to distinguish between "still
running," "all done," "bad stateid," and "no answer".
> > Suggested-by: Olga Kornievskaia <kolga@netapp.com>
> > Link: https://bugzilla.kernel.org/show_bug.cgi?id=218735
> > Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> > ---
> > fs/nfs/nfs42proc.c | 100 +++++++++++++++++++++++++++++++++++---
> > fs/nfs/nfs4trace.h | 1 +
> > include/linux/nfs_fs_sb.h | 1 +
> > 3 files changed, 96 insertions(+), 6 deletions(-)
> >
> > diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
> > index 7656d7c103fa..224fb3b8696a 100644
> > --- a/fs/nfs/nfs42proc.c
> > +++ b/fs/nfs/nfs42proc.c
> > @@ -21,6 +21,7 @@
> >
> > #define NFSDBG_FACILITY NFSDBG_PROC
> > static int nfs42_do_offload_cancel_async(struct file *dst, nfs4_stateid *std);
> > +static int nfs42_proc_offload_status(struct file *file, nfs4_stateid *stateid);
> >
> > static void nfs42_set_netaddr(struct file *filep, struct nfs42_netaddr *naddr)
> > {
> > @@ -173,6 +174,9 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len)
> > return err;
> > }
> >
> > +/* Wait this long before checking progress on a COPY operation */
> > +#define NFS42_COPY_TIMEOUT (7 * HZ)
> > +
> > static int handle_async_copy(struct nfs42_copy_res *res,
> > struct nfs_server *dst_server,
> > struct nfs_server *src_server,
> > @@ -222,7 +226,9 @@ static int handle_async_copy(struct nfs42_copy_res *res,
> > spin_unlock(&src_server->nfs_client->cl_lock);
> > }
> >
> > - status = wait_for_completion_interruptible(©->completion);
> > +wait:
> > + status = wait_for_completion_interruptible_timeout(©->completion,
> > + NFS42_COPY_TIMEOUT);
> > spin_lock(&dst_server->nfs_client->cl_lock);
> > list_del_init(©->copies);
> > spin_unlock(&dst_server->nfs_client->cl_lock);
> > @@ -231,12 +237,20 @@ static int handle_async_copy(struct nfs42_copy_res *res,
> > list_del_init(©->src_copies);
> > spin_unlock(&src_server->nfs_client->cl_lock);
> > }
> > - if (status == -ERESTARTSYS) {
> > - goto out_cancel;
> > - } else if (copy->flags || copy->error == NFS4ERR_PARTNER_NO_AUTH) {
> > - status = -EAGAIN;
> > - *restart = true;
> > + switch (status) {
> > + case 0:
> > + status = nfs42_proc_offload_status(src, src_stateid);
> > + if (status && status != -EOPNOTSUPP)
> > + goto wait;
> > + break;
> > + case -ERESTARTSYS:
> > goto out_cancel;
> > + default:
> > + if (copy->flags || copy->error == NFS4ERR_PARTNER_NO_AUTH) {
> > + status = -EAGAIN;
> > + *restart = true;
> > + goto out_cancel;
> > + }
> > }
> > out:
> > res->write_res.count = copy->count;
> > @@ -582,6 +596,80 @@ static int nfs42_do_offload_cancel_async(struct file *dst,
> > return status;
> > }
> >
> > +static void nfs42_offload_status_prepare(struct rpc_task *task, void *calldata)
> > +{
> > + struct nfs42_offload_data *data = calldata;
> > +
> > + nfs4_setup_sequence(data->seq_server->nfs_client,
> > + &data->args.osa_seq_args,
> > + &data->res.osr_seq_res, task);
> > +}
> > +
> > +static void nfs42_offload_status_done(struct rpc_task *task, void *calldata)
> > +{
> > + struct nfs42_offload_data *data = calldata;
> > +
> > + trace_nfs4_offload_status(&data->args, task->tk_status);
> > + nfs41_sequence_done(task, &data->res.osr_seq_res);
> > + if (task->tk_status &&
> > + nfs4_async_handle_error(task, data->seq_server, NULL,
> > + NULL) == -EAGAIN)
> > + rpc_restart_call_prepare(task);
> > +}
> > +
> > +static const struct rpc_call_ops nfs42_offload_status_ops = {
> > + .rpc_call_prepare = nfs42_offload_status_prepare,
> > + .rpc_call_done = nfs42_offload_status_done,
> > + .rpc_release = nfs42_free_offload_data,
> > +};
> > +
> > +static int nfs42_proc_offload_status(struct file *file, nfs4_stateid *stateid)
> > +{
> > + struct nfs_open_context *ctx = nfs_file_open_context(file);
> > + struct nfs_server *server = NFS_SERVER(file_inode(file));
> > + struct nfs42_offload_data *data = NULL;
> > + struct rpc_task *task;
> > + struct rpc_message msg = {
> > + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OFFLOAD_STATUS],
> > + .rpc_cred = ctx->cred,
> > + };
> > + struct rpc_task_setup task_setup_data = {
> > + .rpc_client = server->client,
> > + .rpc_message = &msg,
> > + .callback_ops = &nfs42_offload_status_ops,
> > + .workqueue = nfsiod_workqueue,
> > + .flags = RPC_TASK_ASYNC | RPC_TASK_SOFTCONN,
> > + };
> > + int status;
> > +
> > + if (!(server->caps & NFS_CAP_OFFLOAD_STATUS))
> > + return -EOPNOTSUPP;
> > +
> > + data = kzalloc(sizeof(struct nfs42_offload_data), GFP_KERNEL);
> > + if (data == NULL)
> > + return -ENOMEM;
> > +
> > + data->seq_server = server;
> > + data->args.osa_src_fh = NFS_FH(file_inode(file));
> > + memcpy(&data->args.osa_stateid, stateid,
> > + sizeof(data->args.osa_stateid));
> > + msg.rpc_argp = &data->args;
> > + msg.rpc_resp = &data->res;
> > + task_setup_data.callback_data = data;
> > + nfs4_init_sequence(&data->args.osa_seq_args, &data->res.osr_seq_res,
> > + 1, 0);
> > + task = rpc_run_task(&task_setup_data);
> > + if (IS_ERR(task)) {
> > + nfs42_free_offload_data(data);
> > + return PTR_ERR(task);
> > + }
> > + status = rpc_wait_for_completion_task(task);
> > + if (status == -ENOTSUPP)
> > + server->caps &= ~NFS_CAP_OFFLOAD_STATUS;
> > + rpc_put_task(task);
> > + return status;
> > +}
> > +
> > static int _nfs42_proc_copy_notify(struct file *src, struct file *dst,
> > struct nfs42_copy_notify_args *args,
> > struct nfs42_copy_notify_res *res)
> > diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
> > index 8f32dbf9c91d..9bcc525c71d1 100644
> > --- a/fs/nfs/nfs4trace.h
> > +++ b/fs/nfs/nfs4trace.h
> > @@ -2564,6 +2564,7 @@ DECLARE_EVENT_CLASS(nfs4_offload_class,
> > ), \
> > TP_ARGS(args, error))
> > DEFINE_NFS4_OFFLOAD_EVENT(nfs4_offload_cancel);
> > +DEFINE_NFS4_OFFLOAD_EVENT(nfs4_offload_status);
> >
> > DECLARE_EVENT_CLASS(nfs4_xattr_event,
> > TP_PROTO(
> > diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
> > index 92de074e63b9..0937e73c4767 100644
> > --- a/include/linux/nfs_fs_sb.h
> > +++ b/include/linux/nfs_fs_sb.h
> > @@ -278,6 +278,7 @@ struct nfs_server {
> > #define NFS_CAP_LGOPEN (1U << 5)
> > #define NFS_CAP_CASE_INSENSITIVE (1U << 6)
> > #define NFS_CAP_CASE_PRESERVING (1U << 7)
> > +#define NFS_CAP_OFFLOAD_STATUS (1U << 8)
> > #define NFS_CAP_POSIX_LOCK (1U << 14)
> > #define NFS_CAP_UIDGID_NOMAP (1U << 15)
> > #define NFS_CAP_STATEID_NFSV41 (1U << 16)
> > --
> > 2.44.0
> >
> >
--
Chuck Lever
^ permalink raw reply [flat|nested] 7+ messages in thread