From: cel@kernel.org
To: Olga Kornievskaia <kolga@netapp.com>, Dai Ngo <dai.ngo@oracle.com>
Cc: <linux-nfs@vger.kernel.org>, Chuck Lever <chuck.lever@oracle.com>
Subject: [RFC PATCH 4/4] NFS: Implement NFSv4.2's OFFLOAD_STATUS operation
Date: Mon, 29 Apr 2024 11:16:37 -0400 [thread overview]
Message-ID: <20240429151632.212571-10-cel@kernel.org> (raw)
In-Reply-To: <20240429151632.212571-6-cel@kernel.org>
From: Chuck Lever <chuck.lever@oracle.com>
We've found that there are cases where a transport disconnection
results in the loss of callback RPCs. NFS servers typically do not
retransmit callback operations after a disconnect.
This can be a problem for the Linux NFS client's implementation of
asynchronous COPY, which waits indefinitely for a CB_OFFLOAD
callback. If a transport disconnect occurs while an async COPY is
running, there's a good chance the client will never get the
matching CB_OFFLOAD.
Fix this by implementing the OFFLOAD_STATUS operation so that the
Linux NFS client can probe the NFS server if it doesn't see a
CB_OFFLOAD in a reasonable amount of time.
This patch implements a simplistic check. As future work, the client
might also be able to detect whether there is no forward progress on
the request asynchronous COPY operation, and CANCEL it.
Suggested-by: Olga Kornievskaia <kolga@netapp.com>
Link: https://bugzilla.kernel.org/show_bug.cgi?id=218735
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
fs/nfs/nfs42proc.c | 100 +++++++++++++++++++++++++++++++++++---
fs/nfs/nfs4trace.h | 1 +
include/linux/nfs_fs_sb.h | 1 +
3 files changed, 96 insertions(+), 6 deletions(-)
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 7656d7c103fa..224fb3b8696a 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -21,6 +21,7 @@
#define NFSDBG_FACILITY NFSDBG_PROC
static int nfs42_do_offload_cancel_async(struct file *dst, nfs4_stateid *std);
+static int nfs42_proc_offload_status(struct file *file, nfs4_stateid *stateid);
static void nfs42_set_netaddr(struct file *filep, struct nfs42_netaddr *naddr)
{
@@ -173,6 +174,9 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len)
return err;
}
+/* Wait this long before checking progress on a COPY operation */
+#define NFS42_COPY_TIMEOUT (7 * HZ)
+
static int handle_async_copy(struct nfs42_copy_res *res,
struct nfs_server *dst_server,
struct nfs_server *src_server,
@@ -222,7 +226,9 @@ static int handle_async_copy(struct nfs42_copy_res *res,
spin_unlock(&src_server->nfs_client->cl_lock);
}
- status = wait_for_completion_interruptible(©->completion);
+wait:
+ status = wait_for_completion_interruptible_timeout(©->completion,
+ NFS42_COPY_TIMEOUT);
spin_lock(&dst_server->nfs_client->cl_lock);
list_del_init(©->copies);
spin_unlock(&dst_server->nfs_client->cl_lock);
@@ -231,12 +237,20 @@ static int handle_async_copy(struct nfs42_copy_res *res,
list_del_init(©->src_copies);
spin_unlock(&src_server->nfs_client->cl_lock);
}
- if (status == -ERESTARTSYS) {
- goto out_cancel;
- } else if (copy->flags || copy->error == NFS4ERR_PARTNER_NO_AUTH) {
- status = -EAGAIN;
- *restart = true;
+ switch (status) {
+ case 0:
+ status = nfs42_proc_offload_status(src, src_stateid);
+ if (status && status != -EOPNOTSUPP)
+ goto wait;
+ break;
+ case -ERESTARTSYS:
goto out_cancel;
+ default:
+ if (copy->flags || copy->error == NFS4ERR_PARTNER_NO_AUTH) {
+ status = -EAGAIN;
+ *restart = true;
+ goto out_cancel;
+ }
}
out:
res->write_res.count = copy->count;
@@ -582,6 +596,80 @@ static int nfs42_do_offload_cancel_async(struct file *dst,
return status;
}
+static void nfs42_offload_status_prepare(struct rpc_task *task, void *calldata)
+{
+ struct nfs42_offload_data *data = calldata;
+
+ nfs4_setup_sequence(data->seq_server->nfs_client,
+ &data->args.osa_seq_args,
+ &data->res.osr_seq_res, task);
+}
+
+static void nfs42_offload_status_done(struct rpc_task *task, void *calldata)
+{
+ struct nfs42_offload_data *data = calldata;
+
+ trace_nfs4_offload_status(&data->args, task->tk_status);
+ nfs41_sequence_done(task, &data->res.osr_seq_res);
+ if (task->tk_status &&
+ nfs4_async_handle_error(task, data->seq_server, NULL,
+ NULL) == -EAGAIN)
+ rpc_restart_call_prepare(task);
+}
+
+static const struct rpc_call_ops nfs42_offload_status_ops = {
+ .rpc_call_prepare = nfs42_offload_status_prepare,
+ .rpc_call_done = nfs42_offload_status_done,
+ .rpc_release = nfs42_free_offload_data,
+};
+
+static int nfs42_proc_offload_status(struct file *file, nfs4_stateid *stateid)
+{
+ struct nfs_open_context *ctx = nfs_file_open_context(file);
+ struct nfs_server *server = NFS_SERVER(file_inode(file));
+ struct nfs42_offload_data *data = NULL;
+ struct rpc_task *task;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OFFLOAD_STATUS],
+ .rpc_cred = ctx->cred,
+ };
+ struct rpc_task_setup task_setup_data = {
+ .rpc_client = server->client,
+ .rpc_message = &msg,
+ .callback_ops = &nfs42_offload_status_ops,
+ .workqueue = nfsiod_workqueue,
+ .flags = RPC_TASK_ASYNC | RPC_TASK_SOFTCONN,
+ };
+ int status;
+
+ if (!(server->caps & NFS_CAP_OFFLOAD_STATUS))
+ return -EOPNOTSUPP;
+
+ data = kzalloc(sizeof(struct nfs42_offload_data), GFP_KERNEL);
+ if (data == NULL)
+ return -ENOMEM;
+
+ data->seq_server = server;
+ data->args.osa_src_fh = NFS_FH(file_inode(file));
+ memcpy(&data->args.osa_stateid, stateid,
+ sizeof(data->args.osa_stateid));
+ msg.rpc_argp = &data->args;
+ msg.rpc_resp = &data->res;
+ task_setup_data.callback_data = data;
+ nfs4_init_sequence(&data->args.osa_seq_args, &data->res.osr_seq_res,
+ 1, 0);
+ task = rpc_run_task(&task_setup_data);
+ if (IS_ERR(task)) {
+ nfs42_free_offload_data(data);
+ return PTR_ERR(task);
+ }
+ status = rpc_wait_for_completion_task(task);
+ if (status == -ENOTSUPP)
+ server->caps &= ~NFS_CAP_OFFLOAD_STATUS;
+ rpc_put_task(task);
+ return status;
+}
+
static int _nfs42_proc_copy_notify(struct file *src, struct file *dst,
struct nfs42_copy_notify_args *args,
struct nfs42_copy_notify_res *res)
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index 8f32dbf9c91d..9bcc525c71d1 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -2564,6 +2564,7 @@ DECLARE_EVENT_CLASS(nfs4_offload_class,
), \
TP_ARGS(args, error))
DEFINE_NFS4_OFFLOAD_EVENT(nfs4_offload_cancel);
+DEFINE_NFS4_OFFLOAD_EVENT(nfs4_offload_status);
DECLARE_EVENT_CLASS(nfs4_xattr_event,
TP_PROTO(
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 92de074e63b9..0937e73c4767 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -278,6 +278,7 @@ struct nfs_server {
#define NFS_CAP_LGOPEN (1U << 5)
#define NFS_CAP_CASE_INSENSITIVE (1U << 6)
#define NFS_CAP_CASE_PRESERVING (1U << 7)
+#define NFS_CAP_OFFLOAD_STATUS (1U << 8)
#define NFS_CAP_POSIX_LOCK (1U << 14)
#define NFS_CAP_UIDGID_NOMAP (1U << 15)
#define NFS_CAP_STATEID_NFSV41 (1U << 16)
--
2.44.0
next prev parent reply other threads:[~2024-04-29 15:17 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-04-29 15:16 [RFC PATCH 0/4] NFSv4.2 OFFLOAD_STATUS for the Linux NFS client cel
2024-04-29 15:16 ` [RFC PATCH 1/4] NFS: Implement NFSv4.2's OFFLOAD_STATUS XDR cel
2024-04-29 15:16 ` [RFC PATCH 2/4] NFS: Refactor trace_nfs4_offload_cancel cel
2024-04-29 15:16 ` [RFC PATCH 3/4] NFS: Rename struct nfs4_offloadcancel_data cel
2024-04-29 15:16 ` cel [this message]
2024-04-29 15:35 ` [RFC PATCH 4/4] NFS: Implement NFSv4.2's OFFLOAD_STATUS operation Olga Kornievskaia
2024-04-29 16:12 ` Chuck Lever
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240429151632.212571-10-cel@kernel.org \
--to=cel@kernel.org \
--cc=chuck.lever@oracle.com \
--cc=dai.ngo@oracle.com \
--cc=kolga@netapp.com \
--cc=linux-nfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox