From: Mike Snitzer <snitzer@kernel.org>
To: linux-nfs@vger.kernel.org
Cc: Jeff Layton <jlayton@kernel.org>,
Chuck Lever <chuck.lever@oracle.com>,
Anna Schumaker <anna@kernel.org>,
Trond Myklebust <trondmy@hammerspace.com>,
NeilBrown <neilb@suse.de>,
linux-fsdevel@vger.kernel.org
Subject: [PATCH v13 15/19] pnfs/flexfiles: enable localio support
Date: Fri, 23 Aug 2024 14:14:13 -0400 [thread overview]
Message-ID: <20240823181423.20458-16-snitzer@kernel.org> (raw)
In-Reply-To: <20240823181423.20458-1-snitzer@kernel.org>
From: Trond Myklebust <trond.myklebust@hammerspace.com>
If the DS is local to this client use localio to write the data.
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Mike Snitzer <snitzer@kernel.org>
---
fs/nfs/flexfilelayout/flexfilelayout.c | 136 +++++++++++++++++++++-
fs/nfs/flexfilelayout/flexfilelayout.h | 2 +
fs/nfs/flexfilelayout/flexfilelayoutdev.c | 6 +
3 files changed, 140 insertions(+), 4 deletions(-)
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 01ee52551a63..d91b640f6c05 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -11,6 +11,7 @@
#include <linux/nfs_mount.h>
#include <linux/nfs_page.h>
#include <linux/module.h>
+#include <linux/file.h>
#include <linux/sched/mm.h>
#include <linux/sunrpc/metrics.h>
@@ -162,6 +163,72 @@ decode_name(struct xdr_stream *xdr, u32 *id)
return 0;
}
+/*
+ * A dummy definition to make RCU (and non-LOCALIO compilation) happy.
+ * struct nfsd_file should never be dereferenced in this file.
+ */
+struct nfsd_file {
+ int undefined__;
+};
+
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+
+static struct nfsd_file *
+ff_local_open_fh(struct pnfs_layout_segment *lseg,
+ u32 ds_idx,
+ struct nfs_client *clp,
+ const struct cred *cred,
+ struct nfs_fh *fh,
+ fmode_t mode)
+{
+ struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx);
+ struct nfsd_file *nf, *new, __rcu **pnf;
+
+ if (!nfs_server_is_local(clp))
+ return NULL;
+ if (mode & FMODE_WRITE) {
+ /*
+ * Always request read and write access since this corresponds
+ * to a rw layout.
+ */
+ mode |= FMODE_READ;
+ pnf = &mirror->rw_file;
+ } else
+ pnf = &mirror->ro_file;
+
+ new = NULL;
+ rcu_read_lock();
+ nf = rcu_dereference(*pnf);
+ if (!nf) {
+ rcu_read_unlock();
+ new = nfs_local_open_fh(clp, cred, fh, mode);
+ if (IS_ERR(new))
+ return NULL;
+ rcu_read_lock();
+ /* try to swap in the pointer */
+ nf = cmpxchg(pnf, NULL, new);
+ if (!nf) {
+ nf = new;
+ new = NULL;
+ }
+ }
+ nf = nfs_to.nfsd_file_get(nf);
+ rcu_read_unlock();
+ if (new)
+ nfs_to.nfsd_file_put(new);
+ return nf;
+}
+
+#else
+static struct nfsd_file *
+ff_local_open_fh(struct pnfs_layout_segment *lseg, u32 ds_idx,
+ struct nfs_client *clp, const struct cred *cred,
+ struct nfs_fh *fh, fmode_t mode)
+{
+ return NULL;
+}
+#endif /* IS_ENABLED(CONFIG_NFS_LOCALIO) */
+
static bool ff_mirror_match_fh(const struct nfs4_ff_layout_mirror *m1,
const struct nfs4_ff_layout_mirror *m2)
{
@@ -237,8 +304,17 @@ static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags)
static void ff_layout_free_mirror(struct nfs4_ff_layout_mirror *mirror)
{
- const struct cred *cred;
+ struct nfsd_file * __maybe_unused nf;
+ const struct cred *cred;
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+ nf = rcu_access_pointer(mirror->ro_file);
+ if (nf)
+ nfs_to.nfsd_file_put(nf);
+ nf = rcu_access_pointer(mirror->rw_file);
+ if (nf)
+ nfs_to.nfsd_file_put(nf);
+#endif
ff_layout_remove_mirror(mirror);
kfree(mirror->fh_versions);
cred = rcu_access_pointer(mirror->ro_cred);
@@ -514,6 +590,30 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
mirror = ff_layout_add_mirror(lh, fls->mirror_array[i]);
if (mirror != fls->mirror_array[i]) {
/* swap cred ptrs so free_mirror will clean up old */
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+ if (lgr->range.iomode == IOMODE_READ) {
+ const struct cred __rcu *old =
+ xchg(&mirror->ro_cred, cred);
+ rcu_assign_pointer(fls->mirror_array[i]->ro_cred, old);
+ /* drop file if creds changed */
+ if (old != cred) {
+ struct nfsd_file *nf =
+ rcu_dereference_protected(xchg(&mirror->ro_file, NULL), 1);
+ if (nf)
+ nfs_to.nfsd_file_put(nf);
+ }
+ } else {
+ const struct cred __rcu *old =
+ xchg(&mirror->rw_cred, cred);
+ rcu_assign_pointer(fls->mirror_array[i]->rw_cred, old);
+ if (old != cred) {
+ struct nfsd_file *nf =
+ rcu_dereference_protected(xchg(&mirror->rw_file, NULL), 1);
+ if (nf)
+ nfs_to.nfsd_file_put(nf);
+ }
+ }
+#else
if (lgr->range.iomode == IOMODE_READ) {
cred = xchg(&mirror->ro_cred, cred);
rcu_assign_pointer(fls->mirror_array[i]->ro_cred, cred);
@@ -521,6 +621,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
cred = xchg(&mirror->rw_cred, cred);
rcu_assign_pointer(fls->mirror_array[i]->rw_cred, cred);
}
+#endif /* IS_ENABLED(CONFIG_NFS_LOCALIO) */
ff_layout_free_mirror(fls->mirror_array[i]);
fls->mirror_array[i] = mirror;
}
@@ -1756,6 +1857,7 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
struct pnfs_layout_segment *lseg = hdr->lseg;
struct nfs4_pnfs_ds *ds;
struct rpc_clnt *ds_clnt;
+ struct nfsd_file *nf;
struct nfs4_ff_layout_mirror *mirror;
const struct cred *ds_cred;
loff_t offset = hdr->args.offset;
@@ -1802,11 +1904,19 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
hdr->args.offset = offset;
hdr->mds_offset = offset;
+ /* Start IO accounting for local read */
+ nf = ff_local_open_fh(lseg, idx, ds->ds_clp, ds_cred, fh,
+ FMODE_READ);
+ if (nf) {
+ hdr->task.tk_start = ktime_get();
+ ff_layout_read_record_layoutstats_start(&hdr->task, hdr);
+ }
+
/* Perform an asynchronous read to ds */
nfs_initiate_pgio(ds_clnt, hdr, ds_cred, ds->ds_clp->rpc_ops,
vers == 3 ? &ff_layout_read_call_ops_v3 :
&ff_layout_read_call_ops_v4,
- 0, RPC_TASK_SOFTCONN, NULL);
+ 0, RPC_TASK_SOFTCONN, nf);
put_cred(ds_cred);
return PNFS_ATTEMPTED;
@@ -1826,6 +1936,7 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
struct pnfs_layout_segment *lseg = hdr->lseg;
struct nfs4_pnfs_ds *ds;
struct rpc_clnt *ds_clnt;
+ struct nfsd_file *nf;
struct nfs4_ff_layout_mirror *mirror;
const struct cred *ds_cred;
loff_t offset = hdr->args.offset;
@@ -1870,11 +1981,19 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
*/
hdr->args.offset = offset;
+ /* Start IO accounting for local write */
+ nf = ff_local_open_fh(lseg, idx, ds->ds_clp, ds_cred, fh,
+ FMODE_READ|FMODE_WRITE);
+ if (nf) {
+ hdr->task.tk_start = ktime_get();
+ ff_layout_write_record_layoutstats_start(&hdr->task, hdr);
+ }
+
/* Perform an asynchronous write */
nfs_initiate_pgio(ds_clnt, hdr, ds_cred, ds->ds_clp->rpc_ops,
vers == 3 ? &ff_layout_write_call_ops_v3 :
&ff_layout_write_call_ops_v4,
- sync, RPC_TASK_SOFTCONN, NULL);
+ sync, RPC_TASK_SOFTCONN, nf);
put_cred(ds_cred);
return PNFS_ATTEMPTED;
@@ -1908,6 +2027,7 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how)
struct pnfs_layout_segment *lseg = data->lseg;
struct nfs4_pnfs_ds *ds;
struct rpc_clnt *ds_clnt;
+ struct nfsd_file *nf;
struct nfs4_ff_layout_mirror *mirror;
const struct cred *ds_cred;
u32 idx;
@@ -1946,10 +2066,18 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how)
if (fh)
data->args.fh = fh;
+ /* Start IO accounting for local commit */
+ nf = ff_local_open_fh(lseg, idx, ds->ds_clp, ds_cred, fh,
+ FMODE_READ|FMODE_WRITE);
+ if (nf) {
+ data->task.tk_start = ktime_get();
+ ff_layout_commit_record_layoutstats_start(&data->task, data);
+ }
+
ret = nfs_initiate_commit(ds_clnt, data, ds->ds_clp->rpc_ops,
vers == 3 ? &ff_layout_commit_call_ops_v3 :
&ff_layout_commit_call_ops_v4,
- how, RPC_TASK_SOFTCONN, NULL);
+ how, RPC_TASK_SOFTCONN, nf);
put_cred(ds_cred);
return ret;
out_err:
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h
index f84b3fb0dddd..562e7e27a8b5 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.h
+++ b/fs/nfs/flexfilelayout/flexfilelayout.h
@@ -82,7 +82,9 @@ struct nfs4_ff_layout_mirror {
struct nfs_fh *fh_versions;
nfs4_stateid stateid;
const struct cred __rcu *ro_cred;
+ struct nfsd_file __rcu *ro_file;
const struct cred __rcu *rw_cred;
+ struct nfsd_file __rcu *rw_file;
refcount_t ref;
spinlock_t lock;
unsigned long flags;
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index e028f5a0ef5f..e58bedfb1dcc 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -395,6 +395,12 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg,
/* connect success, check rsize/wsize limit */
if (!status) {
+ /*
+ * ds_clp is put in destroy_ds().
+ * keep ds_clp even if DS is local, so that if local IO cannot
+ * proceed somehow, we can fall back to NFS whenever we want.
+ */
+ nfs_local_probe(ds->ds_clp);
max_payload =
nfs_block_size(rpc_max_payload(ds->ds_clp->cl_rpcclient),
NULL);
--
2.44.0
next prev parent reply other threads:[~2024-08-23 18:14 UTC|newest]
Thread overview: 55+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-08-23 18:13 [PATCH v13 00/19] nfs/nfsd: add support for localio Mike Snitzer
2024-08-23 18:13 ` [PATCH v13 01/19] nfs_common: factor out nfs_errtbl and nfs_stat_to_errno Mike Snitzer
2024-08-23 18:14 ` [PATCH v13 02/19] nfs_common: factor out nfs4_errtbl and nfs4_stat_to_errno Mike Snitzer
2024-08-23 18:14 ` [PATCH v13 03/19] nfs: factor out {encode,decode}_opaque_fixed to nfs_xdr.h Mike Snitzer
2024-08-23 18:14 ` [PATCH v13 04/19] nfsd: factor out __fh_verify to allow NULL rqstp to be passed Mike Snitzer
2024-08-25 15:32 ` Chuck Lever
2024-08-25 23:44 ` NeilBrown
2024-08-26 14:51 ` Chuck Lever
2024-08-28 17:01 ` Chuck Lever
2024-08-29 0:30 ` Mike Snitzer
2024-08-29 0:32 ` Mike Snitzer
2024-08-27 18:58 ` Mike Snitzer
2024-08-27 19:26 ` Chuck Lever
2024-08-27 19:35 ` Mike Snitzer
2024-08-23 18:14 ` [PATCH v13 05/19] nfsd: add nfsd_file_acquire_local() Mike Snitzer
2024-08-25 15:18 ` Chuck Lever
2024-08-23 18:14 ` [PATCH v13 06/19] SUNRPC: remove call_allocate() BUG_ONs Mike Snitzer
2024-08-23 18:14 ` [PATCH v13 07/19] SUNRPC: add rpcauth_map_clnt_to_svc_cred_local Mike Snitzer
2024-08-25 15:17 ` Chuck Lever
2024-08-27 16:08 ` Mike Snitzer
2024-08-23 18:14 ` [PATCH v13 08/19] SUNRPC: replace program list with program array Mike Snitzer
2024-08-25 15:14 ` Chuck Lever
2024-08-23 18:14 ` [PATCH v13 09/19] nfs_common: add NFS LOCALIO auxiliary protocol enablement Mike Snitzer
2024-08-26 0:32 ` NeilBrown
2024-08-27 17:45 ` Mike Snitzer
2024-08-27 21:25 ` NeilBrown
2024-08-23 18:14 ` [PATCH v13 10/19] nfsd: add localio support Mike Snitzer
2024-08-25 15:13 ` Chuck Lever
2024-08-26 0:53 ` NeilBrown
2024-08-26 20:03 ` Mike Snitzer
2024-08-23 18:14 ` [PATCH v13 11/19] nfsd: implement server support for NFS_LOCALIO_PROGRAM Mike Snitzer
2024-08-25 15:09 ` Chuck Lever
2024-08-23 18:14 ` [PATCH v13 12/19] nfs: pass struct nfsd_file to nfs_init_pgio and nfs_init_commit Mike Snitzer
2024-08-23 18:14 ` [PATCH v13 13/19] nfs: add localio support Mike Snitzer
2024-08-26 1:21 ` NeilBrown
2024-08-23 18:14 ` [PATCH v13 14/19] nfs: enable localio for non-pNFS IO Mike Snitzer
2024-08-23 18:14 ` Mike Snitzer [this message]
2024-08-26 1:39 ` [PATCH v13 15/19] pnfs/flexfiles: enable localio support NeilBrown
2024-08-26 15:38 ` Mike Snitzer
2024-08-27 21:27 ` NeilBrown
2024-08-23 18:14 ` [PATCH v13 16/19] nfs/localio: use dedicated workqueues for filesystem read and write Mike Snitzer
2024-08-23 18:14 ` [PATCH v13 17/19] nfs: implement client support for NFS_LOCALIO_PROGRAM Mike Snitzer
2024-08-23 18:14 ` [PATCH v13 18/19] nfs: add Documentation/filesystems/nfs/localio.rst Mike Snitzer
2024-08-23 18:14 ` [PATCH v13 19/19] nfs: add FAQ section to Documentation/filesystems/nfs/localio.rst Mike Snitzer
2024-08-26 1:56 ` NeilBrown
2024-08-26 14:16 ` Chuck Lever III
2024-08-26 14:50 ` Trond Myklebust
2024-08-27 21:49 ` NeilBrown
2024-08-27 22:24 ` Trond Myklebust
2024-08-27 23:41 ` NeilBrown
2024-08-28 0:08 ` Trond Myklebust
2024-08-28 4:26 ` Mike Snitzer
2024-08-25 15:46 ` [PATCH v13 00/19] nfs/nfsd: add support for localio Chuck Lever
2024-08-27 16:56 ` Mike Snitzer
2024-08-26 1:59 ` NeilBrown
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240823181423.20458-16-snitzer@kernel.org \
--to=snitzer@kernel.org \
--cc=anna@kernel.org \
--cc=chuck.lever@oracle.com \
--cc=jlayton@kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-nfs@vger.kernel.org \
--cc=neilb@suse.de \
--cc=trondmy@hammerspace.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).