From: Mike Snitzer <snitzer@kernel.org>
To: linux-nfs@vger.kernel.org
Cc: Jeff Layton <jlayton@kernel.org>,
Chuck Lever <chuck.lever@oracle.com>,
Anna Schumaker <anna@kernel.org>,
Trond Myklebust <trondmy@hammerspace.com>,
NeilBrown <neilb@suse.de>,
linux-fsdevel@vger.kernel.org
Subject: [PATCH v12 16/24] pnfs/flexfiles: enable localio support
Date: Mon, 19 Aug 2024 14:17:21 -0400 [thread overview]
Message-ID: <20240819181750.70570-17-snitzer@kernel.org> (raw)
In-Reply-To: <20240819181750.70570-1-snitzer@kernel.org>
From: Trond Myklebust <trond.myklebust@hammerspace.com>
If the DS is local to this client use localio to write the data.
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Mike Snitzer <snitzer@kernel.org>
---
fs/nfs/flexfilelayout/flexfilelayout.c | 109 ++++++++++++++++++++--
fs/nfs/flexfilelayout/flexfilelayout.h | 2 +
fs/nfs/flexfilelayout/flexfilelayoutdev.c | 6 ++
3 files changed, 110 insertions(+), 7 deletions(-)
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 01ee52551a63..206b4b524e43 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -11,6 +11,7 @@
#include <linux/nfs_mount.h>
#include <linux/nfs_page.h>
#include <linux/module.h>
+#include <linux/file.h>
#include <linux/sched/mm.h>
#include <linux/sunrpc/metrics.h>
@@ -162,6 +163,52 @@ decode_name(struct xdr_stream *xdr, u32 *id)
return 0;
}
+static struct file *
+ff_local_open_fh(struct pnfs_layout_segment *lseg,
+ u32 ds_idx,
+ struct nfs_client *clp,
+ const struct cred *cred,
+ struct nfs_fh *fh,
+ fmode_t mode)
+{
+ struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx);
+ struct file *filp, *new, __rcu **pfile;
+
+ if (!nfs_server_is_local(clp))
+ return NULL;
+ if (mode & FMODE_WRITE) {
+ /*
+ * Always request read and write access since this corresponds
+ * to a rw layout.
+ */
+ mode |= FMODE_READ;
+ pfile = &mirror->rw_file;
+ } else
+ pfile = &mirror->ro_file;
+
+ new = NULL;
+ rcu_read_lock();
+ filp = rcu_dereference(*pfile);
+ if (!filp) {
+ rcu_read_unlock();
+ new = nfs_local_open_fh(clp, cred, fh, mode);
+ if (IS_ERR(new))
+ return NULL;
+ rcu_read_lock();
+ /* try to swap in the pointer */
+ filp = cmpxchg(pfile, NULL, new);
+ if (!filp) {
+ filp = new;
+ new = NULL;
+ }
+ }
+ filp = get_file_rcu(&filp);
+ rcu_read_unlock();
+ if (new)
+ fput(new);
+ return filp;
+}
+
static bool ff_mirror_match_fh(const struct nfs4_ff_layout_mirror *m1,
const struct nfs4_ff_layout_mirror *m2)
{
@@ -237,8 +284,15 @@ static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags)
static void ff_layout_free_mirror(struct nfs4_ff_layout_mirror *mirror)
{
+ struct file *filp;
const struct cred *cred;
+ filp = rcu_access_pointer(mirror->ro_file);
+ if (filp)
+ fput(filp);
+ filp = rcu_access_pointer(mirror->rw_file);
+ if (filp)
+ fput(filp);
ff_layout_remove_mirror(mirror);
kfree(mirror->fh_versions);
cred = rcu_access_pointer(mirror->ro_cred);
@@ -414,6 +468,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
struct nfs4_ff_layout_mirror *mirror;
struct cred *kcred;
const struct cred __rcu *cred;
+ const struct cred __rcu *old;
kuid_t uid;
kgid_t gid;
u32 ds_count, fh_count, id;
@@ -513,13 +568,26 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
mirror = ff_layout_add_mirror(lh, fls->mirror_array[i]);
if (mirror != fls->mirror_array[i]) {
+ struct file *filp;
+
/* swap cred ptrs so free_mirror will clean up old */
if (lgr->range.iomode == IOMODE_READ) {
- cred = xchg(&mirror->ro_cred, cred);
- rcu_assign_pointer(fls->mirror_array[i]->ro_cred, cred);
+ old = xchg(&mirror->ro_cred, cred);
+ rcu_assign_pointer(fls->mirror_array[i]->ro_cred, old);
+ /* drop file if creds changed */
+ if (old != cred) {
+ filp = rcu_dereference_protected(xchg(&mirror->ro_file, NULL), 1);
+ if (filp)
+ fput(filp);
+ }
} else {
- cred = xchg(&mirror->rw_cred, cred);
- rcu_assign_pointer(fls->mirror_array[i]->rw_cred, cred);
+ old = xchg(&mirror->rw_cred, cred);
+ rcu_assign_pointer(fls->mirror_array[i]->rw_cred, old);
+ if (old != cred) {
+ filp = rcu_dereference_protected(xchg(&mirror->rw_file, NULL), 1);
+ if (filp)
+ fput(filp);
+ }
}
ff_layout_free_mirror(fls->mirror_array[i]);
fls->mirror_array[i] = mirror;
@@ -1756,6 +1824,7 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
struct pnfs_layout_segment *lseg = hdr->lseg;
struct nfs4_pnfs_ds *ds;
struct rpc_clnt *ds_clnt;
+ struct file *filp;
struct nfs4_ff_layout_mirror *mirror;
const struct cred *ds_cred;
loff_t offset = hdr->args.offset;
@@ -1802,11 +1871,19 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
hdr->args.offset = offset;
hdr->mds_offset = offset;
+ /* Start IO accounting for local read */
+ filp = ff_local_open_fh(lseg, idx, ds->ds_clp, ds_cred, fh,
+ FMODE_READ);
+ if (filp) {
+ hdr->task.tk_start = ktime_get();
+ ff_layout_read_record_layoutstats_start(&hdr->task, hdr);
+ }
+
/* Perform an asynchronous read to ds */
nfs_initiate_pgio(ds_clnt, hdr, ds_cred, ds->ds_clp->rpc_ops,
vers == 3 ? &ff_layout_read_call_ops_v3 :
&ff_layout_read_call_ops_v4,
- 0, RPC_TASK_SOFTCONN, NULL);
+ 0, RPC_TASK_SOFTCONN, filp);
put_cred(ds_cred);
return PNFS_ATTEMPTED;
@@ -1826,6 +1903,7 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
struct pnfs_layout_segment *lseg = hdr->lseg;
struct nfs4_pnfs_ds *ds;
struct rpc_clnt *ds_clnt;
+ struct file *filp;
struct nfs4_ff_layout_mirror *mirror;
const struct cred *ds_cred;
loff_t offset = hdr->args.offset;
@@ -1870,11 +1948,19 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
*/
hdr->args.offset = offset;
+ /* Start IO accounting for local write */
+ filp = ff_local_open_fh(lseg, idx, ds->ds_clp, ds_cred, fh,
+ FMODE_READ|FMODE_WRITE);
+ if (filp) {
+ hdr->task.tk_start = ktime_get();
+ ff_layout_write_record_layoutstats_start(&hdr->task, hdr);
+ }
+
/* Perform an asynchronous write */
nfs_initiate_pgio(ds_clnt, hdr, ds_cred, ds->ds_clp->rpc_ops,
vers == 3 ? &ff_layout_write_call_ops_v3 :
&ff_layout_write_call_ops_v4,
- sync, RPC_TASK_SOFTCONN, NULL);
+ sync, RPC_TASK_SOFTCONN, filp);
put_cred(ds_cred);
return PNFS_ATTEMPTED;
@@ -1908,6 +1994,7 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how)
struct pnfs_layout_segment *lseg = data->lseg;
struct nfs4_pnfs_ds *ds;
struct rpc_clnt *ds_clnt;
+ struct file *filp;
struct nfs4_ff_layout_mirror *mirror;
const struct cred *ds_cred;
u32 idx;
@@ -1946,10 +2033,18 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how)
if (fh)
data->args.fh = fh;
+ /* Start IO accounting for local commit */
+ filp = ff_local_open_fh(lseg, idx, ds->ds_clp, ds_cred, fh,
+ FMODE_READ|FMODE_WRITE);
+ if (filp) {
+ data->task.tk_start = ktime_get();
+ ff_layout_commit_record_layoutstats_start(&data->task, data);
+ }
+
ret = nfs_initiate_commit(ds_clnt, data, ds->ds_clp->rpc_ops,
vers == 3 ? &ff_layout_commit_call_ops_v3 :
&ff_layout_commit_call_ops_v4,
- how, RPC_TASK_SOFTCONN, NULL);
+ how, RPC_TASK_SOFTCONN, filp);
put_cred(ds_cred);
return ret;
out_err:
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h
index f84b3fb0dddd..8e042df5a2c9 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.h
+++ b/fs/nfs/flexfilelayout/flexfilelayout.h
@@ -82,7 +82,9 @@ struct nfs4_ff_layout_mirror {
struct nfs_fh *fh_versions;
nfs4_stateid stateid;
const struct cred __rcu *ro_cred;
+ struct file __rcu *ro_file;
const struct cred __rcu *rw_cred;
+ struct file __rcu *rw_file;
refcount_t ref;
spinlock_t lock;
unsigned long flags;
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index e028f5a0ef5f..e58bedfb1dcc 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -395,6 +395,12 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg,
/* connect success, check rsize/wsize limit */
if (!status) {
+ /*
+ * ds_clp is put in destroy_ds().
+ * keep ds_clp even if DS is local, so that if local IO cannot
+ * proceed somehow, we can fall back to NFS whenever we want.
+ */
+ nfs_local_probe(ds->ds_clp);
max_payload =
nfs_block_size(rpc_max_payload(ds->ds_clp->cl_rpcclient),
NULL);
--
2.44.0
next prev parent reply other threads:[~2024-08-19 18:18 UTC|newest]
Thread overview: 54+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-08-19 18:17 [PATCH v12 00/24] nfs/nfsd: add support for localio Mike Snitzer
2024-08-19 18:17 ` [PATCH v12 01/24] nfs_common: factor out nfs_errtbl and nfs_stat_to_errno Mike Snitzer
2024-08-19 18:17 ` [PATCH v12 02/24] nfs_common: factor out nfs4_errtbl and nfs4_stat_to_errno Mike Snitzer
2024-08-19 18:17 ` [PATCH v12 03/24] nfs: factor out {encode,decode}_opaque_fixed to nfs_xdr.h Mike Snitzer
2024-08-19 18:17 ` [PATCH v12 04/24] nfsd: factor out __fh_verify to allow NULL rqstp to be passed Mike Snitzer
2024-08-19 18:17 ` [PATCH v12 05/24] nfsd: fix nfsfh tracepoints to properly handle NULL rqstp Mike Snitzer
2024-08-21 17:46 ` Jeff Layton
2024-08-21 21:23 ` Mike Snitzer
2024-08-22 15:07 ` Chuck Lever
2024-08-22 16:04 ` Mike Snitzer
2024-08-22 17:07 ` Jeff Layton
2024-08-22 17:20 ` Mike Snitzer
2024-08-22 18:14 ` Chuck Lever III
2024-08-19 18:17 ` [PATCH v12 06/24] nfsd: add nfsd_file_acquire_local() Mike Snitzer
2024-08-19 18:17 ` [PATCH v12 07/24] SUNRPC: remove call_allocate() BUG_ONs Mike Snitzer
2024-08-19 18:17 ` [PATCH v12 08/24] SUNRPC: add rpcauth_map_clnt_to_svc_cred_local Mike Snitzer
2024-08-19 18:17 ` [PATCH v12 09/24] nfs_common: add NFS LOCALIO auxiliary protocol enablement Mike Snitzer
2024-08-21 18:04 ` Jeff Layton
2024-08-21 18:39 ` Jeff Layton
2024-08-19 18:17 ` [PATCH v12 10/24] nfsd: add localio support Mike Snitzer
2024-08-19 18:17 ` [PATCH v12 11/24] nfsd: implement server support for NFS_LOCALIO_PROGRAM Mike Snitzer
2024-08-19 18:17 ` [PATCH v12 12/24] SUNRPC: replace program list with program array Mike Snitzer
2024-08-21 18:31 ` Jeff Layton
2024-08-21 20:40 ` Mike Snitzer
2024-08-21 21:43 ` Mike Snitzer
2024-08-19 18:17 ` [PATCH v12 13/24] nfs: pass struct file to nfs_init_pgio and nfs_init_commit Mike Snitzer
2024-08-19 18:17 ` [PATCH v12 14/24] nfs: add localio support Mike Snitzer
2024-08-19 18:17 ` [PATCH v12 15/24] nfs: enable localio for non-pNFS IO Mike Snitzer
2024-08-19 18:17 ` Mike Snitzer [this message]
2024-08-19 18:17 ` [PATCH v12 17/24] nfs/localio: use dedicated workqueues for filesystem read and write Mike Snitzer
2024-08-19 18:17 ` [PATCH v12 18/24] nfs: implement client support for NFS_LOCALIO_PROGRAM Mike Snitzer
2024-08-19 18:17 ` [PATCH v12 19/24] nfs: add Documentation/filesystems/nfs/localio.rst Mike Snitzer
2024-08-19 18:17 ` [PATCH v12 20/24] nfsd: use GC for nfsd_file returned by nfsd_file_acquire_local Mike Snitzer
2024-08-21 18:34 ` Jeff Layton
2024-08-19 18:17 ` [PATCH v12 21/24] nfs_common: expose localio's required nfsd symbols to nfs client Mike Snitzer
2024-08-19 18:17 ` [PATCH v12 22/24] nfs: push localio nfsd_file_put call out to client Mike Snitzer
2024-08-21 18:50 ` Jeff Layton
2024-08-19 18:17 ` [PATCH v12 23/24] nfs: switch client to use nfsd_file for localio Mike Snitzer
2024-08-19 18:17 ` [PATCH v12 24/24] nfs: add FAQ section to Documentation/filesystems/nfs/localio.rst Mike Snitzer
2024-08-21 19:03 ` Jeff Layton
2024-08-21 20:12 ` Mike Snitzer
2024-08-21 20:14 ` Mike Snitzer
2024-08-21 23:46 ` Jeff Layton
2024-08-19 18:29 ` [PATCH v12 00/24] nfs/nfsd: add support for localio Chuck Lever III
2024-08-19 18:43 ` Mike Snitzer
2024-08-21 19:20 ` Jeff Layton
2024-08-21 20:05 ` Mike Snitzer
2024-08-22 12:35 ` Jeff Layton
2024-08-22 2:00 ` Mike Snitzer
2024-08-22 12:50 ` Jeff Layton
2024-08-22 15:18 ` Chuck Lever III
2024-08-22 15:42 ` Mike Snitzer
2024-08-21 19:56 ` Chuck Lever
2024-08-21 20:10 ` Mike Snitzer
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240819181750.70570-17-snitzer@kernel.org \
--to=snitzer@kernel.org \
--cc=anna@kernel.org \
--cc=chuck.lever@oracle.com \
--cc=jlayton@kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-nfs@vger.kernel.org \
--cc=neilb@suse.de \
--cc=trondmy@hammerspace.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.