From: Mike Snitzer <snitzer@kernel.org>
To: linux-nfs@vger.kernel.org
Cc: Anna Schumaker <anna@kernel.org>,
Trond Myklebust <trondmy@hammerspace.com>,
Chuck Lever <chuck.lever@oracle.com>,
Jeff Layton <jlayton@kernel.org>, NeilBrown <neilb@suse.de>
Subject: [for-6.13 PATCH v2 02/15] nfs/localio: add direct IO enablement with sync and async IO support
Date: Wed, 13 Nov 2024 22:59:39 -0500 [thread overview]
Message-ID: <20241114035952.13889-3-snitzer@kernel.org> (raw)
In-Reply-To: <20241114035952.13889-1-snitzer@kernel.org>
This commit simply adds the required O_DIRECT plumbing. It doesn't
address the fact that NFS doesn't ensure all writes are page aligned
(nor device logical block size aligned as required by O_DIRECT).
Because NFS will read-modify-write for IO that isn't aligned, LOCALIO
will not use O_DIRECT semantics by default if/when an application
requests the use of O_DIRECT. Allow the use of O_DIRECT semantics by:
1: Adding a flag to the nfs_pgio_header struct to allow the NFS
O_DIRECT layer to signal that O_DIRECT was used by the application
2: Adding a 'localio_O_DIRECT_semantics' NFS module parameter that
when enabled will cause LOCALIO to use O_DIRECT semantics (this may
cause IO to fail if applications do not properly align their IO).
This commit is derived from code developed by Weston Andros Adamson.
Signed-off-by: Mike Snitzer <snitzer@kernel.org>
---
Documentation/filesystems/nfs/localio.rst | 11 +++
fs/nfs/direct.c | 1 +
fs/nfs/localio.c | 93 ++++++++++++++++++++---
include/linux/nfs_xdr.h | 1 +
4 files changed, 96 insertions(+), 10 deletions(-)
diff --git a/Documentation/filesystems/nfs/localio.rst b/Documentation/filesystems/nfs/localio.rst
index bd1967e2eab32..eb3dc764cfce8 100644
--- a/Documentation/filesystems/nfs/localio.rst
+++ b/Documentation/filesystems/nfs/localio.rst
@@ -306,6 +306,17 @@ is issuing IO to the underlying local filesystem that it is sharing with
the NFS server. See: fs/nfs/localio.c:nfs_local_doio() and
fs/nfs/localio.c:nfs_local_commit().
+With normal NFS that makes use of RPC to issue IO to the server, if an
+application uses O_DIRECT the NFS client will bypass the pagecache but
+the NFS server will not. Because the NFS server's use of buffered IO
+affords applications to be less precise with their alignment when
+issuing IO to the NFS client. LOCALIO can be configured to use O_DIRECT
+semantics by setting the 'localio_O_DIRECT_semantics' nfs module
+parameter to Y, e.g.:
+ echo Y > /sys/module/nfs/parameters/localio_O_DIRECT_semantics
+Once enabled, it will cause LOCALIO to use O_DIRECT semantics (this may
+cause IO to fail if applications do not properly align their IO).
+
Security
========
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 90079ca134dd3..4b92493d6ff0e 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -303,6 +303,7 @@ static void nfs_read_sync_pgio_error(struct list_head *head, int error)
static void nfs_direct_pgio_init(struct nfs_pgio_header *hdr)
{
get_dreq(hdr->dreq);
+ set_bit(NFS_IOHDR_ODIRECT, &hdr->flags);
}
static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = {
diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c
index 4b8618cf114ca..7637786c5cb70 100644
--- a/fs/nfs/localio.c
+++ b/fs/nfs/localio.c
@@ -35,6 +35,7 @@ struct nfs_local_kiocb {
struct bio_vec *bvec;
struct nfs_pgio_header *hdr;
struct work_struct work;
+ void (*aio_complete_work)(struct work_struct *);
struct nfsd_file *localio;
};
@@ -48,6 +49,11 @@ struct nfs_local_fsync_ctx {
static bool localio_enabled __read_mostly = true;
module_param(localio_enabled, bool, 0644);
+static bool localio_O_DIRECT_semantics __read_mostly = false;
+module_param(localio_O_DIRECT_semantics, bool, 0644);
+MODULE_PARM_DESC(localio_O_DIRECT_semantics,
+ "LOCALIO will use O_DIRECT semantics to filesystem.");
+
static inline bool nfs_client_is_local(const struct nfs_client *clp)
{
return !!test_bit(NFS_CS_LOCAL_IO, &clp->cl_flags);
@@ -285,10 +291,19 @@ nfs_local_iocb_alloc(struct nfs_pgio_header *hdr,
kfree(iocb);
return NULL;
}
- init_sync_kiocb(&iocb->kiocb, file);
+
+ if (localio_O_DIRECT_semantics &&
+ test_bit(NFS_IOHDR_ODIRECT, &hdr->flags)) {
+ iocb->kiocb.ki_filp = file;
+ iocb->kiocb.ki_flags = IOCB_DIRECT;
+ } else
+ init_sync_kiocb(&iocb->kiocb, file);
+
iocb->kiocb.ki_pos = hdr->args.offset;
iocb->hdr = hdr;
iocb->kiocb.ki_flags &= ~IOCB_APPEND;
+ iocb->aio_complete_work = NULL;
+
return iocb;
}
@@ -343,6 +358,18 @@ nfs_local_pgio_release(struct nfs_local_kiocb *iocb)
nfs_local_hdr_release(hdr, hdr->task.tk_ops);
}
+/*
+ * Complete the I/O from iocb->kiocb.ki_complete()
+ *
+ * Note that this function can be called from a bottom half context,
+ * hence we need to queue the rpc_call_done() etc to a workqueue
+ */
+static inline void nfs_local_pgio_aio_complete(struct nfs_local_kiocb *iocb)
+{
+ INIT_WORK(&iocb->work, iocb->aio_complete_work);
+ queue_work(nfsiod_workqueue, &iocb->work);
+}
+
static void
nfs_local_read_done(struct nfs_local_kiocb *iocb, long status)
{
@@ -365,6 +392,23 @@ nfs_local_read_done(struct nfs_local_kiocb *iocb, long status)
status > 0 ? status : 0, hdr->res.eof);
}
+static void nfs_local_read_aio_complete_work(struct work_struct *work)
+{
+ struct nfs_local_kiocb *iocb =
+ container_of(work, struct nfs_local_kiocb, work);
+
+ nfs_local_pgio_release(iocb);
+}
+
+static void nfs_local_read_aio_complete(struct kiocb *kiocb, long ret)
+{
+ struct nfs_local_kiocb *iocb =
+ container_of(kiocb, struct nfs_local_kiocb, kiocb);
+
+ nfs_local_read_done(iocb, ret);
+ nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_read_aio_complete_work */
+}
+
static void nfs_local_call_read(struct work_struct *work)
{
struct nfs_local_kiocb *iocb =
@@ -379,10 +423,10 @@ static void nfs_local_call_read(struct work_struct *work)
nfs_local_iter_init(&iter, iocb, READ);
status = filp->f_op->read_iter(&iocb->kiocb, &iter);
- WARN_ON_ONCE(status == -EIOCBQUEUED);
-
- nfs_local_read_done(iocb, status);
- nfs_local_pgio_release(iocb);
+ if (status != -EIOCBQUEUED) {
+ nfs_local_read_done(iocb, status);
+ nfs_local_pgio_release(iocb);
+ }
revert_creds(save_cred);
}
@@ -410,6 +454,11 @@ nfs_do_local_read(struct nfs_pgio_header *hdr,
nfs_local_pgio_init(hdr, call_ops);
hdr->res.eof = false;
+ if (iocb->kiocb.ki_flags & IOCB_DIRECT) {
+ iocb->kiocb.ki_complete = nfs_local_read_aio_complete;
+ iocb->aio_complete_work = nfs_local_read_aio_complete_work;
+ }
+
INIT_WORK(&iocb->work, nfs_local_call_read);
queue_work(nfslocaliod_workqueue, &iocb->work);
@@ -534,6 +583,24 @@ nfs_local_write_done(struct nfs_local_kiocb *iocb, long status)
nfs_local_pgio_done(hdr, status);
}
+static void nfs_local_write_aio_complete_work(struct work_struct *work)
+{
+ struct nfs_local_kiocb *iocb =
+ container_of(work, struct nfs_local_kiocb, work);
+
+ nfs_local_vfs_getattr(iocb);
+ nfs_local_pgio_release(iocb);
+}
+
+static void nfs_local_write_aio_complete(struct kiocb *kiocb, long ret)
+{
+ struct nfs_local_kiocb *iocb =
+ container_of(kiocb, struct nfs_local_kiocb, kiocb);
+
+ nfs_local_write_done(iocb, ret);
+ nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_write_aio_complete_work */
+}
+
static void nfs_local_call_write(struct work_struct *work)
{
struct nfs_local_kiocb *iocb =
@@ -552,11 +619,11 @@ static void nfs_local_call_write(struct work_struct *work)
file_start_write(filp);
status = filp->f_op->write_iter(&iocb->kiocb, &iter);
file_end_write(filp);
- WARN_ON_ONCE(status == -EIOCBQUEUED);
-
- nfs_local_write_done(iocb, status);
- nfs_local_vfs_getattr(iocb);
- nfs_local_pgio_release(iocb);
+ if (status != -EIOCBQUEUED) {
+ nfs_local_write_done(iocb, status);
+ nfs_local_vfs_getattr(iocb);
+ nfs_local_pgio_release(iocb);
+ }
revert_creds(save_cred);
current->flags = old_flags;
@@ -592,10 +659,16 @@ nfs_do_local_write(struct nfs_pgio_header *hdr,
case NFS_FILE_SYNC:
iocb->kiocb.ki_flags |= IOCB_DSYNC|IOCB_SYNC;
}
+
nfs_local_pgio_init(hdr, call_ops);
nfs_set_local_verifier(hdr->inode, hdr->res.verf, hdr->args.stable);
+ if (iocb->kiocb.ki_flags & IOCB_DIRECT) {
+ iocb->kiocb.ki_complete = nfs_local_write_aio_complete;
+ iocb->aio_complete_work = nfs_local_write_aio_complete_work;
+ }
+
INIT_WORK(&iocb->work, nfs_local_call_write);
queue_work(nfslocaliod_workqueue, &iocb->work);
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index e74a87bb18a44..162b7c0c35557 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1632,6 +1632,7 @@ enum {
NFS_IOHDR_RESEND_PNFS,
NFS_IOHDR_RESEND_MDS,
NFS_IOHDR_UNSTABLE_WRITES,
+ NFS_IOHDR_ODIRECT,
};
struct nfs_io_completion;
--
2.44.0
next prev parent reply other threads:[~2024-11-14 3:59 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-11-14 3:59 [for-6.13 PATCH v2 00/15] nfs/nfsd: fixes and improvements for LOCALIO Mike Snitzer
2024-11-14 3:59 ` [for-6.13 PATCH v2 01/15] nfs_common: must not hold RCU while calling nfsd_file_put_local Mike Snitzer
2024-11-14 3:59 ` Mike Snitzer [this message]
2024-11-14 3:59 ` [for-6.13 PATCH v2 03/15] nfsd: add nfsd_file_{get,put} to 'nfs_to' nfsd_localio_operations Mike Snitzer
2024-11-14 3:59 ` [for-6.13 PATCH v2 04/15] nfs_common: rename functions that invalidate LOCALIO nfs_clients Mike Snitzer
2024-11-14 3:59 ` [for-6.13 PATCH v2 05/15] nfs_common: move localio_lock to new lock member of nfs_uuid_t Mike Snitzer
2024-11-14 3:59 ` [for-6.13 PATCH v2 06/15] nfs: cache all open LOCALIO nfsd_file(s) in client Mike Snitzer
2024-11-15 3:12 ` NeilBrown
2024-11-15 16:49 ` Mike Snitzer
2024-11-14 3:59 ` [for-6.13 PATCH v2 07/15] nfsd: update percpu_ref to manage references on nfsd_net Mike Snitzer
2024-11-14 3:59 ` [for-6.13 PATCH v2 08/15] nfsd: rename nfsd_serv_ prefixed methods and variables with nfsd_net_ Mike Snitzer
2024-11-14 3:59 ` [for-6.13 PATCH v2 09/15] nfsd: nfsd_file_acquire_local no longer returns GC'd nfsd_file Mike Snitzer
2024-11-14 3:59 ` [for-6.13 PATCH v2 10/15] nfs_common: rename nfslocalio nfs_uuid_lock to nfs_uuids_lock Mike Snitzer
2024-11-14 3:59 ` [for-6.13 PATCH v2 11/15] nfs_common: track all open nfsd_files per LOCALIO nfs_client Mike Snitzer
2024-11-14 18:53 ` Jeff Layton
2024-11-14 23:22 ` Mike Snitzer
2024-11-14 3:59 ` [for-6.13 PATCH v2 12/15] nfs_common: add nfs_localio trace events Mike Snitzer
2024-11-14 3:59 ` [for-6.13 PATCH v2 13/15] nfs/localio: remove redundant code and simplify LOCALIO enablement Mike Snitzer
2024-11-14 15:31 ` [for-6.13 PATCH v2-fixed " Mike Snitzer
2024-11-14 3:59 ` [for-6.13 PATCH v2 14/15] nfs: probe for LOCALIO when v4 client reconnects to server Mike Snitzer
2024-11-14 3:59 ` [for-6.13 PATCH v2 15/15] nfs: probe for LOCALIO when v3 " Mike Snitzer
2024-11-15 3:55 ` NeilBrown
2024-11-14 16:14 ` (subset) [for-6.13 PATCH v2 00/15] nfs/nfsd: fixes and improvements for LOCALIO cel
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241114035952.13889-3-snitzer@kernel.org \
--to=snitzer@kernel.org \
--cc=anna@kernel.org \
--cc=chuck.lever@oracle.com \
--cc=jlayton@kernel.org \
--cc=linux-nfs@vger.kernel.org \
--cc=neilb@suse.de \
--cc=trondmy@hammerspace.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox