All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mike Snitzer <snitzer@kernel.org>
To: Trond Myklebust <trond.myklebust@hammerspace.com>,
	Anna Schumaker <anna@kernel.org>
Cc: linux-nfs@vger.kernel.org
Subject: [PATCH v9 2/7] nfs/localio: avoid issuing misaligned IO using O_DIRECT
Date: Mon, 15 Sep 2025 11:41:10 -0400	[thread overview]
Message-ID: <20250915154115.19579-3-snitzer@kernel.org> (raw)
In-Reply-To: <20250915154115.19579-1-snitzer@kernel.org>

Add nfsd_file_dio_alignment and use it to avoid issuing misaligned IO
using O_DIRECT. Any misaligned DIO falls back to using buffered IO.

Because misaligned DIO is now handled safely, remove the nfs modparam
'localio_O_DIRECT_semantics' that was added to require users opt-in to
the requirement that all O_DIRECT be properly DIO-aligned.

Also, introduce nfs_iov_iter_aligned_bvec() which is a variant of
iov_iter_aligned_bvec() that also verifies the offset associated with
an iov_iter is DIO-aligned.  NOTE: in a parallel effort,
iov_iter_aligned_bvec() is being removed along with
iov_iter_is_aligned().

Lastly, add pr_info_ratelimited if underlying filesystem returns
-EINVAL because it was made to try O_DIRECT for IO that is not
DIO-aligned (shouldn't happen, so its best to be louder if it does).

Fixes: 3feec68563d ("nfs/localio: add direct IO enablement with sync and async IO support")
Signed-off-by: Mike Snitzer <snitzer@kernel.org>
---
 fs/nfs/localio.c           | 69 ++++++++++++++++++++++++++++++++------
 fs/nfsd/localio.c          | 11 ++++++
 include/linux/nfslocalio.h |  2 ++
 3 files changed, 72 insertions(+), 10 deletions(-)

diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c
index 42ea50d42c995..380407c41822c 100644
--- a/fs/nfs/localio.c
+++ b/fs/nfs/localio.c
@@ -49,11 +49,6 @@ struct nfs_local_fsync_ctx {
 static bool localio_enabled __read_mostly = true;
 module_param(localio_enabled, bool, 0644);
 
-static bool localio_O_DIRECT_semantics __read_mostly = false;
-module_param(localio_O_DIRECT_semantics, bool, 0644);
-MODULE_PARM_DESC(localio_O_DIRECT_semantics,
-		 "LOCALIO will use O_DIRECT semantics to filesystem.");
-
 static inline bool nfs_client_is_local(const struct nfs_client *clp)
 {
 	return !!rcu_access_pointer(clp->cl_uuid.net);
@@ -322,12 +317,9 @@ nfs_local_iocb_alloc(struct nfs_pgio_header *hdr,
 		return NULL;
 	}
 
-	if (localio_O_DIRECT_semantics &&
-	    test_bit(NFS_IOHDR_ODIRECT, &hdr->flags)) {
-		iocb->kiocb.ki_filp = file;
+	init_sync_kiocb(&iocb->kiocb, file);
+	if (test_bit(NFS_IOHDR_ODIRECT, &hdr->flags))
 		iocb->kiocb.ki_flags = IOCB_DIRECT;
-	} else
-		init_sync_kiocb(&iocb->kiocb, file);
 
 	iocb->kiocb.ki_pos = hdr->args.offset;
 	iocb->hdr = hdr;
@@ -337,6 +329,30 @@ nfs_local_iocb_alloc(struct nfs_pgio_header *hdr,
 	return iocb;
 }
 
+static bool nfs_iov_iter_aligned_bvec(const struct iov_iter *i,
+		loff_t offset, unsigned int addr_mask, unsigned int len_mask)
+{
+	const struct bio_vec *bvec = i->bvec;
+	size_t skip = i->iov_offset;
+	size_t size = i->count;
+
+	if ((offset | size) & len_mask)
+		return false;
+	do {
+		size_t len = bvec->bv_len;
+
+		if (len > size)
+			len = size;
+		if ((unsigned long)(bvec->bv_offset + skip) & addr_mask)
+			return false;
+		bvec++;
+		size -= len;
+		skip = 0;
+	} while (size);
+
+	return true;
+}
+
 static void
 nfs_local_iter_init(struct iov_iter *i, struct nfs_local_kiocb *iocb, int dir)
 {
@@ -346,6 +362,25 @@ nfs_local_iter_init(struct iov_iter *i, struct nfs_local_kiocb *iocb, int dir)
 		      hdr->args.count + hdr->args.pgbase);
 	if (hdr->args.pgbase != 0)
 		iov_iter_advance(i, hdr->args.pgbase);
+
+	if (iocb->kiocb.ki_flags & IOCB_DIRECT) {
+		u32 nf_dio_mem_align, nf_dio_offset_align, nf_dio_read_offset_align;
+		/* Verify the IO is DIO-aligned as required */
+		nfs_to->nfsd_file_dio_alignment(iocb->localio, &nf_dio_mem_align,
+						&nf_dio_offset_align,
+						&nf_dio_read_offset_align);
+		if (dir == READ)
+			nf_dio_offset_align = nf_dio_read_offset_align;
+
+		if (nf_dio_mem_align && nf_dio_offset_align &&
+		    nfs_iov_iter_aligned_bvec(i, hdr->args.offset,
+					      nf_dio_mem_align - 1,
+					      nf_dio_offset_align - 1))
+			return; /* is DIO-aligned */
+
+		/* Fallback to using buffered for this misaligned IO */
+		iocb->kiocb.ki_flags &= ~IOCB_DIRECT;
+	}
 }
 
 static void
@@ -406,6 +441,13 @@ nfs_local_read_done(struct nfs_local_kiocb *iocb, long status)
 	struct nfs_pgio_header *hdr = iocb->hdr;
 	struct file *filp = iocb->kiocb.ki_filp;
 
+	if (iocb->kiocb.ki_flags & IOCB_DIRECT) {
+		if (status == -EINVAL) {
+			/* Underlying FS will return -EINVAL if misaligned DIO is attempted. */
+			pr_info_ratelimited("nfs: Unexpected direct I/O read alignment failure\n");
+		}
+	}
+
 	nfs_local_pgio_done(hdr, status);
 
 	/*
@@ -598,6 +640,13 @@ nfs_local_write_done(struct nfs_local_kiocb *iocb, long status)
 
 	dprintk("%s: wrote %ld bytes.\n", __func__, status > 0 ? status : 0);
 
+	if (iocb->kiocb.ki_flags & IOCB_DIRECT) {
+		if (status == -EINVAL) {
+			/* Underlying FS will return -EINVAL if misaligned DIO is attempted. */
+			pr_info_ratelimited("nfs: Unexpected direct I/O write alignment failure\n");
+		}
+	}
+
 	/* Handle short writes as if they are ENOSPC */
 	if (status > 0 && status < hdr->args.count) {
 		hdr->mds_offset += status;
diff --git a/fs/nfsd/localio.c b/fs/nfsd/localio.c
index 269fa9391dc46..be710d809a3ba 100644
--- a/fs/nfsd/localio.c
+++ b/fs/nfsd/localio.c
@@ -117,12 +117,23 @@ nfsd_open_local_fh(struct net *net, struct auth_domain *dom,
 	return localio;
 }
 
+static void nfsd_file_dio_alignment(struct nfsd_file *nf,
+				    u32 *nf_dio_mem_align,
+				    u32 *nf_dio_offset_align,
+				    u32 *nf_dio_read_offset_align)
+{
+	*nf_dio_mem_align = nf->nf_dio_mem_align;
+	*nf_dio_offset_align = nf->nf_dio_offset_align;
+	*nf_dio_read_offset_align = nf->nf_dio_read_offset_align;
+}
+
 static const struct nfsd_localio_operations nfsd_localio_ops = {
 	.nfsd_net_try_get  = nfsd_net_try_get,
 	.nfsd_net_put  = nfsd_net_put,
 	.nfsd_open_local_fh = nfsd_open_local_fh,
 	.nfsd_file_put_local = nfsd_file_put_local,
 	.nfsd_file_file = nfsd_file_file,
+	.nfsd_file_dio_alignment = nfsd_file_dio_alignment,
 };
 
 void nfsd_localio_ops_init(void)
diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h
index 59ea90bd136b6..3d91043254e64 100644
--- a/include/linux/nfslocalio.h
+++ b/include/linux/nfslocalio.h
@@ -64,6 +64,8 @@ struct nfsd_localio_operations {
 						const fmode_t);
 	struct net *(*nfsd_file_put_local)(struct nfsd_file __rcu **);
 	struct file *(*nfsd_file_file)(struct nfsd_file *);
+	void (*nfsd_file_dio_alignment)(struct nfsd_file *,
+					u32 *, u32 *, u32 *);
 } ____cacheline_aligned;
 
 extern void nfsd_localio_ops_init(void);
-- 
2.44.0


  parent reply	other threads:[~2025-09-15 15:41 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-09-15 15:41 [PATCH v9 0/7] NFS DIRECT: align misaligned DIO for LOCALIO Mike Snitzer
2025-09-15 15:41 ` [PATCH v9 1/7] nfs/localio: make trace_nfs_local_open_fh more useful Mike Snitzer
2025-09-15 15:41 ` Mike Snitzer [this message]
2025-09-15 15:41 ` [PATCH v9 3/7] nfs/localio: refactor iocb and iov_iter_bvec initialization Mike Snitzer
2025-09-15 15:41 ` [PATCH v9 4/7] nfs/localio: refactor iocb initialization further Mike Snitzer
2025-09-15 15:41 ` SYNCFrom cb702c86aa12e5477e1e4aca0c1384a821f2afef Mon Sep 17 00:00:00 2001 Mike Snitzer
2025-09-15 15:47   ` [PATCH v9 5/7] nfs/localio: add proper O_DIRECT support for READ and WRITE Mike Snitzer
2025-09-15 15:41 ` [PATCH v9 6/7] nfs/localio: add tracepoints for misaligned DIO READ and WRITE support Mike Snitzer
2025-09-15 15:41 ` [PATCH v9 7/7] NFS: add basic STATX_DIOALIGN and STATX_DIO_READ_ALIGN support Mike Snitzer
     [not found] ` <aMiMpYAcHV8bYU4W@kernel.org>
     [not found]   ` <aNLfroQ8Ti1Vh5wh@kernel.org>
     [not found]     ` <aNQqUprZ3DuJhMe4@kernel.org>
     [not found]       ` <aNgSOM9EzMS_Q6bR@kernel.org>
2025-09-30 16:26         ` [GIT PULL] NFS LOCALIO O_DIRECT changes for Linux 6.18 Mike Snitzer
2025-09-30 17:15           ` Chuck Lever
2025-09-30 17:35             ` Mike Snitzer
2025-09-30 17:59               ` Chuck Lever
2025-09-30 19:32           ` [GIT PULL v2] " Mike Snitzer
2025-09-30 20:53             ` Anna Schumaker
2025-09-30 21:30               ` Mike Snitzer
2025-10-01 16:04                 ` Mike Snitzer

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250915154115.19579-3-snitzer@kernel.org \
    --to=snitzer@kernel.org \
    --cc=anna@kernel.org \
    --cc=linux-nfs@vger.kernel.org \
    --cc=trond.myklebust@hammerspace.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.