Linux NFS development
 help / color / mirror / Atom feed
From: Chuck Lever <cel@kernel.org>
To: NeilBrown <neil@brown.name>, Jeff Layton <jlayton@kernel.org>,
	Olga Kornievskaia <okorniev@redhat.com>,
	Dai Ngo <dai.ngo@oracle.com>, Tom Talpey <tom@talpey.com>
Cc: <linux-nfs@vger.kernel.org>, Christoph Hellwig <hch@lst.de>,
	Chuck Lever <chuck.lever@oracle.com>
Subject: [PATCH v8 09/12] NFSD: Handle both offset and memory alignment for direct I/O
Date: Mon, 27 Oct 2025 11:46:27 -0400	[thread overview]
Message-ID: <20251027154630.1774-10-cel@kernel.org> (raw)
In-Reply-To: <20251027154630.1774-1-cel@kernel.org>

From: Chuck Lever <chuck.lever@oracle.com>

Currently, nfsd_is_write_dio_possible() only considers file offset
alignment (nf_dio_offset_align) when splitting an NFS WRITE request
into segments. This leaves accounting for memory buffer alignment
(nf_dio_mem_align) until nfsd_setup_write_dio_iters(). If this
second check fails, the code falls back to cached I/O entirely,
wasting the opportunity to use direct I/O for the bulk of the
request.

Enhance the logic to find a beginning segment size that satisfies
both alignment constraints simultaneously. The search algorithm
starts with the file offset alignment requirement and steps through
multiples of offset_align, checking memory alignment at each step.
The search is bounded by lcm(offset_align, mem_align) to ensure that
it always terminates.

Signed-off-by: Chuck Lever <cel@kernel.org>
---
 fs/nfsd/filecache.c |   5 ++
 fs/nfsd/filecache.h |   1 +
 fs/nfsd/vfs.c       | 119 +++++++++++++++++++++++++++++---------------
 3 files changed, 86 insertions(+), 39 deletions(-)

diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index a238b6725008..89adc4ab5b24 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -40,6 +40,7 @@
 #include <linux/seq_file.h>
 #include <linux/rhashtable.h>
 #include <linux/nfslocalio.h>
+#include <linux/lcm.h>
 
 #include "vfs.h"
 #include "nfsd.h"
@@ -234,6 +235,7 @@ nfsd_file_alloc(struct net *net, struct inode *inode, unsigned char need,
 	nf->nf_dio_mem_align = 0;
 	nf->nf_dio_offset_align = 0;
 	nf->nf_dio_read_offset_align = 0;
+	nf->nf_dio_align_lcm = 0;
 	return nf;
 }
 
@@ -1071,6 +1073,9 @@ nfsd_file_get_dio_attrs(const struct svc_fh *fhp, struct nfsd_file *nf)
 	if (stat.result_mask & STATX_DIOALIGN) {
 		nf->nf_dio_mem_align = stat.dio_mem_align;
 		nf->nf_dio_offset_align = stat.dio_offset_align;
+		if (stat.dio_mem_align && stat.dio_offset_align)
+			nf->nf_dio_align_lcm = lcm(stat.dio_mem_align,
+						   stat.dio_offset_align);
 	}
 	if (stat.result_mask & STATX_DIO_READ_ALIGN)
 		nf->nf_dio_read_offset_align = stat.dio_read_offset_align;
diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
index e3d6ca2b6030..2648aaab5a1b 100644
--- a/fs/nfsd/filecache.h
+++ b/fs/nfsd/filecache.h
@@ -58,6 +58,7 @@ struct nfsd_file {
 	u32			nf_dio_mem_align;
 	u32			nf_dio_offset_align;
 	u32			nf_dio_read_offset_align;
+	unsigned long		nf_dio_align_lcm;
 };
 
 int nfsd_file_cache_init(void);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 37353fb48d58..a872be300c9f 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1261,49 +1261,73 @@ struct nfsd_write_dio_seg {
 
 struct nfsd_write_dio_args {
 	struct nfsd_file		*nf;
-	size_t				first, middle, last;
 	unsigned int			nsegs;
 	struct nfsd_write_dio_seg	segment[3];
 };
 
+/*
+ * Find the minimum offset within the write request that aligns both
+ * the file offset and memory buffer for direct I/O.
+ *
+ * Returns the size of the unaligned prefix, or SIZE_MAX if no alignment
+ * is possible within reasonable bounds.
+ */
+static size_t
+nfsd_find_dio_aligned_offset(struct nfsd_file *nf, loff_t file_offset,
+			     unsigned long mem_offset, size_t total_len)
+{
+	u32 offset_align = nf->nf_dio_offset_align;
+	u32 mem_align = nf->nf_dio_mem_align;
+	unsigned long search_limit;
+	size_t first;
+
+	/* Start with the file offset alignment requirement */
+	first = round_up(file_offset, offset_align) - file_offset;
+
+	/* Quick check: does this also satisfy memory alignment? */
+	if (((mem_offset + first) & (mem_align - 1)) == 0)
+		return first;
+
+	/*
+	 * Search for a value that satisfies both constraints by stepping
+	 * through multiples of offset_align. Limit search to one period
+	 * of the LCM. We need to check up through the search_limit to
+	 * cover all possible alignments within the LCM period.
+	 */
+	search_limit = min_t(unsigned long, nf->nf_dio_align_lcm, total_len);
+
+	for (; first <= search_limit && first < total_len; first += offset_align) {
+		if (((mem_offset + first) & (mem_align - 1)) == 0)
+			return first;
+	}
+
+	return SIZE_MAX;  /* No alignment found */
+}
+
+/*
+ * Check if the underlying file system implements direct I/O.
+ */
 static bool
 nfsd_is_write_dio_possible(loff_t offset, unsigned long len,
 			   struct nfsd_write_dio_args *args)
 {
-	u32 dio_blocksize = args->nf->nf_dio_offset_align;
-	loff_t first_end, orig_end, middle_end;
+	u32 offset_align = args->nf->nf_dio_offset_align;
+	u32 mem_align = args->nf->nf_dio_mem_align;
 
-	if (unlikely(!args->nf->nf_dio_mem_align || !dio_blocksize))
-		return false;
-	if (unlikely(len < dio_blocksize))
+	if (unlikely(!mem_align || !offset_align))
 		return false;
 
-	first_end = round_up(offset, dio_blocksize);
-	orig_end = offset + len;
-	middle_end = round_down(orig_end, dio_blocksize);
+	/*
+	 * Need enough data to potentially find an aligned segment.
+	 * In the worst case, we might need up to
+	 * lcm(offset_align, mem_align) bytes for the prefix.
+	 */
+	if (unlikely(len < max(offset_align, mem_align)))
+		return false;
 
-	args->first = first_end - offset;
-	args->middle = middle_end - first_end;
-	args->last = orig_end - middle_end;
 	return true;
 }
 
-/*
- * Check if the bvec iterator is aligned for direct I/O.
- *
- * bvecs generated from RPC receive buffers are contiguous: After the first
- * bvec, all subsequent bvecs start at bv_offset zero (page-aligned).
- * Therefore, only the first bvec is checked.
- */
-static bool
-nfsd_iov_iter_aligned_bvec(const struct nfsd_file *nf, const struct iov_iter *i)
-{
-	unsigned int addr_mask = nf->nf_dio_mem_align - 1;
-	const struct bio_vec *bvec = i->bvec;
-
-	return !((unsigned long)(bvec->bv_offset + i->iov_offset) & addr_mask);
-}
-
 static void
 nfsd_write_dio_seg_init(struct nfsd_write_dio_seg *segment,
 			struct bio_vec *bvec, unsigned int nvecs,
@@ -1318,29 +1342,45 @@ nfsd_write_dio_seg_init(struct nfsd_write_dio_seg *segment,
 
 static bool
 nfsd_setup_write_dio_iters(struct bio_vec *bvec, unsigned int nvecs,
-			   unsigned long total,
+			   loff_t offset, unsigned long total,
 			   struct nfsd_write_dio_args *args)
 {
+	u32 offset_align = args->nf->nf_dio_offset_align;
+	unsigned long mem_offset = bvec->bv_offset;
+	loff_t prefix_end, orig_end, middle_end;
+	size_t prefix, middle, suffix;
+
 	args->nsegs = 0;
 
-	if (args->first) {
+	prefix = nfsd_find_dio_aligned_offset(args->nf, offset, mem_offset,
+					     total);
+	if (prefix == SIZE_MAX)
+		return false;	/* No alignment possible */
+
+	prefix_end = offset + prefix;
+	orig_end = offset + total;
+	middle_end = round_down(orig_end, offset_align);
+
+	middle = middle_end - prefix_end;
+	suffix = orig_end - middle_end;
+
+	if (prefix) {
 		nfsd_write_dio_seg_init(&args->segment[args->nsegs], bvec,
-					nvecs, total, 0, args->first);
+					nvecs, total, 0, prefix);
 		++args->nsegs;
 	}
 
+	if (!middle)
+		return false;	/* No aligned region for DIO */
+
 	nfsd_write_dio_seg_init(&args->segment[args->nsegs], bvec, nvecs,
-				total, args->first, args->middle);
-	if (!nfsd_iov_iter_aligned_bvec(args->nf,
-					&args->segment[args->nsegs].iter))
-		return false;	/* no DIO-aligned IO possible */
+				total, prefix, middle);
 	args->segment[args->nsegs].use_dio = true;
 	++args->nsegs;
 
-	if (args->last) {
+	if (suffix) {
 		nfsd_write_dio_seg_init(&args->segment[args->nsegs], bvec,
-					nvecs, total, args->first +
-					args->middle, args->last);
+					nvecs, total, prefix + middle, suffix);
 		++args->nsegs;
 	}
 
@@ -1373,7 +1413,8 @@ nfsd_issue_write_dio(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *stable_how
 	ssize_t host_err;
 	unsigned int i;
 
-	if (!nfsd_setup_write_dio_iters(rqstp->rq_bvec, nvecs, *cnt, args))
+	if (!nfsd_setup_write_dio_iters(rqstp->rq_bvec, nvecs, kiocb->ki_pos,
+					*cnt, args))
 		return nfsd_buffered_write(rqstp, file, nvecs, cnt, kiocb);
 
 	/*
-- 
2.51.0


  parent reply	other threads:[~2025-10-27 15:46 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-27 15:46 [PATCH v8 00/12] NFSD: Implement NFSD_IO_DIRECT for NFS WRITE Chuck Lever
2025-10-27 15:46 ` [PATCH v8 01/12] NFSD: Make FILE_SYNC WRITEs comply with spec Chuck Lever
2025-10-27 15:46 ` [PATCH v8 02/12] NFSD: Enable return of an updated stable_how to NFS clients Chuck Lever
2025-10-27 15:46 ` [PATCH v8 03/12] NFSD: Implement NFSD_IO_DIRECT for NFS WRITE Chuck Lever
2025-10-27 15:46 ` [PATCH v8 04/12] NFSD: Remove specific error handling Chuck Lever
2025-10-27 15:46 ` [PATCH v8 05/12] NFSD: Remove alignment size checking Chuck Lever
2025-10-27 15:46 ` [PATCH v8 06/12] NFSD: Clean up struct nfsd_write_dio Chuck Lever
2025-10-27 15:46 ` [PATCH v8 07/12] NFSD: Introduce struct nfsd_write_dio_seg Chuck Lever
2025-10-27 15:46 ` [PATCH v8 08/12] NFSD: Simplify nfsd_iov_iter_aligned_bvec() Chuck Lever
2025-10-30 15:00   ` Jeff Layton
2025-10-31 13:16   ` Christoph Hellwig
2025-10-27 15:46 ` Chuck Lever [this message]
2025-10-30 19:52   ` [PATCH v8 09/12] NFSD: Handle both offset and memory alignment for direct I/O Jeff Layton
2025-10-30 19:55     ` Chuck Lever
2025-10-31  9:13     ` Christoph Hellwig
2025-10-31 13:19   ` Christoph Hellwig
2025-10-31 13:21     ` Chuck Lever
2025-10-31 13:23       ` Christoph Hellwig
2025-10-31 16:07       ` Mike Snitzer
2025-10-27 15:46 ` [PATCH v8 10/12] NFSD: Combine direct I/O feasibility check with iterator setup Chuck Lever
2025-10-30 19:59   ` Jeff Layton
2025-10-31 13:20   ` Christoph Hellwig
2025-10-27 15:46 ` [PATCH v8 11/12] NFSD: Handle kiocb->ki_flags correctly Chuck Lever
2025-10-30 20:01   ` Jeff Layton
2025-10-31 13:21   ` Christoph Hellwig
2025-10-27 15:46 ` [PATCH v8 12/12] NFSD: Refactor nfsd_vfs_write Chuck Lever
2025-10-30 20:02   ` Jeff Layton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251027154630.1774-10-cel@kernel.org \
    --to=cel@kernel.org \
    --cc=chuck.lever@oracle.com \
    --cc=dai.ngo@oracle.com \
    --cc=hch@lst.de \
    --cc=jlayton@kernel.org \
    --cc=linux-nfs@vger.kernel.org \
    --cc=neil@brown.name \
    --cc=okorniev@redhat.com \
    --cc=tom@talpey.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox