From: Chuck Lever <cel@kernel.org>
To: NeilBrown <neil@brown.name>, Jeff Layton <jlayton@kernel.org>,
Olga Kornievskaia <okorniev@redhat.com>,
Dai Ngo <dai.ngo@oracle.com>, Tom Talpey <tom@talpey.com>
Cc: <linux-nfs@vger.kernel.org>, Christoph Hellwig <hch@lst.de>,
Chuck Lever <chuck.lever@oracle.com>
Subject: [PATCH v8 09/12] NFSD: Handle both offset and memory alignment for direct I/O
Date: Mon, 27 Oct 2025 11:46:27 -0400 [thread overview]
Message-ID: <20251027154630.1774-10-cel@kernel.org> (raw)
In-Reply-To: <20251027154630.1774-1-cel@kernel.org>
From: Chuck Lever <chuck.lever@oracle.com>
Currently, nfsd_is_write_dio_possible() only considers file offset
alignment (nf_dio_offset_align) when splitting an NFS WRITE request
into segments. This leaves accounting for memory buffer alignment
(nf_dio_mem_align) until nfsd_setup_write_dio_iters(). If this
second check fails, the code falls back to cached I/O entirely,
wasting the opportunity to use direct I/O for the bulk of the
request.
Enhance the logic to find a beginning segment size that satisfies
both alignment constraints simultaneously. The search algorithm
starts with the file offset alignment requirement and steps through
multiples of offset_align, checking memory alignment at each step.
The search is bounded by lcm(offset_align, mem_align) to ensure that
it always terminates.
Signed-off-by: Chuck Lever <cel@kernel.org>
---
fs/nfsd/filecache.c | 5 ++
fs/nfsd/filecache.h | 1 +
fs/nfsd/vfs.c | 119 +++++++++++++++++++++++++++++---------------
3 files changed, 86 insertions(+), 39 deletions(-)
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index a238b6725008..89adc4ab5b24 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -40,6 +40,7 @@
#include <linux/seq_file.h>
#include <linux/rhashtable.h>
#include <linux/nfslocalio.h>
+#include <linux/lcm.h>
#include "vfs.h"
#include "nfsd.h"
@@ -234,6 +235,7 @@ nfsd_file_alloc(struct net *net, struct inode *inode, unsigned char need,
nf->nf_dio_mem_align = 0;
nf->nf_dio_offset_align = 0;
nf->nf_dio_read_offset_align = 0;
+ nf->nf_dio_align_lcm = 0;
return nf;
}
@@ -1071,6 +1073,9 @@ nfsd_file_get_dio_attrs(const struct svc_fh *fhp, struct nfsd_file *nf)
if (stat.result_mask & STATX_DIOALIGN) {
nf->nf_dio_mem_align = stat.dio_mem_align;
nf->nf_dio_offset_align = stat.dio_offset_align;
+ if (stat.dio_mem_align && stat.dio_offset_align)
+ nf->nf_dio_align_lcm = lcm(stat.dio_mem_align,
+ stat.dio_offset_align);
}
if (stat.result_mask & STATX_DIO_READ_ALIGN)
nf->nf_dio_read_offset_align = stat.dio_read_offset_align;
diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
index e3d6ca2b6030..2648aaab5a1b 100644
--- a/fs/nfsd/filecache.h
+++ b/fs/nfsd/filecache.h
@@ -58,6 +58,7 @@ struct nfsd_file {
u32 nf_dio_mem_align;
u32 nf_dio_offset_align;
u32 nf_dio_read_offset_align;
+ unsigned long nf_dio_align_lcm;
};
int nfsd_file_cache_init(void);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 37353fb48d58..a872be300c9f 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1261,49 +1261,73 @@ struct nfsd_write_dio_seg {
struct nfsd_write_dio_args {
struct nfsd_file *nf;
- size_t first, middle, last;
unsigned int nsegs;
struct nfsd_write_dio_seg segment[3];
};
+/*
+ * Find the minimum offset within the write request that aligns both
+ * the file offset and memory buffer for direct I/O.
+ *
+ * Returns the size of the unaligned prefix, or SIZE_MAX if no alignment
+ * is possible within reasonable bounds.
+ */
+static size_t
+nfsd_find_dio_aligned_offset(struct nfsd_file *nf, loff_t file_offset,
+ unsigned long mem_offset, size_t total_len)
+{
+ u32 offset_align = nf->nf_dio_offset_align;
+ u32 mem_align = nf->nf_dio_mem_align;
+ unsigned long search_limit;
+ size_t first;
+
+ /* Start with the file offset alignment requirement */
+ first = round_up(file_offset, offset_align) - file_offset;
+
+ /* Quick check: does this also satisfy memory alignment? */
+ if (((mem_offset + first) & (mem_align - 1)) == 0)
+ return first;
+
+ /*
+ * Search for a value that satisfies both constraints by stepping
+ * through multiples of offset_align. Limit search to one period
+ * of the LCM. We need to check up through the search_limit to
+ * cover all possible alignments within the LCM period.
+ */
+ search_limit = min_t(unsigned long, nf->nf_dio_align_lcm, total_len);
+
+ for (; first <= search_limit && first < total_len; first += offset_align) {
+ if (((mem_offset + first) & (mem_align - 1)) == 0)
+ return first;
+ }
+
+ return SIZE_MAX; /* No alignment found */
+}
+
+/*
+ * Check if the underlying file system implements direct I/O.
+ */
static bool
nfsd_is_write_dio_possible(loff_t offset, unsigned long len,
struct nfsd_write_dio_args *args)
{
- u32 dio_blocksize = args->nf->nf_dio_offset_align;
- loff_t first_end, orig_end, middle_end;
+ u32 offset_align = args->nf->nf_dio_offset_align;
+ u32 mem_align = args->nf->nf_dio_mem_align;
- if (unlikely(!args->nf->nf_dio_mem_align || !dio_blocksize))
- return false;
- if (unlikely(len < dio_blocksize))
+ if (unlikely(!mem_align || !offset_align))
return false;
- first_end = round_up(offset, dio_blocksize);
- orig_end = offset + len;
- middle_end = round_down(orig_end, dio_blocksize);
+ /*
+ * Need enough data to potentially find an aligned segment.
+ * In the worst case, we might need up to
+ * lcm(offset_align, mem_align) bytes for the prefix.
+ */
+ if (unlikely(len < max(offset_align, mem_align)))
+ return false;
- args->first = first_end - offset;
- args->middle = middle_end - first_end;
- args->last = orig_end - middle_end;
return true;
}
-/*
- * Check if the bvec iterator is aligned for direct I/O.
- *
- * bvecs generated from RPC receive buffers are contiguous: After the first
- * bvec, all subsequent bvecs start at bv_offset zero (page-aligned).
- * Therefore, only the first bvec is checked.
- */
-static bool
-nfsd_iov_iter_aligned_bvec(const struct nfsd_file *nf, const struct iov_iter *i)
-{
- unsigned int addr_mask = nf->nf_dio_mem_align - 1;
- const struct bio_vec *bvec = i->bvec;
-
- return !((unsigned long)(bvec->bv_offset + i->iov_offset) & addr_mask);
-}
-
static void
nfsd_write_dio_seg_init(struct nfsd_write_dio_seg *segment,
struct bio_vec *bvec, unsigned int nvecs,
@@ -1318,29 +1342,45 @@ nfsd_write_dio_seg_init(struct nfsd_write_dio_seg *segment,
static bool
nfsd_setup_write_dio_iters(struct bio_vec *bvec, unsigned int nvecs,
- unsigned long total,
+ loff_t offset, unsigned long total,
struct nfsd_write_dio_args *args)
{
+ u32 offset_align = args->nf->nf_dio_offset_align;
+ unsigned long mem_offset = bvec->bv_offset;
+ loff_t prefix_end, orig_end, middle_end;
+ size_t prefix, middle, suffix;
+
args->nsegs = 0;
- if (args->first) {
+ prefix = nfsd_find_dio_aligned_offset(args->nf, offset, mem_offset,
+ total);
+ if (prefix == SIZE_MAX)
+ return false; /* No alignment possible */
+
+ prefix_end = offset + prefix;
+ orig_end = offset + total;
+ middle_end = round_down(orig_end, offset_align);
+
+ middle = middle_end - prefix_end;
+ suffix = orig_end - middle_end;
+
+ if (prefix) {
nfsd_write_dio_seg_init(&args->segment[args->nsegs], bvec,
- nvecs, total, 0, args->first);
+ nvecs, total, 0, prefix);
++args->nsegs;
}
+ if (!middle)
+ return false; /* No aligned region for DIO */
+
nfsd_write_dio_seg_init(&args->segment[args->nsegs], bvec, nvecs,
- total, args->first, args->middle);
- if (!nfsd_iov_iter_aligned_bvec(args->nf,
- &args->segment[args->nsegs].iter))
- return false; /* no DIO-aligned IO possible */
+ total, prefix, middle);
args->segment[args->nsegs].use_dio = true;
++args->nsegs;
- if (args->last) {
+ if (suffix) {
nfsd_write_dio_seg_init(&args->segment[args->nsegs], bvec,
- nvecs, total, args->first +
- args->middle, args->last);
+ nvecs, total, prefix + middle, suffix);
++args->nsegs;
}
@@ -1373,7 +1413,8 @@ nfsd_issue_write_dio(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *stable_how
ssize_t host_err;
unsigned int i;
- if (!nfsd_setup_write_dio_iters(rqstp->rq_bvec, nvecs, *cnt, args))
+ if (!nfsd_setup_write_dio_iters(rqstp->rq_bvec, nvecs, kiocb->ki_pos,
+ *cnt, args))
return nfsd_buffered_write(rqstp, file, nvecs, cnt, kiocb);
/*
--
2.51.0
next prev parent reply other threads:[~2025-10-27 15:46 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-10-27 15:46 [PATCH v8 00/12] NFSD: Implement NFSD_IO_DIRECT for NFS WRITE Chuck Lever
2025-10-27 15:46 ` [PATCH v8 01/12] NFSD: Make FILE_SYNC WRITEs comply with spec Chuck Lever
2025-10-27 15:46 ` [PATCH v8 02/12] NFSD: Enable return of an updated stable_how to NFS clients Chuck Lever
2025-10-27 15:46 ` [PATCH v8 03/12] NFSD: Implement NFSD_IO_DIRECT for NFS WRITE Chuck Lever
2025-10-27 15:46 ` [PATCH v8 04/12] NFSD: Remove specific error handling Chuck Lever
2025-10-27 15:46 ` [PATCH v8 05/12] NFSD: Remove alignment size checking Chuck Lever
2025-10-27 15:46 ` [PATCH v8 06/12] NFSD: Clean up struct nfsd_write_dio Chuck Lever
2025-10-27 15:46 ` [PATCH v8 07/12] NFSD: Introduce struct nfsd_write_dio_seg Chuck Lever
2025-10-27 15:46 ` [PATCH v8 08/12] NFSD: Simplify nfsd_iov_iter_aligned_bvec() Chuck Lever
2025-10-30 15:00 ` Jeff Layton
2025-10-31 13:16 ` Christoph Hellwig
2025-10-27 15:46 ` Chuck Lever [this message]
2025-10-30 19:52 ` [PATCH v8 09/12] NFSD: Handle both offset and memory alignment for direct I/O Jeff Layton
2025-10-30 19:55 ` Chuck Lever
2025-10-31 9:13 ` Christoph Hellwig
2025-10-31 13:19 ` Christoph Hellwig
2025-10-31 13:21 ` Chuck Lever
2025-10-31 13:23 ` Christoph Hellwig
2025-10-31 16:07 ` Mike Snitzer
2025-10-27 15:46 ` [PATCH v8 10/12] NFSD: Combine direct I/O feasibility check with iterator setup Chuck Lever
2025-10-30 19:59 ` Jeff Layton
2025-10-31 13:20 ` Christoph Hellwig
2025-10-27 15:46 ` [PATCH v8 11/12] NFSD: Handle kiocb->ki_flags correctly Chuck Lever
2025-10-30 20:01 ` Jeff Layton
2025-10-31 13:21 ` Christoph Hellwig
2025-10-27 15:46 ` [PATCH v8 12/12] NFSD: Refactor nfsd_vfs_write Chuck Lever
2025-10-30 20:02 ` Jeff Layton
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251027154630.1774-10-cel@kernel.org \
--to=cel@kernel.org \
--cc=chuck.lever@oracle.com \
--cc=dai.ngo@oracle.com \
--cc=hch@lst.de \
--cc=jlayton@kernel.org \
--cc=linux-nfs@vger.kernel.org \
--cc=neil@brown.name \
--cc=okorniev@redhat.com \
--cc=tom@talpey.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox