Linux NFS development
 help / color / mirror / Atom feed
From: Chuck Lever <cel@kernel.org>
To: NeilBrown <neil@brown.name>, Jeff Layton <jlayton@kernel.org>,
	Olga Kornievskaia <okorniev@redhat.com>,
	Dai Ngo <dai.ngo@oracle.com>, Tom Talpey <tom@talpey.com>
Cc: <linux-nfs@vger.kernel.org>, Christoph Hellwig <hch@lst.de>,
	Chuck Lever <chuck.lever@oracle.com>,
	Mike Snitzer <snitzer@kernel.org>
Subject: [PATCH v8 07/12] NFSD: Introduce struct nfsd_write_dio_seg
Date: Mon, 27 Oct 2025 11:46:25 -0400	[thread overview]
Message-ID: <20251027154630.1774-8-cel@kernel.org> (raw)
In-Reply-To: <20251027154630.1774-1-cel@kernel.org>

From: Chuck Lever <chuck.lever@oracle.com>

Passing iter arrays by reference is a little risky. Instead, pass a
struct with a fixed-size array so bounds checking can be done.

Name each item in the array a "segment", as the term "extent"
generally refers to a set of blocks on storage, not to a buffer.
Each segment is processed via a single vfs_iocb_iter_write() call,
and is either IOCB_DIRECT or buffered.

Introduce a segment constructor function so each segment is
initialized identically.

Consensus is that allowing the code to build segment arrays that
are smaller than 3 is better than the I/O loop unconditionally
visiting all 3 segments, skipping the zero-length ones.

Suggested-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Mike Snitzer <snitzer@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/vfs.c | 120 ++++++++++++++++++++++++--------------------------
 1 file changed, 58 insertions(+), 62 deletions(-)

diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 326c60eada65..5d6efcceb8c9 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1254,12 +1254,16 @@ static int wait_for_concurrent_writes(struct file *file)
 	return err;
 }
 
+struct nfsd_write_dio_seg {
+	struct iov_iter			iter;
+	bool				use_dio;
+};
+
 struct nfsd_write_dio_args {
 	struct nfsd_file		*nf;
-
-	ssize_t	start_len;	/* Length for misaligned first extent */
-	ssize_t	middle_len;	/* Length for DIO-aligned middle extent */
-	ssize_t	end_len;	/* Length for misaligned last extent */
+	size_t				first, middle, last;
+	unsigned int			nsegs;
+	struct nfsd_write_dio_seg	segment[3];
 };
 
 static bool
@@ -1267,21 +1271,20 @@ nfsd_is_write_dio_possible(loff_t offset, unsigned long len,
 			   struct nfsd_write_dio_args *args)
 {
 	u32 dio_blocksize = args->nf->nf_dio_offset_align;
-	loff_t start_end, orig_end, middle_end;
+	loff_t first_end, orig_end, middle_end;
 
 	if (unlikely(!args->nf->nf_dio_mem_align || !dio_blocksize))
 		return false;
 	if (unlikely(len < dio_blocksize))
 		return false;
 
-	start_end = round_up(offset, dio_blocksize);
+	first_end = round_up(offset, dio_blocksize);
 	orig_end = offset + len;
 	middle_end = round_down(orig_end, dio_blocksize);
 
-	args->start_len = start_end - offset;
-	args->middle_len = middle_end - start_end;
-	args->end_len = orig_end - middle_end;
-
+	args->first = first_end - offset;
+	args->middle = middle_end - first_end;
+	args->last = orig_end - middle_end;
 	return true;
 }
 
@@ -1311,47 +1314,47 @@ nfsd_iov_iter_aligned_bvec(const struct nfsd_file *nf, const struct iov_iter *i)
 	return true;
 }
 
-/*
- * Setup as many as 3 iov_iter based on extents described by @write_dio.
- * Returns the number of iov_iter that were setup.
- */
-static int
-nfsd_setup_write_dio_iters(struct iov_iter **iterp, bool *iter_is_dio_aligned,
-			   struct bio_vec *rq_bvec, unsigned int nvecs,
-			   unsigned long cnt, struct nfsd_write_dio_args *args)
+static void
+nfsd_write_dio_seg_init(struct nfsd_write_dio_seg *segment,
+			struct bio_vec *bvec, unsigned int nvecs,
+			unsigned long total, size_t start, size_t len)
 {
-	int n_iters = 0;
-	struct iov_iter *iters = *iterp;
+	iov_iter_bvec(&segment->iter, ITER_SOURCE, bvec, nvecs, total);
+	if (start)
+		iov_iter_advance(&segment->iter, start);
+	iov_iter_truncate(&segment->iter, len);
+	segment->use_dio = false;
+}
 
-	/* Setup misaligned start? */
-	if (args->start_len) {
-		iov_iter_bvec(&iters[n_iters], ITER_SOURCE, rq_bvec, nvecs, cnt);
-		iters[n_iters].count = args->start_len;
-		iter_is_dio_aligned[n_iters] = false;
-		++n_iters;
+static bool
+nfsd_setup_write_dio_iters(struct bio_vec *bvec, unsigned int nvecs,
+			   unsigned long total,
+			   struct nfsd_write_dio_args *args)
+{
+	args->nsegs = 0;
+
+	if (args->first) {
+		nfsd_write_dio_seg_init(&args->segment[args->nsegs], bvec,
+					nvecs, total, 0, args->first);
+		++args->nsegs;
 	}
 
-	/* Setup DIO-aligned middle */
-	iov_iter_bvec(&iters[n_iters], ITER_SOURCE, rq_bvec, nvecs, cnt);
-	if (args->start_len)
-		iov_iter_advance(&iters[n_iters], args->start_len);
-	iters[n_iters].count -= args->end_len;
-	iter_is_dio_aligned[n_iters] =
-		nfsd_iov_iter_aligned_bvec(args->nf, &iters[n_iters]);
-	if (unlikely(!iter_is_dio_aligned[n_iters]))
-		return 0; /* no DIO-aligned IO possible */
-	++n_iters;
+	nfsd_write_dio_seg_init(&args->segment[args->nsegs], bvec, nvecs,
+				total, args->first, args->middle);
+	if (!nfsd_iov_iter_aligned_bvec(args->nf,
+					&args->segment[args->nsegs].iter))
+		return false;	/* no DIO-aligned IO possible */
+	args->segment[args->nsegs].use_dio = true;
+	++args->nsegs;
 
-	/* Setup misaligned end? */
-	if (args->end_len) {
-		iov_iter_bvec(&iters[n_iters], ITER_SOURCE, rq_bvec, nvecs, cnt);
-		iov_iter_advance(&iters[n_iters],
-				 args->start_len + args->middle_len);
-		iter_is_dio_aligned[n_iters] = false;
-		++n_iters;
+	if (args->last) {
+		nfsd_write_dio_seg_init(&args->segment[args->nsegs], bvec,
+					nvecs, total, args->first +
+					args->middle, args->last);
+		++args->nsegs;
 	}
 
-	return n_iters;
+	return true;
 }
 
 static int
@@ -1377,22 +1380,12 @@ nfsd_issue_write_dio(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *stable_how
 		     struct nfsd_write_dio_args *args)
 {
 	struct file *file = args->nf->nf_file;
-	bool iter_is_dio_aligned[3];
-	struct iov_iter iter_stack[3];
-	struct iov_iter *iter = iter_stack;
-	unsigned int n_iters = 0;
-	unsigned long in_count = *cnt;
-	loff_t in_offset = kiocb->ki_pos;
 	ssize_t host_err;
+	unsigned int i;
 
-	n_iters = nfsd_setup_write_dio_iters(&iter, iter_is_dio_aligned,
-					     rqstp->rq_bvec, nvecs, *cnt,
-					     args);
-	if (unlikely(!n_iters))
+	if (!nfsd_setup_write_dio_iters(rqstp->rq_bvec, nvecs, *cnt, args))
 		return nfsd_buffered_write(rqstp, file, nvecs, cnt, kiocb);
 
-	trace_nfsd_write_direct(rqstp, fhp, in_offset, in_count);
-
 	/*
 	 * Any buffered IO issued here will be misaligned, use
 	 * sync IO to ensure it has completed before returning.
@@ -1402,18 +1395,21 @@ nfsd_issue_write_dio(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *stable_how
 	*stable_how = NFS_FILE_SYNC;
 
 	*cnt = 0;
-	for (int i = 0; i < n_iters; i++) {
-		if (iter_is_dio_aligned[i])
+	for (i = 0; i < args->nsegs; i++) {
+		if (args->segment[i].use_dio) {
 			kiocb->ki_flags |= IOCB_DIRECT;
-		else
+			trace_nfsd_write_direct(rqstp, fhp, kiocb->ki_pos,
+						args->segment[i].iter.count);
+		} else
 			kiocb->ki_flags &= ~IOCB_DIRECT;
 
-		host_err = vfs_iocb_iter_write(file, kiocb, &iter[i]);
+		host_err = vfs_iocb_iter_write(file, kiocb,
+					       &args->segment[i].iter);
 		if (host_err < 0)
 			return host_err;
 		*cnt += host_err;
-		if (host_err < iter[i].count) /* partial write? */
-			break;
+		if (host_err < args->segment[i].iter.count)
+			break;	/* partial write */
 	}
 
 	return 0;
-- 
2.51.0


  parent reply	other threads:[~2025-10-27 15:46 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-27 15:46 [PATCH v8 00/12] NFSD: Implement NFSD_IO_DIRECT for NFS WRITE Chuck Lever
2025-10-27 15:46 ` [PATCH v8 01/12] NFSD: Make FILE_SYNC WRITEs comply with spec Chuck Lever
2025-10-27 15:46 ` [PATCH v8 02/12] NFSD: Enable return of an updated stable_how to NFS clients Chuck Lever
2025-10-27 15:46 ` [PATCH v8 03/12] NFSD: Implement NFSD_IO_DIRECT for NFS WRITE Chuck Lever
2025-10-27 15:46 ` [PATCH v8 04/12] NFSD: Remove specific error handling Chuck Lever
2025-10-27 15:46 ` [PATCH v8 05/12] NFSD: Remove alignment size checking Chuck Lever
2025-10-27 15:46 ` [PATCH v8 06/12] NFSD: Clean up struct nfsd_write_dio Chuck Lever
2025-10-27 15:46 ` Chuck Lever [this message]
2025-10-27 15:46 ` [PATCH v8 08/12] NFSD: Simplify nfsd_iov_iter_aligned_bvec() Chuck Lever
2025-10-30 15:00   ` Jeff Layton
2025-10-31 13:16   ` Christoph Hellwig
2025-10-27 15:46 ` [PATCH v8 09/12] NFSD: Handle both offset and memory alignment for direct I/O Chuck Lever
2025-10-30 19:52   ` Jeff Layton
2025-10-30 19:55     ` Chuck Lever
2025-10-31  9:13     ` Christoph Hellwig
2025-10-31 13:19   ` Christoph Hellwig
2025-10-31 13:21     ` Chuck Lever
2025-10-31 13:23       ` Christoph Hellwig
2025-10-31 16:07       ` Mike Snitzer
2025-10-27 15:46 ` [PATCH v8 10/12] NFSD: Combine direct I/O feasibility check with iterator setup Chuck Lever
2025-10-30 19:59   ` Jeff Layton
2025-10-31 13:20   ` Christoph Hellwig
2025-10-27 15:46 ` [PATCH v8 11/12] NFSD: Handle kiocb->ki_flags correctly Chuck Lever
2025-10-30 20:01   ` Jeff Layton
2025-10-31 13:21   ` Christoph Hellwig
2025-10-27 15:46 ` [PATCH v8 12/12] NFSD: Refactor nfsd_vfs_write Chuck Lever
2025-10-30 20:02   ` Jeff Layton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251027154630.1774-8-cel@kernel.org \
    --to=cel@kernel.org \
    --cc=chuck.lever@oracle.com \
    --cc=dai.ngo@oracle.com \
    --cc=hch@lst.de \
    --cc=jlayton@kernel.org \
    --cc=linux-nfs@vger.kernel.org \
    --cc=neil@brown.name \
    --cc=okorniev@redhat.com \
    --cc=snitzer@kernel.org \
    --cc=tom@talpey.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox