linux-nfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Christoph Hellwig <hch@infradead.org>
To: NeilBrown <neil@brown.name>
Cc: Chuck Lever <cel@kernel.org>, Jeff Layton <jlayton@kernel.org>,
	Olga Kornievskaia <okorniev@redhat.com>,
	Dai Ngo <dai.ngo@oracle.com>, Tom Talpey <tom@talpey.com>,
	linux-nfs@vger.kernel.org, Mike Snitzer <snitzer@kernel.org>,
	Chuck Lever <chuck.lever@oracle.com>
Subject: Re: [PATCH v10 4/5] NFSD: Implement NFSD_IO_DIRECT for NFS WRITE
Date: Thu, 6 Nov 2025 05:51:55 -0800	[thread overview]
Message-ID: <aQyn-_GSL_z3a9to@infradead.org> (raw)
In-Reply-To: <aQyfgfWu8kPfe1uA@infradead.org>

On Thu, Nov 06, 2025 at 05:15:45AM -0800, Christoph Hellwig wrote:
> On Thu, Nov 06, 2025 at 09:11:51PM +1100, NeilBrown wrote:
> > > +struct nfsd_write_dio_seg {
> > > +	struct iov_iter			iter;
> > > +	bool				use_dio;
> > 
> > This is only used to choose which flags to use.
> > I think it would be neater the have 'flags' here explicitly.
> 
> Actually, looking at the grand unified patch now (thanks, this is so
> much easier to review!), we can just do away with the struct entirely.
> Just have nfsd_write_dio_iters_init return if direct I/O is possible
> or not, and do a single vfs_iocb_iter_write on the origin kiocb/iter
> if not.

That didn't work out too well, and indeed having flags here seems
saner.

Chuck, below is an untested incremental patch I did while reviewing
it.  Besides this flags thing, it adds the actual NFSD_IO_DIRECT
definition that was missing, but otherwise mostly just folds things
so that we don't need the extra args structure and thus simplifies
things quite a bit.

diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index ea87b42894dd..bdb60ee1f1a4 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -157,6 +157,7 @@ enum {
 	/* Any new NFSD_IO enum value must be added at the end */
 	NFSD_IO_BUFFERED,
 	NFSD_IO_DONTCACHE,
+	NFSD_IO_DIRECT,
 };
 
 extern u64 nfsd_io_cache_read __read_mostly;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index bb94da333d50..8038d8d038f3 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1170,56 +1170,38 @@ static int wait_for_concurrent_writes(struct file *file)
 
 struct nfsd_write_dio_seg {
 	struct iov_iter			iter;
-	bool				use_dio;
+	int				flags;
 };
 
-struct nfsd_write_dio_args {
-	struct nfsd_file		*nf;
-	int				flags_buffered;
-	int				flags_direct;
-	unsigned int			nsegs;
-	struct nfsd_write_dio_seg	segment[3];
-};
-
-/*
- * Check if the bvec iterator is aligned for direct I/O.
- *
- * bvecs generated from RPC receive buffers are contiguous: After the first
- * bvec, all subsequent bvecs start at bv_offset zero (page-aligned).
- * Therefore, only the first bvec is checked.
- */
-static bool
-nfsd_iov_iter_aligned_bvec(const struct nfsd_file *nf, const struct iov_iter *i)
+static unsigned long iov_iter_bvec_offset(const struct iov_iter *iter)
 {
-	unsigned int addr_mask = nf->nf_dio_mem_align - 1;
-	const struct bio_vec *bvec = i->bvec;
-
-	return ((unsigned long)(bvec->bv_offset + i->iov_offset) & addr_mask) == 0;
+	return (unsigned long)(iter->bvec->bv_offset + iter->iov_offset);
 }
 
 static void
 nfsd_write_dio_seg_init(struct nfsd_write_dio_seg *segment,
 			struct bio_vec *bvec, unsigned int nvecs,
-			unsigned long total, size_t start, size_t len)
+			unsigned long total, size_t start, size_t len,
+			struct kiocb *iocb)
 {
 	iov_iter_bvec(&segment->iter, ITER_SOURCE, bvec, nvecs, total);
 	if (start)
 		iov_iter_advance(&segment->iter, start);
 	iov_iter_truncate(&segment->iter, len);
-	segment->use_dio = false;
+	segment->flags = iocb->ki_flags;
 }
 
-static void
-nfsd_write_dio_iters_init(struct bio_vec *bvec, unsigned int nvecs,
-			  loff_t offset, unsigned long total,
-			  struct nfsd_write_dio_args *args)
+static unsigned int
+nfsd_write_dio_iters_init(struct nfsd_file *nf, struct bio_vec *bvec,
+		unsigned int nvecs, struct kiocb *iocb, unsigned long total,
+		struct nfsd_write_dio_seg segments[3])
 {
-	u32 offset_align = args->nf->nf_dio_offset_align;
-	u32 mem_align = args->nf->nf_dio_mem_align;
+	u32 offset_align = nf->nf_dio_offset_align;
+	u32 mem_align = nf->nf_dio_mem_align;
+	loff_t offset = iocb->ki_pos;
 	loff_t prefix_end, orig_end, middle_end;
 	size_t prefix, middle, suffix;
-
-	args->nsegs = 0;
+	unsigned int nsegs = 0;
 
 	/*
 	 * Check if direct I/O is feasible for this write request.
@@ -1242,87 +1224,80 @@ nfsd_write_dio_iters_init(struct bio_vec *bvec, unsigned int nvecs,
 	if (!middle)
 		goto no_dio;
 
-	if (prefix) {
-		nfsd_write_dio_seg_init(&args->segment[args->nsegs], bvec,
-					nvecs, total, 0, prefix);
-		++args->nsegs;
-	}
+	if (prefix)
+		nfsd_write_dio_seg_init(&segments[nsegs++], bvec,
+					nvecs, total, 0, prefix, iocb);
 
-	nfsd_write_dio_seg_init(&args->segment[args->nsegs], bvec, nvecs,
-				total, prefix, middle);
-	if (!nfsd_iov_iter_aligned_bvec(args->nf,
-					&args->segment[args->nsegs].iter))
+	nfsd_write_dio_seg_init(&segments[nsegs], bvec, nvecs,
+				total, prefix, middle, iocb);
+
+	/*
+	 * Check if the bvec iterator is aligned for direct I/O.
+	 *
+	 * bvecs generated from RPC receive buffers are contiguous: After the
+	 * first bvec, all subsequent bvecs start at bv_offset zero
+	 * (page-aligned).
+	 * Therefore, only the first bvec is checked.
+	 */
+	if (iov_iter_bvec_offset(&segments[nsegs].iter) & (mem_align - 1))
 		goto no_dio;
-	args->segment[args->nsegs].use_dio = true;
-	++args->nsegs;
+	segments[nsegs].flags |= IOCB_DIRECT;
+	nsegs++;
 
-	if (suffix) {
-		nfsd_write_dio_seg_init(&args->segment[args->nsegs], bvec,
-					nvecs, total, prefix + middle, suffix);
-		++args->nsegs;
-	}
+	if (suffix)
+		nfsd_write_dio_seg_init(&segments[nsegs++], bvec,
+					nvecs, total, prefix + middle, suffix,
+					iocb);
 
-	return;
+	return nsegs;
 
 no_dio:
 	/*
 	 * No DIO alignment possible - pack into single non-DIO segment.
-	 * IOCB_DONTCACHE preserves the intent of NFSD_IO_DIRECT.
 	 */
-	if (args->nf->nf_file->f_op->fop_flags & FOP_DONTCACHE)
-		args->flags_buffered |= IOCB_DONTCACHE;
-	nfsd_write_dio_seg_init(&args->segment[0], bvec, nvecs, total,
-				0, total);
-	args->nsegs = 1;
+	nfsd_write_dio_seg_init(&segments[0], bvec, nvecs, total,
+				0, total, iocb);
+	return 1;
 }
 
-static int
-nfsd_issue_dio_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
-		     struct kiocb *kiocb, unsigned int nvecs,
-		     unsigned long *cnt, struct nfsd_write_dio_args *args)
+static noinline_for_stack int
+nfsd_direct_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
+		  struct nfsd_file *nf, unsigned int nvecs,
+		  unsigned long *cnt, struct kiocb *kiocb)
 {
-	struct file *file = args->nf->nf_file;
+	struct file *file = nf->nf_file;
+	struct nfsd_write_dio_seg segments[3];
+	unsigned int nsegs = 0, i;
 	ssize_t host_err;
-	unsigned int i;
 
-	nfsd_write_dio_iters_init(rqstp->rq_bvec, nvecs, kiocb->ki_pos,
-				  *cnt, args);
+	nsegs = nfsd_write_dio_iters_init(nf, rqstp->rq_bvec, nvecs,
+			kiocb, *cnt, segments);
 
 	*cnt = 0;
-	for (i = 0; i < args->nsegs; i++) {
-		if (args->segment[i].use_dio) {
-			kiocb->ki_flags = args->flags_direct;
+	for (i = 0; i < nsegs; i++) {
+		kiocb->ki_flags = segments[i].flags;
+		if (kiocb->ki_flags & IOCB_DIRECT) {
 			trace_nfsd_write_direct(rqstp, fhp, kiocb->ki_pos,
-						args->segment[i].iter.count);
-		} else
-			kiocb->ki_flags = args->flags_buffered;
+						segments[i].iter.count);
+		} else if (nf->nf_file->f_op->fop_flags & FOP_DONTCACHE) {
+			/*
+			 * IOCB_DONTCACHE preserves the intent of
+			 * NFSD_IO_DIRECT.
+			 */
+			kiocb->ki_flags |= IOCB_DONTCACHE;
+		}
 
-		host_err = vfs_iocb_iter_write(file, kiocb,
-					       &args->segment[i].iter);
+		host_err = vfs_iocb_iter_write(file, kiocb, &segments[i].iter);
 		if (host_err < 0)
 			return host_err;
 		*cnt += host_err;
-		if (host_err < args->segment[i].iter.count)
+		if (host_err < segments[i].iter.count)
 			break;	/* partial write */
 	}
 
 	return 0;
 }
 
-static noinline_for_stack int
-nfsd_direct_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
-		  struct nfsd_file *nf, unsigned int nvecs,
-		  unsigned long *cnt, struct kiocb *kiocb)
-{
-	struct nfsd_write_dio_args args;
-
-	args.flags_direct = kiocb->ki_flags | IOCB_DIRECT;
-	args.flags_buffered = kiocb->ki_flags;
-	args.nf = nf;
-
-	return nfsd_issue_dio_write(rqstp, fhp, kiocb, nvecs, cnt, &args);
-}
-
 /**
  * nfsd_vfs_write - write data to an already-open file
  * @rqstp: RPC execution context

  reply	other threads:[~2025-11-06 13:51 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-11-05 19:28 [PATCH v10 0/5] NFSD: Implement NFSD_IO_DIRECT for NFS WRITE Chuck Lever
2025-11-05 19:28 ` [PATCH v10 1/5] NFSD: don't start nfsd if sv_permsocks is empty Chuck Lever
2025-11-05 19:31   ` Chuck Lever
2025-11-05 19:28 ` [PATCH v10 2/5] NFSD: Make FILE_SYNC WRITEs comply with spec Chuck Lever
2025-11-06  0:55   ` NeilBrown
2025-11-06 13:05   ` Christoph Hellwig
2025-11-05 19:28 ` [PATCH v10 3/5] NFSD: Enable return of an updated stable_how to NFS clients Chuck Lever
2025-11-06 13:07   ` Christoph Hellwig
2025-11-06 16:30     ` Chuck Lever
2025-11-05 19:28 ` [PATCH v10 4/5] NFSD: Implement NFSD_IO_DIRECT for NFS WRITE Chuck Lever
2025-11-06 10:11   ` NeilBrown
2025-11-06 13:15     ` Christoph Hellwig
2025-11-06 13:51       ` Christoph Hellwig [this message]
2025-11-06 14:45         ` Chuck Lever
2025-11-06 14:49           ` Christoph Hellwig
2025-11-06 16:48         ` Mike Snitzer
2025-11-06 18:10           ` Chuck Lever
2025-11-06 19:02             ` Mike Snitzer
2025-11-07 13:24               ` Christoph Hellwig
2025-11-07 14:38                 ` Chuck Lever
2025-11-07 15:24                   ` Christoph Hellwig
2025-11-07 15:26                     ` Chuck Lever
2025-11-05 19:28 ` [PATCH v10 5/5] NFSD: add Documentation/filesystems/nfs/nfsd-io-modes.rst Chuck Lever
2025-11-06 10:24   ` NeilBrown
2025-11-06 15:46     ` Mike Snitzer
2025-11-06 15:52       ` Chuck Lever

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=aQyn-_GSL_z3a9to@infradead.org \
    --to=hch@infradead.org \
    --cc=cel@kernel.org \
    --cc=chuck.lever@oracle.com \
    --cc=dai.ngo@oracle.com \
    --cc=jlayton@kernel.org \
    --cc=linux-nfs@vger.kernel.org \
    --cc=neil@brown.name \
    --cc=okorniev@redhat.com \
    --cc=snitzer@kernel.org \
    --cc=tom@talpey.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).