From: Christoph Hellwig <hch@infradead.org>
To: NeilBrown <neil@brown.name>
Cc: Chuck Lever <cel@kernel.org>, Jeff Layton <jlayton@kernel.org>,
Olga Kornievskaia <okorniev@redhat.com>,
Dai Ngo <dai.ngo@oracle.com>, Tom Talpey <tom@talpey.com>,
linux-nfs@vger.kernel.org, Mike Snitzer <snitzer@kernel.org>,
Chuck Lever <chuck.lever@oracle.com>
Subject: Re: [PATCH v10 4/5] NFSD: Implement NFSD_IO_DIRECT for NFS WRITE
Date: Thu, 6 Nov 2025 05:51:55 -0800 [thread overview]
Message-ID: <aQyn-_GSL_z3a9to@infradead.org> (raw)
In-Reply-To: <aQyfgfWu8kPfe1uA@infradead.org>
On Thu, Nov 06, 2025 at 05:15:45AM -0800, Christoph Hellwig wrote:
> On Thu, Nov 06, 2025 at 09:11:51PM +1100, NeilBrown wrote:
> > > +struct nfsd_write_dio_seg {
> > > + struct iov_iter iter;
> > > + bool use_dio;
> >
> > This is only used to choose which flags to use.
> > I think it would be neater the have 'flags' here explicitly.
>
> Actually, looking at the grand unified patch now (thanks, this is so
> much easier to review!), we can just do away with the struct entirely.
> Just have nfsd_write_dio_iters_init return if direct I/O is possible
> or not, and do a single vfs_iocb_iter_write on the origin kiocb/iter
> if not.
That didn't work out too well, and indeed having flags here seems
saner.
Chuck, below is an untested incremental patch I did while reviewing
it. Besides this flags thing, it adds the actual NFSD_IO_DIRECT
definition that was missing, but otherwise mostly just folds things
so that we don't need the extra args structure and thus simplifies
things quite a bit.
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index ea87b42894dd..bdb60ee1f1a4 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -157,6 +157,7 @@ enum {
/* Any new NFSD_IO enum value must be added at the end */
NFSD_IO_BUFFERED,
NFSD_IO_DONTCACHE,
+ NFSD_IO_DIRECT,
};
extern u64 nfsd_io_cache_read __read_mostly;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index bb94da333d50..8038d8d038f3 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1170,56 +1170,38 @@ static int wait_for_concurrent_writes(struct file *file)
struct nfsd_write_dio_seg {
struct iov_iter iter;
- bool use_dio;
+ int flags;
};
-struct nfsd_write_dio_args {
- struct nfsd_file *nf;
- int flags_buffered;
- int flags_direct;
- unsigned int nsegs;
- struct nfsd_write_dio_seg segment[3];
-};
-
-/*
- * Check if the bvec iterator is aligned for direct I/O.
- *
- * bvecs generated from RPC receive buffers are contiguous: After the first
- * bvec, all subsequent bvecs start at bv_offset zero (page-aligned).
- * Therefore, only the first bvec is checked.
- */
-static bool
-nfsd_iov_iter_aligned_bvec(const struct nfsd_file *nf, const struct iov_iter *i)
+static unsigned long iov_iter_bvec_offset(const struct iov_iter *iter)
{
- unsigned int addr_mask = nf->nf_dio_mem_align - 1;
- const struct bio_vec *bvec = i->bvec;
-
- return ((unsigned long)(bvec->bv_offset + i->iov_offset) & addr_mask) == 0;
+ return (unsigned long)(iter->bvec->bv_offset + iter->iov_offset);
}
static void
nfsd_write_dio_seg_init(struct nfsd_write_dio_seg *segment,
struct bio_vec *bvec, unsigned int nvecs,
- unsigned long total, size_t start, size_t len)
+ unsigned long total, size_t start, size_t len,
+ struct kiocb *iocb)
{
iov_iter_bvec(&segment->iter, ITER_SOURCE, bvec, nvecs, total);
if (start)
iov_iter_advance(&segment->iter, start);
iov_iter_truncate(&segment->iter, len);
- segment->use_dio = false;
+ segment->flags = iocb->ki_flags;
}
-static void
-nfsd_write_dio_iters_init(struct bio_vec *bvec, unsigned int nvecs,
- loff_t offset, unsigned long total,
- struct nfsd_write_dio_args *args)
+static unsigned int
+nfsd_write_dio_iters_init(struct nfsd_file *nf, struct bio_vec *bvec,
+ unsigned int nvecs, struct kiocb *iocb, unsigned long total,
+ struct nfsd_write_dio_seg segments[3])
{
- u32 offset_align = args->nf->nf_dio_offset_align;
- u32 mem_align = args->nf->nf_dio_mem_align;
+ u32 offset_align = nf->nf_dio_offset_align;
+ u32 mem_align = nf->nf_dio_mem_align;
+ loff_t offset = iocb->ki_pos;
loff_t prefix_end, orig_end, middle_end;
size_t prefix, middle, suffix;
-
- args->nsegs = 0;
+ unsigned int nsegs = 0;
/*
* Check if direct I/O is feasible for this write request.
@@ -1242,87 +1224,80 @@ nfsd_write_dio_iters_init(struct bio_vec *bvec, unsigned int nvecs,
if (!middle)
goto no_dio;
- if (prefix) {
- nfsd_write_dio_seg_init(&args->segment[args->nsegs], bvec,
- nvecs, total, 0, prefix);
- ++args->nsegs;
- }
+ if (prefix)
+ nfsd_write_dio_seg_init(&segments[nsegs++], bvec,
+ nvecs, total, 0, prefix, iocb);
- nfsd_write_dio_seg_init(&args->segment[args->nsegs], bvec, nvecs,
- total, prefix, middle);
- if (!nfsd_iov_iter_aligned_bvec(args->nf,
- &args->segment[args->nsegs].iter))
+ nfsd_write_dio_seg_init(&segments[nsegs], bvec, nvecs,
+ total, prefix, middle, iocb);
+
+ /*
+ * Check if the bvec iterator is aligned for direct I/O.
+ *
+ * bvecs generated from RPC receive buffers are contiguous: After the
+ * first bvec, all subsequent bvecs start at bv_offset zero
+ * (page-aligned).
+ * Therefore, only the first bvec is checked.
+ */
+ if (iov_iter_bvec_offset(&segments[nsegs].iter) & (mem_align - 1))
goto no_dio;
- args->segment[args->nsegs].use_dio = true;
- ++args->nsegs;
+ segments[nsegs].flags |= IOCB_DIRECT;
+ nsegs++;
- if (suffix) {
- nfsd_write_dio_seg_init(&args->segment[args->nsegs], bvec,
- nvecs, total, prefix + middle, suffix);
- ++args->nsegs;
- }
+ if (suffix)
+ nfsd_write_dio_seg_init(&segments[nsegs++], bvec,
+ nvecs, total, prefix + middle, suffix,
+ iocb);
- return;
+ return nsegs;
no_dio:
/*
* No DIO alignment possible - pack into single non-DIO segment.
- * IOCB_DONTCACHE preserves the intent of NFSD_IO_DIRECT.
*/
- if (args->nf->nf_file->f_op->fop_flags & FOP_DONTCACHE)
- args->flags_buffered |= IOCB_DONTCACHE;
- nfsd_write_dio_seg_init(&args->segment[0], bvec, nvecs, total,
- 0, total);
- args->nsegs = 1;
+ nfsd_write_dio_seg_init(&segments[0], bvec, nvecs, total,
+ 0, total, iocb);
+ return 1;
}
-static int
-nfsd_issue_dio_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
- struct kiocb *kiocb, unsigned int nvecs,
- unsigned long *cnt, struct nfsd_write_dio_args *args)
+static noinline_for_stack int
+nfsd_direct_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ struct nfsd_file *nf, unsigned int nvecs,
+ unsigned long *cnt, struct kiocb *kiocb)
{
- struct file *file = args->nf->nf_file;
+ struct file *file = nf->nf_file;
+ struct nfsd_write_dio_seg segments[3];
+ unsigned int nsegs = 0, i;
ssize_t host_err;
- unsigned int i;
- nfsd_write_dio_iters_init(rqstp->rq_bvec, nvecs, kiocb->ki_pos,
- *cnt, args);
+ nsegs = nfsd_write_dio_iters_init(nf, rqstp->rq_bvec, nvecs,
+ kiocb, *cnt, segments);
*cnt = 0;
- for (i = 0; i < args->nsegs; i++) {
- if (args->segment[i].use_dio) {
- kiocb->ki_flags = args->flags_direct;
+ for (i = 0; i < nsegs; i++) {
+ kiocb->ki_flags = segments[i].flags;
+ if (kiocb->ki_flags & IOCB_DIRECT) {
trace_nfsd_write_direct(rqstp, fhp, kiocb->ki_pos,
- args->segment[i].iter.count);
- } else
- kiocb->ki_flags = args->flags_buffered;
+ segments[i].iter.count);
+ } else if (nf->nf_file->f_op->fop_flags & FOP_DONTCACHE) {
+ /*
+ * IOCB_DONTCACHE preserves the intent of
+ * NFSD_IO_DIRECT.
+ */
+ kiocb->ki_flags |= IOCB_DONTCACHE;
+ }
- host_err = vfs_iocb_iter_write(file, kiocb,
- &args->segment[i].iter);
+ host_err = vfs_iocb_iter_write(file, kiocb, &segments[i].iter);
if (host_err < 0)
return host_err;
*cnt += host_err;
- if (host_err < args->segment[i].iter.count)
+ if (host_err < segments[i].iter.count)
break; /* partial write */
}
return 0;
}
-static noinline_for_stack int
-nfsd_direct_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
- struct nfsd_file *nf, unsigned int nvecs,
- unsigned long *cnt, struct kiocb *kiocb)
-{
- struct nfsd_write_dio_args args;
-
- args.flags_direct = kiocb->ki_flags | IOCB_DIRECT;
- args.flags_buffered = kiocb->ki_flags;
- args.nf = nf;
-
- return nfsd_issue_dio_write(rqstp, fhp, kiocb, nvecs, cnt, &args);
-}
-
/**
* nfsd_vfs_write - write data to an already-open file
* @rqstp: RPC execution context
next prev parent reply other threads:[~2025-11-06 13:51 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-11-05 19:28 [PATCH v10 0/5] NFSD: Implement NFSD_IO_DIRECT for NFS WRITE Chuck Lever
2025-11-05 19:28 ` [PATCH v10 1/5] NFSD: don't start nfsd if sv_permsocks is empty Chuck Lever
2025-11-05 19:31 ` Chuck Lever
2025-11-05 19:28 ` [PATCH v10 2/5] NFSD: Make FILE_SYNC WRITEs comply with spec Chuck Lever
2025-11-06 0:55 ` NeilBrown
2025-11-06 13:05 ` Christoph Hellwig
2025-11-05 19:28 ` [PATCH v10 3/5] NFSD: Enable return of an updated stable_how to NFS clients Chuck Lever
2025-11-06 13:07 ` Christoph Hellwig
2025-11-06 16:30 ` Chuck Lever
2025-11-05 19:28 ` [PATCH v10 4/5] NFSD: Implement NFSD_IO_DIRECT for NFS WRITE Chuck Lever
2025-11-06 10:11 ` NeilBrown
2025-11-06 13:15 ` Christoph Hellwig
2025-11-06 13:51 ` Christoph Hellwig [this message]
2025-11-06 14:45 ` Chuck Lever
2025-11-06 14:49 ` Christoph Hellwig
2025-11-06 16:48 ` Mike Snitzer
2025-11-06 18:10 ` Chuck Lever
2025-11-06 19:02 ` Mike Snitzer
2025-11-07 13:24 ` Christoph Hellwig
2025-11-07 14:38 ` Chuck Lever
2025-11-07 15:24 ` Christoph Hellwig
2025-11-07 15:26 ` Chuck Lever
2025-11-05 19:28 ` [PATCH v10 5/5] NFSD: add Documentation/filesystems/nfs/nfsd-io-modes.rst Chuck Lever
2025-11-06 10:24 ` NeilBrown
2025-11-06 15:46 ` Mike Snitzer
2025-11-06 15:52 ` Chuck Lever
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=aQyn-_GSL_z3a9to@infradead.org \
--to=hch@infradead.org \
--cc=cel@kernel.org \
--cc=chuck.lever@oracle.com \
--cc=dai.ngo@oracle.com \
--cc=jlayton@kernel.org \
--cc=linux-nfs@vger.kernel.org \
--cc=neil@brown.name \
--cc=okorniev@redhat.com \
--cc=snitzer@kernel.org \
--cc=tom@talpey.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).