linux-nfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jeff Layton <jlayton@kernel.org>
To: Mike Snitzer <snitzer@kernel.org>, Anna Schumaker <anna@kernel.org>
Cc: Trond Myklebust <trond.myklebust@hammerspace.com>,
	 linux-nfs@vger.kernel.org, Chuck Lever <chuck.lever@oracle.com>
Subject: Re: [PATCH v11 2/7] nfs/localio: avoid issuing misaligned IO using O_DIRECT
Date: Fri, 26 Sep 2025 10:15:14 -0400	[thread overview]
Message-ID: <2b1e79503919069897b6049462653b0edfa2bef0.camel@kernel.org> (raw)
In-Reply-To: <20250919143631.44851-3-snitzer@kernel.org>

On Fri, 2025-09-19 at 10:36 -0400, Mike Snitzer wrote:
> Add nfsd_file_dio_alignment and use it to avoid issuing misaligned IO
> using O_DIRECT. Any misaligned DIO falls back to using buffered IO.
> 
> Because misaligned DIO is now handled safely, remove the nfs modparam
> 'localio_O_DIRECT_semantics' that was added to require users opt-in to
> the requirement that all O_DIRECT be properly DIO-aligned.
> 
> Also, introduce nfs_iov_iter_aligned_bvec() which is a variant of
> iov_iter_aligned_bvec() that also verifies the offset associated with
> an iov_iter is DIO-aligned.  NOTE: in a parallel effort,
> iov_iter_aligned_bvec() is being removed along with
> iov_iter_is_aligned().
> 
> Lastly, add pr_info_ratelimited if underlying filesystem returns
> -EINVAL because it was made to try O_DIRECT for IO that is not
> DIO-aligned (shouldn't happen, so its best to be louder if it does).
> 
> Fixes: 3feec68563d ("nfs/localio: add direct IO enablement with sync and async IO support")
> Signed-off-by: Mike Snitzer <snitzer@kernel.org>
> ---
>  fs/nfs/localio.c           | 65 ++++++++++++++++++++++++++++++++------
>  fs/nfsd/localio.c          | 11 +++++++
>  include/linux/nfslocalio.h |  2 ++
>  3 files changed, 68 insertions(+), 10 deletions(-)
> 
> diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c
> index 42ea50d42c995..b165922e5cb65 100644
> --- a/fs/nfs/localio.c
> +++ b/fs/nfs/localio.c
> @@ -49,11 +49,6 @@ struct nfs_local_fsync_ctx {
>  static bool localio_enabled __read_mostly = true;
>  module_param(localio_enabled, bool, 0644);
>  
> -static bool localio_O_DIRECT_semantics __read_mostly = false;
> -module_param(localio_O_DIRECT_semantics, bool, 0644);
> -MODULE_PARM_DESC(localio_O_DIRECT_semantics,
> -		 "LOCALIO will use O_DIRECT semantics to filesystem.");
> -
>  static inline bool nfs_client_is_local(const struct nfs_client *clp)
>  {
>  	return !!rcu_access_pointer(clp->cl_uuid.net);
> @@ -322,12 +317,9 @@ nfs_local_iocb_alloc(struct nfs_pgio_header *hdr,
>  		return NULL;
>  	}
>  
> -	if (localio_O_DIRECT_semantics &&
> -	    test_bit(NFS_IOHDR_ODIRECT, &hdr->flags)) {
> -		iocb->kiocb.ki_filp = file;
> +	init_sync_kiocb(&iocb->kiocb, file);
> +	if (test_bit(NFS_IOHDR_ODIRECT, &hdr->flags))
>  		iocb->kiocb.ki_flags = IOCB_DIRECT;
> -	} else
> -		init_sync_kiocb(&iocb->kiocb, file);
>  
>  	iocb->kiocb.ki_pos = hdr->args.offset;
>  	iocb->hdr = hdr;
> @@ -337,6 +329,30 @@ nfs_local_iocb_alloc(struct nfs_pgio_header *hdr,
>  	return iocb;
>  }
>  
> +static bool nfs_iov_iter_aligned_bvec(const struct iov_iter *i,
> +		loff_t offset, unsigned int addr_mask, unsigned int len_mask)
> +{
> +	const struct bio_vec *bvec = i->bvec;
> +	size_t skip = i->iov_offset;
> +	size_t size = i->count;
> +
> +	if ((offset | size) & len_mask)
> +		return false;
> +	do {
> +		size_t len = bvec->bv_len;
> +
> +		if (len > size)
> +			len = size;
> +		if ((unsigned long)(bvec->bv_offset + skip) & addr_mask)
> +			return false;
> +		bvec++;
> +		size -= len;
> +		skip = 0;
> +	} while (size);
> +
> +	return true;
> +}
> +
>  static void
>  nfs_local_iter_init(struct iov_iter *i, struct nfs_local_kiocb *iocb, int dir)
>  {
> @@ -346,6 +362,25 @@ nfs_local_iter_init(struct iov_iter *i, struct nfs_local_kiocb *iocb, int dir)
>  		      hdr->args.count + hdr->args.pgbase);
>  	if (hdr->args.pgbase != 0)
>  		iov_iter_advance(i, hdr->args.pgbase);
> +
> +	if (iocb->kiocb.ki_flags & IOCB_DIRECT) {
> +		u32 nf_dio_mem_align, nf_dio_offset_align, nf_dio_read_offset_align;
> +		/* Verify the IO is DIO-aligned as required */
> +		nfs_to->nfsd_file_dio_alignment(iocb->localio, &nf_dio_mem_align,
> +						&nf_dio_offset_align,
> +						&nf_dio_read_offset_align);
> +		if (dir == READ)
> +			nf_dio_offset_align = nf_dio_read_offset_align;
> +
> +		if (nf_dio_mem_align && nf_dio_offset_align &&
> +		    nfs_iov_iter_aligned_bvec(i, hdr->args.offset,
> +					      nf_dio_mem_align - 1,
> +					      nf_dio_offset_align - 1))
> +			return; /* is DIO-aligned */
> +
> +		/* Fallback to using buffered for this misaligned IO */
> +		iocb->kiocb.ki_flags &= ~IOCB_DIRECT;
> +	}
>  }
>  
>  static void
> @@ -406,6 +441,11 @@ nfs_local_read_done(struct nfs_local_kiocb *iocb, long status)
>  	struct nfs_pgio_header *hdr = iocb->hdr;
>  	struct file *filp = iocb->kiocb.ki_filp;
>  
> +	if ((iocb->kiocb.ki_flags & IOCB_DIRECT) && status == -EINVAL) {
> +		/* Underlying FS will return -EINVAL if misaligned DIO is attempted. */
> +		pr_info_ratelimited("nfs: Unexpected direct I/O read alignment failure\n");
> +	}
> +
>  	nfs_local_pgio_done(hdr, status);
>  
>  	/*
> @@ -598,6 +638,11 @@ nfs_local_write_done(struct nfs_local_kiocb *iocb, long status)
>  
>  	dprintk("%s: wrote %ld bytes.\n", __func__, status > 0 ? status : 0);
>  
> +	if ((iocb->kiocb.ki_flags & IOCB_DIRECT) && status == -EINVAL) {
> +		/* Underlying FS will return -EINVAL if misaligned DIO is attempted. */
> +		pr_info_ratelimited("nfs: Unexpected direct I/O write alignment failure\n");
> +	}
> +
>  	/* Handle short writes as if they are ENOSPC */
>  	if (status > 0 && status < hdr->args.count) {
>  		hdr->mds_offset += status;
> diff --git a/fs/nfsd/localio.c b/fs/nfsd/localio.c
> index 269fa9391dc46..be710d809a3ba 100644
> --- a/fs/nfsd/localio.c
> +++ b/fs/nfsd/localio.c
> @@ -117,12 +117,23 @@ nfsd_open_local_fh(struct net *net, struct auth_domain *dom,
>  	return localio;
>  }
>  
> +static void nfsd_file_dio_alignment(struct nfsd_file *nf,
> +				    u32 *nf_dio_mem_align,
> +				    u32 *nf_dio_offset_align,
> +				    u32 *nf_dio_read_offset_align)
> +{
> +	*nf_dio_mem_align = nf->nf_dio_mem_align;
> +	*nf_dio_offset_align = nf->nf_dio_offset_align;
> +	*nf_dio_read_offset_align = nf->nf_dio_read_offset_align;
> +}
> +
>  static const struct nfsd_localio_operations nfsd_localio_ops = {
>  	.nfsd_net_try_get  = nfsd_net_try_get,
>  	.nfsd_net_put  = nfsd_net_put,
>  	.nfsd_open_local_fh = nfsd_open_local_fh,
>  	.nfsd_file_put_local = nfsd_file_put_local,
>  	.nfsd_file_file = nfsd_file_file,
> +	.nfsd_file_dio_alignment = nfsd_file_dio_alignment,
>  };
>  
>  void nfsd_localio_ops_init(void)
> diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h
> index 59ea90bd136b6..3d91043254e64 100644
> --- a/include/linux/nfslocalio.h
> +++ b/include/linux/nfslocalio.h
> @@ -64,6 +64,8 @@ struct nfsd_localio_operations {
>  						const fmode_t);
>  	struct net *(*nfsd_file_put_local)(struct nfsd_file __rcu **);
>  	struct file *(*nfsd_file_file)(struct nfsd_file *);
> +	void (*nfsd_file_dio_alignment)(struct nfsd_file *,
> +					u32 *, u32 *, u32 *);
>  } ____cacheline_aligned;
>  
>  extern void nfsd_localio_ops_init(void);

This looks reasonable to me. The nfsd bits in particular are
straightforward.

Reviewed-by: Jeff Layton <jlayton@kernel.org>

  parent reply	other threads:[~2025-09-26 14:15 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-09-19 14:36 [PATCH v11 0/7] NFS Direct: align misaligned DIO for LOCALIO Mike Snitzer
2025-09-19 14:36 ` [PATCH v11 1/7] nfs/localio: make trace_nfs_local_open_fh more useful Mike Snitzer
2025-09-19 14:36 ` [PATCH v11 2/7] nfs/localio: avoid issuing misaligned IO using O_DIRECT Mike Snitzer
2025-09-19 17:34   ` Jeff Layton
2025-09-20  0:07     ` Mike Snitzer
2025-09-20  1:18     ` [PATCH v11b " Mike Snitzer
2025-09-26 14:15   ` Jeff Layton [this message]
2025-09-19 14:36 ` [PATCH v11 3/7] nfs/localio: refactor iocb and iov_iter_bvec initialization Mike Snitzer
2025-09-19 14:36 ` [PATCH v11 4/7] nfs/localio: refactor iocb initialization Mike Snitzer
2025-09-19 14:36 ` [PATCH v11 5/7] nfs/localio: add proper O_DIRECT support for READ and WRITE Mike Snitzer
2025-09-19 14:36 ` [PATCH v11 6/7] nfs/localio: add tracepoints for misaligned DIO READ and WRITE support Mike Snitzer
2025-09-19 14:36 ` [PATCH v11 7/7] NFS: add basic STATX_DIOALIGN and STATX_DIO_READ_ALIGN support Mike Snitzer

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=2b1e79503919069897b6049462653b0edfa2bef0.camel@kernel.org \
    --to=jlayton@kernel.org \
    --cc=anna@kernel.org \
    --cc=chuck.lever@oracle.com \
    --cc=linux-nfs@vger.kernel.org \
    --cc=snitzer@kernel.org \
    --cc=trond.myklebust@hammerspace.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).