All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mike Snitzer <snitzer@kernel.org>
To: Anna Schumaker <anna.schumaker@oracle.com>
Cc: Trond Myklebust <trond.myklebust@hammerspace.com>,
	linux-nfs@vger.kernel.org
Subject: [v6.18-rcX PATCH 2/3] nfs/localio: add refcounting for each iocb IO associated with NFS pgio header
Date: Mon, 27 Oct 2025 09:08:32 -0400	[thread overview]
Message-ID: <20251027130833.96571-3-snitzer@kernel.org> (raw)
In-Reply-To: <aPZ-dIObXH8Z06la@kernel.org>

Improve completion handling of as many as 3 IOs associated with each
misaligned DIO by using a atomic_t to track completion of each IO.

Update nfs_local_pgio_done() to use precise atomic_t accounting for
remaining iov_iter (up to 3) associated with each iocb, so that each
NFS LOCALIO pgio header is only released after all IOs have completed.
But also allow early return if/when a short read or write occurs.

Fixes reported BUG: KASAN: slab-use-after-free in nfs_local_call_read:
https://lore.kernel.org/linux-nfs/aPSvi5Yr2lGOh5Jh@dell-per750-06-vm-07.rhts.eng.pek2.redhat.com/

Reported-by: Yongcheng Yang <yoyang@redhat.com>
Fixes: c817248fc831 ("nfs/localio: add proper O_DIRECT support for READ and WRITE")
Signed-off-by: Mike Snitzer <snitzer@kernel.org>
---
 fs/nfs/localio.c | 114 ++++++++++++++++++++++++++++-------------------
 1 file changed, 69 insertions(+), 45 deletions(-)

diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c
index 7c97055bddb1..a5f1eeeef30e 100644
--- a/fs/nfs/localio.c
+++ b/fs/nfs/localio.c
@@ -42,7 +42,7 @@ struct nfs_local_kiocb {
 	/* Begin mostly DIO-specific members */
 	size_t                  end_len;
 	short int		end_iter_index;
-	short int		n_iters;
+	atomic_t		n_iters;
 	bool			iter_is_dio_aligned[NFSLOCAL_MAX_IOS];
 	loff_t                  offset[NFSLOCAL_MAX_IOS] ____cacheline_aligned;
 	struct iov_iter		iters[NFSLOCAL_MAX_IOS];
@@ -407,6 +407,7 @@ nfs_local_iters_setup_dio(struct nfs_local_kiocb *iocb, int rw,
 		iters[n_iters].count = local_dio->start_len;
 		iocb->offset[n_iters] = iocb->hdr->args.offset;
 		iocb->iter_is_dio_aligned[n_iters] = false;
+		atomic_inc(&iocb->n_iters);
 		++n_iters;
 	}
 
@@ -425,6 +426,7 @@ nfs_local_iters_setup_dio(struct nfs_local_kiocb *iocb, int rw,
 		/* Save index and length of end */
 		iocb->end_iter_index = n_iters;
 		iocb->end_len = local_dio->end_len;
+		atomic_inc(&iocb->n_iters);
 		++n_iters;
 	}
 
@@ -448,7 +450,6 @@ nfs_local_iters_setup_dio(struct nfs_local_kiocb *iocb, int rw,
 	}
 	++n_iters;
 
-	iocb->n_iters = n_iters;
 	return n_iters;
 }
 
@@ -474,6 +475,12 @@ nfs_local_iters_init(struct nfs_local_kiocb *iocb, int rw)
 	}
 	len = hdr->args.count - total;
 
+	/*
+	 * For each iocb, iocb->n_iter is always at least 1 and we always
+	 * end io after first nfs_local_pgio_done call unless misaligned DIO.
+	 */
+	atomic_set(&iocb->n_iters, 1);
+
 	if (test_bit(NFS_IOHDR_ODIRECT, &hdr->flags)) {
 		struct nfs_local_dio local_dio;
 
@@ -486,7 +493,6 @@ nfs_local_iters_init(struct nfs_local_kiocb *iocb, int rw)
 	iocb->offset[0] = hdr->args.offset;
 	iov_iter_bvec(&iocb->iters[0], rw, iocb->bvec, v, len);
 	iocb->iter_is_dio_aligned[0] = false;
-	iocb->n_iters = 1;
 }
 
 static void
@@ -506,9 +512,11 @@ nfs_local_pgio_init(struct nfs_pgio_header *hdr,
 		hdr->task.tk_start = ktime_get();
 }
 
-static void
-nfs_local_pgio_done(struct nfs_pgio_header *hdr, long status)
+static bool
+nfs_local_pgio_done(struct nfs_local_kiocb *iocb, long status, bool force)
 {
+	struct nfs_pgio_header *hdr = iocb->hdr;
+
 	/* Must handle partial completions */
 	if (status >= 0) {
 		hdr->res.count += status;
@@ -519,6 +527,12 @@ nfs_local_pgio_done(struct nfs_pgio_header *hdr, long status)
 		hdr->res.op_status = nfs_localio_errno_to_nfs4_stat(status);
 		hdr->task.tk_status = status;
 	}
+
+	if (force)
+		return true;
+
+	BUG_ON(atomic_read(&iocb->n_iters) <= 0);
+	return atomic_dec_and_test(&iocb->n_iters);
 }
 
 static void
@@ -549,11 +563,11 @@ static inline void nfs_local_pgio_aio_complete(struct nfs_local_kiocb *iocb)
 	queue_work(nfsiod_workqueue, &iocb->work);
 }
 
-static void
-nfs_local_read_done(struct nfs_local_kiocb *iocb, long status)
+static void nfs_local_read_done(struct nfs_local_kiocb *iocb)
 {
 	struct nfs_pgio_header *hdr = iocb->hdr;
 	struct file *filp = iocb->kiocb.ki_filp;
+	long status = hdr->task.tk_status;
 
 	if ((iocb->kiocb.ki_flags & IOCB_DIRECT) && status == -EINVAL) {
 		/* Underlying FS will return -EINVAL if misaligned DIO is attempted. */
@@ -574,12 +588,18 @@ nfs_local_read_done(struct nfs_local_kiocb *iocb, long status)
 			status > 0 ? status : 0, hdr->res.eof);
 }
 
+static inline void nfs_local_read_iocb_done(struct nfs_local_kiocb *iocb)
+{
+	nfs_local_read_done(iocb);
+	nfs_local_pgio_release(iocb);
+}
+
 static void nfs_local_read_aio_complete_work(struct work_struct *work)
 {
 	struct nfs_local_kiocb *iocb =
 		container_of(work, struct nfs_local_kiocb, work);
 
-	nfs_local_pgio_release(iocb);
+	nfs_local_read_iocb_done(iocb);
 }
 
 static void nfs_local_read_aio_complete(struct kiocb *kiocb, long ret)
@@ -587,8 +607,10 @@ static void nfs_local_read_aio_complete(struct kiocb *kiocb, long ret)
 	struct nfs_local_kiocb *iocb =
 		container_of(kiocb, struct nfs_local_kiocb, kiocb);
 
-	nfs_local_pgio_done(iocb->hdr, ret);
-	nfs_local_read_done(iocb, ret);
+	/* AIO completion of DIO read should always be last to complete */
+	if (unlikely(!nfs_local_pgio_done(iocb, ret, false)))
+		return;
+
 	nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_read_aio_complete_work */
 }
 
@@ -599,10 +621,13 @@ static void nfs_local_call_read(struct work_struct *work)
 	struct file *filp = iocb->kiocb.ki_filp;
 	const struct cred *save_cred;
 	ssize_t status;
+	int n_iters;
 
 	save_cred = override_creds(filp->f_cred);
 
-	for (int i = 0; i < iocb->n_iters ; i++) {
+	n_iters = atomic_read(&iocb->n_iters);
+	for (int i = 0; i < n_iters ; i++) {
+		/* DIO-aligned middle is always issued last with AIO completion */
 		if (iocb->iter_is_dio_aligned[i]) {
 			iocb->kiocb.ki_flags |= IOCB_DIRECT;
 			iocb->kiocb.ki_complete = nfs_local_read_aio_complete;
@@ -612,18 +637,14 @@ static void nfs_local_call_read(struct work_struct *work)
 		iocb->kiocb.ki_pos = iocb->offset[i];
 		status = filp->f_op->read_iter(&iocb->kiocb, &iocb->iters[i]);
 		if (status != -EIOCBQUEUED) {
-			nfs_local_pgio_done(iocb->hdr, status);
-			if (iocb->hdr->task.tk_status)
+			if (nfs_local_pgio_done(iocb, status, false)) {
+				nfs_local_read_iocb_done(iocb);
 				break;
+			}
 		}
 	}
 
 	revert_creds(save_cred);
-
-	if (status != -EIOCBQUEUED) {
-		nfs_local_read_done(iocb, status);
-		nfs_local_pgio_release(iocb);
-	}
 }
 
 static int
@@ -738,11 +759,10 @@ static void nfs_local_vfs_getattr(struct nfs_local_kiocb *iocb)
 	fattr->du.nfs3.used = stat.blocks << 9;
 }
 
-static void
-nfs_local_write_done(struct nfs_local_kiocb *iocb, long status)
+static void nfs_local_write_done(struct nfs_local_kiocb *iocb)
 {
 	struct nfs_pgio_header *hdr = iocb->hdr;
-	struct inode *inode = hdr->inode;
+	long status = hdr->task.tk_status;
 
 	dprintk("%s: wrote %ld bytes.\n", __func__, status > 0 ? status : 0);
 
@@ -761,28 +781,36 @@ nfs_local_write_done(struct nfs_local_kiocb *iocb, long status)
 		nfs_set_pgio_error(hdr, -ENOSPC, hdr->args.offset);
 		status = -ENOSPC;
 		/* record -ENOSPC in terms of nfs_local_pgio_done */
-		nfs_local_pgio_done(hdr, status);
+		(void) nfs_local_pgio_done(iocb, status, true);
 	}
 	if (hdr->task.tk_status < 0)
-		nfs_reset_boot_verifier(inode);
+		nfs_reset_boot_verifier(hdr->inode);
 }
 
-static void nfs_local_write_aio_complete_work(struct work_struct *work)
+static inline void nfs_local_write_iocb_done(struct nfs_local_kiocb *iocb)
 {
-	struct nfs_local_kiocb *iocb =
-		container_of(work, struct nfs_local_kiocb, work);
-
+	nfs_local_write_done(iocb);
 	nfs_local_vfs_getattr(iocb);
 	nfs_local_pgio_release(iocb);
 }
 
+static void nfs_local_write_aio_complete_work(struct work_struct *work)
+{
+	struct nfs_local_kiocb *iocb =
+		container_of(work, struct nfs_local_kiocb, work);
+
+	nfs_local_write_iocb_done(iocb);
+}
+
 static void nfs_local_write_aio_complete(struct kiocb *kiocb, long ret)
 {
 	struct nfs_local_kiocb *iocb =
 		container_of(kiocb, struct nfs_local_kiocb, kiocb);
 
-	nfs_local_pgio_done(iocb->hdr, ret);
-	nfs_local_write_done(iocb, ret);
+	/* AIO completion of DIO write should always be last to complete */
+	if (unlikely(!nfs_local_pgio_done(iocb, ret, false)))
+		return;
+
 	nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_write_aio_complete_work */
 }
 
@@ -793,13 +821,17 @@ static void nfs_local_call_write(struct work_struct *work)
 	struct file *filp = iocb->kiocb.ki_filp;
 	unsigned long old_flags = current->flags;
 	const struct cred *save_cred;
+	bool force_done = false;
 	ssize_t status;
+	int n_iters;
 
 	current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO;
 	save_cred = override_creds(filp->f_cred);
 
 	file_start_write(filp);
-	for (int i = 0; i < iocb->n_iters ; i++) {
+	n_iters = atomic_read(&iocb->n_iters);
+	for (int i = 0; i < n_iters ; i++) {
+		/* DIO-aligned middle is always issued last with AIO completion */
 		if (iocb->iter_is_dio_aligned[i]) {
 			iocb->kiocb.ki_flags |= IOCB_DIRECT;
 			iocb->kiocb.ki_complete = nfs_local_write_aio_complete;
@@ -812,35 +844,27 @@ static void nfs_local_call_write(struct work_struct *work)
 			if (unlikely(status >= 0 && status < iocb->iters[i].count)) {
 				/* partial write */
 				if (i == iocb->end_iter_index) {
-					/* Must not account partial end, otherwise, due
-					 * to end being issued before middle: the partial
+					/* Must not account DIO partial end, otherwise (due
+					 * to end being issued before middle): the partial
 					 * write accounting in nfs_local_write_done()
 					 * would incorrectly advance hdr->args.offset
 					 */
 					status = 0;
 				} else {
-					/* Partial write at start or buffered middle,
-					 * exit early.
-					 */
-					nfs_local_pgio_done(iocb->hdr, status);
-					break;
+					/* Partial write at start or middle, force done */
+					force_done = true;
 				}
 			}
-			nfs_local_pgio_done(iocb->hdr, status);
-			if (iocb->hdr->task.tk_status)
+			if (nfs_local_pgio_done(iocb, status, force_done)) {
+				nfs_local_write_iocb_done(iocb);
 				break;
+			}
 		}
 	}
 	file_end_write(filp);
 
 	revert_creds(save_cred);
 	current->flags = old_flags;
-
-	if (status != -EIOCBQUEUED) {
-		nfs_local_write_done(iocb, status);
-		nfs_local_vfs_getattr(iocb);
-		nfs_local_pgio_release(iocb);
-	}
 }
 
 static int
-- 
2.44.0


  parent reply	other threads:[~2025-10-27 13:08 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-19  9:29 [Bug report] xfstests generic/323 over NFS hit BUG: KASAN: slab-use-after-free in nfs_local_call_read on 6.18.0-rc1 Yongcheng Yang
2025-10-19 15:18 ` Trond Myklebust
2025-10-19 16:26   ` Mike Snitzer
2025-10-20 18:24     ` Mike Snitzer
2025-10-27 13:08       ` [v6.18-rcX PATCH 0/3] nfs/localio: fixes for recent misaligned DIO changes Mike Snitzer
2025-10-27 13:08       ` [v6.18-rcX PATCH 1/3] nfs/localio: remove unecessary ENOTBLK handling in DIO WRITE support Mike Snitzer
2025-10-27 13:08       ` Mike Snitzer [this message]
2025-10-27 13:19         ` [v6.18-rcX PATCH 2/3] nfs/localio: add refcounting for each iocb IO associated with NFS pgio header Christoph Hellwig
2025-10-27 13:55           ` Mike Snitzer
2025-10-27 14:45             ` Christoph Hellwig
2025-10-27 13:08       ` [v6.18-rcX PATCH 3/3] nfs/localio: backfill missing partial read support for misaligned DIO Mike Snitzer
2025-10-27 17:52       ` [v6.18-rcX PATCH 4/3] nfs/localio: Ensure DIO WRITE's IO on stable storage upon completion Mike Snitzer
2025-10-29 23:19         ` [v6.18-rcX PATCH 5/3] nfs/localio: do not issue misaligned DIO out-of-order Mike Snitzer
2025-10-31  1:50           ` Mike Snitzer
2025-10-31 13:33             ` Anna Schumaker
2025-11-04 18:02               ` [v6.18-rcX PATCH v2] " Mike Snitzer
2025-11-06  2:50                 ` Mike Snitzer
2025-11-06  3:03                   ` [v6.18-rcX PATCH v3 5/3] " Mike Snitzer

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251027130833.96571-3-snitzer@kernel.org \
    --to=snitzer@kernel.org \
    --cc=anna.schumaker@oracle.com \
    --cc=linux-nfs@vger.kernel.org \
    --cc=trond.myklebust@hammerspace.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.