public inbox for netdev@vger.kernel.org
 help / color / mirror / Atom feed
From: Pranjal Shrivastava <praan@google.com>
To: trond.myklebust@hammerspace.com, anna@kernel.org
Cc: davem@davemloft.net, kuba@kernel.org, edumazet@google.com,
	 pabeni@redhat.com, chuck.lever@oracle.com, jlayton@kernel.org,
	tom@talpey.com,  okorniev@redhat.com, neil@brown.name,
	dai.ngo@oracle.com,  linux-nfs@vger.kernel.org,
	netdev@vger.kernel.org,  Pranjal Shrivastava <praan@google.com>
Subject: [RFC PATCH 4/4] nfs: allow P2PDMA in direct I/O path
Date: Wed,  1 Apr 2026 19:45:00 +0000	[thread overview]
Message-ID: <20260401194501.2269200-5-praan@google.com> (raw)
In-Reply-To: <20260401194501.2269200-1-praan@google.com>

Migrate the NFS Direct I/O path from the legacy iov_iter_get_pages_alloc2()
API to the modern iov_iter_extract_pages() API. This migration enables
support for PCI Peer-to-Peer DMA (P2PDMA) by allowing the setting the
ITER_ALLOW_P2PDMA flag.

Pass ITER_ALLOW_P2PDMA to iov_iter_extract_pages() only if the local
mount indicates support via the NFS_CAP_P2PDMA capability bit (detected
at mount time for RDMA transports).

Fix the memory safety bug in the Direct I/O loop where pages were being
unpinned immediately after request creation. Instead, we now leverage
pin-aware nfs_page structures to hold the pins until the I/O is complete
The manual release in the loop is updated to only clean up pages that
failed to be handed over to an nfs_page request.

Signed-off-by: Pranjal Shrivastava <praan@google.com>
---
 fs/nfs/direct.c | 51 +++++++++++++++++++++++++++++++++++--------------
 1 file changed, 37 insertions(+), 14 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index c8429b430181..6916541af9db 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -165,11 +165,17 @@ int nfs_swap_rw(struct kiocb *iocb, struct iov_iter *iter)
 	return 0;
 }
 
-static void nfs_direct_release_pages(struct page **pages, unsigned int npages)
+static void nfs_direct_release_pages(struct page **pages, unsigned int npages,
+				     bool pinned)
 {
 	unsigned int i;
-	for (i = 0; i < npages; i++)
-		put_page(pages[i]);
+
+	if (pinned) {
+		unpin_user_pages(pages, npages);
+	} else {
+		for (i = 0; i < npages; i++)
+			put_page(pages[i]);
+	}
 }
 
 void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
@@ -354,23 +360,30 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
 	inode_dio_begin(inode);
 
 	while (iov_iter_count(iter)) {
-		struct page **pagevec;
+		/* Tell extract pages to allocate the page array */
+		struct page **pagevec = NULL;
 		size_t bytes;
 		size_t pgbase;
 		unsigned npages, i;
+		bool pinned = iov_iter_extract_will_pin(iter);
+		iov_iter_extraction_t extraction_flags = 0;
+
+		if (NFS_SERVER(inode)->caps & NFS_CAP_P2PDMA)
+			extraction_flags |= ITER_ALLOW_P2PDMA;
 
-		result = iov_iter_get_pages_alloc2(iter, &pagevec,
-						  rsize, &pgbase);
+		result = iov_iter_extract_pages(iter, &pagevec,
+						rsize, ~0U,
+						extraction_flags, &pgbase);
 		if (result < 0)
 			break;
-	
+
 		bytes = result;
 		npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE;
 		for (i = 0; i < npages; i++) {
 			struct nfs_page *req;
 			unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
 			/* XXX do we need to do the eof zeroing found in async_filler? */
-			req = nfs_page_create_from_page(dreq->ctx, pagevec[i], false,
+			req = nfs_page_create_from_page(dreq->ctx, pagevec[i], pinned,
 							pgbase, pos, req_len);
 			if (IS_ERR(req)) {
 				result = PTR_ERR(req);
@@ -386,7 +399,8 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
 			requested_bytes += req_len;
 			pos += req_len;
 		}
-		nfs_direct_release_pages(pagevec, npages);
+		if (i < npages)
+			nfs_direct_release_pages(pagevec + i, npages - i, pinned);
 		kvfree(pagevec);
 		if (result < 0)
 			break;
@@ -882,13 +896,21 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
 
 	NFS_I(inode)->write_io += iov_iter_count(iter);
 	while (iov_iter_count(iter)) {
-		struct page **pagevec;
+
+		/* Tell extract pages to allocate the page array */
+		struct page **pagevec = NULL;
 		size_t bytes;
 		size_t pgbase;
 		unsigned npages, i;
+		bool pinned = iov_iter_extract_will_pin(iter);
+		iov_iter_extraction_t extraction_flags = 0;
+
+		if (NFS_SERVER(inode)->caps & NFS_CAP_P2PDMA)
+			extraction_flags |= ITER_ALLOW_P2PDMA;
 
-		result = iov_iter_get_pages_alloc2(iter, &pagevec,
-						  wsize, &pgbase);
+		result = iov_iter_extract_pages(iter, &pagevec,
+						wsize, ~0U,
+						extraction_flags, &pgbase);
 		if (result < 0)
 			break;
 
@@ -898,7 +920,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
 			struct nfs_page *req;
 			unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
 
-			req = nfs_page_create_from_page(dreq->ctx, pagevec[i], false,
+			req = nfs_page_create_from_page(dreq->ctx, pagevec[i], pinned,
 							pgbase, pos, req_len);
 			if (IS_ERR(req)) {
 				result = PTR_ERR(req);
@@ -942,7 +964,8 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
 			desc.pg_error = 0;
 			defer = true;
 		}
-		nfs_direct_release_pages(pagevec, npages);
+		if (i < npages)
+			nfs_direct_release_pages(pagevec + i, npages - i, pinned);
 		kvfree(pagevec);
 		if (result < 0)
 			break;
-- 
2.53.0.1185.g05d4b7b318-goog


  parent reply	other threads:[~2026-04-01 19:45 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-01 19:44 [RFC PATCH 0/4] nfs: Enable PCI Peer-to-Peer DMA (P2PDMA) support Pranjal Shrivastava
2026-04-01 19:44 ` [RFC PATCH 1/4] sunrpc: add supports_p2pdma to rpc_xprt_ops Pranjal Shrivastava
2026-04-01 19:44 ` [RFC PATCH 2/4] nfs: add NFS_CAP_P2PDMA and detect transport support Pranjal Shrivastava
2026-04-02 13:11   ` Chuck Lever
2026-04-01 19:44 ` [RFC PATCH 3/4] nfs: make nfs_page pin-aware Pranjal Shrivastava
2026-04-02  5:04   ` Christoph Hellwig
2026-04-01 19:45 ` Pranjal Shrivastava [this message]
2026-04-02  5:05   ` [RFC PATCH 4/4] nfs: allow P2PDMA in direct I/O path Christoph Hellwig
2026-04-02  5:07 ` [RFC PATCH 0/4] nfs: Enable PCI Peer-to-Peer DMA (P2PDMA) support Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260401194501.2269200-5-praan@google.com \
    --to=praan@google.com \
    --cc=anna@kernel.org \
    --cc=chuck.lever@oracle.com \
    --cc=dai.ngo@oracle.com \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=jlayton@kernel.org \
    --cc=kuba@kernel.org \
    --cc=linux-nfs@vger.kernel.org \
    --cc=neil@brown.name \
    --cc=netdev@vger.kernel.org \
    --cc=okorniev@redhat.com \
    --cc=pabeni@redhat.com \
    --cc=tom@talpey.com \
    --cc=trond.myklebust@hammerspace.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox