public inbox for linux-nfs@vger.kernel.org
 help / color / mirror / Atom feed
From: Sergey Bashirov <sergeybashirov@gmail.com>
To: "J . Bruce Fields" <bfields@fieldses.org>,
	Chuck Lever <chuck.lever@oracle.com>
Cc: linux-nfs@vger.kernel.org,
	Sergey Bashirov <sergeybashirov@gmail.com>,
	Konstantin Evtushenko <koevtushenko@yandex.com>
Subject: [PATCH] nfsd: Implement large extent array support in pNFS
Date: Wed,  4 Jun 2025 16:07:08 +0300	[thread overview]
Message-ID: <20250604130809.52931-1-sergeybashirov@gmail.com> (raw)

When pNFS client in block layout mode sends layoutcommit RPC to MDS,
a variable length array of modified extents is supplied within request.
This patch allows NFS server to accept such extent arrays if they do not
fit within single memory page.

Co-developed-by: Konstantin Evtushenko <koevtushenko@yandex.com>
Signed-off-by: Konstantin Evtushenko <koevtushenko@yandex.com>
Signed-off-by: Sergey Bashirov <sergeybashirov@gmail.com>
---
 fs/nfsd/blocklayout.c    | 12 ++++---
 fs/nfsd/blocklayoutxdr.c | 78 ++++++++++++++++++++++++++++++++--------
 fs/nfsd/blocklayoutxdr.h |  8 ++---
 fs/nfsd/nfs4xdr.c        |  7 ++--
 fs/nfsd/xdr4.h           |  2 +-
 5 files changed, 79 insertions(+), 28 deletions(-)

diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index e5c0982a381d..d40a0860fcf6 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -179,8 +179,10 @@ nfsd4_block_proc_layoutcommit(struct inode *inode,
 	struct iomap *iomaps;
 	int nr_iomaps;
 
-	nr_iomaps = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout,
-			lcp->lc_up_len, &iomaps, i_blocksize(inode));
+	nr_iomaps = nfsd4_block_decode_layoutupdate(&lcp->lc_up_layout,
+						    lcp->lc_up_len,
+						    &iomaps,
+						    i_blocksize(inode));
 	if (nr_iomaps < 0)
 		return nfserrno(nr_iomaps);
 
@@ -317,8 +319,10 @@ nfsd4_scsi_proc_layoutcommit(struct inode *inode,
 	struct iomap *iomaps;
 	int nr_iomaps;
 
-	nr_iomaps = nfsd4_scsi_decode_layoutupdate(lcp->lc_up_layout,
-			lcp->lc_up_len, &iomaps, i_blocksize(inode));
+	nr_iomaps = nfsd4_scsi_decode_layoutupdate(&lcp->lc_up_layout,
+						   lcp->lc_up_len,
+						   &iomaps,
+						   i_blocksize(inode));
 	if (nr_iomaps < 0)
 		return nfserrno(nr_iomaps);
 
diff --git a/fs/nfsd/blocklayoutxdr.c b/fs/nfsd/blocklayoutxdr.c
index 442543304930..e3e3d79c8b4f 100644
--- a/fs/nfsd/blocklayoutxdr.c
+++ b/fs/nfsd/blocklayoutxdr.c
@@ -103,11 +103,13 @@ nfsd4_block_encode_getdeviceinfo(struct xdr_stream *xdr,
 }
 
 int
-nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
-		u32 block_size)
+nfsd4_block_decode_layoutupdate(struct xdr_buf *buf, u32 len,
+				struct iomap **iomapp, u32 block_size)
 {
+	struct xdr_stream xdr;
 	struct iomap *iomaps;
 	u32 nr_iomaps, i;
+	char scratch[sizeof(struct pnfs_block_extent)];
 
 	if (len < sizeof(u32)) {
 		dprintk("%s: extent array too small: %u\n", __func__, len);
@@ -119,7 +121,15 @@ nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
 		return -EINVAL;
 	}
 
-	nr_iomaps = be32_to_cpup(p++);
+	xdr_init_decode(&xdr, buf, buf->head[0].iov_base, NULL);
+	xdr_set_scratch_buffer(&xdr, scratch, sizeof(scratch));
+
+	if (xdr_stream_decode_u32(&xdr, &nr_iomaps)) {
+		dprintk("%s: failed to decode extent array length\n",
+			__func__);
+		return -EINVAL;
+	}
+
 	if (nr_iomaps != len / PNFS_BLOCK_EXTENT_SIZE) {
 		dprintk("%s: extent array size mismatch: %u/%u\n",
 			__func__, len, nr_iomaps);
@@ -135,28 +145,51 @@ nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
 	for (i = 0; i < nr_iomaps; i++) {
 		struct pnfs_block_extent bex;
 
-		memcpy(&bex.vol_id, p, sizeof(struct nfsd4_deviceid));
-		p += XDR_QUADLEN(sizeof(struct nfsd4_deviceid));
+		if (xdr_stream_decode_opaque_fixed(&xdr, &bex.vol_id, sizeof(bex.vol_id)) <
+		    sizeof(bex.vol_id)) {
+			dprintk("%s: failed to decode device id for entry %u\n",
+				__func__, i);
+			goto fail;
+		}
 
-		p = xdr_decode_hyper(p, &bex.foff);
+		if (xdr_stream_decode_u64(&xdr, &bex.foff)) {
+			dprintk("%s: failed to decode offset for entry %u\n",
+				__func__, i);
+			goto fail;
+		}
 		if (bex.foff & (block_size - 1)) {
 			dprintk("%s: unaligned offset 0x%llx\n",
 				__func__, bex.foff);
 			goto fail;
 		}
-		p = xdr_decode_hyper(p, &bex.len);
+
+		if (xdr_stream_decode_u64(&xdr, &bex.len)) {
+			dprintk("%s: failed to decode length for entry %u\n",
+				__func__, i);
+			goto fail;
+		}
 		if (bex.len & (block_size - 1)) {
 			dprintk("%s: unaligned length 0x%llx\n",
 				__func__, bex.foff);
 			goto fail;
 		}
-		p = xdr_decode_hyper(p, &bex.soff);
+
+		if (xdr_stream_decode_u64(&xdr, &bex.soff)) {
+			dprintk("%s: failed to decode soffset for entry %u\n",
+				__func__, i);
+			goto fail;
+		}
 		if (bex.soff & (block_size - 1)) {
 			dprintk("%s: unaligned disk offset 0x%llx\n",
 				__func__, bex.soff);
 			goto fail;
 		}
-		bex.es = be32_to_cpup(p++);
+
+		if (xdr_stream_decode_u32(&xdr, &bex.es)) {
+			dprintk("%s: failed to decode estate for entry %u\n",
+				__func__, i);
+			goto fail;
+		}
 		if (bex.es != PNFS_BLOCK_READWRITE_DATA) {
 			dprintk("%s: incorrect extent state %d\n",
 				__func__, bex.es);
@@ -175,18 +208,27 @@ nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
 }
 
 int
-nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
-		u32 block_size)
+nfsd4_scsi_decode_layoutupdate(struct xdr_buf *buf, u32 len,
+			       struct iomap **iomapp, u32 block_size)
 {
+	struct xdr_stream xdr;
 	struct iomap *iomaps;
 	u32 nr_iomaps, expected, i;
+	char scratch[sizeof(u64)];
 
 	if (len < sizeof(u32)) {
 		dprintk("%s: extent array too small: %u\n", __func__, len);
 		return -EINVAL;
 	}
 
-	nr_iomaps = be32_to_cpup(p++);
+	xdr_init_decode(&xdr, buf, buf->head[0].iov_base, NULL);
+	xdr_set_scratch_buffer(&xdr, scratch, sizeof(scratch));
+
+	if (xdr_stream_decode_u32(&xdr, &nr_iomaps)) {
+		dprintk("%s: failed to decode extent array length\n", __func__);
+		return -EINVAL;
+	}
+
 	expected = sizeof(__be32) + nr_iomaps * PNFS_SCSI_RANGE_SIZE;
 	if (len != expected) {
 		dprintk("%s: extent array size mismatch: %u/%u\n",
@@ -203,14 +245,22 @@ nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
 	for (i = 0; i < nr_iomaps; i++) {
 		u64 val;
 
-		p = xdr_decode_hyper(p, &val);
+		if (xdr_stream_decode_u64(&xdr, &val)) {
+			dprintk("%s: failed to decode offset for entry %u\n",
+				__func__, i);
+			goto fail;
+		}
 		if (val & (block_size - 1)) {
 			dprintk("%s: unaligned offset 0x%llx\n", __func__, val);
 			goto fail;
 		}
 		iomaps[i].offset = val;
 
-		p = xdr_decode_hyper(p, &val);
+		if (xdr_stream_decode_u64(&xdr, &val)) {
+			dprintk("%s: failed to decode length for entry %u\n",
+				__func__, i);
+			goto fail;
+		}
 		if (val & (block_size - 1)) {
 			dprintk("%s: unaligned length 0x%llx\n", __func__, val);
 			goto fail;
diff --git a/fs/nfsd/blocklayoutxdr.h b/fs/nfsd/blocklayoutxdr.h
index bc5166bfe46b..c4c8139b8e96 100644
--- a/fs/nfsd/blocklayoutxdr.h
+++ b/fs/nfsd/blocklayoutxdr.h
@@ -54,9 +54,9 @@ __be32 nfsd4_block_encode_getdeviceinfo(struct xdr_stream *xdr,
 		struct nfsd4_getdeviceinfo *gdp);
 __be32 nfsd4_block_encode_layoutget(struct xdr_stream *xdr,
 		struct nfsd4_layoutget *lgp);
-int nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
-		u32 block_size);
-int nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
-		u32 block_size);
+int nfsd4_block_decode_layoutupdate(struct xdr_buf *buf, u32 len,
+		struct iomap **iomapp, u32 block_size);
+int nfsd4_scsi_decode_layoutupdate(struct xdr_buf *buf, u32 len,
+		struct iomap **iomapp, u32 block_size);
 
 #endif /* _NFSD_BLOCKLAYOUTXDR_H */
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 5a93a5db4fb0..81f42dc75b95 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -592,11 +592,8 @@ nfsd4_decode_layoutupdate4(struct nfsd4_compoundargs *argp,
 
 	if (xdr_stream_decode_u32(argp->xdr, &lcp->lc_up_len) < 0)
 		return nfserr_bad_xdr;
-	if (lcp->lc_up_len > 0) {
-		lcp->lc_up_layout = xdr_inline_decode(argp->xdr, lcp->lc_up_len);
-		if (!lcp->lc_up_layout)
-			return nfserr_bad_xdr;
-	}
+	if (!xdr_stream_subsegment(argp->xdr, &lcp->lc_up_layout, lcp->lc_up_len))
+		return nfserr_bad_xdr;
 
 	return nfs_ok;
 }
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 846ab6df9d48..8516a1a6b46d 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -492,7 +492,7 @@ struct nfsd4_layoutcommit {
 	struct timespec64	lc_mtime;	/* request */
 	u32			lc_layout_type;	/* request */
 	u32			lc_up_len;	/* layout length */
-	void			*lc_up_layout;	/* decoded by callback */
+	struct xdr_buf		lc_up_layout;	/* request, decoded by callback */
 	u32			lc_size_chg;	/* boolean for response */
 	u64			lc_newsize;	/* response */
 };
-- 
2.43.0


             reply	other threads:[~2025-06-04 13:08 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-06-04 13:07 Sergey Bashirov [this message]
2025-06-04 14:10 ` [PATCH] nfsd: Implement large extent array support in pNFS Chuck Lever
2025-06-04 14:54 ` Christoph Hellwig
2025-06-10  0:36   ` Sergey Bashirov
2025-06-10  5:39     ` Christoph Hellwig
2025-06-10 15:24       ` Sergey Bashirov
2025-06-11  6:55         ` Christoph Hellwig
2025-06-11 12:19           ` Sergey Bashirov
2025-06-12  6:33             ` Christoph Hellwig
2025-06-12  8:13               ` Sergey Bashirov
2025-06-11 13:53           ` Chuck Lever

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250604130809.52931-1-sergeybashirov@gmail.com \
    --to=sergeybashirov@gmail.com \
    --cc=bfields@fieldses.org \
    --cc=chuck.lever@oracle.com \
    --cc=koevtushenko@yandex.com \
    --cc=linux-nfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox