From: Sergey Bashirov <sergeybashirov@gmail.com>
To: "J . Bruce Fields" <bfields@fieldses.org>,
Chuck Lever <chuck.lever@oracle.com>
Cc: linux-nfs@vger.kernel.org,
Sergey Bashirov <sergeybashirov@gmail.com>,
Konstantin Evtushenko <koevtushenko@yandex.com>
Subject: [PATCH] nfsd: Implement large extent array support in pNFS
Date: Wed, 4 Jun 2025 16:07:08 +0300 [thread overview]
Message-ID: <20250604130809.52931-1-sergeybashirov@gmail.com> (raw)
When pNFS client in block layout mode sends layoutcommit RPC to MDS,
a variable length array of modified extents is supplied within request.
This patch allows NFS server to accept such extent arrays if they do not
fit within single memory page.
Co-developed-by: Konstantin Evtushenko <koevtushenko@yandex.com>
Signed-off-by: Konstantin Evtushenko <koevtushenko@yandex.com>
Signed-off-by: Sergey Bashirov <sergeybashirov@gmail.com>
---
fs/nfsd/blocklayout.c | 12 ++++---
fs/nfsd/blocklayoutxdr.c | 78 ++++++++++++++++++++++++++++++++--------
fs/nfsd/blocklayoutxdr.h | 8 ++---
fs/nfsd/nfs4xdr.c | 7 ++--
fs/nfsd/xdr4.h | 2 +-
5 files changed, 79 insertions(+), 28 deletions(-)
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index e5c0982a381d..d40a0860fcf6 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -179,8 +179,10 @@ nfsd4_block_proc_layoutcommit(struct inode *inode,
struct iomap *iomaps;
int nr_iomaps;
- nr_iomaps = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout,
- lcp->lc_up_len, &iomaps, i_blocksize(inode));
+ nr_iomaps = nfsd4_block_decode_layoutupdate(&lcp->lc_up_layout,
+ lcp->lc_up_len,
+ &iomaps,
+ i_blocksize(inode));
if (nr_iomaps < 0)
return nfserrno(nr_iomaps);
@@ -317,8 +319,10 @@ nfsd4_scsi_proc_layoutcommit(struct inode *inode,
struct iomap *iomaps;
int nr_iomaps;
- nr_iomaps = nfsd4_scsi_decode_layoutupdate(lcp->lc_up_layout,
- lcp->lc_up_len, &iomaps, i_blocksize(inode));
+ nr_iomaps = nfsd4_scsi_decode_layoutupdate(&lcp->lc_up_layout,
+ lcp->lc_up_len,
+ &iomaps,
+ i_blocksize(inode));
if (nr_iomaps < 0)
return nfserrno(nr_iomaps);
diff --git a/fs/nfsd/blocklayoutxdr.c b/fs/nfsd/blocklayoutxdr.c
index 442543304930..e3e3d79c8b4f 100644
--- a/fs/nfsd/blocklayoutxdr.c
+++ b/fs/nfsd/blocklayoutxdr.c
@@ -103,11 +103,13 @@ nfsd4_block_encode_getdeviceinfo(struct xdr_stream *xdr,
}
int
-nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
- u32 block_size)
+nfsd4_block_decode_layoutupdate(struct xdr_buf *buf, u32 len,
+ struct iomap **iomapp, u32 block_size)
{
+ struct xdr_stream xdr;
struct iomap *iomaps;
u32 nr_iomaps, i;
+ char scratch[sizeof(struct pnfs_block_extent)];
if (len < sizeof(u32)) {
dprintk("%s: extent array too small: %u\n", __func__, len);
@@ -119,7 +121,15 @@ nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
return -EINVAL;
}
- nr_iomaps = be32_to_cpup(p++);
+ xdr_init_decode(&xdr, buf, buf->head[0].iov_base, NULL);
+ xdr_set_scratch_buffer(&xdr, scratch, sizeof(scratch));
+
+ if (xdr_stream_decode_u32(&xdr, &nr_iomaps)) {
+ dprintk("%s: failed to decode extent array length\n",
+ __func__);
+ return -EINVAL;
+ }
+
if (nr_iomaps != len / PNFS_BLOCK_EXTENT_SIZE) {
dprintk("%s: extent array size mismatch: %u/%u\n",
__func__, len, nr_iomaps);
@@ -135,28 +145,51 @@ nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
for (i = 0; i < nr_iomaps; i++) {
struct pnfs_block_extent bex;
- memcpy(&bex.vol_id, p, sizeof(struct nfsd4_deviceid));
- p += XDR_QUADLEN(sizeof(struct nfsd4_deviceid));
+ if (xdr_stream_decode_opaque_fixed(&xdr, &bex.vol_id, sizeof(bex.vol_id)) <
+ sizeof(bex.vol_id)) {
+ dprintk("%s: failed to decode device id for entry %u\n",
+ __func__, i);
+ goto fail;
+ }
- p = xdr_decode_hyper(p, &bex.foff);
+ if (xdr_stream_decode_u64(&xdr, &bex.foff)) {
+ dprintk("%s: failed to decode offset for entry %u\n",
+ __func__, i);
+ goto fail;
+ }
if (bex.foff & (block_size - 1)) {
dprintk("%s: unaligned offset 0x%llx\n",
__func__, bex.foff);
goto fail;
}
- p = xdr_decode_hyper(p, &bex.len);
+
+ if (xdr_stream_decode_u64(&xdr, &bex.len)) {
+ dprintk("%s: failed to decode length for entry %u\n",
+ __func__, i);
+ goto fail;
+ }
if (bex.len & (block_size - 1)) {
dprintk("%s: unaligned length 0x%llx\n",
__func__, bex.foff);
goto fail;
}
- p = xdr_decode_hyper(p, &bex.soff);
+
+ if (xdr_stream_decode_u64(&xdr, &bex.soff)) {
+ dprintk("%s: failed to decode soffset for entry %u\n",
+ __func__, i);
+ goto fail;
+ }
if (bex.soff & (block_size - 1)) {
dprintk("%s: unaligned disk offset 0x%llx\n",
__func__, bex.soff);
goto fail;
}
- bex.es = be32_to_cpup(p++);
+
+ if (xdr_stream_decode_u32(&xdr, &bex.es)) {
+ dprintk("%s: failed to decode estate for entry %u\n",
+ __func__, i);
+ goto fail;
+ }
if (bex.es != PNFS_BLOCK_READWRITE_DATA) {
dprintk("%s: incorrect extent state %d\n",
__func__, bex.es);
@@ -175,18 +208,27 @@ nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
}
int
-nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
- u32 block_size)
+nfsd4_scsi_decode_layoutupdate(struct xdr_buf *buf, u32 len,
+ struct iomap **iomapp, u32 block_size)
{
+ struct xdr_stream xdr;
struct iomap *iomaps;
u32 nr_iomaps, expected, i;
+ char scratch[sizeof(u64)];
if (len < sizeof(u32)) {
dprintk("%s: extent array too small: %u\n", __func__, len);
return -EINVAL;
}
- nr_iomaps = be32_to_cpup(p++);
+ xdr_init_decode(&xdr, buf, buf->head[0].iov_base, NULL);
+ xdr_set_scratch_buffer(&xdr, scratch, sizeof(scratch));
+
+ if (xdr_stream_decode_u32(&xdr, &nr_iomaps)) {
+ dprintk("%s: failed to decode extent array length\n", __func__);
+ return -EINVAL;
+ }
+
expected = sizeof(__be32) + nr_iomaps * PNFS_SCSI_RANGE_SIZE;
if (len != expected) {
dprintk("%s: extent array size mismatch: %u/%u\n",
@@ -203,14 +245,22 @@ nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
for (i = 0; i < nr_iomaps; i++) {
u64 val;
- p = xdr_decode_hyper(p, &val);
+ if (xdr_stream_decode_u64(&xdr, &val)) {
+ dprintk("%s: failed to decode offset for entry %u\n",
+ __func__, i);
+ goto fail;
+ }
if (val & (block_size - 1)) {
dprintk("%s: unaligned offset 0x%llx\n", __func__, val);
goto fail;
}
iomaps[i].offset = val;
- p = xdr_decode_hyper(p, &val);
+ if (xdr_stream_decode_u64(&xdr, &val)) {
+ dprintk("%s: failed to decode length for entry %u\n",
+ __func__, i);
+ goto fail;
+ }
if (val & (block_size - 1)) {
dprintk("%s: unaligned length 0x%llx\n", __func__, val);
goto fail;
diff --git a/fs/nfsd/blocklayoutxdr.h b/fs/nfsd/blocklayoutxdr.h
index bc5166bfe46b..c4c8139b8e96 100644
--- a/fs/nfsd/blocklayoutxdr.h
+++ b/fs/nfsd/blocklayoutxdr.h
@@ -54,9 +54,9 @@ __be32 nfsd4_block_encode_getdeviceinfo(struct xdr_stream *xdr,
struct nfsd4_getdeviceinfo *gdp);
__be32 nfsd4_block_encode_layoutget(struct xdr_stream *xdr,
struct nfsd4_layoutget *lgp);
-int nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
- u32 block_size);
-int nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
- u32 block_size);
+int nfsd4_block_decode_layoutupdate(struct xdr_buf *buf, u32 len,
+ struct iomap **iomapp, u32 block_size);
+int nfsd4_scsi_decode_layoutupdate(struct xdr_buf *buf, u32 len,
+ struct iomap **iomapp, u32 block_size);
#endif /* _NFSD_BLOCKLAYOUTXDR_H */
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 5a93a5db4fb0..81f42dc75b95 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -592,11 +592,8 @@ nfsd4_decode_layoutupdate4(struct nfsd4_compoundargs *argp,
if (xdr_stream_decode_u32(argp->xdr, &lcp->lc_up_len) < 0)
return nfserr_bad_xdr;
- if (lcp->lc_up_len > 0) {
- lcp->lc_up_layout = xdr_inline_decode(argp->xdr, lcp->lc_up_len);
- if (!lcp->lc_up_layout)
- return nfserr_bad_xdr;
- }
+ if (!xdr_stream_subsegment(argp->xdr, &lcp->lc_up_layout, lcp->lc_up_len))
+ return nfserr_bad_xdr;
return nfs_ok;
}
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 846ab6df9d48..8516a1a6b46d 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -492,7 +492,7 @@ struct nfsd4_layoutcommit {
struct timespec64 lc_mtime; /* request */
u32 lc_layout_type; /* request */
u32 lc_up_len; /* layout length */
- void *lc_up_layout; /* decoded by callback */
+ struct xdr_buf lc_up_layout; /* request, decoded by callback */
u32 lc_size_chg; /* boolean for response */
u64 lc_newsize; /* response */
};
--
2.43.0
next reply other threads:[~2025-06-04 13:08 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-06-04 13:07 Sergey Bashirov [this message]
2025-06-04 14:10 ` [PATCH] nfsd: Implement large extent array support in pNFS Chuck Lever
2025-06-04 14:54 ` Christoph Hellwig
2025-06-10 0:36 ` Sergey Bashirov
2025-06-10 5:39 ` Christoph Hellwig
2025-06-10 15:24 ` Sergey Bashirov
2025-06-11 6:55 ` Christoph Hellwig
2025-06-11 12:19 ` Sergey Bashirov
2025-06-12 6:33 ` Christoph Hellwig
2025-06-12 8:13 ` Sergey Bashirov
2025-06-11 13:53 ` Chuck Lever
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250604130809.52931-1-sergeybashirov@gmail.com \
--to=sergeybashirov@gmail.com \
--cc=bfields@fieldses.org \
--cc=chuck.lever@oracle.com \
--cc=koevtushenko@yandex.com \
--cc=linux-nfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.