From: Anna Schumaker <anna@kernel.org>
To: linux-nfs@vger.kernel.org, chuck.lever@oracle.com
Cc: anna@kernel.org
Subject: [PATCH v3 6/6] NFSD: Repeal and replace the READ_PLUS implementation
Date: Fri, 15 Jul 2022 14:44:33 -0400 [thread overview]
Message-ID: <20220715184433.838521-7-anna@kernel.org> (raw)
In-Reply-To: <20220715184433.838521-1-anna@kernel.org>
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Rather than relying on the underlying filesystem to tell us where hole
and data segments are through vfs_llseek(), let's instead do the hole
compression ourselves. This has a few advantages over the old
implementation:
1) A single call to the underlying filesystem through nfsd_readv() means
the file can't change from underneath us in the middle of encoding.
2) A single call to the underlying filestem also means that the
underlying filesystem only needs to synchronize cached and on-disk
data one time instead of potentially many speeding up the reply.
3) Hole support for filesystems that don't support SEEK_HOLE and SEEK_DATA
I also included an optimization where we can cut down on the amount of
memory being shifed around by doing the compression as (hole, data)
pairs.
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
fs/nfsd/nfs4xdr.c | 219 +++++++++++++++++++++++++---------------------
1 file changed, 119 insertions(+), 100 deletions(-)
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 61b2aae81abb..df8289fce4ef 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -4731,81 +4731,138 @@ nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr,
return nfserr;
}
+struct read_plus_segment {
+ enum data_content4 rp_type;
+ u64 rp_offset;
+ u64 rp_length;
+ unsigned int rp_page_pos;
+};
+
static __be32
-nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
- struct nfsd4_read *read,
- unsigned long *maxcount, u32 *eof,
- loff_t *pos)
+nfsd4_read_plus_readv(struct nfsd4_compoundres *resp, struct nfsd4_read *read,
+ unsigned long *maxcount, u32 *eof)
{
struct xdr_stream *xdr = resp->xdr;
- struct file *file = read->rd_nf->nf_file;
- int starting_len = xdr->buf->len;
- loff_t hole_pos;
- __be32 nfserr;
- __be32 *p, tmp;
- __be64 tmp64;
-
- hole_pos = pos ? *pos : vfs_llseek(file, read->rd_offset, SEEK_HOLE);
- if (hole_pos > read->rd_offset)
- *maxcount = min_t(unsigned long, *maxcount, hole_pos - read->rd_offset);
- *maxcount = min_t(unsigned long, *maxcount, (xdr->buf->buflen - xdr->buf->len));
-
- /* Content type, offset, byte count */
- p = xdr_reserve_space(xdr, 4 + 8 + 4);
- if (!p)
- return nfserr_resource;
+ unsigned int starting_len = xdr->buf->len;
+ __be32 nfserr, zero = xdr_zero;
+ unsigned int pad;
+ /*
+ * Reserve the maximum abount of space needed to craft a READ_PLUS
+ * reply. The call to xdr_reserve_space_vec() switches us to the
+ * xdr->pages, which we then read file data into before analyzing
+ * the individual segments.
+ */
read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, *maxcount);
if (read->rd_vlen < 0)
return nfserr_resource;
- nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset,
- resp->rqstp->rq_vec, read->rd_vlen, maxcount, eof);
+ nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, read->rd_nf->nf_file,
+ read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen,
+ maxcount, eof);
if (nfserr)
return nfserr;
- xdr_truncate_encode(xdr, starting_len + 16 + xdr_align_size(*maxcount));
+ xdr_truncate_encode(xdr, starting_len + xdr_align_size(*maxcount));
- tmp = htonl(NFS4_CONTENT_DATA);
- write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4);
- tmp64 = cpu_to_be64(read->rd_offset);
- write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp64, 8);
- tmp = htonl(*maxcount);
- write_bytes_to_xdr_buf(xdr->buf, starting_len + 12, &tmp, 4);
-
- tmp = xdr_zero;
- write_bytes_to_xdr_buf(xdr->buf, starting_len + 16 + *maxcount, &tmp,
- xdr_pad_size(*maxcount));
+ pad = xdr_pad_size(*maxcount);
+ write_bytes_to_xdr_buf(xdr->buf, starting_len + *maxcount, &zero, pad);
return nfs_ok;
}
+/**
+ * nfsd4_encode_read_plus_segment - Encode a single READ_PLUS segment
+ * @xdr: pointer to an xdr_stream
+ * @segment: pointer to a single segment
+ * @bufpos: xdr_stream offset to place the segment
+ * @segments: pointer to the total number of segments seen
+ *
+ * Performs surgery on the xdr_stream to compress out HOLE segments and
+ * to place DATA segments in the proper place.
+ */
+static void
+nfsd4_encode_read_plus_segment(struct xdr_stream *xdr,
+ struct read_plus_segment *segment,
+ unsigned int *bufpos, unsigned int *segments)
+{
+ struct xdr_buf *buf = xdr->buf;
+
+ xdr_encode_word(buf, *bufpos, segment->rp_type);
+ xdr_encode_double(buf, *bufpos + XDR_UNIT, segment->rp_offset);
+ *bufpos += 3 * XDR_UNIT;
+
+ if (segment->rp_type == NFS4_CONTENT_HOLE) {
+ xdr_encode_double(buf, *bufpos, segment->rp_length);
+ *bufpos += 2 * XDR_UNIT;
+ } else {
+ size_t align = xdr_align_size(segment->rp_length);
+ xdr_encode_word(buf, *bufpos, segment->rp_length);
+ if (*segments == 0)
+ xdr_buf_trim_head(buf, XDR_UNIT);
+
+ xdr_stream_move_subsegment(xdr,
+ buf->head[0].iov_len + segment->rp_page_pos,
+ *bufpos + XDR_UNIT, align);
+ *bufpos += XDR_UNIT + align;
+ }
+
+ *segments += 1;
+}
+
static __be32
-nfsd4_encode_read_plus_hole(struct nfsd4_compoundres *resp,
- struct nfsd4_read *read,
- unsigned long *maxcount, u32 *eof)
+nfsd4_encode_read_plus_segments(struct nfsd4_compoundres *resp,
+ struct nfsd4_read *read,
+ unsigned int *segments, u32 *eof)
{
- struct file *file = read->rd_nf->nf_file;
- loff_t data_pos = vfs_llseek(file, read->rd_offset, SEEK_DATA);
- loff_t f_size = i_size_read(file_inode(file));
- unsigned long count;
- __be32 *p;
+ struct xdr_stream *xdr = resp->xdr;
+ unsigned int bufpos = xdr->buf->len;
+ u64 offset = read->rd_offset;
+ struct read_plus_segment segment;
+ enum data_content4 pagetype;
+ unsigned long maxcount;
+ unsigned int pagenum = 0;
+ unsigned int pagelen;
+ char *vpage, *p;
+ __be32 nfserr;
- if (data_pos == -ENXIO)
- data_pos = f_size;
- else if (data_pos <= read->rd_offset || (data_pos < f_size && data_pos % PAGE_SIZE))
- return nfsd4_encode_read_plus_data(resp, read, maxcount, eof, &f_size);
- count = data_pos - read->rd_offset;
-
- /* Content type, offset, byte count */
- p = xdr_reserve_space(resp->xdr, 4 + 8 + 8);
- if (!p)
+ /* enough space for a HOLE segment before we switch to the pages */
+ if (!xdr_reserve_space(xdr, 5 * XDR_UNIT))
return nfserr_resource;
+ xdr_commit_encode(xdr);
- *p++ = htonl(NFS4_CONTENT_HOLE);
- p = xdr_encode_hyper(p, read->rd_offset);
- p = xdr_encode_hyper(p, count);
+ maxcount = min_t(unsigned long, read->rd_length,
+ (xdr->buf->buflen - xdr->buf->len));
- *eof = (read->rd_offset + count) >= f_size;
- *maxcount = min_t(unsigned long, count, *maxcount);
+ nfserr = nfsd4_read_plus_readv(resp, read, &maxcount, eof);
+ if (nfserr)
+ return nfserr;
+
+ while (maxcount > 0) {
+ vpage = xdr_buf_nth_page_address(xdr->buf, pagenum, &pagelen);
+ pagelen = min_t(unsigned int, pagelen, maxcount);
+ if (!vpage || pagelen == 0)
+ break;
+ p = memchr_inv(vpage, 0, pagelen);
+ pagetype = (p == NULL) ? NFS4_CONTENT_HOLE : NFS4_CONTENT_DATA;
+
+ if (pagetype != segment.rp_type || pagenum == 0) {
+ if (likely(pagenum > 0)) {
+ nfsd4_encode_read_plus_segment(xdr, &segment,
+ &bufpos, segments);
+ offset += segment.rp_length;
+ }
+ segment.rp_type = pagetype;
+ segment.rp_offset = offset;
+ segment.rp_length = pagelen;
+ segment.rp_page_pos = pagenum * PAGE_SIZE;
+ } else
+ segment.rp_length += pagelen;
+
+ maxcount -= pagelen;
+ pagenum++;
+ }
+
+ nfsd4_encode_read_plus_segment(xdr, &segment, &bufpos, segments);
+ xdr_truncate_encode(xdr, bufpos);
return nfs_ok;
}
@@ -4813,69 +4870,31 @@ static __be32
nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
struct nfsd4_read *read)
{
- unsigned long maxcount, count;
struct xdr_stream *xdr = resp->xdr;
- struct file *file;
int starting_len = xdr->buf->len;
- int last_segment = xdr->buf->len;
- int segments = 0;
- __be32 *p, tmp;
- bool is_data;
- loff_t pos;
+ unsigned int segments = 0;
u32 eof;
if (nfserr)
return nfserr;
- file = read->rd_nf->nf_file;
/* eof flag, segment count */
- p = xdr_reserve_space(xdr, 4 + 4);
- if (!p)
+ if (!xdr_reserve_space(xdr, 2 * XDR_UNIT))
return nfserr_resource;
xdr_commit_encode(xdr);
- maxcount = min_t(unsigned long, read->rd_length,
- (xdr->buf->buflen - xdr->buf->len));
- count = maxcount;
-
- eof = read->rd_offset >= i_size_read(file_inode(file));
+ eof = read->rd_offset >= i_size_read(file_inode(read->rd_nf->nf_file));
if (eof)
goto out;
- pos = vfs_llseek(file, read->rd_offset, SEEK_HOLE);
- is_data = pos > read->rd_offset;
-
- while (count > 0 && !eof) {
- maxcount = count;
- if (is_data)
- nfserr = nfsd4_encode_read_plus_data(resp, read, &maxcount, &eof,
- segments == 0 ? &pos : NULL);
- else
- nfserr = nfsd4_encode_read_plus_hole(resp, read, &maxcount, &eof);
- if (nfserr)
- goto out;
- count -= maxcount;
- read->rd_offset += maxcount;
- is_data = !is_data;
- last_segment = xdr->buf->len;
- segments++;
- }
-
+ nfserr = nfsd4_encode_read_plus_segments(resp, read, &segments, &eof);
out:
- if (nfserr && segments == 0)
+ if (nfserr)
xdr_truncate_encode(xdr, starting_len);
else {
- if (nfserr) {
- xdr_truncate_encode(xdr, last_segment);
- nfserr = nfs_ok;
- eof = 0;
- }
- tmp = htonl(eof);
- write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4);
- tmp = htonl(segments);
- write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4);
+ xdr_encode_word(xdr->buf, starting_len, eof);
+ xdr_encode_word(xdr->buf, starting_len + XDR_UNIT, segments);
}
-
return nfserr;
}
--
2.37.1
next prev parent reply other threads:[~2022-07-15 18:44 UTC|newest]
Thread overview: 29+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-07-15 18:44 [PATCH v3 0/6] NFSD: Improvements for the NFSv4.2 READ_PLUS operation Anna Schumaker
2022-07-15 18:44 ` [PATCH v3 1/6] SUNRPC: Introduce xdr_stream_move_subsegment() Anna Schumaker
2022-07-15 18:44 ` [PATCH v3 2/6] SUNRPC: Introduce xdr_encode_double() Anna Schumaker
2022-07-15 18:44 ` [PATCH v3 3/6] SUNRPC: Introduce xdr_buf_trim_head() Anna Schumaker
2022-07-15 18:44 ` [PATCH v3 4/6] SUNRPC: Introduce xdr_buf_nth_page_address() Anna Schumaker
2022-07-15 18:44 ` [PATCH v3 5/6] SUNRPC: Export xdr_buf_pagecount() Anna Schumaker
2022-07-15 18:44 ` Anna Schumaker [this message]
2022-07-15 19:08 ` [PATCH v3 6/6] NFSD: Repeal and replace the READ_PLUS implementation Chuck Lever III
2022-07-18 1:15 ` Dave Chinner
2022-07-19 17:21 ` Chuck Lever III
2022-07-19 20:24 ` Anna Schumaker
2022-07-19 20:47 ` Chuck Lever III
2022-07-19 21:10 ` Matthew Wilcox
2022-07-19 23:18 ` Dave Chinner
2022-07-19 20:46 ` Anna Schumaker
2022-07-19 22:44 ` Dave Chinner
2022-07-20 1:26 ` Chuck Lever III
2022-07-20 2:36 ` Dave Chinner
2022-07-20 4:18 ` Chuck Lever III
2022-07-22 0:44 ` Dave Chinner
2022-07-22 15:09 ` Chuck Lever III
2022-08-18 18:31 ` Anna Schumaker
2022-08-19 15:18 ` Chuck Lever III
2022-07-20 12:55 ` Jeff Layton
2022-07-21 23:12 ` Dave Chinner
2022-07-21 20:47 ` Josef Bacik
2022-07-22 12:45 ` Anna Schumaker
2022-07-22 13:32 ` Josef Bacik
2022-07-22 13:43 ` Anna Schumaker
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220715184433.838521-7-anna@kernel.org \
--to=anna@kernel.org \
--cc=chuck.lever@oracle.com \
--cc=linux-nfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox