* [PATCH 1/2] NFSD: nfsd4_encode_read{v}() should encode eof and maxcount
2019-02-22 21:58 [PATCH 0/2] NFSD: Add support for the v4.2 READ_PLUS operation Anna Schumaker
@ 2019-02-22 21:58 ` Anna Schumaker
2019-02-22 21:58 ` [PATCH 2/2] NFSD: Add basic READ_PLUS support Anna Schumaker
2019-03-01 17:03 ` [PATCH 0/2] NFSD: Add support for the v4.2 READ_PLUS operation J. Bruce Fields
2 siblings, 0 replies; 4+ messages in thread
From: Anna Schumaker @ 2019-02-22 21:58 UTC (permalink / raw)
To: bfields; +Cc: linux-nfs
I intend to reuse nfsd4_encode_readv() for READ_PLUS, so I need an
alternate way to encode these values. I think it makes sense for
nfsd4_encode_read() to handle this in a single place for both splice and
readv paths.
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
fs/nfsd/nfs4xdr.c | 67 ++++++++++++++++++-----------------------------
1 file changed, 26 insertions(+), 41 deletions(-)
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 3de42a729093..bb487e5c022c 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3453,24 +3453,19 @@ nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struc
static __be32 nfsd4_encode_splice_read(
struct nfsd4_compoundres *resp,
struct nfsd4_read *read,
- struct file *file, unsigned long maxcount)
+ struct file *file, unsigned long *maxcount)
{
struct xdr_stream *xdr = &resp->xdr;
struct xdr_buf *buf = xdr->buf;
- u32 eof;
- long len;
int space_left;
__be32 nfserr;
- __be32 *p = xdr->p - 2;
/* Make sure there will be room for padding if needed */
if (xdr->end - xdr->p < 1)
return nfserr_resource;
- len = maxcount;
nfserr = nfsd_splice_read(read->rd_rqstp, read->rd_fhp,
- file, read->rd_offset, &maxcount);
- read->rd_length = maxcount;
+ file, read->rd_offset, maxcount);
if (nfserr) {
/*
* nfsd_splice_actor may have already messed with the
@@ -3481,27 +3476,21 @@ static __be32 nfsd4_encode_splice_read(
return nfserr;
}
- eof = nfsd_eof_on_read(len, maxcount, read->rd_offset,
- d_inode(read->rd_fhp->fh_dentry)->i_size);
-
- *(p++) = htonl(eof);
- *(p++) = htonl(maxcount);
-
- buf->page_len = maxcount;
- buf->len += maxcount;
- xdr->page_ptr += (buf->page_base + maxcount + PAGE_SIZE - 1)
+ buf->page_len = *maxcount;
+ buf->len += *maxcount;
+ xdr->page_ptr += (buf->page_base + *maxcount + PAGE_SIZE - 1)
/ PAGE_SIZE;
/* Use rest of head for padding and remaining ops: */
buf->tail[0].iov_base = xdr->p;
buf->tail[0].iov_len = 0;
xdr->iov = buf->tail;
- if (maxcount&3) {
- int pad = 4 - (maxcount&3);
+ if (*maxcount&3) {
+ int pad = 4 - (*maxcount&3);
*(xdr->p++) = 0;
- buf->tail[0].iov_base += maxcount&3;
+ buf->tail[0].iov_base += *maxcount&3;
buf->tail[0].iov_len = pad;
buf->len += pad;
}
@@ -3516,21 +3505,19 @@ static __be32 nfsd4_encode_splice_read(
static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
struct nfsd4_read *read,
- struct file *file, unsigned long maxcount)
+ struct file *file, unsigned long *maxcount)
{
struct xdr_stream *xdr = &resp->xdr;
- u32 eof;
int v;
- int starting_len = xdr->buf->len - 8;
+ int starting_len = xdr->buf->len;
long len;
int thislen;
__be32 nfserr;
- __be32 tmp;
__be32 *p;
u32 zzz = 0;
int pad;
- len = maxcount;
+ len = *maxcount;
v = 0;
thislen = min_t(long, len, ((void *)xdr->end - (void *)xdr->p));
@@ -3552,25 +3539,14 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
}
read->rd_vlen = v;
- len = maxcount;
nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset,
- resp->rqstp->rq_vec, read->rd_vlen, &maxcount);
- read->rd_length = maxcount;
+ resp->rqstp->rq_vec, read->rd_vlen, maxcount);
if (nfserr)
return nfserr;
- xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3));
-
- eof = nfsd_eof_on_read(len, maxcount, read->rd_offset,
- d_inode(read->rd_fhp->fh_dentry)->i_size);
+ xdr_truncate_encode(xdr, starting_len + ((*maxcount+3)&~3));
- tmp = htonl(eof);
- write_bytes_to_xdr_buf(xdr->buf, starting_len , &tmp, 4);
- tmp = htonl(maxcount);
- write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4);
-
- pad = (maxcount&3) ? 4 - (maxcount&3) : 0;
- write_bytes_to_xdr_buf(xdr->buf, starting_len + 8 + maxcount,
- &zzz, pad);
+ pad = (*maxcount&3) ? 4 - (*maxcount&3) : 0;
+ write_bytes_to_xdr_buf(xdr->buf, starting_len + *maxcount, &zzz, pad);
return 0;
}
@@ -3585,6 +3561,8 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
int starting_len = xdr->buf->len;
struct raparms *ra = NULL;
__be32 *p;
+ long len;
+ u32 eof;
p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */
if (!p) {
@@ -3602,15 +3580,22 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
maxcount = min_t(unsigned long, maxcount,
(xdr->buf->buflen - xdr->buf->len));
maxcount = min_t(unsigned long, maxcount, read->rd_length);
+ len = maxcount;
if (read->rd_tmp_file)
ra = nfsd_init_raparms(file);
if (file->f_op->splice_read &&
test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags))
- nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount);
+ nfserr = nfsd4_encode_splice_read(resp, read, file, &maxcount);
else
- nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
+ nfserr = nfsd4_encode_readv(resp, read, file, &maxcount);
+
+ read->rd_length = maxcount;
+ eof = nfsd_eof_on_read(len, maxcount, read->rd_offset,
+ d_inode(read->rd_fhp->fh_dentry)->i_size);
+ *p++ = cpu_to_be32(eof);
+ *p++ = cpu_to_be32(maxcount);
if (ra)
nfsd_put_raparams(file, ra);
--
2.20.1
^ permalink raw reply related [flat|nested] 4+ messages in thread* [PATCH 2/2] NFSD: Add basic READ_PLUS support
2019-02-22 21:58 [PATCH 0/2] NFSD: Add support for the v4.2 READ_PLUS operation Anna Schumaker
2019-02-22 21:58 ` [PATCH 1/2] NFSD: nfsd4_encode_read{v}() should encode eof and maxcount Anna Schumaker
@ 2019-02-22 21:58 ` Anna Schumaker
2019-03-01 17:03 ` [PATCH 0/2] NFSD: Add support for the v4.2 READ_PLUS operation J. Bruce Fields
2 siblings, 0 replies; 4+ messages in thread
From: Anna Schumaker @ 2019-02-22 21:58 UTC (permalink / raw)
To: bfields; +Cc: linux-nfs
This patch adds READ_PLUS support for both NFS4_CONTENT_DATA and
NFS4_CONTENT_HOLE segments. I keep things simple for now by only
returning a hole segment if it is the first segment found at the given
offset. Everything else, including other hole segments, will be encoded
as data.
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
fs/nfsd/nfs4proc.c | 16 +++++++
fs/nfsd/nfs4xdr.c | 113 ++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 127 insertions(+), 2 deletions(-)
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 0cfd257ffdaf..1c5f2c3da55f 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -2180,6 +2180,16 @@ static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
return (op_encode_hdr_size + 2 + XDR_QUADLEN(rlen)) * sizeof(__be32);
}
+static inline u32 nfsd4_read_plus_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ u32 maxcount = svc_max_payload(rqstp);
+ u32 rlen = min(op->u.read.rd_length, maxcount);
+ /* enough extra xdr space for encoding either a hole or data segment. */
+ u32 xdr = 5;
+
+ return (op_encode_hdr_size + 2 + xdr + XDR_QUADLEN(rlen)) * sizeof(__be32);
+}
+
static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
u32 maxcount = 0, rlen = 0;
@@ -2701,6 +2711,12 @@ static const struct nfsd4_operation nfsd4_ops[] = {
.op_name = "OP_COPY",
.op_rsize_bop = nfsd4_copy_rsize,
},
+ [OP_READ_PLUS] = {
+ .op_func = nfsd4_read,
+ .op_name = "OP_READ_PLUS",
+ .op_rsize_bop = nfsd4_read_plus_rsize,
+ .op_get_currentstateid = nfsd4_get_readstateid,
+ },
[OP_SEEK] = {
.op_func = nfsd4_seek,
.op_name = "OP_SEEK",
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index bb487e5c022c..ec953efd24c2 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1882,7 +1882,7 @@ static const nfsd4_dec nfsd4_dec_ops[] = {
[OP_LAYOUTSTATS] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_OFFLOAD_CANCEL] = (nfsd4_dec)nfsd4_decode_offload_status,
[OP_OFFLOAD_STATUS] = (nfsd4_dec)nfsd4_decode_offload_status,
- [OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_read,
[OP_SEEK] = (nfsd4_dec)nfsd4_decode_seek,
[OP_WRITE_SAME] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_CLONE] = (nfsd4_dec)nfsd4_decode_clone,
@@ -4273,7 +4273,116 @@ nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr,
return nfserr_resource;
p = xdr_encode_hyper(p, os->count);
*p++ = cpu_to_be32(0);
+ return nfserr;
+}
+
+static __be32
+nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp, struct nfsd4_read *read,
+ struct file *file)
+{
+ struct xdr_stream *xdr = &resp->xdr;
+ unsigned long maxcount;
+ __be32 *p, nfserr;
+
+ p = xdr_reserve_space(xdr, 4 + 8 + 4);
+ if (!p)
+ return nfserr_resource;
+ xdr_commit_encode(xdr);
+
+ maxcount = svc_max_payload(resp->rqstp);
+ maxcount = min_t(unsigned long, maxcount,
+ (xdr->buf->buflen - xdr->buf->len));
+ maxcount = min_t(unsigned long, maxcount, read->rd_length);
+
+ if (file->f_op->splice_read &&
+ test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags))
+ nfserr = nfsd4_encode_splice_read(resp, read, file, &maxcount);
+ else
+ nfserr = nfsd4_encode_readv(resp, read, file, &maxcount);
+
+ *p++ = cpu_to_be32(NFS4_CONTENT_DATA);
+ p = xdr_encode_hyper(p, read->rd_offset);
+ *p++ = cpu_to_be32(maxcount);
+
+ read->rd_offset += maxcount;
+ return nfserr;
+}
+
+static __be32
+nfsd4_encode_read_plus_hole(struct nfsd4_compoundres *resp, struct nfsd4_read *read,
+ unsigned long length)
+{
+ __be32 *p = xdr_reserve_space(&resp->xdr, 4 + 8 + 8);
+ if (!p)
+ return nfserr_resource;
+
+ length = min_t(unsigned long, read->rd_length, length);
+
+ *p++ = cpu_to_be32(NFS4_CONTENT_HOLE);
+ p = xdr_encode_hyper(p, read->rd_offset);
+ p = xdr_encode_hyper(p, length);
+
+ read->rd_offset += length;
+ read->rd_length -= length;
+ return nfs_ok;
+}
+static __be32
+nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
+ struct nfsd4_read *read)
+{
+ struct xdr_stream *xdr = &resp->xdr;
+ struct file *file = read->rd_filp;
+ int starting_len = xdr->buf->len;
+ struct raparms *ra = NULL;
+ loff_t data_pos;
+ __be32 *p;
+ u32 eof, segments = 0;
+
+ if (nfserr)
+ goto out;
+
+ /* eof flag, segment count */
+ p = xdr_reserve_space(xdr, 4 + 4 );
+ if (!p) {
+ nfserr = nfserr_resource;
+ goto out;
+ }
+ xdr_commit_encode(xdr);
+
+ if (read->rd_tmp_file)
+ ra = nfsd_init_raparms(file);
+
+ data_pos = vfs_llseek(file, read->rd_offset, SEEK_DATA);
+ if (data_pos == -ENXIO)
+ data_pos = i_size_read(file_inode(file));
+
+ if (data_pos > read->rd_offset) {
+ nfserr = nfsd4_encode_read_plus_hole(resp, read,
+ data_pos - read->rd_offset);
+ if (nfserr)
+ goto out;
+ segments++;
+ }
+
+ if (read->rd_length > 0) {
+ nfserr = nfsd4_encode_read_plus_data(resp, read, file);
+ segments++;
+ }
+
+ eof = (read->rd_offset >= i_size_read(file_inode(file)));
+ *p++ = cpu_to_be32(eof);
+ *p++ = cpu_to_be32(segments);
+
+ if (ra)
+ nfsd_put_raparams(file, ra);
+
+ if (nfserr)
+ xdr_truncate_encode(xdr, starting_len);
+
+out:
+ if (file)
+ fput(file);
return nfserr;
}
@@ -4381,7 +4490,7 @@ static const nfsd4_enc nfsd4_enc_ops[] = {
[OP_LAYOUTSTATS] = (nfsd4_enc)nfsd4_encode_noop,
[OP_OFFLOAD_CANCEL] = (nfsd4_enc)nfsd4_encode_noop,
[OP_OFFLOAD_STATUS] = (nfsd4_enc)nfsd4_encode_offload_status,
- [OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_read_plus,
[OP_SEEK] = (nfsd4_enc)nfsd4_encode_seek,
[OP_WRITE_SAME] = (nfsd4_enc)nfsd4_encode_noop,
[OP_CLONE] = (nfsd4_enc)nfsd4_encode_noop,
--
2.20.1
^ permalink raw reply related [flat|nested] 4+ messages in thread* Re: [PATCH 0/2] NFSD: Add support for the v4.2 READ_PLUS operation
2019-02-22 21:58 [PATCH 0/2] NFSD: Add support for the v4.2 READ_PLUS operation Anna Schumaker
2019-02-22 21:58 ` [PATCH 1/2] NFSD: nfsd4_encode_read{v}() should encode eof and maxcount Anna Schumaker
2019-02-22 21:58 ` [PATCH 2/2] NFSD: Add basic READ_PLUS support Anna Schumaker
@ 2019-03-01 17:03 ` J. Bruce Fields
2 siblings, 0 replies; 4+ messages in thread
From: J. Bruce Fields @ 2019-03-01 17:03 UTC (permalink / raw)
To: Anna Schumaker; +Cc: linux-nfs
On Fri, Feb 22, 2019 at 04:58:48PM -0500, Anna Schumaker wrote:
> These patches add server support for the READ_PLUS operation. This
> operation is meant to improve file reading performance when working with
> sparse files, but there are some issues around the use of vfs_llseek() to
> identify hole and data segments when encoding the reply. I've done a
> bunch of testing on virtual machines,
Maybe the VM<->VM case is important too, but it'd be a little easier to
understand a case with real hardware, I think. What kind of disk are
you using, our of curiosity?
Also, what are the file sizes, and the rsize? (Apologies if I
overlooked that.)
> and I found that READ_PLUS performs best if:
It's interesting to think about why these are:
> 1) The file being read is not yet in the server's page cache.
I don't understand this one.
> 2) The read request begins with a hole segment. And
If I understand your current implementation, it's basically:
- seek with SEEK_DATA.
- if that finds a hole, read the rest of the requested range and
return two segments (a hole plus ordinary read results)
- otherwise just return ordinary read data for the whole range.
So, right, in the case the read range starts with data, the seek was
just a waste of time, makes sense.
> 3) The server only performs one llseek() call during encoding
OK, so for that we'd need to compare to a different implementation,
which is what you did elsewhere:
> I also have performance numbers for if we encode every hole and data
> segment but I figured this email was long enough already. I'm happy to
> share it if requested!
Got it. (And, sure, that might be interesting.)
--b.
^ permalink raw reply [flat|nested] 4+ messages in thread