linux-nfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] nfsd: use short read rather than i_size to set eof
@ 2016-03-21 14:42 Benjamin Coddington
  2016-03-21 21:36 ` J. Bruce Fields
  0 siblings, 1 reply; 7+ messages in thread
From: Benjamin Coddington @ 2016-03-21 14:42 UTC (permalink / raw)
  To: J. Bruce Fields, Jeff Layton; +Cc: linux-nfs

Use the result of a local read to determine when to set the eof flag.  This
allows us to return the location of the end of the file atomically at the
time of the read.

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
---
 fs/nfsd/nfs3proc.c |   10 ++++------
 fs/nfsd/nfs4xdr.c  |    9 +++++----
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 7b755b7..6be6092 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -147,6 +147,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
 {
 	__be32	nfserr;
 	u32	max_blocksize = svc_max_payload(rqstp);
+	unsigned long cnt = min(argp->count, max_blocksize);
 
 	dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n",
 				SVCFH_fmt(&argp->fh),
@@ -157,7 +158,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
 	 * 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof)
 	 * + 1 (xdr opaque byte count) = 26
 	 */
-	resp->count = min(argp->count, max_blocksize);
+	resp->count = cnt;
 	svc_reserve_auth(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4);
 
 	fh_copy(&resp->fh, &argp->fh);
@@ -165,11 +166,8 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
 				  argp->offset,
 			   	  rqstp->rq_vec, argp->vlen,
 				  &resp->count);
-	if (nfserr == 0) {
-		struct inode	*inode = d_inode(resp->fh.fh_dentry);
-
-		resp->eof = (argp->offset + resp->count) >= inode->i_size;
-	}
+	if (nfserr == 0)
+		resp->eof = cnt > resp->count;
 
 	RETURN_STATUS(nfserr);
 }
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index d6ef095..2e14839 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3362,6 +3362,7 @@ static __be32 nfsd4_encode_splice_read(
 	struct xdr_stream *xdr = &resp->xdr;
 	struct xdr_buf *buf = xdr->buf;
 	u32 eof;
+	long len;
 	int space_left;
 	__be32 nfserr;
 	__be32 *p = xdr->p - 2;
@@ -3370,6 +3371,7 @@ static __be32 nfsd4_encode_splice_read(
 	if (xdr->end - xdr->p < 1)
 		return nfserr_resource;
 
+	len = maxcount;
 	nfserr = nfsd_splice_read(read->rd_rqstp, file,
 				  read->rd_offset, &maxcount);
 	if (nfserr) {
@@ -3382,8 +3384,7 @@ static __be32 nfsd4_encode_splice_read(
 		return nfserr;
 	}
 
-	eof = (read->rd_offset + maxcount >=
-	       d_inode(read->rd_fhp->fh_dentry)->i_size);
+	eof = len > maxcount;
 
 	*(p++) = htonl(eof);
 	*(p++) = htonl(maxcount);
@@ -3453,14 +3454,14 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
 	}
 	read->rd_vlen = v;
 
+	len = maxcount;
 	nfserr = nfsd_readv(file, read->rd_offset, resp->rqstp->rq_vec,
 			read->rd_vlen, &maxcount);
 	if (nfserr)
 		return nfserr;
 	xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3));
 
-	eof = (read->rd_offset + maxcount >=
-	       d_inode(read->rd_fhp->fh_dentry)->i_size);
+	eof = len > maxcount;
 
 	tmp = htonl(eof);
 	write_bytes_to_xdr_buf(xdr->buf, starting_len    , &tmp, 4);
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] nfsd: use short read rather than i_size to set eof
  2016-03-21 14:42 [PATCH] nfsd: use short read rather than i_size to set eof Benjamin Coddington
@ 2016-03-21 21:36 ` J. Bruce Fields
  2016-03-22 14:28   ` [PATCH v2] " Benjamin Coddington
  0 siblings, 1 reply; 7+ messages in thread
From: J. Bruce Fields @ 2016-03-21 21:36 UTC (permalink / raw)
  To: Benjamin Coddington; +Cc: Jeff Layton, linux-nfs

On Mon, Mar 21, 2016 at 10:42:25AM -0400, Benjamin Coddington wrote:
> Use the result of a local read to determine when to set the eof flag.  This
> allows us to return the location of the end of the file atomically at the
> time of the read.

My only question is whether we should instead do something like:

	eof = (res > cnt) || (offset + cnt >= i_size)

That'd fix the reported bug without changing existing behavior
otherwise.

But maybe it's unlikely any client depends on existing behavior.

The only test failure I'm seeing is on pynfs WRT13, which literally just
checks that a 6-byte read of a 6-byte file returns with eof set.  The
test is correct (the spec does clearly require eof to be set in that
case), but maybe it's not important.

--b.

> 
> Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
> ---
>  fs/nfsd/nfs3proc.c |   10 ++++------
>  fs/nfsd/nfs4xdr.c  |    9 +++++----
>  2 files changed, 9 insertions(+), 10 deletions(-)
> 
> diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
> index 7b755b7..6be6092 100644
> --- a/fs/nfsd/nfs3proc.c
> +++ b/fs/nfsd/nfs3proc.c
> @@ -147,6 +147,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
>  {
>  	__be32	nfserr;
>  	u32	max_blocksize = svc_max_payload(rqstp);
> +	unsigned long cnt = min(argp->count, max_blocksize);
>  
>  	dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n",
>  				SVCFH_fmt(&argp->fh),
> @@ -157,7 +158,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
>  	 * 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof)
>  	 * + 1 (xdr opaque byte count) = 26
>  	 */
> -	resp->count = min(argp->count, max_blocksize);
> +	resp->count = cnt;
>  	svc_reserve_auth(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4);
>  
>  	fh_copy(&resp->fh, &argp->fh);
> @@ -165,11 +166,8 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
>  				  argp->offset,
>  			   	  rqstp->rq_vec, argp->vlen,
>  				  &resp->count);
> -	if (nfserr == 0) {
> -		struct inode	*inode = d_inode(resp->fh.fh_dentry);
> -
> -		resp->eof = (argp->offset + resp->count) >= inode->i_size;
> -	}
> +	if (nfserr == 0)
> +		resp->eof = cnt > resp->count;
>  
>  	RETURN_STATUS(nfserr);
>  }
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index d6ef095..2e14839 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -3362,6 +3362,7 @@ static __be32 nfsd4_encode_splice_read(
>  	struct xdr_stream *xdr = &resp->xdr;
>  	struct xdr_buf *buf = xdr->buf;
>  	u32 eof;
> +	long len;
>  	int space_left;
>  	__be32 nfserr;
>  	__be32 *p = xdr->p - 2;
> @@ -3370,6 +3371,7 @@ static __be32 nfsd4_encode_splice_read(
>  	if (xdr->end - xdr->p < 1)
>  		return nfserr_resource;
>  
> +	len = maxcount;
>  	nfserr = nfsd_splice_read(read->rd_rqstp, file,
>  				  read->rd_offset, &maxcount);
>  	if (nfserr) {
> @@ -3382,8 +3384,7 @@ static __be32 nfsd4_encode_splice_read(
>  		return nfserr;
>  	}
>  
> -	eof = (read->rd_offset + maxcount >=
> -	       d_inode(read->rd_fhp->fh_dentry)->i_size);
> +	eof = len > maxcount;
>  
>  	*(p++) = htonl(eof);
>  	*(p++) = htonl(maxcount);
> @@ -3453,14 +3454,14 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
>  	}
>  	read->rd_vlen = v;
>  
> +	len = maxcount;
>  	nfserr = nfsd_readv(file, read->rd_offset, resp->rqstp->rq_vec,
>  			read->rd_vlen, &maxcount);
>  	if (nfserr)
>  		return nfserr;
>  	xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3));
>  
> -	eof = (read->rd_offset + maxcount >=
> -	       d_inode(read->rd_fhp->fh_dentry)->i_size);
> +	eof = len > maxcount;
>  
>  	tmp = htonl(eof);
>  	write_bytes_to_xdr_buf(xdr->buf, starting_len    , &tmp, 4);
> -- 
> 1.7.1

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH v2] nfsd: use short read rather than i_size to set eof
  2016-03-21 21:36 ` J. Bruce Fields
@ 2016-03-22 14:28   ` Benjamin Coddington
  2016-03-22 16:46     ` J. Bruce Fields
  0 siblings, 1 reply; 7+ messages in thread
From: Benjamin Coddington @ 2016-03-22 14:28 UTC (permalink / raw)
  To: J. Bruce Fields, Jeff Layton; +Cc: linux-nfs

On Mon, 21 Mar 2016, J. Bruce Fields wrote:
> On Mon, Mar 21, 2016 at 10:42:25AM -0400, Benjamin Coddington wrote:
> > Use the result of a local read to determine when to set the eof flag.
> > This
> > allows us to return the location of the end of the file atomically at
> > the
> > time of the read.
>
> My only question is whether we should instead do something like:
>
>   eof = (res > cnt) || (offset + cnt >= i_size)

Yes, let's do that.

> That'd fix the reported bug without changing existing behavior
> otherwise.
>
> But maybe it's unlikely any client depends on existing behavior.
>
> The only test failure I'm seeing is on pynfs WRT13, which literally just
> checks that a 6-byte read of a 6-byte file returns with eof set.  The
> test is correct (the spec does clearly require eof to be set in that
> case), but maybe it's not important.

The above change will fix this up and be correct in the absence of races
which saves the client from having to perform another full RPC just to
retrieve eof.  This passes WRT13:

8<------------------------------------------------------------------------

Use the result of a local read to determine when to set the eof flag.  This
allows us to return the location of the end of the file atomically at the
time of the read.

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
---
 fs/nfsd/nfs3proc.c |    7 ++++---
 fs/nfsd/nfs4xdr.c  |   11 +++++++----
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 7b755b7..83c9abb 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -147,6 +147,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
 {
 	__be32	nfserr;
 	u32	max_blocksize = svc_max_payload(rqstp);
+	unsigned long cnt = min(argp->count, max_blocksize);
 
 	dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n",
 				SVCFH_fmt(&argp->fh),
@@ -157,7 +158,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
 	 * 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof)
 	 * + 1 (xdr opaque byte count) = 26
 	 */
-	resp->count = min(argp->count, max_blocksize);
+	resp->count = cnt;
 	svc_reserve_auth(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4);
 
 	fh_copy(&resp->fh, &argp->fh);
@@ -167,8 +168,8 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
 				  &resp->count);
 	if (nfserr == 0) {
 		struct inode	*inode = d_inode(resp->fh.fh_dentry);
-
-		resp->eof = (argp->offset + resp->count) >= inode->i_size;
+		resp->eof = (cnt > resp->count) ||
+			((argp->offset + resp->count) >= inode->i_size);
 	}
 
 	RETURN_STATUS(nfserr);
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index d6ef095..1d26b2b 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3362,6 +3362,7 @@ static __be32 nfsd4_encode_splice_read(
 	struct xdr_stream *xdr = &resp->xdr;
 	struct xdr_buf *buf = xdr->buf;
 	u32 eof;
+	long len;
 	int space_left;
 	__be32 nfserr;
 	__be32 *p = xdr->p - 2;
@@ -3370,6 +3371,7 @@ static __be32 nfsd4_encode_splice_read(
 	if (xdr->end - xdr->p < 1)
 		return nfserr_resource;
 
+	len = maxcount;
 	nfserr = nfsd_splice_read(read->rd_rqstp, file,
 				  read->rd_offset, &maxcount);
 	if (nfserr) {
@@ -3382,8 +3384,8 @@ static __be32 nfsd4_encode_splice_read(
 		return nfserr;
 	}
 
-	eof = (read->rd_offset + maxcount >=
-	       d_inode(read->rd_fhp->fh_dentry)->i_size);
+	eof = (len > maxcount) ||
+		((read->rd_offset + maxcount >= d_inode(read->rd_fhp->fh_dentry)->i_size));
 
 	*(p++) = htonl(eof);
 	*(p++) = htonl(maxcount);
@@ -3453,14 +3455,15 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
 	}
 	read->rd_vlen = v;
 
+	len = maxcount;
 	nfserr = nfsd_readv(file, read->rd_offset, resp->rqstp->rq_vec,
 			read->rd_vlen, &maxcount);
 	if (nfserr)
 		return nfserr;
 	xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3));
 
-	eof = (read->rd_offset + maxcount >=
-	       d_inode(read->rd_fhp->fh_dentry)->i_size);
+	eof = (len > maxcount) ||
+		((read->rd_offset + maxcount >= d_inode(read->rd_fhp->fh_dentry)->i_size));
 
 	tmp = htonl(eof);
 	write_bytes_to_xdr_buf(xdr->buf, starting_len    , &tmp, 4);
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] nfsd: use short read rather than i_size to set eof
  2016-03-22 14:28   ` [PATCH v2] " Benjamin Coddington
@ 2016-03-22 16:46     ` J. Bruce Fields
  2016-03-22 18:53       ` J. Bruce Fields
  0 siblings, 1 reply; 7+ messages in thread
From: J. Bruce Fields @ 2016-03-22 16:46 UTC (permalink / raw)
  To: Benjamin Coddington; +Cc: Jeff Layton, linux-nfs

On Tue, Mar 22, 2016 at 10:28:36AM -0400, Benjamin Coddington wrote:
> On Mon, 21 Mar 2016, J. Bruce Fields wrote:
> > On Mon, Mar 21, 2016 at 10:42:25AM -0400, Benjamin Coddington wrote:
> > > Use the result of a local read to determine when to set the eof flag.
> > > This
> > > allows us to return the location of the end of the file atomically at
> > > the
> > > time of the read.
> >
> > My only question is whether we should instead do something like:
> >
> >   eof = (res > cnt) || (offset + cnt >= i_size)
> 
> Yes, let's do that.
> 
> > That'd fix the reported bug without changing existing behavior
> > otherwise.
> >
> > But maybe it's unlikely any client depends on existing behavior.
> >
> > The only test failure I'm seeing is on pynfs WRT13, which literally just
> > checks that a 6-byte read of a 6-byte file returns with eof set.  The
> > test is correct (the spec does clearly require eof to be set in that
> > case), but maybe it's not important.
> 
> The above change will fix this up and be correct in the absence of races
> which saves the client from having to perform another full RPC just to
> retrieve eof.

Seems likely to be rare (and possibly papered over by caching--does the
client even send a read if it would extend past the size returned from a
recent getattr?).

But, this does seem like the most conservative approach for now.
Applying.

Thanks!

--b.

> This passes WRT13:
> 
> 8<------------------------------------------------------------------------
> 
> Use the result of a local read to determine when to set the eof flag.  This
> allows us to return the location of the end of the file atomically at the
> time of the read.
> 
> Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
> ---
>  fs/nfsd/nfs3proc.c |    7 ++++---
>  fs/nfsd/nfs4xdr.c  |   11 +++++++----
>  2 files changed, 11 insertions(+), 7 deletions(-)
> 
> diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
> index 7b755b7..83c9abb 100644
> --- a/fs/nfsd/nfs3proc.c
> +++ b/fs/nfsd/nfs3proc.c
> @@ -147,6 +147,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
>  {
>  	__be32	nfserr;
>  	u32	max_blocksize = svc_max_payload(rqstp);
> +	unsigned long cnt = min(argp->count, max_blocksize);
>  
>  	dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n",
>  				SVCFH_fmt(&argp->fh),
> @@ -157,7 +158,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
>  	 * 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof)
>  	 * + 1 (xdr opaque byte count) = 26
>  	 */
> -	resp->count = min(argp->count, max_blocksize);
> +	resp->count = cnt;
>  	svc_reserve_auth(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4);
>  
>  	fh_copy(&resp->fh, &argp->fh);
> @@ -167,8 +168,8 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
>  				  &resp->count);
>  	if (nfserr == 0) {
>  		struct inode	*inode = d_inode(resp->fh.fh_dentry);
> -
> -		resp->eof = (argp->offset + resp->count) >= inode->i_size;
> +		resp->eof = (cnt > resp->count) ||
> +			((argp->offset + resp->count) >= inode->i_size);
>  	}
>  
>  	RETURN_STATUS(nfserr);
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index d6ef095..1d26b2b 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -3362,6 +3362,7 @@ static __be32 nfsd4_encode_splice_read(
>  	struct xdr_stream *xdr = &resp->xdr;
>  	struct xdr_buf *buf = xdr->buf;
>  	u32 eof;
> +	long len;
>  	int space_left;
>  	__be32 nfserr;
>  	__be32 *p = xdr->p - 2;
> @@ -3370,6 +3371,7 @@ static __be32 nfsd4_encode_splice_read(
>  	if (xdr->end - xdr->p < 1)
>  		return nfserr_resource;
>  
> +	len = maxcount;
>  	nfserr = nfsd_splice_read(read->rd_rqstp, file,
>  				  read->rd_offset, &maxcount);
>  	if (nfserr) {
> @@ -3382,8 +3384,8 @@ static __be32 nfsd4_encode_splice_read(
>  		return nfserr;
>  	}
>  
> -	eof = (read->rd_offset + maxcount >=
> -	       d_inode(read->rd_fhp->fh_dentry)->i_size);
> +	eof = (len > maxcount) ||
> +		((read->rd_offset + maxcount >= d_inode(read->rd_fhp->fh_dentry)->i_size));
>  
>  	*(p++) = htonl(eof);
>  	*(p++) = htonl(maxcount);
> @@ -3453,14 +3455,15 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
>  	}
>  	read->rd_vlen = v;
>  
> +	len = maxcount;
>  	nfserr = nfsd_readv(file, read->rd_offset, resp->rqstp->rq_vec,
>  			read->rd_vlen, &maxcount);
>  	if (nfserr)
>  		return nfserr;
>  	xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3));
>  
> -	eof = (read->rd_offset + maxcount >=
> -	       d_inode(read->rd_fhp->fh_dentry)->i_size);
> +	eof = (len > maxcount) ||
> +		((read->rd_offset + maxcount >= d_inode(read->rd_fhp->fh_dentry)->i_size));
>  
>  	tmp = htonl(eof);
>  	write_bytes_to_xdr_buf(xdr->buf, starting_len    , &tmp, 4);
> -- 
> 1.7.1

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] nfsd: use short read rather than i_size to set eof
  2016-03-22 16:46     ` J. Bruce Fields
@ 2016-03-22 18:53       ` J. Bruce Fields
  2016-03-22 20:51         ` Benjamin Coddington
  0 siblings, 1 reply; 7+ messages in thread
From: J. Bruce Fields @ 2016-03-22 18:53 UTC (permalink / raw)
  To: Benjamin Coddington; +Cc: Jeff Layton, linux-nfs

Possibly overkill, but I think I'll fold in something like this if you
don't see an objection.

--b.

commit 58e18a2a14a0
Author: J. Bruce Fields <bfields@redhat.com>
Date:   Tue Mar 22 14:08:11 2016 -0400

    nfsd: document read eof logic
    
    The choice of checks here is a little subtle, let's document this for
    posterity.
    
    Signed-off-by: J. Bruce Fields <bfields@redhat.com>

diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 83c9abb33e8b..df0f0a86f21d 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -168,8 +168,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
 				  &resp->count);
 	if (nfserr == 0) {
 		struct inode	*inode = d_inode(resp->fh.fh_dentry);
-		resp->eof = (cnt > resp->count) ||
-			((argp->offset + resp->count) >= inode->i_size);
+		resp->eof = nfsd_eof_on_read(cnt, resp->count, argp->offset, inode->i_size);
 	}
 
 	RETURN_STATUS(nfserr);
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 90232bd7e498..9df898ba648f 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3387,8 +3387,8 @@ static __be32 nfsd4_encode_splice_read(
 		return nfserr;
 	}
 
-	eof = (len > maxcount) ||
-		((read->rd_offset + maxcount >= d_inode(read->rd_fhp->fh_dentry)->i_size));
+	eof = nfsd_eof_on_read(len, maxcount, read->rd_offset,
+				d_inode(read->rd_fhp->fh_dentry)->i_size);
 
 	*(p++) = htonl(eof);
 	*(p++) = htonl(maxcount);
@@ -3465,8 +3465,8 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
 		return nfserr;
 	xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3));
 
-	eof = (len > maxcount) ||
-		((read->rd_offset + maxcount >= d_inode(read->rd_fhp->fh_dentry)->i_size));
+	eof = nfsd_eof_on_read(len, maxcount, read->rd_offset,
+				d_inode(read->rd_fhp->fh_dentry)->i_size);
 
 	tmp = htonl(eof);
 	write_bytes_to_xdr_buf(xdr->buf, starting_len    , &tmp, 4);
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index c11ba316f23f..6244e073c137 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -139,4 +139,24 @@ static inline int nfsd_create_is_exclusive(int createmode)
 	       || createmode == NFS4_CREATE_EXCLUSIVE4_1;
 }
 
+static inline bool nfsd_eof_on_read(long requested, long read,
+				loff_t offset, loff_t size)
+{
+	/* We assume a short read means eof: */
+	if (requested > read)
+		return true;
+	/*
+	 * A non-short read might also reach end of file.  The spec
+	 * still requires us to set eof in that case.
+	 *
+	 * Further operations may have modified the file size since
+	 * the read, so the following check is not atomic with the read.
+	 * The only case we've seen that cause a problem for a client
+	 * is the case where the read returned a count of 0 without
+	 * setting eof.  That case was fixed by the addition of the
+	 * above check.
+	 */
+	return (offset + read >= size);
+}
+
 #endif /* LINUX_NFSD_VFS_H */

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] nfsd: use short read rather than i_size to set eof
  2016-03-22 18:53       ` J. Bruce Fields
@ 2016-03-22 20:51         ` Benjamin Coddington
  2016-03-22 21:22           ` J. Bruce Fields
  0 siblings, 1 reply; 7+ messages in thread
From: Benjamin Coddington @ 2016-03-22 20:51 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: Jeff Layton, linux-nfs

On Tue, 22 Mar 2016, J. Bruce Fields wrote:

> Possibly overkill, but I think I'll fold in something like this if you
> don't see an objection.

No objection.  To answer your earlier question (does the client even send a
read if it would extend past the size returned from a recent getattr?):

Looks to me (by testing with a server that never sets eof) the answer is no.
The client won't issue reads beyond the end of the file.

Also the previous patch's subject is now a little misleading.  Maybe
s/rather than/as well as/ would do.

Ben

> commit 58e18a2a14a0
> Author: J. Bruce Fields <bfields@redhat.com>
> Date:   Tue Mar 22 14:08:11 2016 -0400
>
>     nfsd: document read eof logic
>
>     The choice of checks here is a little subtle, let's document this for
>     posterity.
>
>     Signed-off-by: J. Bruce Fields <bfields@redhat.com>
>
> diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
> index 83c9abb33e8b..df0f0a86f21d 100644
> --- a/fs/nfsd/nfs3proc.c
> +++ b/fs/nfsd/nfs3proc.c
> @@ -168,8 +168,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
>  				  &resp->count);
>  	if (nfserr == 0) {
>  		struct inode	*inode = d_inode(resp->fh.fh_dentry);
> -		resp->eof = (cnt > resp->count) ||
> -			((argp->offset + resp->count) >= inode->i_size);
> +		resp->eof = nfsd_eof_on_read(cnt, resp->count, argp->offset, inode->i_size);
>  	}
>
>  	RETURN_STATUS(nfserr);
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index 90232bd7e498..9df898ba648f 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -3387,8 +3387,8 @@ static __be32 nfsd4_encode_splice_read(
>  		return nfserr;
>  	}
>
> -	eof = (len > maxcount) ||
> -		((read->rd_offset + maxcount >= d_inode(read->rd_fhp->fh_dentry)->i_size));
> +	eof = nfsd_eof_on_read(len, maxcount, read->rd_offset,
> +				d_inode(read->rd_fhp->fh_dentry)->i_size);
>
>  	*(p++) = htonl(eof);
>  	*(p++) = htonl(maxcount);
> @@ -3465,8 +3465,8 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
>  		return nfserr;
>  	xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3));
>
> -	eof = (len > maxcount) ||
> -		((read->rd_offset + maxcount >= d_inode(read->rd_fhp->fh_dentry)->i_size));
> +	eof = nfsd_eof_on_read(len, maxcount, read->rd_offset,
> +				d_inode(read->rd_fhp->fh_dentry)->i_size);
>
>  	tmp = htonl(eof);
>  	write_bytes_to_xdr_buf(xdr->buf, starting_len    , &tmp, 4);
> diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
> index c11ba316f23f..6244e073c137 100644
> --- a/fs/nfsd/vfs.h
> +++ b/fs/nfsd/vfs.h
> @@ -139,4 +139,24 @@ static inline int nfsd_create_is_exclusive(int createmode)
>  	       || createmode == NFS4_CREATE_EXCLUSIVE4_1;
>  }
>
> +static inline bool nfsd_eof_on_read(long requested, long read,
> +				loff_t offset, loff_t size)
> +{
> +	/* We assume a short read means eof: */
> +	if (requested > read)
> +		return true;
> +	/*
> +	 * A non-short read might also reach end of file.  The spec
> +	 * still requires us to set eof in that case.
> +	 *
> +	 * Further operations may have modified the file size since
> +	 * the read, so the following check is not atomic with the read.
> +	 * The only case we've seen that cause a problem for a client
> +	 * is the case where the read returned a count of 0 without
> +	 * setting eof.  That case was fixed by the addition of the
> +	 * above check.
> +	 */
> +	return (offset + read >= size);
> +}
> +
>  #endif /* LINUX_NFSD_VFS_H */
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] nfsd: use short read rather than i_size to set eof
  2016-03-22 20:51         ` Benjamin Coddington
@ 2016-03-22 21:22           ` J. Bruce Fields
  0 siblings, 0 replies; 7+ messages in thread
From: J. Bruce Fields @ 2016-03-22 21:22 UTC (permalink / raw)
  To: Benjamin Coddington; +Cc: Jeff Layton, linux-nfs

On Tue, Mar 22, 2016 at 04:51:47PM -0400, Benjamin Coddington wrote:
> On Tue, 22 Mar 2016, J. Bruce Fields wrote:
> 
> > Possibly overkill, but I think I'll fold in something like this if you
> > don't see an objection.
> 
> No objection.  To answer your earlier question (does the client even send a
> read if it would extend past the size returned from a recent getattr?):
> 
> Looks to me (by testing with a server that never sets eof) the answer is no.
> The client won't issue reads beyond the end of the file.
> 
> Also the previous patch's subject is now a little misleading.  Maybe
> s/rather than/as well as/ would do.

OK.--b.

> 
> Ben
> 
> > commit 58e18a2a14a0
> > Author: J. Bruce Fields <bfields@redhat.com>
> > Date:   Tue Mar 22 14:08:11 2016 -0400
> >
> >     nfsd: document read eof logic
> >
> >     The choice of checks here is a little subtle, let's document this for
> >     posterity.
> >
> >     Signed-off-by: J. Bruce Fields <bfields@redhat.com>
> >
> > diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
> > index 83c9abb33e8b..df0f0a86f21d 100644
> > --- a/fs/nfsd/nfs3proc.c
> > +++ b/fs/nfsd/nfs3proc.c
> > @@ -168,8 +168,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
> >  				  &resp->count);
> >  	if (nfserr == 0) {
> >  		struct inode	*inode = d_inode(resp->fh.fh_dentry);
> > -		resp->eof = (cnt > resp->count) ||
> > -			((argp->offset + resp->count) >= inode->i_size);
> > +		resp->eof = nfsd_eof_on_read(cnt, resp->count, argp->offset, inode->i_size);
> >  	}
> >
> >  	RETURN_STATUS(nfserr);
> > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> > index 90232bd7e498..9df898ba648f 100644
> > --- a/fs/nfsd/nfs4xdr.c
> > +++ b/fs/nfsd/nfs4xdr.c
> > @@ -3387,8 +3387,8 @@ static __be32 nfsd4_encode_splice_read(
> >  		return nfserr;
> >  	}
> >
> > -	eof = (len > maxcount) ||
> > -		((read->rd_offset + maxcount >= d_inode(read->rd_fhp->fh_dentry)->i_size));
> > +	eof = nfsd_eof_on_read(len, maxcount, read->rd_offset,
> > +				d_inode(read->rd_fhp->fh_dentry)->i_size);
> >
> >  	*(p++) = htonl(eof);
> >  	*(p++) = htonl(maxcount);
> > @@ -3465,8 +3465,8 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
> >  		return nfserr;
> >  	xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3));
> >
> > -	eof = (len > maxcount) ||
> > -		((read->rd_offset + maxcount >= d_inode(read->rd_fhp->fh_dentry)->i_size));
> > +	eof = nfsd_eof_on_read(len, maxcount, read->rd_offset,
> > +				d_inode(read->rd_fhp->fh_dentry)->i_size);
> >
> >  	tmp = htonl(eof);
> >  	write_bytes_to_xdr_buf(xdr->buf, starting_len    , &tmp, 4);
> > diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
> > index c11ba316f23f..6244e073c137 100644
> > --- a/fs/nfsd/vfs.h
> > +++ b/fs/nfsd/vfs.h
> > @@ -139,4 +139,24 @@ static inline int nfsd_create_is_exclusive(int createmode)
> >  	       || createmode == NFS4_CREATE_EXCLUSIVE4_1;
> >  }
> >
> > +static inline bool nfsd_eof_on_read(long requested, long read,
> > +				loff_t offset, loff_t size)
> > +{
> > +	/* We assume a short read means eof: */
> > +	if (requested > read)
> > +		return true;
> > +	/*
> > +	 * A non-short read might also reach end of file.  The spec
> > +	 * still requires us to set eof in that case.
> > +	 *
> > +	 * Further operations may have modified the file size since
> > +	 * the read, so the following check is not atomic with the read.
> > +	 * The only case we've seen that cause a problem for a client
> > +	 * is the case where the read returned a count of 0 without
> > +	 * setting eof.  That case was fixed by the addition of the
> > +	 * above check.
> > +	 */
> > +	return (offset + read >= size);
> > +}
> > +
> >  #endif /* LINUX_NFSD_VFS_H */
> >

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2016-03-22 21:22 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-03-21 14:42 [PATCH] nfsd: use short read rather than i_size to set eof Benjamin Coddington
2016-03-21 21:36 ` J. Bruce Fields
2016-03-22 14:28   ` [PATCH v2] " Benjamin Coddington
2016-03-22 16:46     ` J. Bruce Fields
2016-03-22 18:53       ` J. Bruce Fields
2016-03-22 20:51         ` Benjamin Coddington
2016-03-22 21:22           ` J. Bruce Fields

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).