linux-nfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "J. Bruce Fields" <bfields@redhat.com>
To: linux-nfs@vger.kernel.org
Cc: Christoph Hellwig <hch@infradead.org>,
	"J. Bruce Fields" <bfields@redhat.com>
Subject: [PATCH 36/52] nfsd4: allow large readdirs
Date: Thu, 22 May 2014 15:32:11 -0400	[thread overview]
Message-ID: <1400787148-25941-37-git-send-email-bfields@redhat.com> (raw)
In-Reply-To: <1400787148-25941-1-git-send-email-bfields@redhat.com>

From: "J. Bruce Fields" <bfields@redhat.com>

Currently we limit readdir results to a single page.  This can result in
a performance regression compared to NFSv3 when reading large
directories.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4proc.c |   9 ++--
 fs/nfsd/nfs4xdr.c  | 137 +++++++++++++++++++++++++++++------------------------
 fs/nfsd/xdr4.h     |   5 +-
 3 files changed, 82 insertions(+), 69 deletions(-)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index e1196ed..f56f5e3 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1502,13 +1502,14 @@ static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 
 static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
+	u32 maxcount = svc_max_payload(rqstp);
 	u32 rlen = op->u.readdir.rd_maxcount;
 
-	if (rlen > PAGE_SIZE)
-		rlen = PAGE_SIZE;
+	if (rlen > maxcount)
+		rlen = maxcount;
 
-	return (op_encode_hdr_size + op_encode_verifier_maxsz)
-		 * sizeof(__be32) + rlen;
+	return (op_encode_hdr_size + op_encode_verifier_maxsz +
+		XDR_QUADLEN(rlen)) * sizeof(__be32);
 }
 
 static inline u32 nfsd4_remove_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 311e281..a2524b3 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2575,8 +2575,8 @@ static inline int attributes_need_mount(u32 *bmval)
 }
 
 static __be32
-nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
-		const char *name, int namlen, __be32 **p, int buflen)
+nfsd4_encode_dirent_fattr(struct xdr_stream *xdr, struct nfsd4_readdir *cd,
+			const char *name, int namlen)
 {
 	struct svc_export *exp = cd->rd_fhp->fh_export;
 	struct dentry *dentry;
@@ -2628,8 +2628,7 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
 
 	}
 out_encode:
-	nfserr = nfsd4_encode_fattr_to_buf(p, buflen, NULL, exp, dentry,
-					cd->rd_bmval,
+	nfserr = nfsd4_encode_fattr(xdr, NULL, exp, dentry, cd->rd_bmval,
 					cd->rd_rqstp, ignore_crossmnt);
 out_put:
 	dput(dentry);
@@ -2638,9 +2637,12 @@ out_put:
 }
 
 static __be32 *
-nfsd4_encode_rdattr_error(__be32 *p, int buflen, __be32 nfserr)
+nfsd4_encode_rdattr_error(struct xdr_stream *xdr, __be32 nfserr)
 {
-	if (buflen < 6)
+	__be32 *p;
+
+	p = xdr_reserve_space(xdr, 6);
+	if (!p)
 		return NULL;
 	*p++ = htonl(2);
 	*p++ = htonl(FATTR4_WORD0_RDATTR_ERROR); /* bmval0 */
@@ -2657,10 +2659,13 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
 {
 	struct readdir_cd *ccd = ccdv;
 	struct nfsd4_readdir *cd = container_of(ccd, struct nfsd4_readdir, common);
-	int buflen;
-	__be32 *p = cd->buffer;
-	__be32 *cookiep;
+	struct xdr_stream *xdr = cd->xdr;
+	int start_offset = xdr->buf->len;
+	int cookie_offset;
+	int entry_bytes;
 	__be32 nfserr = nfserr_toosmall;
+	__be64 wire_offset;
+	__be32 *p;
 
 	/* In nfsv4, "." and ".." never make it onto the wire.. */
 	if (name && isdotent(name, namlen)) {
@@ -2668,19 +2673,24 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
 		return 0;
 	}
 
-	if (cd->offset)
-		xdr_encode_hyper(cd->offset, (u64) offset);
+	if (cd->cookie_offset) {
+		wire_offset = cpu_to_be64(offset);
+		write_bytes_to_xdr_buf(xdr->buf, cd->cookie_offset,
+							&wire_offset, 8);
+	}
 
-	buflen = cd->buflen - 4 - XDR_QUADLEN(namlen);
-	if (buflen < 0)
+	p = xdr_reserve_space(xdr, 4);
+	if (!p)
 		goto fail;
-
 	*p++ = xdr_one;                             /* mark entry present */
-	cookiep = p;
+	cookie_offset = xdr->buf->len;
+	p = xdr_reserve_space(xdr, 3*4 + namlen);
+	if (!p)
+		goto fail;
 	p = xdr_encode_hyper(p, NFS_OFFSET_MAX);    /* offset of next entry */
 	p = xdr_encode_array(p, name, namlen);      /* name length & name */
 
-	nfserr = nfsd4_encode_dirent_fattr(cd, name, namlen, &p, buflen);
+	nfserr = nfsd4_encode_dirent_fattr(xdr, cd, name, namlen);
 	switch (nfserr) {
 	case nfs_ok:
 		break;
@@ -2699,19 +2709,23 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
 		 */
 		if (!(cd->rd_bmval[0] & FATTR4_WORD0_RDATTR_ERROR))
 			goto fail;
-		p = nfsd4_encode_rdattr_error(p, buflen, nfserr);
+		p = nfsd4_encode_rdattr_error(xdr, nfserr);
 		if (p == NULL) {
 			nfserr = nfserr_toosmall;
 			goto fail;
 		}
 	}
-	cd->buflen -= (p - cd->buffer);
-	cd->buffer = p;
-	cd->offset = cookiep;
+	nfserr = nfserr_toosmall;
+	entry_bytes = xdr->buf->len - start_offset;
+	if (entry_bytes > cd->rd_maxcount)
+		goto fail;
+	cd->rd_maxcount -= entry_bytes;
+	cd->cookie_offset = cookie_offset;
 skip_entry:
 	cd->common.err = nfs_ok;
 	return 0;
 fail:
+	xdr_truncate_encode(xdr, start_offset);
 	cd->common.err = nfserr;
 	return -EINVAL;
 }
@@ -3206,10 +3220,11 @@ static __be32
 nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readdir *readdir)
 {
 	int maxcount;
+	int bytes_left;
 	loff_t offset;
+	__be64 wire_offset;
 	struct xdr_stream *xdr = &resp->xdr;
 	int starting_len = xdr->buf->len;
-	__be32 *page, *tailbase;
 	__be32 *p;
 
 	if (nfserr)
@@ -3219,38 +3234,38 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
 	if (!p)
 		return nfserr_resource;
 
-	if (resp->xdr.buf->page_len)
-		return nfserr_resource;
-	if (!*resp->rqstp->rq_next_page)
-		return nfserr_resource;
-
 	/* XXX: Following NFSv3, we ignore the READDIR verifier for now. */
 	WRITE32(0);
 	WRITE32(0);
 	resp->xdr.buf->head[0].iov_len = ((char *)resp->xdr.p)
 				- (char *)resp->xdr.buf->head[0].iov_base;
-	tailbase = p;
-
-	maxcount = PAGE_SIZE;
-	if (maxcount > readdir->rd_maxcount)
-		maxcount = readdir->rd_maxcount;
 
 	/*
-	 * Convert from bytes to words, account for the two words already
-	 * written, make sure to leave two words at the end for the next
-	 * pointer and eof field.
+	 * Number of bytes left for directory entries allowing for the
+	 * final 8 bytes of the readdir and a following failed op:
+	 */
+	bytes_left = xdr->buf->buflen - xdr->buf->len
+			- COMPOUND_ERR_SLACK_SPACE - 8;
+	if (bytes_left < 0) {
+		nfserr = nfserr_resource;
+		goto err_no_verf;
+	}
+	maxcount = min_t(u32, readdir->rd_maxcount, INT_MAX);
+	/*
+	 * Note the rfc defines rd_maxcount as the size of the
+	 * READDIR4resok structure, which includes the verifier above
+	 * and the 8 bytes encoded at the end of this function:
 	 */
-	maxcount = (maxcount >> 2) - 4;
-	if (maxcount < 0) {
-		nfserr =  nfserr_toosmall;
+	if (maxcount < 16) {
+		nfserr = nfserr_toosmall;
 		goto err_no_verf;
 	}
+	maxcount = min_t(int, maxcount-16, bytes_left);
 
-	page = page_address(*(resp->rqstp->rq_next_page++));
+	readdir->xdr = xdr;
+	readdir->rd_maxcount = maxcount;
 	readdir->common.err = 0;
-	readdir->buflen = maxcount;
-	readdir->buffer = page;
-	readdir->offset = NULL;
+	readdir->cookie_offset = 0;
 
 	offset = readdir->rd_cookie;
 	nfserr = nfsd_readdir(readdir->rd_rqstp, readdir->rd_fhp,
@@ -3258,33 +3273,31 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
 			      &readdir->common, nfsd4_encode_dirent);
 	if (nfserr == nfs_ok &&
 	    readdir->common.err == nfserr_toosmall &&
-	    readdir->buffer == page) 
-		nfserr = nfserr_toosmall;
+	    xdr->buf->len == starting_len + 8) {
+		/* nothing encoded; which limit did we hit?: */
+		if (maxcount - 16 < bytes_left)
+			/* It was the fault of rd_maxcount: */
+			nfserr = nfserr_toosmall;
+		else
+			/* We ran out of buffer space: */
+			nfserr = nfserr_resource;
+	}
 	if (nfserr)
 		goto err_no_verf;
 
-	if (readdir->offset)
-		xdr_encode_hyper(readdir->offset, offset);
+	if (readdir->cookie_offset) {
+		wire_offset = cpu_to_be64(offset);
+		write_bytes_to_xdr_buf(xdr->buf, readdir->cookie_offset,
+							&wire_offset, 8);
+	}
 
-	p = readdir->buffer;
+	p = xdr_reserve_space(xdr, 8);
+	if (!p) {
+		WARN_ON_ONCE(1);
+		goto err_no_verf;
+	}
 	*p++ = 0;	/* no more entries */
 	*p++ = htonl(readdir->common.err == nfserr_eof);
-	resp->xdr.buf->page_len = ((char *)p) -
-		(char*)page_address(*(resp->rqstp->rq_next_page-1));
-	xdr->buf->len += xdr->buf->page_len;
-
-	xdr->iov = xdr->buf->tail;
-
-	xdr->page_ptr++;
-	xdr->buf->buflen -= PAGE_SIZE;
-	xdr->iov = xdr->buf->tail;
-
-	/* Use rest of head for padding and remaining ops: */
-	resp->xdr.buf->tail[0].iov_base = tailbase;
-	resp->xdr.buf->tail[0].iov_len = 0;
-	resp->xdr.p = resp->xdr.buf->tail[0].iov_base;
-	resp->xdr.end = resp->xdr.p +
-			(PAGE_SIZE - resp->xdr.buf->head[0].iov_len)/4;
 
 	return 0;
 err_no_verf:
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 41e5229..18cbb6d 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -287,9 +287,8 @@ struct nfsd4_readdir {
 	struct svc_fh * rd_fhp;             /* response */
 
 	struct readdir_cd	common;
-	__be32 *		buffer;
-	int			buflen;
-	__be32 *		offset;
+	struct xdr_stream	*xdr;
+	int			cookie_offset;
 };
 
 struct nfsd4_release_lockowner {
-- 
1.9.0


  parent reply	other threads:[~2014-05-22 19:32 UTC|newest]

Thread overview: 67+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-05-22 19:31 xdr encoding fixes (v3) J. Bruce Fields
2014-05-22 19:31 ` [PATCH 01/52] nfsd4: READ, READDIR, etc., are idempotent J. Bruce Fields
2014-05-23  1:21   ` Kinglong Mee
2014-05-23 13:15     ` J. Bruce Fields
2014-05-22 19:31 ` [PATCH 02/52] nfsd4: allow larger 4.1 session drc slots J. Bruce Fields
2014-05-22 19:31 ` [PATCH 03/52] nfsd4: fill in some missing op_name's J. Bruce Fields
2014-05-22 19:31 ` [PATCH 04/52] nfsd4: decoding errors can still be cached and require space J. Bruce Fields
2014-05-22 19:31 ` [PATCH 05/52] nfsd4: read size estimate should include padding J. Bruce Fields
2014-05-22 19:31 ` [PATCH 06/52] nfsd4: fix write reply size estimate J. Bruce Fields
2014-05-22 19:31 ` [PATCH 07/52] nfsd4: embed xdr_stream in nfsd4_compoundres J. Bruce Fields
2014-05-22 19:31 ` [PATCH 08/52] nfsd4: tweak nfsd4_encode_getattr to take xdr_stream J. Bruce Fields
2014-05-22 19:31 ` [PATCH 09/52] nfsd4: move proc_compound xdr encode init to helper J. Bruce Fields
2014-05-22 19:31 ` [PATCH 10/52] nfsd4: reserve head space for krb5 integ/priv info J. Bruce Fields
2014-05-22 19:31 ` [PATCH 11/52] nfsd4: fix encoding of out-of-space replies J. Bruce Fields
2014-05-22 19:31 ` [PATCH 12/52] nfsd4: allow space for final error return J. Bruce Fields
2014-05-22 19:31 ` [PATCH 13/52] nfsd4: use xdr_reserve_space in attribute encoding J. Bruce Fields
2014-05-25  8:46   ` Kinglong Mee
2014-05-22 19:31 ` [PATCH 14/52] nfsd4: use xdr_stream throughout compound encoding J. Bruce Fields
2014-05-22 19:31 ` [PATCH 15/52] nfsd4: remove ADJUST_ARGS J. Bruce Fields
2014-05-22 19:31 ` [PATCH 16/52] nfsd4: no need for encode_compoundres to adjust lengths J. Bruce Fields
2014-05-22 19:31 ` [PATCH 17/52] nfsd4: keep xdr buf length updated J. Bruce Fields
2014-05-22 19:31 ` [PATCH 18/52] rpc: xdr_truncate_encode J. Bruce Fields
2014-05-22 19:31 ` [PATCH 19/52] nfsd4: use xdr_truncate_encode J. Bruce Fields
2014-05-22 19:31 ` [PATCH 20/52] nfsd4: "backfill" using write_bytes_to_xdr_buf J. Bruce Fields
2014-05-22 19:31 ` [PATCH 21/52] nfsd4: teach encoders to handle reserve_space failures J. Bruce Fields
2014-05-22 19:31 ` [PATCH 22/52] nfsd4: reserve space before inlining 0-copy pages J. Bruce Fields
2014-05-22 19:31 ` [PATCH 23/52] nfsd4: nfsd4_check_resp_size needn't recalculate length J. Bruce Fields
2014-05-22 19:31 ` [PATCH 24/52] nfsd4: remove redundant encode buffer size checking J. Bruce Fields
2014-05-22 19:32 ` [PATCH 25/52] nfsd4: size-checking cleanup J. Bruce Fields
2014-05-22 19:32 ` [PATCH 26/52] nfsd4: allow encoding across page boundaries J. Bruce Fields
2014-05-22 19:32 ` [PATCH 27/52] nfsd4: convert 4.1 replay encoding J. Bruce Fields
2014-05-22 19:32 ` [PATCH 28/52] nfsd4: don't try to encode conflicting owner if low on space J. Bruce Fields
2014-05-22 19:32 ` [PATCH 29/52] nfsd4: more precise nfsd4_max_reply J. Bruce Fields
2014-05-22 19:32 ` [PATCH 30/52] nfsd4: minor encode_read cleanup J. Bruce Fields
2014-05-22 19:32 ` [PATCH 31/52] nfsd4: nfsd4_check_resp_size should check against whole buffer J. Bruce Fields
2014-05-22 19:32 ` [PATCH 32/52] nfsd4: fix buflen calculation after read encoding J. Bruce Fields
2014-05-22 19:32 ` [PATCH 33/52] rpc: define xdr_restrict_buflen J. Bruce Fields
2014-05-22 19:32 ` [PATCH 34/52] nfsd4: adjust buflen to session channel limit J. Bruce Fields
2014-05-22 19:32 ` [PATCH 35/52] nfsd4: use session limits to release send buffer reservation J. Bruce Fields
2014-05-22 19:32 ` J. Bruce Fields [this message]
2014-05-22 19:32 ` [PATCH 37/52] nfsd4: enforce rd_dircount J. Bruce Fields
2014-05-22 19:32 ` [PATCH 38/52] nfsd4: don't treat readlink like a zero-copy operation J. Bruce Fields
2014-05-22 19:32 ` [PATCH 39/52] nfsd4: better estimate of getattr response size J. Bruce Fields
2014-05-22 19:32 ` [PATCH 40/52] nfsd4: estimate sequence " J. Bruce Fields
2014-05-22 19:32 ` [PATCH 41/52] nfsd4: turn off zero-copy-read in exotic cases J. Bruce Fields
2014-05-28  8:09   ` Christoph Hellwig
2014-05-28 14:01     ` J. Bruce Fields
2014-05-28 14:13       ` Anna Schumaker
2014-05-28 14:23         ` J. Bruce Fields
2014-05-28 14:27           ` Anna Schumaker
2014-06-03  4:18           ` Weston Andros Adamson
2014-06-03 14:10             ` J. Bruce Fields
2014-06-03 14:24               ` Weston Andros Adamson
2014-06-03 14:35                 ` J. Bruce Fields
2014-05-28 21:08       ` J. Bruce Fields
2014-06-02 22:12         ` J. Bruce Fields
2014-05-22 19:32 ` [PATCH 42/52] nfsd4: nfsd_vfs_read doesn't use file handle parameter J. Bruce Fields
2014-05-22 19:32 ` [PATCH 43/52] nfsd4: separate splice and readv cases J. Bruce Fields
2014-05-22 19:32 ` [PATCH 44/52] nfsd4: read encoding cleanup J. Bruce Fields
2014-05-22 19:32 ` [PATCH 45/52] nfsd4: more " J. Bruce Fields
2014-05-22 19:32 ` [PATCH 46/52] nfsd4: allow exotic read compounds J. Bruce Fields
2014-05-22 19:32 ` [PATCH 47/52] nfsd4: really fix nfs4err_resource in 4.1 case J. Bruce Fields
2014-05-22 19:32 ` [PATCH 48/52] nfsd4: kill WRITE32 J. Bruce Fields
2014-05-22 19:32 ` [PATCH 49/52] nfsd4: kill WRITE64 J. Bruce Fields
2014-05-22 19:32 ` [PATCH 50/52] nfsd4: kill WRITEMEM J. Bruce Fields
2014-05-22 19:32 ` [PATCH 51/52] nfsd4: kill write32, write64 J. Bruce Fields
2014-05-22 19:32 ` [PATCH 52/52] nfsd4: better reservation of head space for krb5 J. Bruce Fields

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1400787148-25941-37-git-send-email-bfields@redhat.com \
    --to=bfields@redhat.com \
    --cc=hch@infradead.org \
    --cc=linux-nfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).