linux-nfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "J. Bruce Fields" <bfields@redhat.com>
To: linux-nfs@vger.kernel.org
Cc: Christoph Hellwig <hch@infradead.org>,
	"J. Bruce Fields" <bfields@redhat.com>
Subject: [PATCH 30/43] nfsd4: allow large readdirs
Date: Sun, 11 May 2014 16:52:35 -0400	[thread overview]
Message-ID: <1399841568-19716-31-git-send-email-bfields@redhat.com> (raw)
In-Reply-To: <1399841568-19716-1-git-send-email-bfields@redhat.com>

From: "J. Bruce Fields" <bfields@redhat.com>

Currently we limit readdir results to a single page.  This can result in
a performance regression compared to NFSv3 when reading large
directories.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4proc.c |    3 --
 fs/nfsd/nfs4xdr.c  |  134 +++++++++++++++++++++++++++++-----------------------
 fs/nfsd/xdr4.h     |    5 +-
 3 files changed, 76 insertions(+), 66 deletions(-)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index be638c1..0ab65ae 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1451,9 +1451,6 @@ static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *o
 {
 	u32 rlen = op->u.readdir.rd_maxcount;
 
-	if (rlen > PAGE_SIZE)
-		rlen = PAGE_SIZE;
-
 	return (op_encode_hdr_size + op_encode_verifier_maxsz)
 		 * sizeof(__be32) + rlen;
 }
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 0d8a18d..731587c 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2576,8 +2576,8 @@ static inline int attributes_need_mount(u32 *bmval)
 }
 
 static __be32
-nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
-		const char *name, int namlen, __be32 **p, int buflen)
+nfsd4_encode_dirent_fattr(struct xdr_stream *xdr, struct nfsd4_readdir *cd,
+			const char *name, int namlen)
 {
 	struct svc_export *exp = cd->rd_fhp->fh_export;
 	struct dentry *dentry;
@@ -2629,7 +2629,7 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
 
 	}
 out_encode:
-	nfserr = nfsd4_encode_fattr_to_buf(p, buflen, NULL, exp, dentry, cd->rd_bmval,
+	nfserr = nfsd4_encode_fattr(xdr, NULL, exp, dentry, cd->rd_bmval,
 					cd->rd_rqstp, ignore_crossmnt);
 out_put:
 	dput(dentry);
@@ -2638,9 +2638,12 @@ out_put:
 }
 
 static __be32 *
-nfsd4_encode_rdattr_error(__be32 *p, int buflen, __be32 nfserr)
+nfsd4_encode_rdattr_error(struct xdr_stream *xdr, __be32 nfserr)
 {
-	if (buflen < 6)
+	__be32 *p;
+
+	p = xdr_reserve_space(xdr, 6);
+	if (!p)
 		return NULL;
 	*p++ = htonl(2);
 	*p++ = htonl(FATTR4_WORD0_RDATTR_ERROR); /* bmval0 */
@@ -2657,10 +2660,13 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
 {
 	struct readdir_cd *ccd = ccdv;
 	struct nfsd4_readdir *cd = container_of(ccd, struct nfsd4_readdir, common);
-	int buflen;
-	__be32 *p = cd->buffer;
-	__be32 *cookiep;
+	struct xdr_stream *xdr = cd->xdr;
+	int start_offset = xdr->buf->len;
+	int cookie_offset;
+	int entry_bytes;
 	__be32 nfserr = nfserr_toosmall;
+	__be64 wire_offset;
+	__be32 *p;
 
 	/* In nfsv4, "." and ".." never make it onto the wire.. */
 	if (name && isdotent(name, namlen)) {
@@ -2668,19 +2674,23 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
 		return 0;
 	}
 
-	if (cd->offset)
-		xdr_encode_hyper(cd->offset, (u64) offset);
+	if (cd->cookie_offset) {
+		wire_offset = cpu_to_be64(offset);
+		write_bytes_to_xdr_buf(xdr->buf, cd->cookie_offset, &wire_offset, 8);
+	}
 
-	buflen = cd->buflen - 4 - XDR_QUADLEN(namlen);
-	if (buflen < 0)
+	p = xdr_reserve_space(xdr, 4);
+	if (!p)
 		goto fail;
-
 	*p++ = xdr_one;                             /* mark entry present */
-	cookiep = p;
+	cookie_offset = xdr->buf->len;
+	p = xdr_reserve_space(xdr, 3*4 + namlen);
+	if (!p)
+		goto fail;
 	p = xdr_encode_hyper(p, NFS_OFFSET_MAX);    /* offset of next entry */
 	p = xdr_encode_array(p, name, namlen);      /* name length & name */
 
-	nfserr = nfsd4_encode_dirent_fattr(cd, name, namlen, &p, buflen);
+	nfserr = nfsd4_encode_dirent_fattr(xdr, cd, name, namlen);
 	switch (nfserr) {
 	case nfs_ok:
 		break;
@@ -2699,19 +2709,23 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
 		 */
 		if (!(cd->rd_bmval[0] & FATTR4_WORD0_RDATTR_ERROR))
 			goto fail;
-		p = nfsd4_encode_rdattr_error(p, buflen, nfserr);
+		p = nfsd4_encode_rdattr_error(xdr, nfserr);
 		if (p == NULL) {
 			nfserr = nfserr_toosmall;
 			goto fail;
 		}
 	}
-	cd->buflen -= (p - cd->buffer);
-	cd->buffer = p;
-	cd->offset = cookiep;
+	nfserr = nfserr_toosmall;
+	entry_bytes = xdr->buf->len - start_offset;
+	if (entry_bytes > cd->rd_maxcount)
+		goto fail;
+	cd->rd_maxcount -= entry_bytes;
+	cd->cookie_offset = cookie_offset;
 skip_entry:
 	cd->common.err = nfs_ok;
 	return 0;
 fail:
+	xdr_truncate_encode(xdr, start_offset);
 	cd->common.err = nfserr;
 	return -EINVAL;
 }
@@ -3200,10 +3214,11 @@ static __be32
 nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readdir *readdir)
 {
 	int maxcount;
+	int bytes_left;
 	loff_t offset;
+	__be64 wire_offset;
 	struct xdr_stream *xdr = &resp->xdr;
 	int starting_len = xdr->buf->len;
-	__be32 *page, *tailbase;
 	__be32 *p;
 
 	if (nfserr)
@@ -3213,38 +3228,38 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
 	if (!p)
 		return nfserr_resource;
 
-	if (resp->xdr.buf->page_len)
-		return nfserr_resource;
-	if (!*resp->rqstp->rq_next_page)
-		return nfserr_resource;
-
 	/* XXX: Following NFSv3, we ignore the READDIR verifier for now. */
 	WRITE32(0);
 	WRITE32(0);
 	resp->xdr.buf->head[0].iov_len = ((char*)resp->xdr.p)
 				- (char*)resp->xdr.buf->head[0].iov_base;
-	tailbase = p;
-
-	maxcount = PAGE_SIZE;
-	if (maxcount > readdir->rd_maxcount)
-		maxcount = readdir->rd_maxcount;
 
 	/*
-	 * Convert from bytes to words, account for the two words already
-	 * written, make sure to leave two words at the end for the next
-	 * pointer and eof field.
+	 * Number of bytes left for directory entries allowing for the
+	 * final 8 bytes of the readdir and a following failed op:
+	 */
+	bytes_left = xdr->buf->buflen - xdr->buf->len
+			- COMPOUND_ERR_SLACK_SPACE - 8;
+	if (bytes_left < 0) {
+		nfserr = nfserr_resource;
+		goto err_no_verf;
+	}
+	maxcount = min_t(u32, readdir->rd_maxcount, INT_MAX);
+	/*
+	 * Note the rfc defines rd_maxcount as the size of the
+	 * READDIR4resok structure, which includes the verifier above
+	 * and the 8 bytes encoded at the end of this function:
 	 */
-	maxcount = (maxcount >> 2) - 4;
-	if (maxcount < 0) {
-		nfserr =  nfserr_toosmall;
+	if (maxcount < 16) {
+		nfserr = nfserr_toosmall;
 		goto err_no_verf;
 	}
+	maxcount = min_t(int, maxcount-16, bytes_left);
 
-	page = page_address(*(resp->rqstp->rq_next_page++));
+	readdir->xdr = xdr;
+	readdir->rd_maxcount = maxcount;
 	readdir->common.err = 0;
-	readdir->buflen = maxcount;
-	readdir->buffer = page;
-	readdir->offset = NULL;
+	readdir->cookie_offset = 0;
 
 	offset = readdir->rd_cookie;
 	nfserr = nfsd_readdir(readdir->rd_rqstp, readdir->rd_fhp,
@@ -3252,32 +3267,31 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
 			      &readdir->common, nfsd4_encode_dirent);
 	if (nfserr == nfs_ok &&
 	    readdir->common.err == nfserr_toosmall &&
-	    readdir->buffer == page) 
-		nfserr = nfserr_toosmall;
+	    xdr->buf->len == starting_len + 8) {
+		/* nothing encoded; which limit did we hit?: */
+		if (maxcount - 16 < bytes_left)
+			/* It was the fault of rd_maxcount: */
+			nfserr = nfserr_toosmall;
+		else
+			/* We ran out of buffer space: */
+			nfserr = nfserr_resource;
+	}
 	if (nfserr)
 		goto err_no_verf;
 
-	if (readdir->offset)
-		xdr_encode_hyper(readdir->offset, offset);
+	if (readdir->cookie_offset) {
+		wire_offset = cpu_to_be64(offset);
+		write_bytes_to_xdr_buf(xdr->buf, readdir->cookie_offset,
+							&wire_offset, 8);
+	}
 
-	p = readdir->buffer;
+	p = xdr_reserve_space(xdr, 8);
+	if (!p) {
+		WARN_ON_ONCE(1);
+		goto err_no_verf;
+	}
 	*p++ = 0;	/* no more entries */
 	*p++ = htonl(readdir->common.err == nfserr_eof);
-	resp->xdr.buf->page_len = ((char*)p) -
-		(char*)page_address(*(resp->rqstp->rq_next_page-1));
-	xdr->buf->len += xdr->buf->page_len;
-
-	xdr->iov = xdr->buf->tail;
-
-	xdr->page_ptr++;
-	xdr->buf->buflen -= PAGE_SIZE;
-	xdr->iov = xdr->buf->tail;
-
-	/* Use rest of head for padding and remaining ops: */
-	resp->xdr.buf->tail[0].iov_base = tailbase;
-	resp->xdr.buf->tail[0].iov_len = 0;
-	resp->xdr.p = resp->xdr.buf->tail[0].iov_base;
-	resp->xdr.end = resp->xdr.p + (PAGE_SIZE - resp->xdr.buf->head[0].iov_len)/4;
 
 	return 0;
 err_no_verf:
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index d1c6e21..04b8a80 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -287,9 +287,8 @@ struct nfsd4_readdir {
 	struct svc_fh * rd_fhp;             /* response */
 
 	struct readdir_cd	common;
-	__be32 *		buffer;
-	int			buflen;
-	__be32 *		offset;
+	struct xdr_stream	*xdr;
+	int			cookie_offset;
 };
 
 struct nfsd4_release_lockowner {
-- 
1.7.9.5


  parent reply	other threads:[~2014-05-11 20:53 UTC|newest]

Thread overview: 70+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-05-11 20:52 nfsd4 xdr encoding fixes v2 J. Bruce Fields
2014-05-11 20:52 ` [PATCH 01/43] nfsd4: embed xdr_stream in nfsd4_compoundres J. Bruce Fields
2014-05-12  5:34   ` Christoph Hellwig
2014-05-16  9:58   ` Kinglong Mee
2014-05-22 15:13     ` J. Bruce Fields
2014-05-11 20:52 ` [PATCH 02/43] nfsd4: tweak nfsd4_encode_getattr to take xdr_stream J. Bruce Fields
2014-05-12  5:35   ` Christoph Hellwig
2014-05-12 16:06     ` J. Bruce Fields
2014-05-11 20:52 ` [PATCH 03/43] nfsd4: move proc_compound xdr encode init to helper J. Bruce Fields
2014-05-12  5:36   ` Christoph Hellwig
2014-05-11 20:52 ` [PATCH 04/43] nfsd4: reserve head space for krb5 integ/priv info J. Bruce Fields
2014-05-12  5:37   ` Christoph Hellwig
2014-05-12 21:45     ` J. Bruce Fields
2014-05-13  5:05       ` Christoph Hellwig
2014-05-13 14:47         ` J. Bruce Fields
2014-05-11 20:52 ` [PATCH 05/43] nfsd4: move nfsd4_operation to xdr4.h J. Bruce Fields
2014-05-12  5:41   ` Christoph Hellwig
2014-05-22 15:56     ` J. Bruce Fields
2014-05-11 20:52 ` [PATCH 06/43] nfsd4: fix encoding of out-of-space replies J. Bruce Fields
2014-05-12  8:18   ` Christoph Hellwig
2014-05-12 21:47     ` J. Bruce Fields
2014-05-11 20:52 ` [PATCH 07/43] nfsd4: allow space for final error return J. Bruce Fields
2014-05-12  8:18   ` Christoph Hellwig
2014-05-12 14:06     ` J. Bruce Fields
2014-05-11 20:52 ` [PATCH 08/43] nfsd4: use xdr_reserve_space in attribute encoding J. Bruce Fields
2014-05-11 20:52 ` [PATCH 09/43] nfsd4: use xdr_stream throughout compound encoding J. Bruce Fields
2014-05-11 20:52 ` [PATCH 10/43] nfsd4: remove ADJUST_ARGS J. Bruce Fields
2014-05-11 20:52 ` [PATCH 11/43] nfsd4: no need for encode_compoundres to adjust lengths J. Bruce Fields
2014-05-11 20:52 ` [PATCH 12/43] nfsd4: keep xdr buf length updated J. Bruce Fields
2014-05-11 20:52 ` [PATCH 13/43] rpc: xdr_truncate_encode J. Bruce Fields
2014-05-11 20:52 ` [PATCH 14/43] nfsd4: use xdr_truncate_encode J. Bruce Fields
2014-05-11 20:52 ` [PATCH 15/43] nfsd4: "backfill" using write_bytes_to_xdr_buf J. Bruce Fields
2014-05-11 20:52 ` [PATCH 16/43] nfsd4: teach encoders to handle reserve_space failures J. Bruce Fields
2014-05-11 20:52 ` [PATCH 17/43] nfsd4: reserve space before inlining 0-copy pages J. Bruce Fields
2014-05-11 20:52 ` [PATCH 18/43] nfsd4: nfsd4_check_resp_size needn't recalculate length J. Bruce Fields
2014-05-11 20:52 ` [PATCH 19/43] nfsd4: remove redundant encode buffer size checking J. Bruce Fields
2014-05-11 20:52 ` [PATCH 20/43] nfsd4: size-checking cleanup J. Bruce Fields
2014-05-11 20:52 ` [PATCH 21/43] nfsd4: allow encoding across page boundaries J. Bruce Fields
2014-05-11 20:52 ` [PATCH 22/43] nfsd4: convert 4.1 replay encoding J. Bruce Fields
2014-05-11 20:52 ` [PATCH 23/43] nfsd4: don't try to encode conflicting owner if low on space J. Bruce Fields
2014-05-11 20:52 ` [PATCH 24/43] nfsd4: more precise nfsd4_max_reply J. Bruce Fields
2014-05-11 20:52 ` [PATCH 25/43] nfsd4: minor encode_read cleanup J. Bruce Fields
2014-05-11 20:52 ` [PATCH 26/43] nfsd4: nfsd4_check_resp_size should check against whole buffer J. Bruce Fields
2014-05-11 20:52 ` [PATCH 27/43] rpc: define xdr_restrict_buflen J. Bruce Fields
2014-05-11 20:52 ` [PATCH 28/43] nfsd4: adjust buflen to session channel limit J. Bruce Fields
2014-05-11 20:52 ` [PATCH 29/43] nfsd4: use session limits to release send buffer reservation J. Bruce Fields
2014-05-11 20:52 ` J. Bruce Fields [this message]
2014-05-11 20:52 ` [PATCH 31/43] nfsd4: enforce rd_dircount J. Bruce Fields
2014-05-11 20:52 ` [PATCH 32/43] nfsd4: don't treat readlink like a zero-copy operation J. Bruce Fields
2014-05-11 20:52 ` [PATCH 33/43] nfsd4: better estimate of getattr response size J. Bruce Fields
2014-05-11 20:52 ` [PATCH 34/43] nfsd4: estimate sequence " J. Bruce Fields
2014-05-11 20:52 ` [PATCH 35/43] nfsd4: turn off zero-copy-read in exotic cases J. Bruce Fields
2014-05-11 20:52 ` [PATCH 36/43] nfsd4: nfsd_vfs_read doesn't use file handle parameter J. Bruce Fields
2014-05-11 20:52 ` [PATCH 37/43] nfsd4: separate splice and readv cases J. Bruce Fields
2014-05-11 20:52 ` [PATCH 38/43] nfsd4: allow exotic read compounds J. Bruce Fields
2014-05-11 20:52 ` [PATCH 39/43] nfsd4: really fix nfs4err_resource in 4.1 case J. Bruce Fields
2014-05-12  5:33   ` Christoph Hellwig
2014-05-12 14:18     ` J. Bruce Fields
2014-05-11 20:52 ` [PATCH 40/43] nfsd4: kill WRITE32 J. Bruce Fields
2014-05-11 20:52 ` [PATCH 41/43] nfsd4: kill WRITE64 J. Bruce Fields
2014-05-11 20:52 ` [PATCH 42/43] nfsd4: kill WRITEMEM J. Bruce Fields
2014-05-11 20:52 ` [PATCH 43/43] nfsd4: kill write32, write64 J. Bruce Fields
2014-05-12  8:20 ` nfsd4 xdr encoding fixes v2 Christoph Hellwig
2014-05-12 16:07   ` J. Bruce Fields
2014-05-12 16:11     ` Christoph Hellwig
2014-05-13 11:09       ` Christoph Hellwig
2014-05-13 14:48         ` J. Bruce Fields
2014-05-13 21:18           ` J. Bruce Fields
2014-05-13 21:33             ` J. Bruce Fields
2014-05-22 19:17               ` J. Bruce Fields

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1399841568-19716-31-git-send-email-bfields@redhat.com \
    --to=bfields@redhat.com \
    --cc=hch@infradead.org \
    --cc=linux-nfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).