All of lore.kernel.org
 help / color / mirror / Atom feed
From: "J. Bruce Fields" <bfields@redhat.com>
To: linux-nfs@vger.kernel.org
Cc: Christoph Hellwig <hch@infradead.org>,
	"J. Bruce Fields" <bfields@redhat.com>
Subject: [PATCH 30/43] nfsd4: allow large readdirs
Date: Sun, 11 May 2014 16:52:35 -0400	[thread overview]
Message-ID: <1399841568-19716-31-git-send-email-bfields@redhat.com> (raw)
In-Reply-To: <1399841568-19716-1-git-send-email-bfields@redhat.com>

From: "J. Bruce Fields" <bfields@redhat.com>

Currently we limit readdir results to a single page.  This can result in
a performance regression compared to NFSv3 when reading large
directories.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4proc.c |    3 --
 fs/nfsd/nfs4xdr.c  |  134 +++++++++++++++++++++++++++++-----------------------
 fs/nfsd/xdr4.h     |    5 +-
 3 files changed, 76 insertions(+), 66 deletions(-)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index be638c1..0ab65ae 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1451,9 +1451,6 @@ static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *o
 {
 	u32 rlen = op->u.readdir.rd_maxcount;
 
-	if (rlen > PAGE_SIZE)
-		rlen = PAGE_SIZE;
-
 	return (op_encode_hdr_size + op_encode_verifier_maxsz)
 		 * sizeof(__be32) + rlen;
 }
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 0d8a18d..731587c 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2576,8 +2576,8 @@ static inline int attributes_need_mount(u32 *bmval)
 }
 
 static __be32
-nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
-		const char *name, int namlen, __be32 **p, int buflen)
+nfsd4_encode_dirent_fattr(struct xdr_stream *xdr, struct nfsd4_readdir *cd,
+			const char *name, int namlen)
 {
 	struct svc_export *exp = cd->rd_fhp->fh_export;
 	struct dentry *dentry;
@@ -2629,7 +2629,7 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
 
 	}
 out_encode:
-	nfserr = nfsd4_encode_fattr_to_buf(p, buflen, NULL, exp, dentry, cd->rd_bmval,
+	nfserr = nfsd4_encode_fattr(xdr, NULL, exp, dentry, cd->rd_bmval,
 					cd->rd_rqstp, ignore_crossmnt);
 out_put:
 	dput(dentry);
@@ -2638,9 +2638,12 @@ out_put:
 }
 
 static __be32 *
-nfsd4_encode_rdattr_error(__be32 *p, int buflen, __be32 nfserr)
+nfsd4_encode_rdattr_error(struct xdr_stream *xdr, __be32 nfserr)
 {
-	if (buflen < 6)
+	__be32 *p;
+
+	p = xdr_reserve_space(xdr, 6);
+	if (!p)
 		return NULL;
 	*p++ = htonl(2);
 	*p++ = htonl(FATTR4_WORD0_RDATTR_ERROR); /* bmval0 */
@@ -2657,10 +2660,13 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
 {
 	struct readdir_cd *ccd = ccdv;
 	struct nfsd4_readdir *cd = container_of(ccd, struct nfsd4_readdir, common);
-	int buflen;
-	__be32 *p = cd->buffer;
-	__be32 *cookiep;
+	struct xdr_stream *xdr = cd->xdr;
+	int start_offset = xdr->buf->len;
+	int cookie_offset;
+	int entry_bytes;
 	__be32 nfserr = nfserr_toosmall;
+	__be64 wire_offset;
+	__be32 *p;
 
 	/* In nfsv4, "." and ".." never make it onto the wire.. */
 	if (name && isdotent(name, namlen)) {
@@ -2668,19 +2674,23 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
 		return 0;
 	}
 
-	if (cd->offset)
-		xdr_encode_hyper(cd->offset, (u64) offset);
+	if (cd->cookie_offset) {
+		wire_offset = cpu_to_be64(offset);
+		write_bytes_to_xdr_buf(xdr->buf, cd->cookie_offset, &wire_offset, 8);
+	}
 
-	buflen = cd->buflen - 4 - XDR_QUADLEN(namlen);
-	if (buflen < 0)
+	p = xdr_reserve_space(xdr, 4);
+	if (!p)
 		goto fail;
-
 	*p++ = xdr_one;                             /* mark entry present */
-	cookiep = p;
+	cookie_offset = xdr->buf->len;
+	p = xdr_reserve_space(xdr, 3*4 + namlen);
+	if (!p)
+		goto fail;
 	p = xdr_encode_hyper(p, NFS_OFFSET_MAX);    /* offset of next entry */
 	p = xdr_encode_array(p, name, namlen);      /* name length & name */
 
-	nfserr = nfsd4_encode_dirent_fattr(cd, name, namlen, &p, buflen);
+	nfserr = nfsd4_encode_dirent_fattr(xdr, cd, name, namlen);
 	switch (nfserr) {
 	case nfs_ok:
 		break;
@@ -2699,19 +2709,23 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
 		 */
 		if (!(cd->rd_bmval[0] & FATTR4_WORD0_RDATTR_ERROR))
 			goto fail;
-		p = nfsd4_encode_rdattr_error(p, buflen, nfserr);
+		p = nfsd4_encode_rdattr_error(xdr, nfserr);
 		if (p == NULL) {
 			nfserr = nfserr_toosmall;
 			goto fail;
 		}
 	}
-	cd->buflen -= (p - cd->buffer);
-	cd->buffer = p;
-	cd->offset = cookiep;
+	nfserr = nfserr_toosmall;
+	entry_bytes = xdr->buf->len - start_offset;
+	if (entry_bytes > cd->rd_maxcount)
+		goto fail;
+	cd->rd_maxcount -= entry_bytes;
+	cd->cookie_offset = cookie_offset;
 skip_entry:
 	cd->common.err = nfs_ok;
 	return 0;
 fail:
+	xdr_truncate_encode(xdr, start_offset);
 	cd->common.err = nfserr;
 	return -EINVAL;
 }
@@ -3200,10 +3214,11 @@ static __be32
 nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readdir *readdir)
 {
 	int maxcount;
+	int bytes_left;
 	loff_t offset;
+	__be64 wire_offset;
 	struct xdr_stream *xdr = &resp->xdr;
 	int starting_len = xdr->buf->len;
-	__be32 *page, *tailbase;
 	__be32 *p;
 
 	if (nfserr)
@@ -3213,38 +3228,38 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
 	if (!p)
 		return nfserr_resource;
 
-	if (resp->xdr.buf->page_len)
-		return nfserr_resource;
-	if (!*resp->rqstp->rq_next_page)
-		return nfserr_resource;
-
 	/* XXX: Following NFSv3, we ignore the READDIR verifier for now. */
 	WRITE32(0);
 	WRITE32(0);
 	resp->xdr.buf->head[0].iov_len = ((char*)resp->xdr.p)
 				- (char*)resp->xdr.buf->head[0].iov_base;
-	tailbase = p;
-
-	maxcount = PAGE_SIZE;
-	if (maxcount > readdir->rd_maxcount)
-		maxcount = readdir->rd_maxcount;
 
 	/*
-	 * Convert from bytes to words, account for the two words already
-	 * written, make sure to leave two words at the end for the next
-	 * pointer and eof field.
+	 * Number of bytes left for directory entries allowing for the
+	 * final 8 bytes of the readdir and a following failed op:
+	 */
+	bytes_left = xdr->buf->buflen - xdr->buf->len
+			- COMPOUND_ERR_SLACK_SPACE - 8;
+	if (bytes_left < 0) {
+		nfserr = nfserr_resource;
+		goto err_no_verf;
+	}
+	maxcount = min_t(u32, readdir->rd_maxcount, INT_MAX);
+	/*
+	 * Note the rfc defines rd_maxcount as the size of the
+	 * READDIR4resok structure, which includes the verifier above
+	 * and the 8 bytes encoded at the end of this function:
 	 */
-	maxcount = (maxcount >> 2) - 4;
-	if (maxcount < 0) {
-		nfserr =  nfserr_toosmall;
+	if (maxcount < 16) {
+		nfserr = nfserr_toosmall;
 		goto err_no_verf;
 	}
+	maxcount = min_t(int, maxcount-16, bytes_left);
 
-	page = page_address(*(resp->rqstp->rq_next_page++));
+	readdir->xdr = xdr;
+	readdir->rd_maxcount = maxcount;
 	readdir->common.err = 0;
-	readdir->buflen = maxcount;
-	readdir->buffer = page;
-	readdir->offset = NULL;
+	readdir->cookie_offset = 0;
 
 	offset = readdir->rd_cookie;
 	nfserr = nfsd_readdir(readdir->rd_rqstp, readdir->rd_fhp,
@@ -3252,32 +3267,31 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
 			      &readdir->common, nfsd4_encode_dirent);
 	if (nfserr == nfs_ok &&
 	    readdir->common.err == nfserr_toosmall &&
-	    readdir->buffer == page) 
-		nfserr = nfserr_toosmall;
+	    xdr->buf->len == starting_len + 8) {
+		/* nothing encoded; which limit did we hit?: */
+		if (maxcount - 16 < bytes_left)
+			/* It was the fault of rd_maxcount: */
+			nfserr = nfserr_toosmall;
+		else
+			/* We ran out of buffer space: */
+			nfserr = nfserr_resource;
+	}
 	if (nfserr)
 		goto err_no_verf;
 
-	if (readdir->offset)
-		xdr_encode_hyper(readdir->offset, offset);
+	if (readdir->cookie_offset) {
+		wire_offset = cpu_to_be64(offset);
+		write_bytes_to_xdr_buf(xdr->buf, readdir->cookie_offset,
+							&wire_offset, 8);
+	}
 
-	p = readdir->buffer;
+	p = xdr_reserve_space(xdr, 8);
+	if (!p) {
+		WARN_ON_ONCE(1);
+		goto err_no_verf;
+	}
 	*p++ = 0;	/* no more entries */
 	*p++ = htonl(readdir->common.err == nfserr_eof);
-	resp->xdr.buf->page_len = ((char*)p) -
-		(char*)page_address(*(resp->rqstp->rq_next_page-1));
-	xdr->buf->len += xdr->buf->page_len;
-
-	xdr->iov = xdr->buf->tail;
-
-	xdr->page_ptr++;
-	xdr->buf->buflen -= PAGE_SIZE;
-	xdr->iov = xdr->buf->tail;
-
-	/* Use rest of head for padding and remaining ops: */
-	resp->xdr.buf->tail[0].iov_base = tailbase;
-	resp->xdr.buf->tail[0].iov_len = 0;
-	resp->xdr.p = resp->xdr.buf->tail[0].iov_base;
-	resp->xdr.end = resp->xdr.p + (PAGE_SIZE - resp->xdr.buf->head[0].iov_len)/4;
 
 	return 0;
 err_no_verf:
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index d1c6e21..04b8a80 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -287,9 +287,8 @@ struct nfsd4_readdir {
 	struct svc_fh * rd_fhp;             /* response */
 
 	struct readdir_cd	common;
-	__be32 *		buffer;
-	int			buflen;
-	__be32 *		offset;
+	struct xdr_stream	*xdr;
+	int			cookie_offset;
 };
 
 struct nfsd4_release_lockowner {
-- 
1.7.9.5


  parent reply	other threads:[~2014-05-11 20:53 UTC|newest]

Thread overview: 70+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-05-11 20:52 nfsd4 xdr encoding fixes v2 J. Bruce Fields
2014-05-11 20:52 ` [PATCH 01/43] nfsd4: embed xdr_stream in nfsd4_compoundres J. Bruce Fields
2014-05-12  5:34   ` Christoph Hellwig
2014-05-16  9:58   ` Kinglong Mee
2014-05-22 15:13     ` J. Bruce Fields
2014-05-11 20:52 ` [PATCH 02/43] nfsd4: tweak nfsd4_encode_getattr to take xdr_stream J. Bruce Fields
2014-05-12  5:35   ` Christoph Hellwig
2014-05-12 16:06     ` J. Bruce Fields
2014-05-11 20:52 ` [PATCH 03/43] nfsd4: move proc_compound xdr encode init to helper J. Bruce Fields
2014-05-12  5:36   ` Christoph Hellwig
2014-05-11 20:52 ` [PATCH 04/43] nfsd4: reserve head space for krb5 integ/priv info J. Bruce Fields
2014-05-12  5:37   ` Christoph Hellwig
2014-05-12 21:45     ` J. Bruce Fields
2014-05-13  5:05       ` Christoph Hellwig
2014-05-13 14:47         ` J. Bruce Fields
2014-05-11 20:52 ` [PATCH 05/43] nfsd4: move nfsd4_operation to xdr4.h J. Bruce Fields
2014-05-12  5:41   ` Christoph Hellwig
2014-05-22 15:56     ` J. Bruce Fields
2014-05-11 20:52 ` [PATCH 06/43] nfsd4: fix encoding of out-of-space replies J. Bruce Fields
2014-05-12  8:18   ` Christoph Hellwig
2014-05-12 21:47     ` J. Bruce Fields
2014-05-11 20:52 ` [PATCH 07/43] nfsd4: allow space for final error return J. Bruce Fields
2014-05-12  8:18   ` Christoph Hellwig
2014-05-12 14:06     ` J. Bruce Fields
2014-05-11 20:52 ` [PATCH 08/43] nfsd4: use xdr_reserve_space in attribute encoding J. Bruce Fields
2014-05-11 20:52 ` [PATCH 09/43] nfsd4: use xdr_stream throughout compound encoding J. Bruce Fields
2014-05-11 20:52 ` [PATCH 10/43] nfsd4: remove ADJUST_ARGS J. Bruce Fields
2014-05-11 20:52 ` [PATCH 11/43] nfsd4: no need for encode_compoundres to adjust lengths J. Bruce Fields
2014-05-11 20:52 ` [PATCH 12/43] nfsd4: keep xdr buf length updated J. Bruce Fields
2014-05-11 20:52 ` [PATCH 13/43] rpc: xdr_truncate_encode J. Bruce Fields
2014-05-11 20:52 ` [PATCH 14/43] nfsd4: use xdr_truncate_encode J. Bruce Fields
2014-05-11 20:52 ` [PATCH 15/43] nfsd4: "backfill" using write_bytes_to_xdr_buf J. Bruce Fields
2014-05-11 20:52 ` [PATCH 16/43] nfsd4: teach encoders to handle reserve_space failures J. Bruce Fields
2014-05-11 20:52 ` [PATCH 17/43] nfsd4: reserve space before inlining 0-copy pages J. Bruce Fields
2014-05-11 20:52 ` [PATCH 18/43] nfsd4: nfsd4_check_resp_size needn't recalculate length J. Bruce Fields
2014-05-11 20:52 ` [PATCH 19/43] nfsd4: remove redundant encode buffer size checking J. Bruce Fields
2014-05-11 20:52 ` [PATCH 20/43] nfsd4: size-checking cleanup J. Bruce Fields
2014-05-11 20:52 ` [PATCH 21/43] nfsd4: allow encoding across page boundaries J. Bruce Fields
2014-05-11 20:52 ` [PATCH 22/43] nfsd4: convert 4.1 replay encoding J. Bruce Fields
2014-05-11 20:52 ` [PATCH 23/43] nfsd4: don't try to encode conflicting owner if low on space J. Bruce Fields
2014-05-11 20:52 ` [PATCH 24/43] nfsd4: more precise nfsd4_max_reply J. Bruce Fields
2014-05-11 20:52 ` [PATCH 25/43] nfsd4: minor encode_read cleanup J. Bruce Fields
2014-05-11 20:52 ` [PATCH 26/43] nfsd4: nfsd4_check_resp_size should check against whole buffer J. Bruce Fields
2014-05-11 20:52 ` [PATCH 27/43] rpc: define xdr_restrict_buflen J. Bruce Fields
2014-05-11 20:52 ` [PATCH 28/43] nfsd4: adjust buflen to session channel limit J. Bruce Fields
2014-05-11 20:52 ` [PATCH 29/43] nfsd4: use session limits to release send buffer reservation J. Bruce Fields
2014-05-11 20:52 ` J. Bruce Fields [this message]
2014-05-11 20:52 ` [PATCH 31/43] nfsd4: enforce rd_dircount J. Bruce Fields
2014-05-11 20:52 ` [PATCH 32/43] nfsd4: don't treat readlink like a zero-copy operation J. Bruce Fields
2014-05-11 20:52 ` [PATCH 33/43] nfsd4: better estimate of getattr response size J. Bruce Fields
2014-05-11 20:52 ` [PATCH 34/43] nfsd4: estimate sequence " J. Bruce Fields
2014-05-11 20:52 ` [PATCH 35/43] nfsd4: turn off zero-copy-read in exotic cases J. Bruce Fields
2014-05-11 20:52 ` [PATCH 36/43] nfsd4: nfsd_vfs_read doesn't use file handle parameter J. Bruce Fields
2014-05-11 20:52 ` [PATCH 37/43] nfsd4: separate splice and readv cases J. Bruce Fields
2014-05-11 20:52 ` [PATCH 38/43] nfsd4: allow exotic read compounds J. Bruce Fields
2014-05-11 20:52 ` [PATCH 39/43] nfsd4: really fix nfs4err_resource in 4.1 case J. Bruce Fields
2014-05-12  5:33   ` Christoph Hellwig
2014-05-12 14:18     ` J. Bruce Fields
2014-05-11 20:52 ` [PATCH 40/43] nfsd4: kill WRITE32 J. Bruce Fields
2014-05-11 20:52 ` [PATCH 41/43] nfsd4: kill WRITE64 J. Bruce Fields
2014-05-11 20:52 ` [PATCH 42/43] nfsd4: kill WRITEMEM J. Bruce Fields
2014-05-11 20:52 ` [PATCH 43/43] nfsd4: kill write32, write64 J. Bruce Fields
2014-05-12  8:20 ` nfsd4 xdr encoding fixes v2 Christoph Hellwig
2014-05-12 16:07   ` J. Bruce Fields
2014-05-12 16:11     ` Christoph Hellwig
2014-05-13 11:09       ` Christoph Hellwig
2014-05-13 14:48         ` J. Bruce Fields
2014-05-13 21:18           ` J. Bruce Fields
2014-05-13 21:33             ` J. Bruce Fields
2014-05-22 19:17               ` J. Bruce Fields

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1399841568-19716-31-git-send-email-bfields@redhat.com \
    --to=bfields@redhat.com \
    --cc=hch@infradead.org \
    --cc=linux-nfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.