All of lore.kernel.org
 help / color / mirror / Atom feed
From: "J. Bruce Fields" <bfields@redhat.com>
To: linux-nfs@vger.kernel.org
Cc: "J. Bruce Fields" <bfields@redhat.com>
Subject: [PATCH 40/50] nfsd4: allow large readdirs
Date: Sat, 22 Mar 2014 21:12:11 -0400	[thread overview]
Message-ID: <1395537141-10389-41-git-send-email-bfields@redhat.com> (raw)
In-Reply-To: <1395537141-10389-1-git-send-email-bfields@redhat.com>

From: "J. Bruce Fields" <bfields@redhat.com>

Currently we limit readdir results to a single page.  This can result in
a performance regression compared to NFSv3 when reading large
directories.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4proc.c |   3 --
 fs/nfsd/nfs4xdr.c  | 134 +++++++++++++++++++++++++++++------------------------
 fs/nfsd/xdr4.h     |   5 +-
 3 files changed, 76 insertions(+), 66 deletions(-)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 9876de2..d1b4513 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1457,9 +1457,6 @@ static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *o
 {
 	u32 rlen = op->u.readdir.rd_maxcount;
 
-	if (rlen > PAGE_SIZE)
-		rlen = PAGE_SIZE;
-
 	return (op_encode_hdr_size + op_encode_verifier_maxsz)
 		 * sizeof(__be32) + rlen;
 }
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index e0e486d..fa3ae50 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2575,8 +2575,8 @@ static inline int attributes_need_mount(u32 *bmval)
 }
 
 static __be32
-nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
-		const char *name, int namlen, __be32 **p, int buflen)
+nfsd4_encode_dirent_fattr(struct xdr_stream *xdr, struct nfsd4_readdir *cd,
+			const char *name, int namlen)
 {
 	struct svc_export *exp = cd->rd_fhp->fh_export;
 	struct dentry *dentry;
@@ -2628,7 +2628,7 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
 
 	}
 out_encode:
-	nfserr = nfsd4_encode_fattr_to_buf(p, buflen, NULL, exp, dentry, cd->rd_bmval,
+	nfserr = nfsd4_encode_fattr(xdr, NULL, exp, dentry, cd->rd_bmval,
 					cd->rd_rqstp, ignore_crossmnt);
 out_put:
 	dput(dentry);
@@ -2637,9 +2637,12 @@ out_put:
 }
 
 static __be32 *
-nfsd4_encode_rdattr_error(__be32 *p, int buflen, __be32 nfserr)
+nfsd4_encode_rdattr_error(struct xdr_stream *xdr, __be32 nfserr)
 {
-	if (buflen < 6)
+	__be32 *p;
+
+	p = xdr_reserve_space(xdr, 6);
+	if (!p)
 		return NULL;
 	*p++ = htonl(2);
 	*p++ = htonl(FATTR4_WORD0_RDATTR_ERROR); /* bmval0 */
@@ -2656,10 +2659,13 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
 {
 	struct readdir_cd *ccd = ccdv;
 	struct nfsd4_readdir *cd = container_of(ccd, struct nfsd4_readdir, common);
-	int buflen;
-	__be32 *p = cd->buffer;
-	__be32 *cookiep;
+	struct xdr_stream *xdr = cd->xdr;
+	int start_offset = xdr->buf->len;
+	int cookie_offset;
+	int entry_bytes;
 	__be32 nfserr = nfserr_toosmall;
+	__be64 wire_offset;
+	__be32 *p;
 
 	/* In nfsv4, "." and ".." never make it onto the wire.. */
 	if (name && isdotent(name, namlen)) {
@@ -2667,19 +2673,23 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
 		return 0;
 	}
 
-	if (cd->offset)
-		xdr_encode_hyper(cd->offset, (u64) offset);
+	if (cd->cookie_offset) {
+		wire_offset = cpu_to_be64(offset);
+		write_bytes_to_xdr_buf(xdr->buf, cd->cookie_offset, &wire_offset, 8);
+	}
 
-	buflen = cd->buflen - 4 - XDR_QUADLEN(namlen);
-	if (buflen < 0)
+	p = xdr_reserve_space(xdr, 4);
+	if (!p)
 		goto fail;
-
 	*p++ = xdr_one;                             /* mark entry present */
-	cookiep = p;
+	cookie_offset = xdr->buf->len;
+	p = xdr_reserve_space(xdr, 3*4 + namlen);
+	if (!p)
+		goto fail;
 	p = xdr_encode_hyper(p, NFS_OFFSET_MAX);    /* offset of next entry */
 	p = xdr_encode_array(p, name, namlen);      /* name length & name */
 
-	nfserr = nfsd4_encode_dirent_fattr(cd, name, namlen, &p, buflen);
+	nfserr = nfsd4_encode_dirent_fattr(xdr, cd, name, namlen);
 	switch (nfserr) {
 	case nfs_ok:
 		break;
@@ -2698,19 +2708,23 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
 		 */
 		if (!(cd->rd_bmval[0] & FATTR4_WORD0_RDATTR_ERROR))
 			goto fail;
-		p = nfsd4_encode_rdattr_error(p, buflen, nfserr);
+		p = nfsd4_encode_rdattr_error(xdr, nfserr);
 		if (p == NULL) {
 			nfserr = nfserr_toosmall;
 			goto fail;
 		}
 	}
-	cd->buflen -= (p - cd->buffer);
-	cd->buffer = p;
-	cd->offset = cookiep;
+	nfserr = nfserr_toosmall;
+	entry_bytes = xdr->buf->len - start_offset;
+	if (entry_bytes > cd->rd_maxcount)
+		goto fail;
+	cd->rd_maxcount -= entry_bytes;
+	cd->cookie_offset = cookie_offset;
 skip_entry:
 	cd->common.err = nfs_ok;
 	return 0;
 fail:
+	xdr_truncate_encode(xdr, start_offset);
 	cd->common.err = nfserr;
 	return -EINVAL;
 }
@@ -3194,10 +3208,11 @@ static __be32
 nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readdir *readdir)
 {
 	int maxcount;
+	int bytes_left;
 	loff_t offset;
+	__be64 wire_offset;
 	struct xdr_stream *xdr = &resp->xdr;
 	int starting_len = xdr->buf->len;
-	__be32 *page, *tailbase;
 	__be32 *p;
 
 	if (nfserr)
@@ -3207,38 +3222,38 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
 	if (!p)
 		return nfserr_resource;
 
-	if (resp->xdr.buf->page_len)
-		return nfserr_resource;
-	if (!*resp->rqstp->rq_next_page)
-		return nfserr_resource;
-
 	/* XXX: Following NFSv3, we ignore the READDIR verifier for now. */
 	WRITE32(0);
 	WRITE32(0);
 	resp->xdr.buf->head[0].iov_len = ((char*)resp->xdr.p)
 				- (char*)resp->xdr.buf->head[0].iov_base;
-	tailbase = p;
-
-	maxcount = PAGE_SIZE;
-	if (maxcount > readdir->rd_maxcount)
-		maxcount = readdir->rd_maxcount;
 
 	/*
-	 * Convert from bytes to words, account for the two words already
-	 * written, make sure to leave two words at the end for the next
-	 * pointer and eof field.
+	 * Number of bytes left for directory entries allowing for the
+	 * final 8 bytes of the readdir and a following failed op:
+	 */
+	bytes_left = xdr->buf->buflen - xdr->buf->len
+			- COMPOUND_ERR_SLACK_SPACE - 8;
+	if (bytes_left < 0) {
+		nfserr = nfserr_resource;
+		goto err_no_verf;
+	}
+	maxcount = min_t(u32, readdir->rd_maxcount, INT_MAX);
+	/*
+	 * Note the rfc defines rd_maxcount as the size of the
+	 * READDIR4resok structure, which includes the verifier above
+	 * and the 8 bytes encoded at the end of this function:
 	 */
-	maxcount = (maxcount >> 2) - 4;
-	if (maxcount < 0) {
-		nfserr =  nfserr_toosmall;
+	if (maxcount < 16) {
+		nfserr = nfserr_toosmall;
 		goto err_no_verf;
 	}
+	maxcount = min_t(int, maxcount-16, bytes_left);
 
-	page = page_address(*(resp->rqstp->rq_next_page++));
+	readdir->xdr = xdr;
+	readdir->rd_maxcount = maxcount;
 	readdir->common.err = 0;
-	readdir->buflen = maxcount;
-	readdir->buffer = page;
-	readdir->offset = NULL;
+	readdir->cookie_offset = 0;
 
 	offset = readdir->rd_cookie;
 	nfserr = nfsd_readdir(readdir->rd_rqstp, readdir->rd_fhp,
@@ -3246,32 +3261,31 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
 			      &readdir->common, nfsd4_encode_dirent);
 	if (nfserr == nfs_ok &&
 	    readdir->common.err == nfserr_toosmall &&
-	    readdir->buffer == page) 
-		nfserr = nfserr_toosmall;
+	    xdr->buf->len == starting_len + 8) {
+		/* nothing encoded; which limit did we hit?: */
+		if (maxcount - 16 < bytes_left)
+			/* It was the fault of rd_maxcount: */
+			nfserr = nfserr_toosmall;
+		else
+			/* We ran out of buffer space: */
+			nfserr = nfserr_resource;
+	}
 	if (nfserr)
 		goto err_no_verf;
 
-	if (readdir->offset)
-		xdr_encode_hyper(readdir->offset, offset);
+	if (readdir->cookie_offset) {
+		wire_offset = cpu_to_be64(offset);
+		write_bytes_to_xdr_buf(xdr->buf, readdir->cookie_offset,
+							&wire_offset, 8);
+	}
 
-	p = readdir->buffer;
+	p = xdr_reserve_space(xdr, 8);
+	if (!p) {
+		WARN_ON_ONCE(1);
+		goto err_no_verf;
+	}
 	*p++ = 0;	/* no more entries */
 	*p++ = htonl(readdir->common.err == nfserr_eof);
-	resp->xdr.buf->page_len = ((char*)p) -
-		(char*)page_address(*(resp->rqstp->rq_next_page-1));
-	xdr->buf->len += xdr->buf->page_len;
-
-	xdr->iov = xdr->buf->tail;
-
-	xdr->page_ptr++;
-	xdr->buf->buflen -= PAGE_SIZE;
-	xdr->iov = xdr->buf->tail;
-
-	/* Use rest of head for padding and remaining ops: */
-	resp->xdr.buf->tail[0].iov_base = tailbase;
-	resp->xdr.buf->tail[0].iov_len = 0;
-	resp->xdr.p = resp->xdr.buf->tail[0].iov_base;
-	resp->xdr.end = resp->xdr.p + (PAGE_SIZE - resp->xdr.buf->head[0].iov_len)/4;
 
 	return 0;
 err_no_verf:
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index d1c6e21..04b8a80 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -287,9 +287,8 @@ struct nfsd4_readdir {
 	struct svc_fh * rd_fhp;             /* response */
 
 	struct readdir_cd	common;
-	__be32 *		buffer;
-	int			buflen;
-	__be32 *		offset;
+	struct xdr_stream	*xdr;
+	int			cookie_offset;
 };
 
 struct nfsd4_release_lockowner {
-- 
1.8.5.3


  parent reply	other threads:[~2014-03-23  1:12 UTC|newest]

Thread overview: 63+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-03-23  1:11 nfsd4 xdr encoding fixes J. Bruce Fields
2014-03-23  1:11 ` [PATCH 01/50] rpc: Allow xdr_buf_subsegment to operate in-place J. Bruce Fields
2014-03-23  1:11 ` [PATCH 02/50] nfsd4: update comments with obsolete function name J. Bruce Fields
2014-03-23  1:11 ` [PATCH 03/50] nfsd4: nfsd4_replay_cache_entry should be static J. Bruce Fields
2014-03-23  1:11 ` [PATCH 04/50] nfsd4: minor nfsd4_replay_cache_entry cleanup J. Bruce Fields
2014-03-23  1:11 ` [PATCH 05/50] nfsd4: use more generous NFS4_ACL_MAX J. Bruce Fields
2014-03-23  1:11 ` [PATCH 06/50] nfsd4: remove redundant check from nfsd4_check_resp_size J. Bruce Fields
2014-03-23  1:11 ` [PATCH 07/50] nfsd4: fix setclientid encode size J. Bruce Fields
2014-03-23  1:11 ` [PATCH 08/50] nfsd4: fix nfs4err_resource in 4.1 case J. Bruce Fields
2014-03-29 19:18   ` J. Bruce Fields
2014-03-23  1:11 ` [PATCH 09/50] nfsd4: embed xdr_stream in nfsd4_compoundres J. Bruce Fields
2014-03-23  1:11 ` [PATCH 10/50] nfsd4: tweak nfsd4_encode_getattr to take xdr_stream J. Bruce Fields
2014-03-23  1:11 ` [PATCH 11/50] nfsd4: move proc_compound xdr encode init to helper J. Bruce Fields
2014-03-23  1:11 ` [PATCH 12/50] nfsd4: reserve head space for krb5 integ/priv info J. Bruce Fields
2014-03-23  1:11 ` [PATCH 13/50] nfsd4: move nfsd4_operation to xdr4.h J. Bruce Fields
2014-03-23  1:11 ` [PATCH 14/50] nfsd4: fix encoding of out-of-space replies J. Bruce Fields
2014-03-23  1:11 ` [PATCH 15/50] nfsd4: allow space for final error return J. Bruce Fields
2014-03-23  1:11 ` [PATCH 16/50] nfsd4: READ, READDIR, etc., are idempotent J. Bruce Fields
2014-03-23  1:11 ` [PATCH 17/50] nfsd4: use xdr_reserve_space in attribute encoding J. Bruce Fields
2014-03-23  1:11 ` [PATCH 18/50] nfsd4: use xdr_stream throughout compound encoding J. Bruce Fields
2014-03-23  6:43   ` Christoph Hellwig
2014-03-23 15:11     ` J. Bruce Fields
2014-03-25 15:38       ` Christoph Hellwig
2014-03-23  1:11 ` [PATCH 19/50] nfsd4: no need for encode_compoundres to adjust lengths J. Bruce Fields
2014-03-23  1:11 ` [PATCH 20/50] nfsd4: keep xdr buf length updated J. Bruce Fields
2014-03-23  6:47   ` Christoph Hellwig
2014-03-23  1:11 ` [PATCH 21/50] rpc: xdr_truncate_encode J. Bruce Fields
2014-03-23  1:11 ` [PATCH 22/50] nfsd4: use xdr_truncate_encode J. Bruce Fields
2014-03-23  6:50   ` Christoph Hellwig
2014-03-23 15:07     ` J. Bruce Fields
2014-03-25 15:36       ` Christoph Hellwig
2014-04-05  0:20         ` J. Bruce Fields
2014-03-23  1:11 ` [PATCH 23/50] nfsd4: "backfill" using write_bytes_to_xdr_buf J. Bruce Fields
2014-03-23  6:51   ` Christoph Hellwig
2014-03-23 14:43     ` J. Bruce Fields
2014-03-23 14:52       ` Christoph Hellwig
2014-03-23  1:11 ` [PATCH 24/50] nfsd4: remove ADJUST_ARGS J. Bruce Fields
2014-03-23  1:11 ` [PATCH 25/50] nfsd4: teach encoders to handle reserve_space failures J. Bruce Fields
2014-03-23  1:11 ` [PATCH 26/50] nfsd4: reserve space before inlining 0-copy pages J. Bruce Fields
2014-03-23  1:11 ` [PATCH 27/50] nfsd4: nfsd4_check_resp_size needn't recalculate length J. Bruce Fields
2014-03-23  1:11 ` [PATCH 28/50] nfsd4: remove redundant encode buffer size checking J. Bruce Fields
2014-03-23  1:12 ` [PATCH 29/50] nfsd4: size-checking cleanup J. Bruce Fields
2014-03-23  1:12 ` [PATCH 30/50] nfsd4: allow encoding across page boundaries J. Bruce Fields
2014-03-23  1:12 ` [PATCH 31/50] nfsd4: convert 4.1 replay encoding J. Bruce Fields
2014-03-23  1:12 ` [PATCH 32/50] nfsd4: don't try to encode conflicting owner if low on space J. Bruce Fields
2014-03-23  1:12 ` [PATCH 33/50] nfsd4: more precise nfsd4_max_reply J. Bruce Fields
2014-03-23  1:12 ` [PATCH 34/50] nfsd4: minor encode_read cleanup J. Bruce Fields
2014-03-23  1:12 ` [PATCH 35/50] nfsd4: nfsd4_check_resp_size should check against whole buffer J. Bruce Fields
2014-03-23  1:12 ` [PATCH 36/50] nfsd4: allow larger 4.1 session drc slots J. Bruce Fields
2014-03-23  1:12 ` [PATCH 37/50] rpc: define xdr_restrict_buflen J. Bruce Fields
2014-03-23  1:12 ` [PATCH 38/50] nfsd4: adjust buflen to session channel limit J. Bruce Fields
2014-03-23  1:12 ` [PATCH 39/50] nfsd4: use session limits to release send buffer reservation J. Bruce Fields
2014-03-23  1:12 ` J. Bruce Fields [this message]
2014-03-23  1:12 ` [PATCH 41/50] nfsd4: enforce rd_dircount J. Bruce Fields
2014-03-23  1:12 ` [PATCH 42/50] nfsd4: don't treat readlink like a zero-copy operation J. Bruce Fields
2014-03-23  1:12 ` [PATCH 43/50] nfsd4: turn off zero-copy-read in exotic cases J. Bruce Fields
2014-03-23  1:12 ` [PATCH 44/50] nfsd4: nfsd_vfs_read doesn't use file handle parameter J. Bruce Fields
2014-03-23  1:12 ` [PATCH 45/50] nfsd4: separate splice and readv cases J. Bruce Fields
2014-03-23  1:12 ` [PATCH 46/50] nfsd4: allow exotic read compounds J. Bruce Fields
2014-03-23  1:12 ` [PATCH 47/50] nfsd4: kill WRITE32 J. Bruce Fields
2014-03-23  1:12 ` [PATCH 48/50] nfsd4: kill WRITE64 J. Bruce Fields
2014-03-23  1:12 ` [PATCH 49/50] nfsd4: kill WRITEMEM J. Bruce Fields
2014-03-23  1:12 ` [PATCH 50/50] nfsd4: kill write32, write64 J. Bruce Fields

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1395537141-10389-41-git-send-email-bfields@redhat.com \
    --to=bfields@redhat.com \
    --cc=linux-nfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.