From: "J. Bruce Fields" <bfields@redhat.com>
To: linux-nfs@vger.kernel.org
Cc: Christoph Hellwig <hch@infradead.org>,
"J. Bruce Fields" <bfields@redhat.com>
Subject: [PATCH 36/52] nfsd4: allow large readdirs
Date: Thu, 22 May 2014 15:32:11 -0400 [thread overview]
Message-ID: <1400787148-25941-37-git-send-email-bfields@redhat.com> (raw)
In-Reply-To: <1400787148-25941-1-git-send-email-bfields@redhat.com>
From: "J. Bruce Fields" <bfields@redhat.com>
Currently we limit readdir results to a single page. This can result in
a performance regression compared to NFSv3 when reading large
directories.
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
fs/nfsd/nfs4proc.c | 9 ++--
fs/nfsd/nfs4xdr.c | 137 +++++++++++++++++++++++++++++------------------------
fs/nfsd/xdr4.h | 5 +-
3 files changed, 82 insertions(+), 69 deletions(-)
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index e1196ed..f56f5e3 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1502,13 +1502,14 @@ static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
+ u32 maxcount = svc_max_payload(rqstp);
u32 rlen = op->u.readdir.rd_maxcount;
- if (rlen > PAGE_SIZE)
- rlen = PAGE_SIZE;
+ if (rlen > maxcount)
+ rlen = maxcount;
- return (op_encode_hdr_size + op_encode_verifier_maxsz)
- * sizeof(__be32) + rlen;
+ return (op_encode_hdr_size + op_encode_verifier_maxsz +
+ XDR_QUADLEN(rlen)) * sizeof(__be32);
}
static inline u32 nfsd4_remove_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 311e281..a2524b3 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2575,8 +2575,8 @@ static inline int attributes_need_mount(u32 *bmval)
}
static __be32
-nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
- const char *name, int namlen, __be32 **p, int buflen)
+nfsd4_encode_dirent_fattr(struct xdr_stream *xdr, struct nfsd4_readdir *cd,
+ const char *name, int namlen)
{
struct svc_export *exp = cd->rd_fhp->fh_export;
struct dentry *dentry;
@@ -2628,8 +2628,7 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
}
out_encode:
- nfserr = nfsd4_encode_fattr_to_buf(p, buflen, NULL, exp, dentry,
- cd->rd_bmval,
+ nfserr = nfsd4_encode_fattr(xdr, NULL, exp, dentry, cd->rd_bmval,
cd->rd_rqstp, ignore_crossmnt);
out_put:
dput(dentry);
@@ -2638,9 +2637,12 @@ out_put:
}
static __be32 *
-nfsd4_encode_rdattr_error(__be32 *p, int buflen, __be32 nfserr)
+nfsd4_encode_rdattr_error(struct xdr_stream *xdr, __be32 nfserr)
{
- if (buflen < 6)
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, 6);
+ if (!p)
return NULL;
*p++ = htonl(2);
*p++ = htonl(FATTR4_WORD0_RDATTR_ERROR); /* bmval0 */
@@ -2657,10 +2659,13 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
{
struct readdir_cd *ccd = ccdv;
struct nfsd4_readdir *cd = container_of(ccd, struct nfsd4_readdir, common);
- int buflen;
- __be32 *p = cd->buffer;
- __be32 *cookiep;
+ struct xdr_stream *xdr = cd->xdr;
+ int start_offset = xdr->buf->len;
+ int cookie_offset;
+ int entry_bytes;
__be32 nfserr = nfserr_toosmall;
+ __be64 wire_offset;
+ __be32 *p;
/* In nfsv4, "." and ".." never make it onto the wire.. */
if (name && isdotent(name, namlen)) {
@@ -2668,19 +2673,24 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
return 0;
}
- if (cd->offset)
- xdr_encode_hyper(cd->offset, (u64) offset);
+ if (cd->cookie_offset) {
+ wire_offset = cpu_to_be64(offset);
+ write_bytes_to_xdr_buf(xdr->buf, cd->cookie_offset,
+ &wire_offset, 8);
+ }
- buflen = cd->buflen - 4 - XDR_QUADLEN(namlen);
- if (buflen < 0)
+ p = xdr_reserve_space(xdr, 4);
+ if (!p)
goto fail;
-
*p++ = xdr_one; /* mark entry present */
- cookiep = p;
+ cookie_offset = xdr->buf->len;
+ p = xdr_reserve_space(xdr, 3*4 + namlen);
+ if (!p)
+ goto fail;
p = xdr_encode_hyper(p, NFS_OFFSET_MAX); /* offset of next entry */
p = xdr_encode_array(p, name, namlen); /* name length & name */
- nfserr = nfsd4_encode_dirent_fattr(cd, name, namlen, &p, buflen);
+ nfserr = nfsd4_encode_dirent_fattr(xdr, cd, name, namlen);
switch (nfserr) {
case nfs_ok:
break;
@@ -2699,19 +2709,23 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
*/
if (!(cd->rd_bmval[0] & FATTR4_WORD0_RDATTR_ERROR))
goto fail;
- p = nfsd4_encode_rdattr_error(p, buflen, nfserr);
+ p = nfsd4_encode_rdattr_error(xdr, nfserr);
if (p == NULL) {
nfserr = nfserr_toosmall;
goto fail;
}
}
- cd->buflen -= (p - cd->buffer);
- cd->buffer = p;
- cd->offset = cookiep;
+ nfserr = nfserr_toosmall;
+ entry_bytes = xdr->buf->len - start_offset;
+ if (entry_bytes > cd->rd_maxcount)
+ goto fail;
+ cd->rd_maxcount -= entry_bytes;
+ cd->cookie_offset = cookie_offset;
skip_entry:
cd->common.err = nfs_ok;
return 0;
fail:
+ xdr_truncate_encode(xdr, start_offset);
cd->common.err = nfserr;
return -EINVAL;
}
@@ -3206,10 +3220,11 @@ static __be32
nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readdir *readdir)
{
int maxcount;
+ int bytes_left;
loff_t offset;
+ __be64 wire_offset;
struct xdr_stream *xdr = &resp->xdr;
int starting_len = xdr->buf->len;
- __be32 *page, *tailbase;
__be32 *p;
if (nfserr)
@@ -3219,38 +3234,38 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
if (!p)
return nfserr_resource;
- if (resp->xdr.buf->page_len)
- return nfserr_resource;
- if (!*resp->rqstp->rq_next_page)
- return nfserr_resource;
-
/* XXX: Following NFSv3, we ignore the READDIR verifier for now. */
WRITE32(0);
WRITE32(0);
resp->xdr.buf->head[0].iov_len = ((char *)resp->xdr.p)
- (char *)resp->xdr.buf->head[0].iov_base;
- tailbase = p;
-
- maxcount = PAGE_SIZE;
- if (maxcount > readdir->rd_maxcount)
- maxcount = readdir->rd_maxcount;
/*
- * Convert from bytes to words, account for the two words already
- * written, make sure to leave two words at the end for the next
- * pointer and eof field.
+ * Number of bytes left for directory entries allowing for the
+ * final 8 bytes of the readdir and a following failed op:
+ */
+ bytes_left = xdr->buf->buflen - xdr->buf->len
+ - COMPOUND_ERR_SLACK_SPACE - 8;
+ if (bytes_left < 0) {
+ nfserr = nfserr_resource;
+ goto err_no_verf;
+ }
+ maxcount = min_t(u32, readdir->rd_maxcount, INT_MAX);
+ /*
+ * Note the rfc defines rd_maxcount as the size of the
+ * READDIR4resok structure, which includes the verifier above
+ * and the 8 bytes encoded at the end of this function:
*/
- maxcount = (maxcount >> 2) - 4;
- if (maxcount < 0) {
- nfserr = nfserr_toosmall;
+ if (maxcount < 16) {
+ nfserr = nfserr_toosmall;
goto err_no_verf;
}
+ maxcount = min_t(int, maxcount-16, bytes_left);
- page = page_address(*(resp->rqstp->rq_next_page++));
+ readdir->xdr = xdr;
+ readdir->rd_maxcount = maxcount;
readdir->common.err = 0;
- readdir->buflen = maxcount;
- readdir->buffer = page;
- readdir->offset = NULL;
+ readdir->cookie_offset = 0;
offset = readdir->rd_cookie;
nfserr = nfsd_readdir(readdir->rd_rqstp, readdir->rd_fhp,
@@ -3258,33 +3273,31 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
&readdir->common, nfsd4_encode_dirent);
if (nfserr == nfs_ok &&
readdir->common.err == nfserr_toosmall &&
- readdir->buffer == page)
- nfserr = nfserr_toosmall;
+ xdr->buf->len == starting_len + 8) {
+ /* nothing encoded; which limit did we hit?: */
+ if (maxcount - 16 < bytes_left)
+ /* It was the fault of rd_maxcount: */
+ nfserr = nfserr_toosmall;
+ else
+ /* We ran out of buffer space: */
+ nfserr = nfserr_resource;
+ }
if (nfserr)
goto err_no_verf;
- if (readdir->offset)
- xdr_encode_hyper(readdir->offset, offset);
+ if (readdir->cookie_offset) {
+ wire_offset = cpu_to_be64(offset);
+ write_bytes_to_xdr_buf(xdr->buf, readdir->cookie_offset,
+ &wire_offset, 8);
+ }
- p = readdir->buffer;
+ p = xdr_reserve_space(xdr, 8);
+ if (!p) {
+ WARN_ON_ONCE(1);
+ goto err_no_verf;
+ }
*p++ = 0; /* no more entries */
*p++ = htonl(readdir->common.err == nfserr_eof);
- resp->xdr.buf->page_len = ((char *)p) -
- (char*)page_address(*(resp->rqstp->rq_next_page-1));
- xdr->buf->len += xdr->buf->page_len;
-
- xdr->iov = xdr->buf->tail;
-
- xdr->page_ptr++;
- xdr->buf->buflen -= PAGE_SIZE;
- xdr->iov = xdr->buf->tail;
-
- /* Use rest of head for padding and remaining ops: */
- resp->xdr.buf->tail[0].iov_base = tailbase;
- resp->xdr.buf->tail[0].iov_len = 0;
- resp->xdr.p = resp->xdr.buf->tail[0].iov_base;
- resp->xdr.end = resp->xdr.p +
- (PAGE_SIZE - resp->xdr.buf->head[0].iov_len)/4;
return 0;
err_no_verf:
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 41e5229..18cbb6d 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -287,9 +287,8 @@ struct nfsd4_readdir {
struct svc_fh * rd_fhp; /* response */
struct readdir_cd common;
- __be32 * buffer;
- int buflen;
- __be32 * offset;
+ struct xdr_stream *xdr;
+ int cookie_offset;
};
struct nfsd4_release_lockowner {
--
1.9.0
next prev parent reply other threads:[~2014-05-22 19:32 UTC|newest]
Thread overview: 67+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-05-22 19:31 xdr encoding fixes (v3) J. Bruce Fields
2014-05-22 19:31 ` [PATCH 01/52] nfsd4: READ, READDIR, etc., are idempotent J. Bruce Fields
2014-05-23 1:21 ` Kinglong Mee
2014-05-23 13:15 ` J. Bruce Fields
2014-05-22 19:31 ` [PATCH 02/52] nfsd4: allow larger 4.1 session drc slots J. Bruce Fields
2014-05-22 19:31 ` [PATCH 03/52] nfsd4: fill in some missing op_name's J. Bruce Fields
2014-05-22 19:31 ` [PATCH 04/52] nfsd4: decoding errors can still be cached and require space J. Bruce Fields
2014-05-22 19:31 ` [PATCH 05/52] nfsd4: read size estimate should include padding J. Bruce Fields
2014-05-22 19:31 ` [PATCH 06/52] nfsd4: fix write reply size estimate J. Bruce Fields
2014-05-22 19:31 ` [PATCH 07/52] nfsd4: embed xdr_stream in nfsd4_compoundres J. Bruce Fields
2014-05-22 19:31 ` [PATCH 08/52] nfsd4: tweak nfsd4_encode_getattr to take xdr_stream J. Bruce Fields
2014-05-22 19:31 ` [PATCH 09/52] nfsd4: move proc_compound xdr encode init to helper J. Bruce Fields
2014-05-22 19:31 ` [PATCH 10/52] nfsd4: reserve head space for krb5 integ/priv info J. Bruce Fields
2014-05-22 19:31 ` [PATCH 11/52] nfsd4: fix encoding of out-of-space replies J. Bruce Fields
2014-05-22 19:31 ` [PATCH 12/52] nfsd4: allow space for final error return J. Bruce Fields
2014-05-22 19:31 ` [PATCH 13/52] nfsd4: use xdr_reserve_space in attribute encoding J. Bruce Fields
2014-05-25 8:46 ` Kinglong Mee
2014-05-22 19:31 ` [PATCH 14/52] nfsd4: use xdr_stream throughout compound encoding J. Bruce Fields
2014-05-22 19:31 ` [PATCH 15/52] nfsd4: remove ADJUST_ARGS J. Bruce Fields
2014-05-22 19:31 ` [PATCH 16/52] nfsd4: no need for encode_compoundres to adjust lengths J. Bruce Fields
2014-05-22 19:31 ` [PATCH 17/52] nfsd4: keep xdr buf length updated J. Bruce Fields
2014-05-22 19:31 ` [PATCH 18/52] rpc: xdr_truncate_encode J. Bruce Fields
2014-05-22 19:31 ` [PATCH 19/52] nfsd4: use xdr_truncate_encode J. Bruce Fields
2014-05-22 19:31 ` [PATCH 20/52] nfsd4: "backfill" using write_bytes_to_xdr_buf J. Bruce Fields
2014-05-22 19:31 ` [PATCH 21/52] nfsd4: teach encoders to handle reserve_space failures J. Bruce Fields
2014-05-22 19:31 ` [PATCH 22/52] nfsd4: reserve space before inlining 0-copy pages J. Bruce Fields
2014-05-22 19:31 ` [PATCH 23/52] nfsd4: nfsd4_check_resp_size needn't recalculate length J. Bruce Fields
2014-05-22 19:31 ` [PATCH 24/52] nfsd4: remove redundant encode buffer size checking J. Bruce Fields
2014-05-22 19:32 ` [PATCH 25/52] nfsd4: size-checking cleanup J. Bruce Fields
2014-05-22 19:32 ` [PATCH 26/52] nfsd4: allow encoding across page boundaries J. Bruce Fields
2014-05-22 19:32 ` [PATCH 27/52] nfsd4: convert 4.1 replay encoding J. Bruce Fields
2014-05-22 19:32 ` [PATCH 28/52] nfsd4: don't try to encode conflicting owner if low on space J. Bruce Fields
2014-05-22 19:32 ` [PATCH 29/52] nfsd4: more precise nfsd4_max_reply J. Bruce Fields
2014-05-22 19:32 ` [PATCH 30/52] nfsd4: minor encode_read cleanup J. Bruce Fields
2014-05-22 19:32 ` [PATCH 31/52] nfsd4: nfsd4_check_resp_size should check against whole buffer J. Bruce Fields
2014-05-22 19:32 ` [PATCH 32/52] nfsd4: fix buflen calculation after read encoding J. Bruce Fields
2014-05-22 19:32 ` [PATCH 33/52] rpc: define xdr_restrict_buflen J. Bruce Fields
2014-05-22 19:32 ` [PATCH 34/52] nfsd4: adjust buflen to session channel limit J. Bruce Fields
2014-05-22 19:32 ` [PATCH 35/52] nfsd4: use session limits to release send buffer reservation J. Bruce Fields
2014-05-22 19:32 ` J. Bruce Fields [this message]
2014-05-22 19:32 ` [PATCH 37/52] nfsd4: enforce rd_dircount J. Bruce Fields
2014-05-22 19:32 ` [PATCH 38/52] nfsd4: don't treat readlink like a zero-copy operation J. Bruce Fields
2014-05-22 19:32 ` [PATCH 39/52] nfsd4: better estimate of getattr response size J. Bruce Fields
2014-05-22 19:32 ` [PATCH 40/52] nfsd4: estimate sequence " J. Bruce Fields
2014-05-22 19:32 ` [PATCH 41/52] nfsd4: turn off zero-copy-read in exotic cases J. Bruce Fields
2014-05-28 8:09 ` Christoph Hellwig
2014-05-28 14:01 ` J. Bruce Fields
2014-05-28 14:13 ` Anna Schumaker
2014-05-28 14:23 ` J. Bruce Fields
2014-05-28 14:27 ` Anna Schumaker
2014-06-03 4:18 ` Weston Andros Adamson
2014-06-03 14:10 ` J. Bruce Fields
2014-06-03 14:24 ` Weston Andros Adamson
2014-06-03 14:35 ` J. Bruce Fields
2014-05-28 21:08 ` J. Bruce Fields
2014-06-02 22:12 ` J. Bruce Fields
2014-05-22 19:32 ` [PATCH 42/52] nfsd4: nfsd_vfs_read doesn't use file handle parameter J. Bruce Fields
2014-05-22 19:32 ` [PATCH 43/52] nfsd4: separate splice and readv cases J. Bruce Fields
2014-05-22 19:32 ` [PATCH 44/52] nfsd4: read encoding cleanup J. Bruce Fields
2014-05-22 19:32 ` [PATCH 45/52] nfsd4: more " J. Bruce Fields
2014-05-22 19:32 ` [PATCH 46/52] nfsd4: allow exotic read compounds J. Bruce Fields
2014-05-22 19:32 ` [PATCH 47/52] nfsd4: really fix nfs4err_resource in 4.1 case J. Bruce Fields
2014-05-22 19:32 ` [PATCH 48/52] nfsd4: kill WRITE32 J. Bruce Fields
2014-05-22 19:32 ` [PATCH 49/52] nfsd4: kill WRITE64 J. Bruce Fields
2014-05-22 19:32 ` [PATCH 50/52] nfsd4: kill WRITEMEM J. Bruce Fields
2014-05-22 19:32 ` [PATCH 51/52] nfsd4: kill write32, write64 J. Bruce Fields
2014-05-22 19:32 ` [PATCH 52/52] nfsd4: better reservation of head space for krb5 J. Bruce Fields
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1400787148-25941-37-git-send-email-bfields@redhat.com \
--to=bfields@redhat.com \
--cc=hch@infradead.org \
--cc=linux-nfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).