From: "J. Bruce Fields" <bfields@redhat.com>
To: linux-nfs@vger.kernel.org
Cc: Christoph Hellwig <hch@infradead.org>,
"J. Bruce Fields" <bfields@redhat.com>
Subject: [PATCH 21/43] nfsd4: allow encoding across page boundaries
Date: Sun, 11 May 2014 16:52:26 -0400 [thread overview]
Message-ID: <1399841568-19716-22-git-send-email-bfields@redhat.com> (raw)
In-Reply-To: <1399841568-19716-1-git-send-email-bfields@redhat.com>
From: "J. Bruce Fields" <bfields@redhat.com>
After this we can handle for example getattr of very large ACLs.
Read, readdir, readlink are still special cases with their own limits.
Also we can't handle a new operation starting close to the end of a
page.
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
fs/nfsd/nfs4proc.c | 4 +++
fs/nfsd/nfs4xdr.c | 59 +++++++++++++++++++++++----------
include/linux/sunrpc/svc.h | 1 +
include/linux/sunrpc/xdr.h | 1 +
net/sunrpc/svc_xprt.c | 1 +
net/sunrpc/xdr.c | 78 ++++++++++++++++++++++++++++++++++++++++++--
6 files changed, 125 insertions(+), 19 deletions(-)
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 18063e0..787aa9f 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1213,6 +1213,10 @@ static void svcxdr_init_encode(struct svc_rqst *rqstp, struct nfsd4_compoundres
xdr->end = head->iov_base + PAGE_SIZE - 2 * RPC_MAX_AUTH_SIZE;
/* Tail and page_len should be zero at this point: */
buf->len = buf->head[0].iov_len;
+ xdr->scratch.iov_len = 0;
+ xdr->page_ptr = buf->pages;
+ buf->buflen = PAGE_SIZE * (1 + rqstp->rq_page_end - buf->pages)
+ - 2 * RPC_MAX_AUTH_SIZE;
}
/*
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index def2ceb..aedf19a 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1624,6 +1624,7 @@ static int nfsd4_max_reply(u32 opnum)
* the head and tail in another page:
*/
return 2 * PAGE_SIZE;
+ case OP_GETATTR:
case OP_READ:
return INT_MAX;
default:
@@ -2547,21 +2548,30 @@ out_resource:
goto out;
}
+static void svcxdr_init_encode_from_buffer(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p, int bytes)
+{
+ xdr->scratch.iov_len = 0;
+ memset(buf, 0, sizeof(struct xdr_buf));
+ buf->head[0].iov_base = p;
+ buf->head[0].iov_len = 0;
+ buf->len = 0;
+ xdr->buf = buf;
+ xdr->iov = buf->head;
+ xdr->p = p;
+ xdr->end = (void *)p + bytes;
+ buf->buflen = bytes;
+}
+
__be32 nfsd4_encode_fattr_to_buf(__be32 **p, int words,
struct svc_fh *fhp, struct svc_export *exp,
struct dentry *dentry, u32 *bmval,
struct svc_rqst *rqstp, int ignore_crossmnt)
{
- struct xdr_buf dummy = {
- .head[0] = {
- .iov_base = *p,
- },
- .buflen = words << 2,
- };
+ struct xdr_buf dummy;
struct xdr_stream xdr;
__be32 ret;
- xdr_init_encode(&xdr, &dummy, NULL);
+ svcxdr_init_encode_from_buffer(&xdr, &dummy, *p, words << 2);
ret = nfsd4_encode_fattr(&xdr, fhp, exp, dentry, bmval, rqstp, ignore_crossmnt);
*p = xdr.p;
return ret;
@@ -3049,8 +3059,6 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
if (nfserr)
return nfserr;
- if (resp->xdr.buf->page_len)
- return nfserr_resource;
p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */
if (!p)
@@ -3060,6 +3068,9 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
if (xdr->end - xdr->p < 1)
return nfserr_resource;
+ if (resp->xdr.buf->page_len)
+ return nfserr_resource;
+
maxcount = svc_max_payload(resp->rqstp);
if (maxcount > read->rd_length)
maxcount = read->rd_length;
@@ -3104,6 +3115,8 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
- (char *)resp->xdr.buf->head[0].iov_base);
resp->xdr.buf->page_len = maxcount;
xdr->buf->len += maxcount;
+ xdr->page_ptr += v;
+ xdr->buf->buflen = maxcount + PAGE_SIZE - 2 * RPC_MAX_AUTH_SIZE;
xdr->iov = xdr->buf->tail;
/* Use rest of head for padding and remaining ops: */
@@ -3130,6 +3143,11 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd
if (nfserr)
return nfserr;
+
+ p = xdr_reserve_space(xdr, 4);
+ if (!p)
+ return nfserr_resource;
+
if (resp->xdr.buf->page_len)
return nfserr_resource;
if (!*resp->rqstp->rq_next_page)
@@ -3139,10 +3157,6 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd
maxcount = PAGE_SIZE;
- p = xdr_reserve_space(xdr, 4);
- if (!p)
- return nfserr_resource;
-
if (xdr->end - xdr->p < 1)
return nfserr_resource;
@@ -3165,6 +3179,8 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd
- (char*)resp->xdr.buf->head[0].iov_base;
resp->xdr.buf->page_len = maxcount;
xdr->buf->len += maxcount;
+ xdr->page_ptr += 1;
+ xdr->buf->buflen -= PAGE_SIZE;
xdr->iov = xdr->buf->tail;
/* Use rest of head for padding and remaining ops: */
@@ -3191,15 +3207,16 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
if (nfserr)
return nfserr;
- if (resp->xdr.buf->page_len)
- return nfserr_resource;
- if (!*resp->rqstp->rq_next_page)
- return nfserr_resource;
p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE);
if (!p)
return nfserr_resource;
+ if (resp->xdr.buf->page_len)
+ return nfserr_resource;
+ if (!*resp->rqstp->rq_next_page)
+ return nfserr_resource;
+
/* XXX: Following NFSv3, we ignore the READDIR verifier for now. */
WRITE32(0);
WRITE32(0);
@@ -3251,6 +3268,10 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
xdr->iov = xdr->buf->tail;
+ xdr->page_ptr++;
+ xdr->buf->buflen -= PAGE_SIZE;
+ xdr->iov = xdr->buf->tail;
+
/* Use rest of head for padding and remaining ops: */
resp->xdr.buf->tail[0].iov_base = tailbase;
resp->xdr.buf->tail[0].iov_len = 0;
@@ -3783,6 +3804,8 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
!nfsd4_enc_ops[op->opnum]);
encoder = nfsd4_enc_ops[op->opnum];
op->status = encoder(resp, op->status, &op->u);
+ xdr_commit_encode(xdr);
+
/* nfsd4_check_resp_size guarantees enough room for error status */
if (!op->status) {
int space_needed = 0;
@@ -3907,6 +3930,8 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo
WARN_ON_ONCE(buf->len != buf->head[0].iov_len + buf->page_len +
buf->tail[0].iov_len);
+ rqstp->rq_next_page = resp->xdr.page_ptr + 1;
+
p = resp->tagp;
*p++ = htonl(resp->taglen);
memcpy(p, resp->tag, resp->taglen);
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 04e7632..39c50e1 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -244,6 +244,7 @@ struct svc_rqst {
struct page * rq_pages[RPCSVC_MAXPAGES];
struct page * *rq_respages; /* points into rq_pages */
struct page * *rq_next_page; /* next reply page to use */
+ struct page * *rq_page_end; /* one past the last page */
struct kvec rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index e7bb2e3..b23d69f 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -215,6 +215,7 @@ typedef int (*kxdrdproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj);
extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
+extern void xdr_commit_encode(struct xdr_stream *xdr);
extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len);
extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
unsigned int base, unsigned int len);
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 06c6ff0..baec792 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -597,6 +597,7 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
}
rqstp->rq_pages[i] = p;
}
+ rqstp->rq_page_end = &rqstp->rq_pages[i];
rqstp->rq_pages[i++] = NULL; /* this might be seen in nfs_read_actor */
/* Make arg->head point to first page and arg->pages point to rest */
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 8ae8ee7..e65d6b6 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -462,6 +462,7 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
struct kvec *iov = buf->head;
int scratch_len = buf->buflen - buf->page_len - buf->tail[0].iov_len;
+ xdr_set_scratch_buffer(xdr, NULL, 0);
BUG_ON(scratch_len < 0);
xdr->buf = buf;
xdr->iov = iov;
@@ -482,6 +483,74 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
EXPORT_SYMBOL_GPL(xdr_init_encode);
/**
+ * xdr_commit_encode - Ensure all data is written to buffer
+ * @xdr: pointer to xdr_stream
+ *
+ * We handle encoding across page boundaries by giving the caller a
+ * temporary location to write to, then later copying the data into
+ * place; xdr_commit_encode does that copying.
+ *
+ * Normally the caller doesn't need to call this directly, as the
+ * following xdr_reserve_space will do it. But an explicit call may be
+ * required at the end of encoding, or any other time when the xdr_buf
+ * data might be read.
+ */
+void xdr_commit_encode(struct xdr_stream *xdr)
+{
+ int shift = xdr->scratch.iov_len;
+ void *page;
+
+ if (shift == 0)
+ return;
+ page = page_address(*xdr->page_ptr);
+ memcpy(xdr->scratch.iov_base, page, shift);
+ memmove(page, page + shift, (void *)xdr->p - page);
+ xdr->scratch.iov_len = 0;
+}
+EXPORT_SYMBOL_GPL(xdr_commit_encode);
+
+__be32 * xdr_get_next_encode_buffer(struct xdr_stream *xdr, size_t nbytes)
+{
+ static __be32 *p;
+ int space_left;
+ int frag1bytes, frag2bytes;
+
+ if (nbytes > PAGE_SIZE)
+ return NULL; /* Bigger buffers require special handling */
+ if (xdr->buf->len + nbytes > xdr->buf->buflen)
+ return NULL; /* Sorry, we're totally out of space */
+ frag1bytes = (xdr->end - xdr->p) << 2;
+ frag2bytes = nbytes - frag1bytes;
+ if (xdr->iov)
+ xdr->iov->iov_len += frag1bytes;
+ else {
+ xdr->buf->page_len += frag1bytes;
+ xdr->page_ptr++;
+ }
+ xdr->iov = NULL;
+ /*
+ * If the last encode didn't end exactly on a page boundary, the
+ * next one will straddle boundaries. Encode into the next
+ * page, then copy it back later in xdr_commit_encode. We use
+ * the "scratch" iov to track any temporarily unused fragment of
+ * space at the end of the previous buffer:
+ */
+ xdr->scratch.iov_base = xdr->p;
+ xdr->scratch.iov_len = frag1bytes;
+ p = page_address(*xdr->page_ptr);
+ /*
+ * Note this is where the next encode will start after we've
+ * shifted this one back:
+ */
+ xdr->p = (void *)p + frag2bytes;
+ space_left = xdr->buf->buflen - xdr->buf->len;
+ xdr->end = (void *)p + min_t(int, space_left, PAGE_SIZE);
+ xdr->buf->page_len += frag2bytes;
+ xdr->buf->len += nbytes;
+ return p;
+}
+
+/**
* xdr_reserve_space - Reserve buffer space for sending
* @xdr: pointer to xdr_stream
* @nbytes: number of bytes to reserve
@@ -495,14 +564,18 @@ __be32 * xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes)
__be32 *p = xdr->p;
__be32 *q;
+ xdr_commit_encode(xdr);
/* align nbytes on the next 32-bit boundary */
nbytes += 3;
nbytes &= ~3;
q = p + (nbytes >> 2);
if (unlikely(q > xdr->end || q < p))
- return NULL;
+ return xdr_get_next_encode_buffer(xdr, nbytes);
xdr->p = q;
- xdr->iov->iov_len += nbytes;
+ if (xdr->iov)
+ xdr->iov->iov_len += nbytes;
+ else
+ xdr->buf->page_len += nbytes;
xdr->buf->len += nbytes;
return p;
}
@@ -539,6 +612,7 @@ void xdr_truncate_encode(struct xdr_stream *xdr, size_t len)
WARN_ON_ONCE(1);
return;
}
+ xdr_commit_encode(xdr);
fraglen = min_t(int, buf->len - len, tail->iov_len);
tail->iov_len -= fraglen;
--
1.7.9.5
next prev parent reply other threads:[~2014-05-11 20:53 UTC|newest]
Thread overview: 70+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-05-11 20:52 nfsd4 xdr encoding fixes v2 J. Bruce Fields
2014-05-11 20:52 ` [PATCH 01/43] nfsd4: embed xdr_stream in nfsd4_compoundres J. Bruce Fields
2014-05-12 5:34 ` Christoph Hellwig
2014-05-16 9:58 ` Kinglong Mee
2014-05-22 15:13 ` J. Bruce Fields
2014-05-11 20:52 ` [PATCH 02/43] nfsd4: tweak nfsd4_encode_getattr to take xdr_stream J. Bruce Fields
2014-05-12 5:35 ` Christoph Hellwig
2014-05-12 16:06 ` J. Bruce Fields
2014-05-11 20:52 ` [PATCH 03/43] nfsd4: move proc_compound xdr encode init to helper J. Bruce Fields
2014-05-12 5:36 ` Christoph Hellwig
2014-05-11 20:52 ` [PATCH 04/43] nfsd4: reserve head space for krb5 integ/priv info J. Bruce Fields
2014-05-12 5:37 ` Christoph Hellwig
2014-05-12 21:45 ` J. Bruce Fields
2014-05-13 5:05 ` Christoph Hellwig
2014-05-13 14:47 ` J. Bruce Fields
2014-05-11 20:52 ` [PATCH 05/43] nfsd4: move nfsd4_operation to xdr4.h J. Bruce Fields
2014-05-12 5:41 ` Christoph Hellwig
2014-05-22 15:56 ` J. Bruce Fields
2014-05-11 20:52 ` [PATCH 06/43] nfsd4: fix encoding of out-of-space replies J. Bruce Fields
2014-05-12 8:18 ` Christoph Hellwig
2014-05-12 21:47 ` J. Bruce Fields
2014-05-11 20:52 ` [PATCH 07/43] nfsd4: allow space for final error return J. Bruce Fields
2014-05-12 8:18 ` Christoph Hellwig
2014-05-12 14:06 ` J. Bruce Fields
2014-05-11 20:52 ` [PATCH 08/43] nfsd4: use xdr_reserve_space in attribute encoding J. Bruce Fields
2014-05-11 20:52 ` [PATCH 09/43] nfsd4: use xdr_stream throughout compound encoding J. Bruce Fields
2014-05-11 20:52 ` [PATCH 10/43] nfsd4: remove ADJUST_ARGS J. Bruce Fields
2014-05-11 20:52 ` [PATCH 11/43] nfsd4: no need for encode_compoundres to adjust lengths J. Bruce Fields
2014-05-11 20:52 ` [PATCH 12/43] nfsd4: keep xdr buf length updated J. Bruce Fields
2014-05-11 20:52 ` [PATCH 13/43] rpc: xdr_truncate_encode J. Bruce Fields
2014-05-11 20:52 ` [PATCH 14/43] nfsd4: use xdr_truncate_encode J. Bruce Fields
2014-05-11 20:52 ` [PATCH 15/43] nfsd4: "backfill" using write_bytes_to_xdr_buf J. Bruce Fields
2014-05-11 20:52 ` [PATCH 16/43] nfsd4: teach encoders to handle reserve_space failures J. Bruce Fields
2014-05-11 20:52 ` [PATCH 17/43] nfsd4: reserve space before inlining 0-copy pages J. Bruce Fields
2014-05-11 20:52 ` [PATCH 18/43] nfsd4: nfsd4_check_resp_size needn't recalculate length J. Bruce Fields
2014-05-11 20:52 ` [PATCH 19/43] nfsd4: remove redundant encode buffer size checking J. Bruce Fields
2014-05-11 20:52 ` [PATCH 20/43] nfsd4: size-checking cleanup J. Bruce Fields
2014-05-11 20:52 ` J. Bruce Fields [this message]
2014-05-11 20:52 ` [PATCH 22/43] nfsd4: convert 4.1 replay encoding J. Bruce Fields
2014-05-11 20:52 ` [PATCH 23/43] nfsd4: don't try to encode conflicting owner if low on space J. Bruce Fields
2014-05-11 20:52 ` [PATCH 24/43] nfsd4: more precise nfsd4_max_reply J. Bruce Fields
2014-05-11 20:52 ` [PATCH 25/43] nfsd4: minor encode_read cleanup J. Bruce Fields
2014-05-11 20:52 ` [PATCH 26/43] nfsd4: nfsd4_check_resp_size should check against whole buffer J. Bruce Fields
2014-05-11 20:52 ` [PATCH 27/43] rpc: define xdr_restrict_buflen J. Bruce Fields
2014-05-11 20:52 ` [PATCH 28/43] nfsd4: adjust buflen to session channel limit J. Bruce Fields
2014-05-11 20:52 ` [PATCH 29/43] nfsd4: use session limits to release send buffer reservation J. Bruce Fields
2014-05-11 20:52 ` [PATCH 30/43] nfsd4: allow large readdirs J. Bruce Fields
2014-05-11 20:52 ` [PATCH 31/43] nfsd4: enforce rd_dircount J. Bruce Fields
2014-05-11 20:52 ` [PATCH 32/43] nfsd4: don't treat readlink like a zero-copy operation J. Bruce Fields
2014-05-11 20:52 ` [PATCH 33/43] nfsd4: better estimate of getattr response size J. Bruce Fields
2014-05-11 20:52 ` [PATCH 34/43] nfsd4: estimate sequence " J. Bruce Fields
2014-05-11 20:52 ` [PATCH 35/43] nfsd4: turn off zero-copy-read in exotic cases J. Bruce Fields
2014-05-11 20:52 ` [PATCH 36/43] nfsd4: nfsd_vfs_read doesn't use file handle parameter J. Bruce Fields
2014-05-11 20:52 ` [PATCH 37/43] nfsd4: separate splice and readv cases J. Bruce Fields
2014-05-11 20:52 ` [PATCH 38/43] nfsd4: allow exotic read compounds J. Bruce Fields
2014-05-11 20:52 ` [PATCH 39/43] nfsd4: really fix nfs4err_resource in 4.1 case J. Bruce Fields
2014-05-12 5:33 ` Christoph Hellwig
2014-05-12 14:18 ` J. Bruce Fields
2014-05-11 20:52 ` [PATCH 40/43] nfsd4: kill WRITE32 J. Bruce Fields
2014-05-11 20:52 ` [PATCH 41/43] nfsd4: kill WRITE64 J. Bruce Fields
2014-05-11 20:52 ` [PATCH 42/43] nfsd4: kill WRITEMEM J. Bruce Fields
2014-05-11 20:52 ` [PATCH 43/43] nfsd4: kill write32, write64 J. Bruce Fields
2014-05-12 8:20 ` nfsd4 xdr encoding fixes v2 Christoph Hellwig
2014-05-12 16:07 ` J. Bruce Fields
2014-05-12 16:11 ` Christoph Hellwig
2014-05-13 11:09 ` Christoph Hellwig
2014-05-13 14:48 ` J. Bruce Fields
2014-05-13 21:18 ` J. Bruce Fields
2014-05-13 21:33 ` J. Bruce Fields
2014-05-22 19:17 ` J. Bruce Fields
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1399841568-19716-22-git-send-email-bfields@redhat.com \
--to=bfields@redhat.com \
--cc=hch@infradead.org \
--cc=linux-nfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.