From: "J. Bruce Fields" <bfields@redhat.com>
To: linux-nfs@vger.kernel.org
Cc: Christoph Hellwig <hch@infradead.org>,
"J. Bruce Fields" <bfields@redhat.com>
Subject: [PATCH 26/52] nfsd4: allow encoding across page boundaries
Date: Thu, 22 May 2014 15:32:01 -0400 [thread overview]
Message-ID: <1400787148-25941-27-git-send-email-bfields@redhat.com> (raw)
In-Reply-To: <1400787148-25941-1-git-send-email-bfields@redhat.com>
From: "J. Bruce Fields" <bfields@redhat.com>
After this we can handle for example getattr of very large ACLs.
Read, readdir, readlink are still special cases with their own limits.
Also we can't handle a new operation starting close to the end of a
page.
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
fs/nfsd/nfs4proc.c | 4 +++
fs/nfsd/nfs4xdr.c | 60 +++++++++++++++++++++++++----------
include/linux/sunrpc/svc.h | 1 +
include/linux/sunrpc/xdr.h | 1 +
net/sunrpc/svc_xprt.c | 1 +
net/sunrpc/xdr.c | 78 ++++++++++++++++++++++++++++++++++++++++++++--
6 files changed, 126 insertions(+), 19 deletions(-)
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index f8e7472..2517e0b 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1266,6 +1266,10 @@ static void svcxdr_init_encode(struct svc_rqst *rqstp,
xdr->end = head->iov_base + PAGE_SIZE - 2 * RPC_MAX_AUTH_SIZE;
/* Tail and page_len should be zero at this point: */
buf->len = buf->head[0].iov_len;
+ xdr->scratch.iov_len = 0;
+ xdr->page_ptr = buf->pages;
+ buf->buflen = PAGE_SIZE * (1 + rqstp->rq_page_end - buf->pages)
+ - 2 * RPC_MAX_AUTH_SIZE;
}
/*
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index f1a9716..1bac000 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1624,6 +1624,7 @@ static int nfsd4_max_reply(u32 opnum)
* the head and tail in another page:
*/
return 2 * PAGE_SIZE;
+ case OP_GETATTR:
case OP_READ:
return INT_MAX;
default:
@@ -2560,21 +2561,31 @@ out_resource:
goto out;
}
+static void svcxdr_init_encode_from_buffer(struct xdr_stream *xdr,
+ struct xdr_buf *buf, __be32 *p, int bytes)
+{
+ xdr->scratch.iov_len = 0;
+ memset(buf, 0, sizeof(struct xdr_buf));
+ buf->head[0].iov_base = p;
+ buf->head[0].iov_len = 0;
+ buf->len = 0;
+ xdr->buf = buf;
+ xdr->iov = buf->head;
+ xdr->p = p;
+ xdr->end = (void *)p + bytes;
+ buf->buflen = bytes;
+}
+
__be32 nfsd4_encode_fattr_to_buf(__be32 **p, int words,
struct svc_fh *fhp, struct svc_export *exp,
struct dentry *dentry, u32 *bmval,
struct svc_rqst *rqstp, int ignore_crossmnt)
{
- struct xdr_buf dummy = {
- .head[0] = {
- .iov_base = *p,
- },
- .buflen = words << 2,
- };
+ struct xdr_buf dummy;
struct xdr_stream xdr;
__be32 ret;
- xdr_init_encode(&xdr, &dummy, NULL);
+ svcxdr_init_encode_from_buffer(&xdr, &dummy, *p, words << 2);
ret = nfsd4_encode_fattr(&xdr, fhp, exp, dentry, bmval, rqstp,
ignore_crossmnt);
*p = xdr.p;
@@ -3064,8 +3075,6 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
if (nfserr)
return nfserr;
- if (resp->xdr.buf->page_len)
- return nfserr_resource;
p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */
if (!p)
@@ -3075,6 +3084,9 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
if (xdr->end - xdr->p < 1)
return nfserr_resource;
+ if (resp->xdr.buf->page_len)
+ return nfserr_resource;
+
maxcount = svc_max_payload(resp->rqstp);
if (maxcount > read->rd_length)
maxcount = read->rd_length;
@@ -3119,6 +3131,8 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
- (char *)resp->xdr.buf->head[0].iov_base);
resp->xdr.buf->page_len = maxcount;
xdr->buf->len += maxcount;
+ xdr->page_ptr += v;
+ xdr->buf->buflen = maxcount + PAGE_SIZE - 2 * RPC_MAX_AUTH_SIZE;
xdr->iov = xdr->buf->tail;
/* Use rest of head for padding and remaining ops: */
@@ -3145,6 +3159,11 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd
if (nfserr)
return nfserr;
+
+ p = xdr_reserve_space(xdr, 4);
+ if (!p)
+ return nfserr_resource;
+
if (resp->xdr.buf->page_len)
return nfserr_resource;
if (!*resp->rqstp->rq_next_page)
@@ -3154,10 +3173,6 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd
maxcount = PAGE_SIZE;
- p = xdr_reserve_space(xdr, 4);
- if (!p)
- return nfserr_resource;
-
if (xdr->end - xdr->p < 1)
return nfserr_resource;
@@ -3180,6 +3195,8 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd
- (char *)resp->xdr.buf->head[0].iov_base;
resp->xdr.buf->page_len = maxcount;
xdr->buf->len += maxcount;
+ xdr->page_ptr += 1;
+ xdr->buf->buflen -= PAGE_SIZE;
xdr->iov = xdr->buf->tail;
/* Use rest of head for padding and remaining ops: */
@@ -3206,15 +3223,16 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
if (nfserr)
return nfserr;
- if (resp->xdr.buf->page_len)
- return nfserr_resource;
- if (!*resp->rqstp->rq_next_page)
- return nfserr_resource;
p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE);
if (!p)
return nfserr_resource;
+ if (resp->xdr.buf->page_len)
+ return nfserr_resource;
+ if (!*resp->rqstp->rq_next_page)
+ return nfserr_resource;
+
/* XXX: Following NFSv3, we ignore the READDIR verifier for now. */
WRITE32(0);
WRITE32(0);
@@ -3266,6 +3284,10 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
xdr->iov = xdr->buf->tail;
+ xdr->page_ptr++;
+ xdr->buf->buflen -= PAGE_SIZE;
+ xdr->iov = xdr->buf->tail;
+
/* Use rest of head for padding and remaining ops: */
resp->xdr.buf->tail[0].iov_base = tailbase;
resp->xdr.buf->tail[0].iov_len = 0;
@@ -3800,6 +3822,8 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
!nfsd4_enc_ops[op->opnum]);
encoder = nfsd4_enc_ops[op->opnum];
op->status = encoder(resp, op->status, &op->u);
+ xdr_commit_encode(xdr);
+
/* nfsd4_check_resp_size guarantees enough room for error status */
if (!op->status) {
int space_needed = 0;
@@ -3919,6 +3943,8 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo
WARN_ON_ONCE(buf->len != buf->head[0].iov_len + buf->page_len +
buf->tail[0].iov_len);
+ rqstp->rq_next_page = resp->xdr.page_ptr + 1;
+
p = resp->tagp;
*p++ = htonl(resp->taglen);
memcpy(p, resp->tag, resp->taglen);
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 04e7632..39c50e1 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -244,6 +244,7 @@ struct svc_rqst {
struct page * rq_pages[RPCSVC_MAXPAGES];
struct page * *rq_respages; /* points into rq_pages */
struct page * *rq_next_page; /* next reply page to use */
+ struct page * *rq_page_end; /* one past the last page */
struct kvec rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index e7bb2e3..b23d69f 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -215,6 +215,7 @@ typedef int (*kxdrdproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj);
extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
+extern void xdr_commit_encode(struct xdr_stream *xdr);
extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len);
extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
unsigned int base, unsigned int len);
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 06c6ff0..baec792 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -597,6 +597,7 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
}
rqstp->rq_pages[i] = p;
}
+ rqstp->rq_page_end = &rqstp->rq_pages[i];
rqstp->rq_pages[i++] = NULL; /* this might be seen in nfs_read_actor */
/* Make arg->head point to first page and arg->pages point to rest */
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 8ae8ee7..40198c5 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -462,6 +462,7 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
struct kvec *iov = buf->head;
int scratch_len = buf->buflen - buf->page_len - buf->tail[0].iov_len;
+ xdr_set_scratch_buffer(xdr, NULL, 0);
BUG_ON(scratch_len < 0);
xdr->buf = buf;
xdr->iov = iov;
@@ -482,6 +483,74 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
EXPORT_SYMBOL_GPL(xdr_init_encode);
/**
+ * xdr_commit_encode - Ensure all data is written to buffer
+ * @xdr: pointer to xdr_stream
+ *
+ * We handle encoding across page boundaries by giving the caller a
+ * temporary location to write to, then later copying the data into
+ * place; xdr_commit_encode does that copying.
+ *
+ * Normally the caller doesn't need to call this directly, as the
+ * following xdr_reserve_space will do it. But an explicit call may be
+ * required at the end of encoding, or any other time when the xdr_buf
+ * data might be read.
+ */
+void xdr_commit_encode(struct xdr_stream *xdr)
+{
+ int shift = xdr->scratch.iov_len;
+ void *page;
+
+ if (shift == 0)
+ return;
+ page = page_address(*xdr->page_ptr);
+ memcpy(xdr->scratch.iov_base, page, shift);
+ memmove(page, page + shift, (void *)xdr->p - page);
+ xdr->scratch.iov_len = 0;
+}
+EXPORT_SYMBOL_GPL(xdr_commit_encode);
+
+__be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr, size_t nbytes)
+{
+ static __be32 *p;
+ int space_left;
+ int frag1bytes, frag2bytes;
+
+ if (nbytes > PAGE_SIZE)
+ return NULL; /* Bigger buffers require special handling */
+ if (xdr->buf->len + nbytes > xdr->buf->buflen)
+ return NULL; /* Sorry, we're totally out of space */
+ frag1bytes = (xdr->end - xdr->p) << 2;
+ frag2bytes = nbytes - frag1bytes;
+ if (xdr->iov)
+ xdr->iov->iov_len += frag1bytes;
+ else {
+ xdr->buf->page_len += frag1bytes;
+ xdr->page_ptr++;
+ }
+ xdr->iov = NULL;
+ /*
+ * If the last encode didn't end exactly on a page boundary, the
+ * next one will straddle boundaries. Encode into the next
+ * page, then copy it back later in xdr_commit_encode. We use
+ * the "scratch" iov to track any temporarily unused fragment of
+ * space at the end of the previous buffer:
+ */
+ xdr->scratch.iov_base = xdr->p;
+ xdr->scratch.iov_len = frag1bytes;
+ p = page_address(*xdr->page_ptr);
+ /*
+ * Note this is where the next encode will start after we've
+ * shifted this one back:
+ */
+ xdr->p = (void *)p + frag2bytes;
+ space_left = xdr->buf->buflen - xdr->buf->len;
+ xdr->end = (void *)p + min_t(int, space_left, PAGE_SIZE);
+ xdr->buf->page_len += frag2bytes;
+ xdr->buf->len += nbytes;
+ return p;
+}
+
+/**
* xdr_reserve_space - Reserve buffer space for sending
* @xdr: pointer to xdr_stream
* @nbytes: number of bytes to reserve
@@ -495,14 +564,18 @@ __be32 * xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes)
__be32 *p = xdr->p;
__be32 *q;
+ xdr_commit_encode(xdr);
/* align nbytes on the next 32-bit boundary */
nbytes += 3;
nbytes &= ~3;
q = p + (nbytes >> 2);
if (unlikely(q > xdr->end || q < p))
- return NULL;
+ return xdr_get_next_encode_buffer(xdr, nbytes);
xdr->p = q;
- xdr->iov->iov_len += nbytes;
+ if (xdr->iov)
+ xdr->iov->iov_len += nbytes;
+ else
+ xdr->buf->page_len += nbytes;
xdr->buf->len += nbytes;
return p;
}
@@ -539,6 +612,7 @@ void xdr_truncate_encode(struct xdr_stream *xdr, size_t len)
WARN_ON_ONCE(1);
return;
}
+ xdr_commit_encode(xdr);
fraglen = min_t(int, buf->len - len, tail->iov_len);
tail->iov_len -= fraglen;
--
1.9.0
next prev parent reply other threads:[~2014-05-22 19:32 UTC|newest]
Thread overview: 67+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-05-22 19:31 xdr encoding fixes (v3) J. Bruce Fields
2014-05-22 19:31 ` [PATCH 01/52] nfsd4: READ, READDIR, etc., are idempotent J. Bruce Fields
2014-05-23 1:21 ` Kinglong Mee
2014-05-23 13:15 ` J. Bruce Fields
2014-05-22 19:31 ` [PATCH 02/52] nfsd4: allow larger 4.1 session drc slots J. Bruce Fields
2014-05-22 19:31 ` [PATCH 03/52] nfsd4: fill in some missing op_name's J. Bruce Fields
2014-05-22 19:31 ` [PATCH 04/52] nfsd4: decoding errors can still be cached and require space J. Bruce Fields
2014-05-22 19:31 ` [PATCH 05/52] nfsd4: read size estimate should include padding J. Bruce Fields
2014-05-22 19:31 ` [PATCH 06/52] nfsd4: fix write reply size estimate J. Bruce Fields
2014-05-22 19:31 ` [PATCH 07/52] nfsd4: embed xdr_stream in nfsd4_compoundres J. Bruce Fields
2014-05-22 19:31 ` [PATCH 08/52] nfsd4: tweak nfsd4_encode_getattr to take xdr_stream J. Bruce Fields
2014-05-22 19:31 ` [PATCH 09/52] nfsd4: move proc_compound xdr encode init to helper J. Bruce Fields
2014-05-22 19:31 ` [PATCH 10/52] nfsd4: reserve head space for krb5 integ/priv info J. Bruce Fields
2014-05-22 19:31 ` [PATCH 11/52] nfsd4: fix encoding of out-of-space replies J. Bruce Fields
2014-05-22 19:31 ` [PATCH 12/52] nfsd4: allow space for final error return J. Bruce Fields
2014-05-22 19:31 ` [PATCH 13/52] nfsd4: use xdr_reserve_space in attribute encoding J. Bruce Fields
2014-05-25 8:46 ` Kinglong Mee
2014-05-22 19:31 ` [PATCH 14/52] nfsd4: use xdr_stream throughout compound encoding J. Bruce Fields
2014-05-22 19:31 ` [PATCH 15/52] nfsd4: remove ADJUST_ARGS J. Bruce Fields
2014-05-22 19:31 ` [PATCH 16/52] nfsd4: no need for encode_compoundres to adjust lengths J. Bruce Fields
2014-05-22 19:31 ` [PATCH 17/52] nfsd4: keep xdr buf length updated J. Bruce Fields
2014-05-22 19:31 ` [PATCH 18/52] rpc: xdr_truncate_encode J. Bruce Fields
2014-05-22 19:31 ` [PATCH 19/52] nfsd4: use xdr_truncate_encode J. Bruce Fields
2014-05-22 19:31 ` [PATCH 20/52] nfsd4: "backfill" using write_bytes_to_xdr_buf J. Bruce Fields
2014-05-22 19:31 ` [PATCH 21/52] nfsd4: teach encoders to handle reserve_space failures J. Bruce Fields
2014-05-22 19:31 ` [PATCH 22/52] nfsd4: reserve space before inlining 0-copy pages J. Bruce Fields
2014-05-22 19:31 ` [PATCH 23/52] nfsd4: nfsd4_check_resp_size needn't recalculate length J. Bruce Fields
2014-05-22 19:31 ` [PATCH 24/52] nfsd4: remove redundant encode buffer size checking J. Bruce Fields
2014-05-22 19:32 ` [PATCH 25/52] nfsd4: size-checking cleanup J. Bruce Fields
2014-05-22 19:32 ` J. Bruce Fields [this message]
2014-05-22 19:32 ` [PATCH 27/52] nfsd4: convert 4.1 replay encoding J. Bruce Fields
2014-05-22 19:32 ` [PATCH 28/52] nfsd4: don't try to encode conflicting owner if low on space J. Bruce Fields
2014-05-22 19:32 ` [PATCH 29/52] nfsd4: more precise nfsd4_max_reply J. Bruce Fields
2014-05-22 19:32 ` [PATCH 30/52] nfsd4: minor encode_read cleanup J. Bruce Fields
2014-05-22 19:32 ` [PATCH 31/52] nfsd4: nfsd4_check_resp_size should check against whole buffer J. Bruce Fields
2014-05-22 19:32 ` [PATCH 32/52] nfsd4: fix buflen calculation after read encoding J. Bruce Fields
2014-05-22 19:32 ` [PATCH 33/52] rpc: define xdr_restrict_buflen J. Bruce Fields
2014-05-22 19:32 ` [PATCH 34/52] nfsd4: adjust buflen to session channel limit J. Bruce Fields
2014-05-22 19:32 ` [PATCH 35/52] nfsd4: use session limits to release send buffer reservation J. Bruce Fields
2014-05-22 19:32 ` [PATCH 36/52] nfsd4: allow large readdirs J. Bruce Fields
2014-05-22 19:32 ` [PATCH 37/52] nfsd4: enforce rd_dircount J. Bruce Fields
2014-05-22 19:32 ` [PATCH 38/52] nfsd4: don't treat readlink like a zero-copy operation J. Bruce Fields
2014-05-22 19:32 ` [PATCH 39/52] nfsd4: better estimate of getattr response size J. Bruce Fields
2014-05-22 19:32 ` [PATCH 40/52] nfsd4: estimate sequence " J. Bruce Fields
2014-05-22 19:32 ` [PATCH 41/52] nfsd4: turn off zero-copy-read in exotic cases J. Bruce Fields
2014-05-28 8:09 ` Christoph Hellwig
2014-05-28 14:01 ` J. Bruce Fields
2014-05-28 14:13 ` Anna Schumaker
2014-05-28 14:23 ` J. Bruce Fields
2014-05-28 14:27 ` Anna Schumaker
2014-06-03 4:18 ` Weston Andros Adamson
2014-06-03 14:10 ` J. Bruce Fields
2014-06-03 14:24 ` Weston Andros Adamson
2014-06-03 14:35 ` J. Bruce Fields
2014-05-28 21:08 ` J. Bruce Fields
2014-06-02 22:12 ` J. Bruce Fields
2014-05-22 19:32 ` [PATCH 42/52] nfsd4: nfsd_vfs_read doesn't use file handle parameter J. Bruce Fields
2014-05-22 19:32 ` [PATCH 43/52] nfsd4: separate splice and readv cases J. Bruce Fields
2014-05-22 19:32 ` [PATCH 44/52] nfsd4: read encoding cleanup J. Bruce Fields
2014-05-22 19:32 ` [PATCH 45/52] nfsd4: more " J. Bruce Fields
2014-05-22 19:32 ` [PATCH 46/52] nfsd4: allow exotic read compounds J. Bruce Fields
2014-05-22 19:32 ` [PATCH 47/52] nfsd4: really fix nfs4err_resource in 4.1 case J. Bruce Fields
2014-05-22 19:32 ` [PATCH 48/52] nfsd4: kill WRITE32 J. Bruce Fields
2014-05-22 19:32 ` [PATCH 49/52] nfsd4: kill WRITE64 J. Bruce Fields
2014-05-22 19:32 ` [PATCH 50/52] nfsd4: kill WRITEMEM J. Bruce Fields
2014-05-22 19:32 ` [PATCH 51/52] nfsd4: kill write32, write64 J. Bruce Fields
2014-05-22 19:32 ` [PATCH 52/52] nfsd4: better reservation of head space for krb5 J. Bruce Fields
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1400787148-25941-27-git-send-email-bfields@redhat.com \
--to=bfields@redhat.com \
--cc=hch@infradead.org \
--cc=linux-nfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).