From: Ian Campbell <ian.campbell@citrix.com>
To: netdev@vger.kernel.org
Cc: David Miller <davem@davemloft.net>,
Eric Dumazet <eric.dumazet@gmail.com>,
"Michael S. Tsirkin" <mst@redhat.com>,
Ian Campbell <ian.campbell@citrix.com>,
Neil Brown <neilb@suse.de>,
"J. Bruce Fields" <bfields@fieldses.org>,
linux-nfs@vger.kernel.org
Subject: [PATCH 9/9] sunrpc: use SKB fragment destructors to delay completion until page is released by network stack.
Date: Thu, 3 May 2012 15:56:11 +0100 [thread overview]
Message-ID: <1336056971-7839-9-git-send-email-ian.campbell@citrix.com> (raw)
In-Reply-To: <1336056915.20716.96.camel@zakaz.uk.xensource.com>
This prevents an issue where an ACK is delayed, a retransmit is queued (either
at the RPC or TCP level) and the ACK arrives before the retransmission hits the
wire. If this happens to an NFS WRITE RPC then the write() system call
completes and the userspace process can continue, potentially modifying data
referenced by the retransmission before the retransmission occurs.
Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Acked-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Neil Brown <neilb@suse.de>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: linux-nfs@vger.kernel.org
Cc: netdev@vger.kernel.org
---
include/linux/sunrpc/xdr.h | 2 ++
include/linux/sunrpc/xprt.h | 5 ++++-
net/sunrpc/clnt.c | 27 ++++++++++++++++++++++-----
net/sunrpc/svcsock.c | 3 ++-
net/sunrpc/xprt.c | 12 ++++++++++++
net/sunrpc/xprtsock.c | 3 ++-
6 files changed, 44 insertions(+), 8 deletions(-)
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index af70af3..ff1b121 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -16,6 +16,7 @@
#include <asm/byteorder.h>
#include <asm/unaligned.h>
#include <linux/scatterlist.h>
+#include <linux/skbuff.h>
/*
* Buffer adjustment
@@ -57,6 +58,7 @@ struct xdr_buf {
tail[1]; /* Appended after page data */
struct page ** pages; /* Array of contiguous pages */
+ struct skb_frag_destructor *destructor;
unsigned int page_base, /* Start of page data */
page_len, /* Length of page data */
flags; /* Flags for data disposition */
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 77d278d..e8d3f18 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -92,7 +92,10 @@ struct rpc_rqst {
/* A cookie used to track the
state of the transport
connection */
-
+ struct skb_frag_destructor destructor; /* SKB paged fragment
+ * destructor for
+ * transmitted pages*/
+
/*
* Partial send handling
*/
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 6797246..351bf3d 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -61,6 +61,7 @@ static void call_reserve(struct rpc_task *task);
static void call_reserveresult(struct rpc_task *task);
static void call_allocate(struct rpc_task *task);
static void call_decode(struct rpc_task *task);
+static void call_complete(struct rpc_task *task);
static void call_bind(struct rpc_task *task);
static void call_bind_status(struct rpc_task *task);
static void call_transmit(struct rpc_task *task);
@@ -1416,6 +1417,8 @@ rpc_xdr_encode(struct rpc_task *task)
(char *)req->rq_buffer + req->rq_callsize,
req->rq_rcvsize);
+ req->rq_snd_buf.destructor = &req->destructor;
+
p = rpc_encode_header(task);
if (p == NULL) {
printk(KERN_INFO "RPC: couldn't encode RPC header, exit EIO\n");
@@ -1581,6 +1584,7 @@ call_connect_status(struct rpc_task *task)
static void
call_transmit(struct rpc_task *task)
{
+ struct rpc_rqst *req = task->tk_rqstp;
dprint_status(task);
task->tk_action = call_status;
@@ -1614,8 +1618,8 @@ call_transmit(struct rpc_task *task)
call_transmit_status(task);
if (rpc_reply_expected(task))
return;
- task->tk_action = rpc_exit_task;
- rpc_wake_up_queued_task(&task->tk_xprt->pending, task);
+ task->tk_action = call_complete;
+ skb_frag_destructor_unref(&req->destructor);
}
/*
@@ -1688,7 +1692,8 @@ call_bc_transmit(struct rpc_task *task)
return;
}
- task->tk_action = rpc_exit_task;
+ task->tk_action = call_complete;
+ skb_frag_destructor_unref(&req->destructor);
if (task->tk_status < 0) {
printk(KERN_NOTICE "RPC: Could not send backchannel reply "
"error: %d\n", task->tk_status);
@@ -1728,7 +1733,6 @@ call_bc_transmit(struct rpc_task *task)
"error: %d\n", task->tk_status);
break;
}
- rpc_wake_up_queued_task(&req->rq_xprt->pending, task);
}
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
@@ -1906,12 +1910,14 @@ call_decode(struct rpc_task *task)
return;
}
- task->tk_action = rpc_exit_task;
+ task->tk_action = call_complete;
if (decode) {
task->tk_status = rpcauth_unwrap_resp(task, decode, req, p,
task->tk_msg.rpc_resp);
}
+ rpc_sleep_on(&req->rq_xprt->pending, task, NULL);
+ skb_frag_destructor_unref(&req->destructor);
dprintk("RPC: %5u call_decode result %d\n", task->tk_pid,
task->tk_status);
return;
@@ -1926,6 +1932,17 @@ out_retry:
}
}
+/*
+ * 8. Wait for pages to be released by the network stack.
+ */
+static void
+call_complete(struct rpc_task *task)
+{
+ dprintk("RPC: %5u call_complete result %d\n",
+ task->tk_pid, task->tk_status);
+ task->tk_action = rpc_exit_task;
+}
+
static __be32 *
rpc_encode_header(struct rpc_task *task)
{
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index f6d8c73..1145929 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -198,7 +198,8 @@ int svc_send_common(struct socket *sock, struct xdr_buf *xdr,
while (pglen > 0) {
if (slen == size)
flags = 0;
- result = kernel_sendpage(sock, *ppage, NULL, base, size, flags);
+ result = kernel_sendpage(sock, *ppage, xdr->destructor,
+ base, size, flags);
if (result > 0)
len += result;
if (result != size)
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 6fe2dce..f8418a0 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1108,6 +1108,16 @@ static inline void xprt_init_xid(struct rpc_xprt *xprt)
xprt->xid = net_random();
}
+static int xprt_complete_skb_pages(struct skb_frag_destructor *destroy)
+{
+ struct rpc_rqst *req =
+ container_of(destroy, struct rpc_rqst, destructor);
+
+ dprintk("RPC: %5u completing skb pages\n", req->rq_task->tk_pid);
+ rpc_wake_up_queued_task(&req->rq_xprt->pending, req->rq_task);
+ return 0;
+}
+
static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt)
{
struct rpc_rqst *req = task->tk_rqstp;
@@ -1120,6 +1130,8 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt)
req->rq_xid = xprt_alloc_xid(xprt);
req->rq_release_snd_buf = NULL;
xprt_reset_majortimeo(req);
+ atomic_set(&req->destructor.ref, 1);
+ req->destructor.destroy = &xprt_complete_skb_pages;
dprintk("RPC: %5u reserved req %p xid %08x\n", task->tk_pid,
req, ntohl(req->rq_xid));
}
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index f1995dc..44e07f3 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -408,7 +408,8 @@ static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned i
remainder -= len;
if (remainder != 0 || more)
flags |= MSG_MORE;
- err = sock->ops->sendpage(sock, *ppage, NULL, base, len, flags);
+ err = sock->ops->sendpage(sock, *ppage, xdr->destructor,
+ base, len, flags);
if (remainder == 0 || err != len)
break;
sent += err;
--
1.7.2.5
next prev parent reply other threads:[~2012-05-03 14:59 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-05-03 14:55 [PATCH v5 0/9] skb paged fragment destructors Ian Campbell
2012-05-03 14:56 ` [PATCH 1/9] net: add and use SKB_ALLOCSIZE Ian Campbell
2012-05-03 14:56 ` [PATCH 2/9] net: Use SKB_WITH_OVERHEAD in build_skb Ian Campbell
2012-05-03 14:56 ` [PATCH 3/9] chelsio: use SKB_WITH_OVERHEAD Ian Campbell
2012-05-03 14:56 ` [PATCH 4/9] skb: add skb_shinfo_init and use for both alloc_skb, build_skb and skb_recycle Ian Campbell
2012-05-03 14:56 ` [PATCH 5/9] net: pad skb data and shinfo as a whole rather than individually Ian Campbell
2012-05-03 14:56 ` [PATCH 6/9] net: add support for per-paged-fragment destructors Ian Campbell
2012-05-03 14:56 ` [PATCH 7/9] net: add skb_orphan_frags to copy aside frags with destructors Ian Campbell
2012-05-03 15:41 ` Michael S. Tsirkin
2012-05-03 17:55 ` David Miller
2012-05-03 21:10 ` Michael S. Tsirkin
2012-05-04 6:54 ` David Miller
2012-05-04 10:03 ` Michael S. Tsirkin
2012-05-04 10:51 ` Ian Campbell
2012-05-06 17:01 ` Michael S. Tsirkin
2012-05-06 13:54 ` Michael S. Tsirkin
2012-05-07 10:24 ` Michael S. Tsirkin
2012-05-09 9:36 ` Ian Campbell
2012-05-03 14:56 ` [PATCH 8/9] net: add paged frag destructor support to kernel_sendpage Ian Campbell
2012-05-10 11:48 ` Michael S. Tsirkin
2012-05-03 14:56 ` Ian Campbell [this message]
[not found] ` <1336056971-7839-9-git-send-email-ian.campbell-Sxgqhf6Nn4DQT0dZR+AlfA@public.gmane.org>
2012-05-10 11:19 ` [PATCH 9/9] sunrpc: use SKB fragment destructors to delay completion until page is released by network stack Michael S. Tsirkin
[not found] ` <20120510111948.GA9609-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2012-05-10 13:26 ` Ian Campbell
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1336056971-7839-9-git-send-email-ian.campbell@citrix.com \
--to=ian.campbell@citrix.com \
--cc=bfields@fieldses.org \
--cc=davem@davemloft.net \
--cc=eric.dumazet@gmail.com \
--cc=linux-nfs@vger.kernel.org \
--cc=mst@redhat.com \
--cc=neilb@suse.de \
--cc=netdev@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).