From: "Michael S. Tsirkin" <mst-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
To: Ian Campbell <ian.campbell-Sxgqhf6Nn4DQT0dZR+AlfA@public.gmane.org>
Cc: netdev-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
"David S. Miller" <davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org>,
Neil Brown <neilb-l3A5Bk7waGM@public.gmane.org>,
"J. Bruce Fields"
<bfields-uC3wQj2KruNg9hUCZPvPmw@public.gmane.org>,
linux-nfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Subject: Re: [PATCH 4/4] sunrpc: use SKB fragment destructors to delay completion until page is released by network stack.
Date: Fri, 11 Nov 2011 14:38:24 +0200 [thread overview]
Message-ID: <20111111123824.GA23902@redhat.com> (raw)
In-Reply-To: <1320850927-30240-4-git-send-email-ian.campbell-Sxgqhf6Nn4DQT0dZR+AlfA@public.gmane.org>
On Wed, Nov 09, 2011 at 03:02:07PM +0000, Ian Campbell wrote:
> This prevents an issue where an ACK is delayed, a retransmit is queued (either
> at the RPC or TCP level) and the ACK arrives before the retransmission hits the
> wire. If this happens to an NFS WRITE RPC then the write() system call
> completes and the userspace process can continue, potentially modifying data
> referenced by the retransmission before the retransmission occurs.
>
> Signed-off-by: Ian Campbell <ian.campbell-Sxgqhf6Nn4DQT0dZR+AlfA@public.gmane.org>
> Acked-by: Trond Myklebust <Trond.Myklebust-HgOvQuBEEgTQT0dZR+AlfA@public.gmane.org>
> Cc: "David S. Miller" <davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org>
> Cc: Neil Brown <neilb-l3A5Bk7waGM@public.gmane.org>
> Cc: "J. Bruce Fields" <bfields-uC3wQj2KruNg9hUCZPvPmw@public.gmane.org>
> Cc: linux-nfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> Cc: netdev-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
So this blocks the system call until all page references
are gone, right?
But, there's no upper limit on how long the
page is referenced, correct? consider a bridged setup
with an skb queued at a tap device - this cause one process
to block another one by virtue of not consuming a cloned skb?
> ---
> include/linux/sunrpc/xdr.h | 2 ++
> include/linux/sunrpc/xprt.h | 5 ++++-
> net/sunrpc/clnt.c | 27 ++++++++++++++++++++++-----
> net/sunrpc/svcsock.c | 3 ++-
> net/sunrpc/xprt.c | 13 +++++++++++++
> net/sunrpc/xprtsock.c | 3 ++-
> 6 files changed, 45 insertions(+), 8 deletions(-)
>
> diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
> index a20970e..172f81e 100644
> --- a/include/linux/sunrpc/xdr.h
> +++ b/include/linux/sunrpc/xdr.h
> @@ -16,6 +16,7 @@
> #include <asm/byteorder.h>
> #include <asm/unaligned.h>
> #include <linux/scatterlist.h>
> +#include <linux/skbuff.h>
>
> /*
> * Buffer adjustment
> @@ -57,6 +58,7 @@ struct xdr_buf {
> tail[1]; /* Appended after page data */
>
> struct page ** pages; /* Array of contiguous pages */
> + struct skb_frag_destructor *destructor;
> unsigned int page_base, /* Start of page data */
> page_len, /* Length of page data */
> flags; /* Flags for data disposition */
> diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
> index 15518a1..75131eb 100644
> --- a/include/linux/sunrpc/xprt.h
> +++ b/include/linux/sunrpc/xprt.h
> @@ -92,7 +92,10 @@ struct rpc_rqst {
> /* A cookie used to track the
> state of the transport
> connection */
> -
> + struct skb_frag_destructor destructor; /* SKB paged fragment
> + * destructor for
> + * transmitted pages*/
> +
> /*
> * Partial send handling
> */
> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
> index c5347d2..919538d 100644
> --- a/net/sunrpc/clnt.c
> +++ b/net/sunrpc/clnt.c
> @@ -61,6 +61,7 @@ static void call_reserve(struct rpc_task *task);
> static void call_reserveresult(struct rpc_task *task);
> static void call_allocate(struct rpc_task *task);
> static void call_decode(struct rpc_task *task);
> +static void call_complete(struct rpc_task *task);
> static void call_bind(struct rpc_task *task);
> static void call_bind_status(struct rpc_task *task);
> static void call_transmit(struct rpc_task *task);
> @@ -1113,6 +1114,8 @@ rpc_xdr_encode(struct rpc_task *task)
> (char *)req->rq_buffer + req->rq_callsize,
> req->rq_rcvsize);
>
> + req->rq_snd_buf.destructor = &req->destructor;
> +
> p = rpc_encode_header(task);
> if (p == NULL) {
> printk(KERN_INFO "RPC: couldn't encode RPC header, exit EIO\n");
> @@ -1276,6 +1279,7 @@ call_connect_status(struct rpc_task *task)
> static void
> call_transmit(struct rpc_task *task)
> {
> + struct rpc_rqst *req = task->tk_rqstp;
> dprint_status(task);
>
> task->tk_action = call_status;
> @@ -1309,8 +1313,8 @@ call_transmit(struct rpc_task *task)
> call_transmit_status(task);
> if (rpc_reply_expected(task))
> return;
> - task->tk_action = rpc_exit_task;
> - rpc_wake_up_queued_task(&task->tk_xprt->pending, task);
> + task->tk_action = call_complete;
> + skb_frag_destructor_unref(&req->destructor);
> }
>
> /*
> @@ -1383,7 +1387,8 @@ call_bc_transmit(struct rpc_task *task)
> return;
> }
>
> - task->tk_action = rpc_exit_task;
> + task->tk_action = call_complete;
> + skb_frag_destructor_unref(&req->destructor);
> if (task->tk_status < 0) {
> printk(KERN_NOTICE "RPC: Could not send backchannel reply "
> "error: %d\n", task->tk_status);
> @@ -1423,7 +1428,6 @@ call_bc_transmit(struct rpc_task *task)
> "error: %d\n", task->tk_status);
> break;
> }
> - rpc_wake_up_queued_task(&req->rq_xprt->pending, task);
> }
> #endif /* CONFIG_SUNRPC_BACKCHANNEL */
>
> @@ -1589,12 +1593,14 @@ call_decode(struct rpc_task *task)
> return;
> }
>
> - task->tk_action = rpc_exit_task;
> + task->tk_action = call_complete;
>
> if (decode) {
> task->tk_status = rpcauth_unwrap_resp(task, decode, req, p,
> task->tk_msg.rpc_resp);
> }
> + rpc_sleep_on(&req->rq_xprt->pending, task, NULL);
> + skb_frag_destructor_unref(&req->destructor);
> dprintk("RPC: %5u call_decode result %d\n", task->tk_pid,
> task->tk_status);
> return;
> @@ -1609,6 +1615,17 @@ out_retry:
> }
> }
>
> +/*
> + * 8. Wait for pages to be released by the network stack.
> + */
> +static void
> +call_complete(struct rpc_task *task)
> +{
> + dprintk("RPC: %5u call_complete result %d\n",
> + task->tk_pid, task->tk_status);
> + task->tk_action = rpc_exit_task;
> +}
> +
> static __be32 *
> rpc_encode_header(struct rpc_task *task)
> {
> diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
> index 852a258..3685cad 100644
> --- a/net/sunrpc/svcsock.c
> +++ b/net/sunrpc/svcsock.c
> @@ -196,7 +196,8 @@ int svc_send_common(struct socket *sock, struct xdr_buf *xdr,
> while (pglen > 0) {
> if (slen == size)
> flags = 0;
> - result = kernel_sendpage(sock, *ppage, NULL, base, size, flags);
> + result = kernel_sendpage(sock, *ppage, xdr->destructor,
> + base, size, flags);
> if (result > 0)
> len += result;
> if (result != size)
> diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
> index f4385e4..925aa0c 100644
> --- a/net/sunrpc/xprt.c
> +++ b/net/sunrpc/xprt.c
> @@ -1103,6 +1103,16 @@ static inline void xprt_init_xid(struct rpc_xprt *xprt)
> xprt->xid = net_random();
> }
>
> +static int xprt_complete_skb_pages(void *calldata)
> +{
> + struct rpc_task *task = calldata;
> + struct rpc_rqst *req = task->tk_rqstp;
> +
> + dprintk("RPC: %5u completing skb pages\n", task->tk_pid);
> + rpc_wake_up_queued_task(&req->rq_xprt->pending, task);
> + return 0;
> +}
> +
> static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt)
> {
> struct rpc_rqst *req = task->tk_rqstp;
> @@ -1115,6 +1125,9 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt)
> req->rq_xid = xprt_alloc_xid(xprt);
> req->rq_release_snd_buf = NULL;
> xprt_reset_majortimeo(req);
> + atomic_set(&req->destructor.ref, 1);
> + req->destructor.destroy = &xprt_complete_skb_pages;
> + req->destructor.data = task;
> dprintk("RPC: %5u reserved req %p xid %08x\n", task->tk_pid,
> req, ntohl(req->rq_xid));
> }
> diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
> index f79e40e9..af3a106 100644
> --- a/net/sunrpc/xprtsock.c
> +++ b/net/sunrpc/xprtsock.c
> @@ -408,7 +408,8 @@ static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned i
> remainder -= len;
> if (remainder != 0 || more)
> flags |= MSG_MORE;
> - err = sock->ops->sendpage(sock, *ppage, NULL, base, len, flags);
> + err = sock->ops->sendpage(sock, *ppage, xdr->destructor,
> + base, len, flags);
> if (remainder == 0 || err != len)
> break;
> sent += err;
> --
> 1.7.2.5
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
next prev parent reply other threads:[~2011-11-11 12:38 UTC|newest]
Thread overview: 44+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-11-09 15:01 [PATCH 0/4] skb paged fragment destructors Ian Campbell
2011-11-09 15:02 ` [PATCH 1/4] net: add support for per-paged-fragment destructors Ian Campbell
2011-11-09 15:33 ` Michał Mirosław
2011-11-09 16:25 ` Ian Campbell
2011-11-09 17:24 ` Michał Mirosław
2011-11-09 17:28 ` Ian Campbell
2011-11-09 15:02 ` [PATCH 2/4] net: only allow paged fragments with the same destructor to be coalesced Ian Campbell
[not found] ` <1320850895.955.172.camel-o4Be2W7LfRlXesXXhkcM7miJhflN2719@public.gmane.org>
2011-11-09 15:02 ` [PATCH 3/4] net: add paged frag destructor support to kernel_sendpage Ian Campbell
2011-11-09 18:02 ` Michał Mirosław
2011-11-09 15:02 ` [PATCH 4/4] sunrpc: use SKB fragment destructors to delay completion until page is released by network stack Ian Campbell
[not found] ` <1320850927-30240-4-git-send-email-ian.campbell-Sxgqhf6Nn4DQT0dZR+AlfA@public.gmane.org>
2011-11-11 12:38 ` Michael S. Tsirkin [this message]
[not found] ` <20111111123824.GA23902-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2011-11-11 13:20 ` Ian Campbell
[not found] ` <1321017627.955.254.camel-o4Be2W7LfRlXesXXhkcM7miJhflN2719@public.gmane.org>
2011-11-11 20:00 ` J. Bruce Fields
2011-11-13 10:17 ` Michael S. Tsirkin
[not found] ` <20111113101713.GB15322-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2011-11-14 13:07 ` Ian Campbell
2011-11-14 13:25 ` David Laight
2011-11-09 17:49 ` [PATCH 0/4] skb paged fragment destructors Eric Dumazet
2011-11-10 10:39 ` Ian Campbell
2011-11-17 14:45 ` Ian Campbell
2011-11-17 14:51 ` Eric Dumazet
2011-11-17 20:22 ` Michał Mirosław
2011-12-06 11:57 ` Ian Campbell
2011-12-06 13:24 ` Eric Dumazet
2011-12-07 13:35 ` Ian Campbell
2011-12-09 13:47 ` Ian Campbell
2011-12-09 18:34 ` David Miller
2011-12-21 11:03 ` Ian Campbell
2011-12-21 11:08 ` Eric Dumazet
2011-12-21 11:18 ` Ian Campbell
2011-12-21 12:30 ` Eric Dumazet
2011-12-21 13:48 ` Ian Campbell
2011-12-21 14:02 ` Eric Dumazet
2011-12-21 19:28 ` David Miller
2011-12-22 10:33 ` Ian Campbell
2011-12-22 18:20 ` David Miller
2011-12-23 9:35 ` Ian Campbell
2012-01-03 9:36 ` David Laight
[not found] ` <45B8991A987A4149B40F1A061BF49097B96C9EFF05@LONPMAILBOX01.citrite.net>
2011-12-23 9:39 ` Ian Campbell
2011-12-23 21:52 ` David Miller
2011-12-22 18:34 ` Michał Mirosław
2011-12-22 18:43 ` David Miller
2011-12-22 19:29 ` Michał Mirosław
2011-12-22 19:34 ` David Miller
2011-12-23 18:10 ` Michał Mirosław
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20111111123824.GA23902@redhat.com \
--to=mst-h+wxahxf7alqt0dzr+alfa@public.gmane.org \
--cc=bfields-uC3wQj2KruNg9hUCZPvPmw@public.gmane.org \
--cc=davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org \
--cc=ian.campbell-Sxgqhf6Nn4DQT0dZR+AlfA@public.gmane.org \
--cc=linux-nfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
--cc=neilb-l3A5Bk7waGM@public.gmane.org \
--cc=netdev-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).