netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Michael S. Tsirkin" <mst-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
To: Ian Campbell <ian.campbell-Sxgqhf6Nn4DQT0dZR+AlfA@public.gmane.org>
Cc: netdev-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	"David S. Miller" <davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org>,
	Neil Brown <neilb-l3A5Bk7waGM@public.gmane.org>,
	"J. Bruce Fields"
	<bfields-uC3wQj2KruNg9hUCZPvPmw@public.gmane.org>,
	linux-nfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Subject: Re: [PATCH 4/4] sunrpc: use SKB fragment destructors to delay completion until page is released by network stack.
Date: Fri, 11 Nov 2011 14:38:24 +0200	[thread overview]
Message-ID: <20111111123824.GA23902@redhat.com> (raw)
In-Reply-To: <1320850927-30240-4-git-send-email-ian.campbell-Sxgqhf6Nn4DQT0dZR+AlfA@public.gmane.org>

On Wed, Nov 09, 2011 at 03:02:07PM +0000, Ian Campbell wrote:
> This prevents an issue where an ACK is delayed, a retransmit is queued (either
> at the RPC or TCP level) and the ACK arrives before the retransmission hits the
> wire. If this happens to an NFS WRITE RPC then the write() system call
> completes and the userspace process can continue, potentially modifying data
> referenced by the retransmission before the retransmission occurs.
> 
> Signed-off-by: Ian Campbell <ian.campbell-Sxgqhf6Nn4DQT0dZR+AlfA@public.gmane.org>
> Acked-by: Trond Myklebust <Trond.Myklebust-HgOvQuBEEgTQT0dZR+AlfA@public.gmane.org>
> Cc: "David S. Miller" <davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org>
> Cc: Neil Brown <neilb-l3A5Bk7waGM@public.gmane.org>
> Cc: "J. Bruce Fields" <bfields-uC3wQj2KruNg9hUCZPvPmw@public.gmane.org>
> Cc: linux-nfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> Cc: netdev-u79uwXL29TY76Z2rM5mHXA@public.gmane.org

So this blocks the system call until all page references
are gone, right?
But, there's no upper limit on how long the
page is referenced, correct? consider a bridged setup
with an skb queued at a tap device - this cause one process
to block another one by virtue of not consuming a cloned skb?

> ---
>  include/linux/sunrpc/xdr.h  |    2 ++
>  include/linux/sunrpc/xprt.h |    5 ++++-
>  net/sunrpc/clnt.c           |   27 ++++++++++++++++++++++-----
>  net/sunrpc/svcsock.c        |    3 ++-
>  net/sunrpc/xprt.c           |   13 +++++++++++++
>  net/sunrpc/xprtsock.c       |    3 ++-
>  6 files changed, 45 insertions(+), 8 deletions(-)
> 
> diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
> index a20970e..172f81e 100644
> --- a/include/linux/sunrpc/xdr.h
> +++ b/include/linux/sunrpc/xdr.h
> @@ -16,6 +16,7 @@
>  #include <asm/byteorder.h>
>  #include <asm/unaligned.h>
>  #include <linux/scatterlist.h>
> +#include <linux/skbuff.h>
>  
>  /*
>   * Buffer adjustment
> @@ -57,6 +58,7 @@ struct xdr_buf {
>  			tail[1];	/* Appended after page data */
>  
>  	struct page **	pages;		/* Array of contiguous pages */
> +	struct skb_frag_destructor *destructor;
>  	unsigned int	page_base,	/* Start of page data */
>  			page_len,	/* Length of page data */
>  			flags;		/* Flags for data disposition */
> diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
> index 15518a1..75131eb 100644
> --- a/include/linux/sunrpc/xprt.h
> +++ b/include/linux/sunrpc/xprt.h
> @@ -92,7 +92,10 @@ struct rpc_rqst {
>  						/* A cookie used to track the
>  						   state of the transport
>  						   connection */
> -	
> +	struct skb_frag_destructor destructor;	/* SKB paged fragment
> +						 * destructor for
> +						 * transmitted pages*/
> +
>  	/*
>  	 * Partial send handling
>  	 */
> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
> index c5347d2..919538d 100644
> --- a/net/sunrpc/clnt.c
> +++ b/net/sunrpc/clnt.c
> @@ -61,6 +61,7 @@ static void	call_reserve(struct rpc_task *task);
>  static void	call_reserveresult(struct rpc_task *task);
>  static void	call_allocate(struct rpc_task *task);
>  static void	call_decode(struct rpc_task *task);
> +static void	call_complete(struct rpc_task *task);
>  static void	call_bind(struct rpc_task *task);
>  static void	call_bind_status(struct rpc_task *task);
>  static void	call_transmit(struct rpc_task *task);
> @@ -1113,6 +1114,8 @@ rpc_xdr_encode(struct rpc_task *task)
>  			 (char *)req->rq_buffer + req->rq_callsize,
>  			 req->rq_rcvsize);
>  
> +	req->rq_snd_buf.destructor = &req->destructor;
> +
>  	p = rpc_encode_header(task);
>  	if (p == NULL) {
>  		printk(KERN_INFO "RPC: couldn't encode RPC header, exit EIO\n");
> @@ -1276,6 +1279,7 @@ call_connect_status(struct rpc_task *task)
>  static void
>  call_transmit(struct rpc_task *task)
>  {
> +	struct rpc_rqst *req = task->tk_rqstp;
>  	dprint_status(task);
>  
>  	task->tk_action = call_status;
> @@ -1309,8 +1313,8 @@ call_transmit(struct rpc_task *task)
>  	call_transmit_status(task);
>  	if (rpc_reply_expected(task))
>  		return;
> -	task->tk_action = rpc_exit_task;
> -	rpc_wake_up_queued_task(&task->tk_xprt->pending, task);
> +	task->tk_action = call_complete;
> +	skb_frag_destructor_unref(&req->destructor);
>  }
>  
>  /*
> @@ -1383,7 +1387,8 @@ call_bc_transmit(struct rpc_task *task)
>  		return;
>  	}
>  
> -	task->tk_action = rpc_exit_task;
> +	task->tk_action = call_complete;
> +	skb_frag_destructor_unref(&req->destructor);
>  	if (task->tk_status < 0) {
>  		printk(KERN_NOTICE "RPC: Could not send backchannel reply "
>  			"error: %d\n", task->tk_status);
> @@ -1423,7 +1428,6 @@ call_bc_transmit(struct rpc_task *task)
>  			"error: %d\n", task->tk_status);
>  		break;
>  	}
> -	rpc_wake_up_queued_task(&req->rq_xprt->pending, task);
>  }
>  #endif /* CONFIG_SUNRPC_BACKCHANNEL */
>  
> @@ -1589,12 +1593,14 @@ call_decode(struct rpc_task *task)
>  		return;
>  	}
>  
> -	task->tk_action = rpc_exit_task;
> +	task->tk_action = call_complete;
>  
>  	if (decode) {
>  		task->tk_status = rpcauth_unwrap_resp(task, decode, req, p,
>  						      task->tk_msg.rpc_resp);
>  	}
> +	rpc_sleep_on(&req->rq_xprt->pending, task, NULL);
> +	skb_frag_destructor_unref(&req->destructor);
>  	dprintk("RPC: %5u call_decode result %d\n", task->tk_pid,
>  			task->tk_status);
>  	return;
> @@ -1609,6 +1615,17 @@ out_retry:
>  	}
>  }
>  
> +/*
> + * 8.	Wait for pages to be released by the network stack.
> + */
> +static void
> +call_complete(struct rpc_task *task)
> +{
> +	dprintk("RPC: %5u call_complete result %d\n",
> +		task->tk_pid, task->tk_status);
> +	task->tk_action = rpc_exit_task;
> +}
> +
>  static __be32 *
>  rpc_encode_header(struct rpc_task *task)
>  {
> diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
> index 852a258..3685cad 100644
> --- a/net/sunrpc/svcsock.c
> +++ b/net/sunrpc/svcsock.c
> @@ -196,7 +196,8 @@ int svc_send_common(struct socket *sock, struct xdr_buf *xdr,
>  	while (pglen > 0) {
>  		if (slen == size)
>  			flags = 0;
> -		result = kernel_sendpage(sock, *ppage, NULL, base, size, flags);
> +		result = kernel_sendpage(sock, *ppage, xdr->destructor,
> +					 base, size, flags);
>  		if (result > 0)
>  			len += result;
>  		if (result != size)
> diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
> index f4385e4..925aa0c 100644
> --- a/net/sunrpc/xprt.c
> +++ b/net/sunrpc/xprt.c
> @@ -1103,6 +1103,16 @@ static inline void xprt_init_xid(struct rpc_xprt *xprt)
>  	xprt->xid = net_random();
>  }
>  
> +static int xprt_complete_skb_pages(void *calldata)
> +{
> +	struct rpc_task *task = calldata;
> +	struct rpc_rqst	*req = task->tk_rqstp;
> +
> +	dprintk("RPC: %5u completing skb pages\n", task->tk_pid);
> +	rpc_wake_up_queued_task(&req->rq_xprt->pending, task);
> +	return 0;
> +}
> +
>  static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt)
>  {
>  	struct rpc_rqst	*req = task->tk_rqstp;
> @@ -1115,6 +1125,9 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt)
>  	req->rq_xid     = xprt_alloc_xid(xprt);
>  	req->rq_release_snd_buf = NULL;
>  	xprt_reset_majortimeo(req);
> +	atomic_set(&req->destructor.ref, 1);
> +	req->destructor.destroy = &xprt_complete_skb_pages;
> +	req->destructor.data = task;
>  	dprintk("RPC: %5u reserved req %p xid %08x\n", task->tk_pid,
>  			req, ntohl(req->rq_xid));
>  }
> diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
> index f79e40e9..af3a106 100644
> --- a/net/sunrpc/xprtsock.c
> +++ b/net/sunrpc/xprtsock.c
> @@ -408,7 +408,8 @@ static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned i
>  		remainder -= len;
>  		if (remainder != 0 || more)
>  			flags |= MSG_MORE;
> -		err = sock->ops->sendpage(sock, *ppage, NULL, base, len, flags);
> +		err = sock->ops->sendpage(sock, *ppage, xdr->destructor,
> +					  base, len, flags);
>  		if (remainder == 0 || err != len)
>  			break;
>  		sent += err;
> -- 
> 1.7.2.5
> 
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

  parent reply	other threads:[~2011-11-11 12:38 UTC|newest]

Thread overview: 44+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-11-09 15:01 [PATCH 0/4] skb paged fragment destructors Ian Campbell
2011-11-09 15:02 ` [PATCH 1/4] net: add support for per-paged-fragment destructors Ian Campbell
2011-11-09 15:33   ` Michał Mirosław
2011-11-09 16:25     ` Ian Campbell
2011-11-09 17:24       ` Michał Mirosław
2011-11-09 17:28         ` Ian Campbell
2011-11-09 15:02 ` [PATCH 2/4] net: only allow paged fragments with the same destructor to be coalesced Ian Campbell
     [not found] ` <1320850895.955.172.camel-o4Be2W7LfRlXesXXhkcM7miJhflN2719@public.gmane.org>
2011-11-09 15:02   ` [PATCH 3/4] net: add paged frag destructor support to kernel_sendpage Ian Campbell
2011-11-09 18:02     ` Michał Mirosław
2011-11-09 15:02   ` [PATCH 4/4] sunrpc: use SKB fragment destructors to delay completion until page is released by network stack Ian Campbell
     [not found]     ` <1320850927-30240-4-git-send-email-ian.campbell-Sxgqhf6Nn4DQT0dZR+AlfA@public.gmane.org>
2011-11-11 12:38       ` Michael S. Tsirkin [this message]
     [not found]         ` <20111111123824.GA23902-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2011-11-11 13:20           ` Ian Campbell
     [not found]             ` <1321017627.955.254.camel-o4Be2W7LfRlXesXXhkcM7miJhflN2719@public.gmane.org>
2011-11-11 20:00               ` J. Bruce Fields
2011-11-13 10:17             ` Michael S. Tsirkin
     [not found]               ` <20111113101713.GB15322-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2011-11-14 13:07                 ` Ian Campbell
2011-11-14 13:25                   ` David Laight
2011-11-09 17:49 ` [PATCH 0/4] skb paged fragment destructors Eric Dumazet
2011-11-10 10:39   ` Ian Campbell
2011-11-17 14:45   ` Ian Campbell
2011-11-17 14:51     ` Eric Dumazet
2011-11-17 20:22     ` Michał Mirosław
2011-12-06 11:57 ` Ian Campbell
2011-12-06 13:24   ` Eric Dumazet
2011-12-07 13:35     ` Ian Campbell
2011-12-09 13:47       ` Ian Campbell
2011-12-09 18:34         ` David Miller
2011-12-21 11:03           ` Ian Campbell
2011-12-21 11:08             ` Eric Dumazet
2011-12-21 11:18               ` Ian Campbell
2011-12-21 12:30                 ` Eric Dumazet
2011-12-21 13:48                   ` Ian Campbell
2011-12-21 14:02                     ` Eric Dumazet
2011-12-21 19:28                       ` David Miller
2011-12-22 10:33                         ` Ian Campbell
2011-12-22 18:20                           ` David Miller
2011-12-23  9:35                             ` Ian Campbell
2012-01-03  9:36                               ` David Laight
     [not found]                             ` <45B8991A987A4149B40F1A061BF49097B96C9EFF05@LONPMAILBOX01.citrite.net>
2011-12-23  9:39                               ` Ian Campbell
2011-12-23 21:52                                 ` David Miller
2011-12-22 18:34                           ` Michał Mirosław
2011-12-22 18:43                             ` David Miller
2011-12-22 19:29                               ` Michał Mirosław
2011-12-22 19:34                                 ` David Miller
2011-12-23 18:10                                   ` Michał Mirosław

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20111111123824.GA23902@redhat.com \
    --to=mst-h+wxahxf7alqt0dzr+alfa@public.gmane.org \
    --cc=bfields-uC3wQj2KruNg9hUCZPvPmw@public.gmane.org \
    --cc=davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org \
    --cc=ian.campbell-Sxgqhf6Nn4DQT0dZR+AlfA@public.gmane.org \
    --cc=linux-nfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=neilb-l3A5Bk7waGM@public.gmane.org \
    --cc=netdev-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).