netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Christian Lamparter <chunkeey@googlemail.com>
To: Al Viro <viro@zeniv.linux.org.uk>
Cc: David Miller <davem@davemloft.net>,
	netdev@vger.kernel.org, eric.dumazet@gmail.com, rlwinm@sdf.org,
	alexmcwhirter@triadic.us
Subject: Re: [PATCH][CFT] Saner error handling in skb_copy_datagram_iter() et.al.
Date: Sun, 19 Feb 2017 20:19:22 +0100	[thread overview]
Message-ID: <4687112.CyKDoQHoLC@debian64> (raw)
In-Reply-To: <20170218000214.GA19777@ZenIV.linux.org.uk>

On Saturday, February 18, 2017 12:02:14 AM CET Al Viro wrote:
> On Fri, Feb 17, 2017 at 05:03:15PM +0000, Al Viro wrote:
> > On Fri, Feb 17, 2017 at 10:54:20AM -0500, David Miller wrote:
> > > From: Al Viro <viro@ZenIV.linux.org.uk>
> > > Date: Tue, 14 Feb 2017 01:33:06 +0000
> > > 
> > > > OK...  Remaining interesting question is whether it adds a noticable
> > > > overhead.  Could somebody try it on assorted benchmarks and see if
> > > > it slows the things down?  The patch in question follows:
> > > 
> > > That's about a 40 byte copy onto the stack for each invocation of this
> > > thing.  You can benchmark all you want, but it's clear that this is
> > > non-trivial amount of work and will take some operations from fitting
> > > in the cache to not doing so for sure.
> > 
> > In principle, that could be reduced a bit (32 bytes - ->type is never
> > changed, so we don't need to restore it), but that's not much of improvement...
> 
> Actually, I've a better solution.  Namely, analogue of iov_iter_advance()
> for going backwards.  The restriction is that you should never unroll
> further than where you've initially started *or* have the iovec, etc.
> array modified under you.  For iovec/kvec/bio_vec it's trivial, for pipe -
> a bit more convoluted, but still doable.  Then net/core/datagram.c stuff
> could simply use iov_iter_unroll() in case of error - all we need to keep
> track of is how much had we copied and that's easy to do.
> 
> The patch below is completely untested, but if it works it should avoid
> buggering the fast paths at all, still giving the same semantics re
> reverting ->msg_iter both on EINVAL and EFAULT.  Comments?
I've tested it. It also fixes the corruption issue I can reproduce
with my setup.:
# tc qdisc add dev eth0 root netem corrupt 0.1%
(and the dlbug code)

So, for what's worth:
Tested-by: Christian Lamparter <chunkeey@googlemail.com>

> diff --git a/include/linux/uio.h b/include/linux/uio.h
> index 804e34c6f981..237965348bc2 100644
> --- a/include/linux/uio.h
> +++ b/include/linux/uio.h
> @@ -39,7 +39,10 @@ struct iov_iter {
>  	};
>  	union {
>  		unsigned long nr_segs;
> -		int idx;
> +		struct {
> +			int idx;
> +			int start_idx;
> +		};
>  	};
>  };
>  
> @@ -81,6 +84,7 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to);
>  size_t iov_iter_copy_from_user_atomic(struct page *page,
>  		struct iov_iter *i, unsigned long offset, size_t bytes);
>  void iov_iter_advance(struct iov_iter *i, size_t bytes);
> +void iov_iter_unroll(struct iov_iter *i, size_t bytes);
>  int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes);
>  size_t iov_iter_single_seg_count(const struct iov_iter *i);
>  size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
> diff --git a/lib/iov_iter.c b/lib/iov_iter.c
> index e68604ae3ced..afdaca37f5c9 100644
> --- a/lib/iov_iter.c
> +++ b/lib/iov_iter.c
> @@ -786,6 +786,68 @@ void iov_iter_advance(struct iov_iter *i, size_t size)
>  }
>  EXPORT_SYMBOL(iov_iter_advance);
>  
> +void iov_iter_unroll(struct iov_iter *i, size_t unroll)
> +{
> +	if (!unroll)
> +		return;
> +	i->count += unroll;
> +	if (unlikely(i->type & ITER_PIPE)) {
> +		struct pipe_inode_info *pipe = i->pipe;
> +		int idx = i->idx;
> +		size_t off = i->iov_offset;
> +		while (1) {
> +			size_t n = off - pipe->bufs[idx].offset;
> +			if (unroll < n) {
> +				off -= (n - unroll);
> +				break;
> +			}
> +			unroll -= n;
> +			if (!unroll && idx == i->start_idx) {
> +				off = 0;
> +				break;
> +			}
> +			if (!idx--)
> +				idx = pipe->buffers - 1;
> +			off = pipe->bufs[idx].offset + pipe->bufs[idx].len;
> +		}
> +		i->iov_offset = off;
> +		i->idx = idx;
> +		pipe_truncate(i);
> +		return;
> +	}
> +	if (unroll <= i->iov_offset) {
> +		i->iov_offset -= unroll;
> +		return;
> +	}
> +	unroll -= i->iov_offset;
> +	if (i->type & ITER_BVEC) {
> +		const struct bio_vec *bvec = i->bvec;
> +		while (1) {
> +			size_t n = (--bvec)->bv_len;
> +			i->nr_segs++;
> +			if (unroll <= n) {
> +				i->bvec = bvec;
> +				i->iov_offset = n - unroll;
> +				return;
> +			}
> +			unroll -= n;
> +		}
> +	} else { /* same logics for iovec and kvec */
> +		const struct iovec *iov = i->iov;
> +		while (1) {
> +			size_t n = (--iov)->iov_len;
> +			i->nr_segs++;
> +			if (unroll <= n) {
> +				i->iov = iov;
> +				i->iov_offset = n - unroll;
> +				return;
> +			}
> +			unroll -= n;
> +		}
> +	}
> +}
> +EXPORT_SYMBOL(iov_iter_unroll);
> +
>  /*
>   * Return the count of just the current iov_iter segment.
>   */
> @@ -839,6 +901,7 @@ void iov_iter_pipe(struct iov_iter *i, int direction,
>  	i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
>  	i->iov_offset = 0;
>  	i->count = count;
> +	i->start_idx = i->idx;
>  }
>  EXPORT_SYMBOL(iov_iter_pipe);
>  
> diff --git a/net/core/datagram.c b/net/core/datagram.c
> index 662bea587165..63c7353a6ba2 100644
> --- a/net/core/datagram.c
> +++ b/net/core/datagram.c
> @@ -394,7 +394,7 @@ int skb_copy_datagram_iter(const struct sk_buff *skb, int offset,
>  			   struct iov_iter *to, int len)
>  {
>  	int start = skb_headlen(skb);
> -	int i, copy = start - offset;
> +	int i, copy = start - offset, start_off = offset, n;
>  	struct sk_buff *frag_iter;
>  
>  	trace_skb_copy_datagram_iovec(skb, len);
> @@ -403,11 +403,12 @@ int skb_copy_datagram_iter(const struct sk_buff *skb, int offset,
>  	if (copy > 0) {
>  		if (copy > len)
>  			copy = len;
> -		if (copy_to_iter(skb->data + offset, copy, to) != copy)
> +		n = copy_to_iter(skb->data + offset, copy, to);
> +		offset += n;
> +		if (n != copy)
>  			goto short_copy;
>  		if ((len -= copy) == 0)
>  			return 0;
> -		offset += copy;
>  	}
>  
>  	/* Copy paged appendix. Hmm... why does this look so complicated? */
> @@ -421,13 +422,14 @@ int skb_copy_datagram_iter(const struct sk_buff *skb, int offset,
>  		if ((copy = end - offset) > 0) {
>  			if (copy > len)
>  				copy = len;
> -			if (copy_page_to_iter(skb_frag_page(frag),
> +			n = copy_page_to_iter(skb_frag_page(frag),
>  					      frag->page_offset + offset -
> -					      start, copy, to) != copy)
> +					      start, copy, to);
> +			offset += n;
> +			if (n != copy)
>  				goto short_copy;
>  			if (!(len -= copy))
>  				return 0;
> -			offset += copy;
>  		}
>  		start = end;
>  	}
> @@ -459,6 +461,7 @@ int skb_copy_datagram_iter(const struct sk_buff *skb, int offset,
>  	 */
>  
>  fault:
> +	iov_iter_unroll(to, offset - start_off);
>  	return -EFAULT;
>  
>  short_copy:
> @@ -609,7 +612,7 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
>  				      __wsum *csump)
>  {
>  	int start = skb_headlen(skb);
> -	int i, copy = start - offset;
> +	int i, copy = start - offset, start_off = offset;
>  	struct sk_buff *frag_iter;
>  	int pos = 0;
>  	int n;
> @@ -619,11 +622,11 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
>  		if (copy > len)
>  			copy = len;
>  		n = csum_and_copy_to_iter(skb->data + offset, copy, csump, to);
> +		offset += n;
>  		if (n != copy)
>  			goto fault;
>  		if ((len -= copy) == 0)
>  			return 0;
> -		offset += copy;
>  		pos = copy;
>  	}
>  
> @@ -645,12 +648,12 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
>  						  offset - start, copy,
>  						  &csum2, to);
>  			kunmap(page);
> +			offset += n;
>  			if (n != copy)
>  				goto fault;
>  			*csump = csum_block_add(*csump, csum2, pos);
>  			if (!(len -= copy))
>  				return 0;
> -			offset += copy;
>  			pos += copy;
>  		}
>  		start = end;
> @@ -683,6 +686,7 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
>  		return 0;
>  
>  fault:
> +	iov_iter_unroll(to, offset - start_off);
>  	return -EFAULT;
>  }
>  
> @@ -767,6 +771,7 @@ int skb_copy_and_csum_datagram_msg(struct sk_buff *skb,
>  	}
>  	return 0;
>  csum_error:
> +	iov_iter_unroll(&msg->msg_iter, chunk);
>  	return -EINVAL;
>  fault:
>  	return -EFAULT;
>

  parent reply	other threads:[~2017-02-19 19:19 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-07-24  3:35 PROBLEM: network data corruption (bisected to e5a4b0bb803b) Alan Curry
     [not found] ` <201607240335.u6O3ZE81014171-WF+c3Tt1nJM@public.gmane.org>
2016-07-24 17:45   ` Christian Lamparter
2016-07-24 19:02     ` Al Viro
2016-07-26  4:57       ` Alan Curry
2016-07-26 13:59         ` Christian Lamparter
2016-07-26 18:15           ` alexmcwhirter
2016-07-27  6:39             ` Kalle Valo
2016-07-27  1:14           ` Alan Curry
2016-07-27 10:32       ` Alan Curry
2016-07-27 18:04         ` alexmcwhirter
2016-07-27 23:02           ` alexmcwhirter
2016-07-27 23:45             ` David Miller
2016-07-28  0:31               ` Al Viro
2016-07-28  0:26                 ` alexmcwhirter
     [not found]                   ` <8b3126f66186015956e0f8090fb70532-O8/uFoRGvHWcqzYg7KEe8g@public.gmane.org>
2016-07-28  1:22                     ` Al Viro
2016-08-03  3:49                       ` Alan Curry
2016-08-03 12:43                         ` Christian Lamparter
2016-08-03 23:25                           ` Alan Curry
     [not found]                         ` <20160803054118.GG2356@ZenIV.linux.org.uk>
     [not found]                           ` <2363167.YiBS7sFNO2@debian64>
     [not found]                             ` <20160809145836.GQ2356@ZenIV.linux.org.uk>
     [not found]                               ` <20170210081126.GA14157@ZenIV.linux.org.uk>
2017-02-10 21:45                                 ` Al Viro
2017-02-11 19:37                                   ` Christian Lamparter
2017-02-12  5:42                                     ` Al Viro
2017-02-13 21:56                                       ` Christian Lamparter
2017-02-14  1:33                                         ` [PATCH][CFT] Saner error handling in skb_copy_datagram_iter() et.al. (was Re: PROBLEM: network data corruption (bisected to e5a4b0bb803b)) Al Viro
2017-02-17 15:54                                           ` [PATCH][CFT] Saner error handling in skb_copy_datagram_iter() et.al David Miller
2017-02-17 17:03                                             ` Al Viro
2017-02-18  0:02                                               ` Al Viro
2017-02-18  2:24                                                 ` Al Viro
2017-02-19 19:19                                                 ` Christian Lamparter [this message]
2017-02-20 15:14                                                 ` David Miller
2017-02-21 13:25                                                 ` David Laight
2016-07-26  4:32     ` PROBLEM: network data corruption (bisected to e5a4b0bb803b) Alan Curry
2016-07-26  4:38     ` alexmcwhirter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4687112.CyKDoQHoLC@debian64 \
    --to=chunkeey@googlemail.com \
    --cc=alexmcwhirter@triadic.us \
    --cc=davem@davemloft.net \
    --cc=eric.dumazet@gmail.com \
    --cc=netdev@vger.kernel.org \
    --cc=rlwinm@sdf.org \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).