netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] Discard tcp out-of-order queue if system limit is reached
@ 2008-04-15 14:54 Vitaliy Gusev
  2008-04-15 14:58 ` Andi Kleen
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Vitaliy Gusev @ 2008-04-15 14:54 UTC (permalink / raw)
  To: David Miller; +Cc: Andi Kleen, Alexey Kuznetsov, netdev

Hello!

tcp_prune_queue() doesn't prune an out-of-order queue if socket
is under rcvbuf. However even if socket is under rcvbuf but system-wide limit is
reached then skb cannot be queued. It can lead to deadlock situation as any skb that
fills sequence hole is dropped.
So discard out-of-order queue if system-wide limit is reached.

Signed-off-by: Vitaliy Gusev <vgusev@openvz.org>

---
 net/ipv4/tcp_input.c |   78 +++++++++++++++++++++++++++++++++----------------
 1 files changed, 52 insertions(+), 26 deletions(-)


diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 5119856..bbb7d88 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3841,8 +3841,28 @@ static void tcp_ofo_queue(struct sock *sk)
 	}
 }
 
+static int tcp_prune_ofo_queue(struct sock *sk);
 static int tcp_prune_queue(struct sock *sk);
 
+static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size)
+{
+	if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
+	    !sk_rmem_schedule(sk, size)) {
+
+		if (tcp_prune_queue(sk) < 0)
+			return -1;
+
+		if (!sk_rmem_schedule(sk, size)) {
+			if (!tcp_prune_ofo_queue(sk))
+				return -1;
+
+			if (!sk_rmem_schedule(sk, size))
+				return -1;
+		}
+	}
+	return 0;
+}
+
 static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcphdr *th = tcp_hdr(skb);
@@ -3892,12 +3912,9 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 		if (eaten <= 0) {
 queue_and_out:
 			if (eaten < 0 &&
-			    (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
-			     !sk_rmem_schedule(sk, skb->truesize))) {
-				if (tcp_prune_queue(sk) < 0 ||
-				    !sk_rmem_schedule(sk, skb->truesize))
-					goto drop;
-			}
+			    tcp_try_rmem_schedule(sk, skb->truesize))
+				goto drop;
+
 			skb_set_owner_r(skb, sk);
 			__skb_queue_tail(&sk->sk_receive_queue, skb);
 		}
@@ -3966,12 +3983,8 @@ drop:
 
 	TCP_ECN_check_ce(tp, skb);
 
-	if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
-	    !sk_rmem_schedule(sk, skb->truesize)) {
-		if (tcp_prune_queue(sk) < 0 ||
-		    !sk_rmem_schedule(sk, skb->truesize))
-			goto drop;
-	}
+	if (tcp_try_rmem_schedule(sk, skb->truesize))
+		goto drop;
 
 	/* Disable header prediction. */
 	tp->pred_flags = 0;
@@ -4198,6 +4211,32 @@ static void tcp_collapse_ofo_queue(struct sock *sk)
 	}
 }
 
+/*
+ * Purge the out-of-order queue.
+ * Return true if queue was pruned.
+ */
+static int tcp_prune_ofo_queue(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	int res = 0;
+
+	if (!skb_queue_empty(&tp->out_of_order_queue)) {
+		NET_INC_STATS_BH(LINUX_MIB_OFOPRUNED);
+		__skb_queue_purge(&tp->out_of_order_queue);
+
+		/* Reset SACK state.  A conforming SACK implementation will
+		 * do the same at a timeout based retransmit.  When a connection
+		 * is in a sad state like this, we care only about integrity
+		 * of the connection not performance.
+		 */
+		if (tp->rx_opt.sack_ok)
+			tcp_sack_reset(&tp->rx_opt);
+		sk_mem_reclaim(sk);
+		res = 1;
+	}
+	return res;
+}
+
 /* Reduce allocated memory if we can, trying to get
  * the socket within its memory limits again.
  *
@@ -4231,20 +4270,7 @@ static int tcp_prune_queue(struct sock *sk)
 	/* Collapsing did not help, destructive actions follow.
 	 * This must not ever occur. */
 
-	/* First, purge the out_of_order queue. */
-	if (!skb_queue_empty(&tp->out_of_order_queue)) {
-		NET_INC_STATS_BH(LINUX_MIB_OFOPRUNED);
-		__skb_queue_purge(&tp->out_of_order_queue);
-
-		/* Reset SACK state.  A conforming SACK implementation will
-		 * do the same at a timeout based retransmit.  When a connection
-		 * is in a sad state like this, we care only about integrity
-		 * of the connection not performance.
-		 */
-		if (tcp_is_sack(tp))
-			tcp_sack_reset(&tp->rx_opt);
-		sk_mem_reclaim(sk);
-	}
+	tcp_prune_ofo_queue(sk);
 
 	if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
 		return 0;

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] Discard tcp out-of-order queue if system limit is reached
  2008-04-15 14:54 [PATCH] Discard tcp out-of-order queue if system limit is reached Vitaliy Gusev
@ 2008-04-15 14:58 ` Andi Kleen
  2008-04-15 15:22 ` Stephen Hemminger
  2008-04-16  3:27 ` David Miller
  2 siblings, 0 replies; 6+ messages in thread
From: Andi Kleen @ 2008-04-15 14:58 UTC (permalink / raw)
  To: Vitaliy Gusev; +Cc: David Miller, Alexey Kuznetsov, netdev

Vitaliy Gusev wrote:
> Hello!
> 
> tcp_prune_queue() doesn't prune an out-of-order queue if socket
> is under rcvbuf. However even if socket is under rcvbuf but system-wide limit is
> reached then skb cannot be queued. It can lead to deadlock situation as any skb that
> fills sequence hole is dropped.
> So discard out-of-order queue if system-wide limit is reached.

Thanks. At least the description makes much more sense than before now.

Acked-by: Andi Kleen <andi@firstfloor.org>

-Andi


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] Discard tcp out-of-order queue if system limit is reached
  2008-04-15 14:54 [PATCH] Discard tcp out-of-order queue if system limit is reached Vitaliy Gusev
  2008-04-15 14:58 ` Andi Kleen
@ 2008-04-15 15:22 ` Stephen Hemminger
  2008-04-16  3:27 ` David Miller
  2 siblings, 0 replies; 6+ messages in thread
From: Stephen Hemminger @ 2008-04-15 15:22 UTC (permalink / raw)
  To: Vitaliy Gusev; +Cc: David Miller, Andi Kleen, Alexey Kuznetsov, netdev

On Tue, 15 Apr 2008 18:54:44 +0400
Vitaliy Gusev <vgusev@openvz.org> wrote:

> Hello!
> 
> tcp_prune_queue() doesn't prune an out-of-order queue if socket
> is under rcvbuf. However even if socket is under rcvbuf but system-wide limit is
> reached then skb cannot be queued. It can lead to deadlock situation as any skb that
> fills sequence hole is dropped.
> So discard out-of-order queue if system-wide limit is reached.
> 
> Signed-off-by: Vitaliy Gusev <vgusev@openvz.org>
> 
> ---
>  net/ipv4/tcp_input.c |   78 +++++++++++++++++++++++++++++++++----------------
>  1 files changed, 52 insertions(+), 26 deletions(-)
> 
> 
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index 5119856..bbb7d88 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -3841,8 +3841,28 @@ static void tcp_ofo_queue(struct sock *sk)
>  	}
>  }
>  
> +static int tcp_prune_ofo_queue(struct sock *sk);
>  static int tcp_prune_queue(struct sock *sk);
>  
> +static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size)

minor nit, current preferred style is to not use inline for used once functions,
the compiler will do it anyway.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] Discard tcp out-of-order queue if system limit is reached
  2008-04-15 14:54 [PATCH] Discard tcp out-of-order queue if system limit is reached Vitaliy Gusev
  2008-04-15 14:58 ` Andi Kleen
  2008-04-15 15:22 ` Stephen Hemminger
@ 2008-04-16  3:27 ` David Miller
  2008-04-16  7:01   ` Andi Kleen
  2 siblings, 1 reply; 6+ messages in thread
From: David Miller @ 2008-04-16  3:27 UTC (permalink / raw)
  To: vgusev; +Cc: andi, kuznet, netdev

From: Vitaliy Gusev <vgusev@openvz.org>
Date: Tue, 15 Apr 2008 18:54:44 +0400

> tcp_prune_queue() doesn't prune an out-of-order queue if socket
> is under rcvbuf. However even if socket is under rcvbuf but system-wide limit is
> reached then skb cannot be queued. It can lead to deadlock situation as any skb that
> fills sequence hole is dropped.
> So discard out-of-order queue if system-wide limit is reached.
> 
> Signed-off-by: Vitaliy Gusev <vgusev@openvz.org>

I applied your original patch already, so I added the following
relative patch.

Please provide relative fixup patches when I say that I've applied
your patch already.

Thank you.

commit 56f367bbfd5a7439961499ca6a2f0822d2074d83
Author: Vitaliy Gusev <vgusev@openvz.org>
Date:   Tue Apr 15 20:26:34 2008 -0700

    [TCP]: Add return value indication to tcp_prune_ofo_queue().
    
    Returns non-zero if tp->out_of_order_queue was seen non-empty.
    This allows tcp_try_rmem_schedule() to return early.
    
    Signed-off-by: Vitaliy Gusev <vgusev@openvz.org>
    Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 61db7b1..bbb7d88 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3841,7 +3841,7 @@ static void tcp_ofo_queue(struct sock *sk)
 	}
 }
 
-static void tcp_prune_ofo_queue(struct sock *sk);
+static int tcp_prune_ofo_queue(struct sock *sk);
 static int tcp_prune_queue(struct sock *sk);
 
 static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size)
@@ -3853,7 +3853,9 @@ static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size)
 			return -1;
 
 		if (!sk_rmem_schedule(sk, size)) {
-			tcp_prune_ofo_queue(sk);
+			if (!tcp_prune_ofo_queue(sk))
+				return -1;
+
 			if (!sk_rmem_schedule(sk, size))
 				return -1;
 		}
@@ -4211,10 +4213,12 @@ static void tcp_collapse_ofo_queue(struct sock *sk)
 
 /*
  * Purge the out-of-order queue.
+ * Return true if queue was pruned.
  */
-static void tcp_prune_ofo_queue(struct sock *sk)
+static int tcp_prune_ofo_queue(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	int res = 0;
 
 	if (!skb_queue_empty(&tp->out_of_order_queue)) {
 		NET_INC_STATS_BH(LINUX_MIB_OFOPRUNED);
@@ -4228,7 +4232,9 @@ static void tcp_prune_ofo_queue(struct sock *sk)
 		if (tp->rx_opt.sack_ok)
 			tcp_sack_reset(&tp->rx_opt);
 		sk_mem_reclaim(sk);
+		res = 1;
 	}
+	return res;
 }
 
 /* Reduce allocated memory if we can, trying to get

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] Discard tcp out-of-order queue if system limit is reached
  2008-04-16  3:27 ` David Miller
@ 2008-04-16  7:01   ` Andi Kleen
  2008-04-16  7:08     ` David Miller
  0 siblings, 1 reply; 6+ messages in thread
From: Andi Kleen @ 2008-04-16  7:01 UTC (permalink / raw)
  To: David Miller; +Cc: vgusev, kuznet, netdev

David Miller wrote:
> From: Vitaliy Gusev <vgusev@openvz.org>
> Date: Tue, 15 Apr 2008 18:54:44 +0400
> 
>> tcp_prune_queue() doesn't prune an out-of-order queue if socket
>> is under rcvbuf. However even if socket is under rcvbuf but system-wide limit is
>> reached then skb cannot be queued. It can lead to deadlock situation as any skb that
>> fills sequence hole is dropped.
>> So discard out-of-order queue if system-wide limit is reached.
>>
>> Signed-off-by: Vitaliy Gusev <vgusev@openvz.org>
> 
> I applied your original patch already, so I added the following
> relative patch.

Could you please at least update the original description to the
one from the new patch? That one was somewhat misleading imho.

-Andi


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] Discard tcp out-of-order queue if system limit is reached
  2008-04-16  7:01   ` Andi Kleen
@ 2008-04-16  7:08     ` David Miller
  0 siblings, 0 replies; 6+ messages in thread
From: David Miller @ 2008-04-16  7:08 UTC (permalink / raw)
  To: andi; +Cc: vgusev, kuznet, netdev

From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 16 Apr 2008 09:01:45 +0200

> David Miller wrote:
> > From: Vitaliy Gusev <vgusev@openvz.org>
> > Date: Tue, 15 Apr 2008 18:54:44 +0400
> > 
> >> tcp_prune_queue() doesn't prune an out-of-order queue if socket
> >> is under rcvbuf. However even if socket is under rcvbuf but system-wide limit is
> >> reached then skb cannot be queued. It can lead to deadlock situation as any skb that
> >> fills sequence hole is dropped.
> >> So discard out-of-order queue if system-wide limit is reached.
> >>
> >> Signed-off-by: Vitaliy Gusev <vgusev@openvz.org>
> > 
> > I applied your original patch already, so I added the following
> > relative patch.
> 
> Could you please at least update the original description to the
> one from the new patch? That one was somewhat misleading imho.

It's already a few changesets deep in my tree and it's already
pushed to kernel.org  I really don't want to have to respin my
tree and break it for all of my downstream tree developers just
to make this commit message fixup.

Sorry.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2008-04-16  7:08 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-04-15 14:54 [PATCH] Discard tcp out-of-order queue if system limit is reached Vitaliy Gusev
2008-04-15 14:58 ` Andi Kleen
2008-04-15 15:22 ` Stephen Hemminger
2008-04-16  3:27 ` David Miller
2008-04-16  7:01   ` Andi Kleen
2008-04-16  7:08     ` David Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).