public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Vitaliy Gusev <vgusev@openvz.org>
To: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Cc: linux-kernel@vger.kernel.org
Subject: [RFC][PATCH][NET] Fix never pruned tcp out-of-order queue
Date: Mon, 14 Apr 2008 21:21:53 +0400	[thread overview]
Message-ID: <200804142121.53860.vgusev@openvz.org> (raw)

Hello!

tcp_prune_queue() doesn't prune an out-of-order queue at all.
Therefore sk_rmem_schedule() can fail but the out-of-order queue
isn't pruned . This can lead to tcp deadlock state if the
next two conditions are held:

1. There are a sequence hole between last received in
   order segment and segments enqueued to the out-of-order queue.

2. Size of all segments in the out-of-order queue is more than tcp_mem[2].


Signed-off-by: Vitaliy Gusev <vgusev@openvz.org>

---
 tcp_input.c |   72 ++++++++++++++++++++++++++++++++++++++----------------------
 1 file changed, 46 insertions(+), 26 deletions(-)


diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 5119856..fb5f522 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3841,8 +3841,26 @@ static void tcp_ofo_queue(struct sock *sk)
 	}
 }
 
+static void tcp_prune_ofo_queue(struct sock *sk);
 static int tcp_prune_queue(struct sock *sk);
 
+static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size)
+{
+	if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
+	    !sk_rmem_schedule(sk, size)) {
+
+		if (tcp_prune_queue(sk) < 0)
+			return -1;
+
+		if (!sk_rmem_schedule(sk, size)) {
+			tcp_prune_ofo_queue(sk);
+			if (!sk_rmem_schedule(sk, size))
+				return -1;
+		}
+	}
+	return 0;
+}
+
 static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcphdr *th = tcp_hdr(skb);
@@ -3892,12 +3910,9 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 		if (eaten <= 0) {
 queue_and_out:
 			if (eaten < 0 &&
-			    (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
-			     !sk_rmem_schedule(sk, skb->truesize))) {
-				if (tcp_prune_queue(sk) < 0 ||
-				    !sk_rmem_schedule(sk, skb->truesize))
-					goto drop;
-			}
+			    tcp_try_rmem_schedule(sk, skb->truesize))
+				goto drop;
+
 			skb_set_owner_r(skb, sk);
 			__skb_queue_tail(&sk->sk_receive_queue, skb);
 		}
@@ -3966,12 +3981,8 @@ drop:
 
 	TCP_ECN_check_ce(tp, skb);
 
-	if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
-	    !sk_rmem_schedule(sk, skb->truesize)) {
-		if (tcp_prune_queue(sk) < 0 ||
-		    !sk_rmem_schedule(sk, skb->truesize))
-			goto drop;
-	}
+	if (tcp_try_rmem_schedule(sk, skb->truesize))
+		goto drop;
 
 	/* Disable header prediction. */
 	tp->pred_flags = 0;
@@ -4198,6 +4209,28 @@ static void tcp_collapse_ofo_queue(struct sock *sk)
 	}
 }
 
+/*
+ * Purge the out-of-order queue.
+ */
+static void tcp_prune_ofo_queue(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	if (!skb_queue_empty(&tp->out_of_order_queue)) {
+		NET_INC_STATS_BH(LINUX_MIB_OFOPRUNED);
+		__skb_queue_purge(&tp->out_of_order_queue);
+
+		/* Reset SACK state.  A conforming SACK implementation will
+		 * do the same at a timeout based retransmit.  When a connection
+		 * is in a sad state like this, we care only about integrity
+		 * of the connection not performance.
+		 */
+		if (tp->rx_opt.sack_ok)
+			tcp_sack_reset(&tp->rx_opt);
+		sk_mem_reclaim(sk);
+	}
+}
+
 /* Reduce allocated memory if we can, trying to get
  * the socket within its memory limits again.
  *
@@ -4231,20 +4264,7 @@ static int tcp_prune_queue(struct sock *sk)
 	/* Collapsing did not help, destructive actions follow.
 	 * This must not ever occur. */
 
-	/* First, purge the out_of_order queue. */
-	if (!skb_queue_empty(&tp->out_of_order_queue)) {
-		NET_INC_STATS_BH(LINUX_MIB_OFOPRUNED);
-		__skb_queue_purge(&tp->out_of_order_queue);
-
-		/* Reset SACK state.  A conforming SACK implementation will
-		 * do the same at a timeout based retransmit.  When a connection
-		 * is in a sad state like this, we care only about integrity
-		 * of the connection not performance.
-		 */
-		if (tcp_is_sack(tp))
-			tcp_sack_reset(&tp->rx_opt);
-		sk_mem_reclaim(sk);
-	}
+	tcp_prune_ofo_queue(sk);
 
 	if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
 		return 0;


             reply	other threads:[~2008-04-14 17:20 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-04-14 17:21 Vitaliy Gusev [this message]
2008-04-15  7:34 ` [RFC][PATCH][NET] Fix never pruned tcp out-of-order queue David Miller
2008-04-15  7:59 ` Andi Kleen
2008-04-15  8:01   ` David Miller
2008-04-15  8:14     ` Andi Kleen
2008-04-15  8:18       ` David Miller
2008-04-15  8:26         ` Vitaliy Gusev
2008-04-15  8:30           ` Andi Kleen
2008-04-15  9:33             ` Vitaliy Gusev
2008-04-15 11:59             ` Alexey Kuznetsov
2008-04-15 13:47               ` Vitaliy Gusev
2008-04-15 13:54               ` Vitaliy Gusev

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=200804142121.53860.vgusev@openvz.org \
    --to=vgusev@openvz.org \
    --cc=kuznet@ms2.inr.ac.ru \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox