All of lore.kernel.org
 help / color / mirror / Atom feed
From: Tejun Heo <tj@kernel.org>
To: "Ilpo Järvinen" <ilpo.jarvinen@helsinki.fi>
Cc: Lennart Schulte <lennart.schulte@nets.rwth-aachen.de>,
	Eric Dumazet <eric.dumazet@gmail.com>,
	"David S. Miller" <davem@davemloft.net>,
	lkml <linux-kernel@vger.kernel.org>,
	"netdev@vger.kernel.org" <netdev@vger.kernel.org>,
	"Fehrmann, Henning" <henning.fehrmann@aei.mpg.de>,
	Carsten Aulbert <carsten.aulbert@aei.mpg.de>
Subject: Re: oops in tcp_xmit_retransmit_queue() w/ v2.6.32.15
Date: Mon, 19 Jul 2010 16:57:36 +0200	[thread overview]
Message-ID: <4C4467E0.9080907@kernel.org> (raw)
In-Reply-To: <alpine.DEB.2.00.1007161448330.13946@melkinpaasi.cs.helsinki.fi>

Hello,

On 07/16/2010 02:02 PM, Ilpo Järvinen wrote:
> Besides, Tejun has also found that it's hint->next ptr which is NULL in 
> his case so this won't solve his case anyway. Tejun, can you confirm 
> whether it was retransmit_skb_hint->next being NULL on _entry time_ to 
> tcp_xmit_retransmit_queue() or later on in the loop after the updates done 
> by the loop itself to the hint (or that your testing didn't conclude 
> either)?

Sorry about the delay.  I was traveling last week.  Unfortunately, I
don't know whether ->next was NULL on entry or not.  I hacked up the
following ugly patch for the next test run.  It should have everything
which has come up till now + list and hint sanity checking before
starting processing them.  I'm planning on deploying it w/ crashdump
enabled in several days.  If I've missed something, please let me
know.

Thanks.

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b4ed957..1c8b1e0 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2190,6 +2190,53 @@ static int tcp_can_forward_retransmit(struct sock *sk)
 	return 1;
 }

+static void print_queue(struct sock *sk, struct sk_buff *old, struct sk_buff *hole)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct sk_buff *skb, *prev;
+	bool do_panic = false;
+
+	skb = tcp_write_queue_head(sk);
+	prev = (struct sk_buff *)(&sk->sk_write_queue);
+
+	if (skb == NULL) {
+		printk("XXX NULL head, pkts %u\n", tp->packets_out);
+		do_panic = true;
+	}
+
+	printk("XXX head %p tail %p sendhead %p oldhint %p now %p hole %p high %u\n",
+	       tcp_write_queue_head(sk), tcp_write_queue_tail(sk),
+	       tcp_send_head(sk), old, tp->retransmit_skb_hint, hole,
+	       tp->retransmit_high);
+
+	while (skb) {
+		printk("XXX skb %p (%u-%u) next %p prev %p sacked %u\n",
+		       skb, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
+		       skb->next, skb->prev, TCP_SKB_CB(skb)->sacked);
+		if (prev != skb->prev) {
+			printk("XXX Inconsistent prev\n");
+			do_panic = true;
+		}
+
+		if (skb == tcp_write_queue_tail(sk)) {
+			if (skb->next != (struct sk_buff *)(&sk->sk_write_queue)) {
+				printk("XXX Improper next at tail\n");
+				do_panic = true;
+			}
+			break;
+		}
+
+		prev = skb;
+		skb = skb->next;
+	}
+	if (!skb) {
+		printk("XXX Encountered unexpected NULL\n");
+		do_panic = true;
+	}
+	if (do_panic)
+		panic("XXX panicking");
+}
+
 /* This gets called after a retransmit timeout, and the initially
  * retransmitted data is acknowledged.  It tries to continue
  * resending the rest of the retransmit queue, until either
@@ -2198,19 +2245,53 @@ static int tcp_can_forward_retransmit(struct sock *sk)
  * based retransmit packet might feed us FACK information again.
  * If so, we use it to avoid unnecessarily retransmissions.
  */
+static unsigned int caught_it;
+
 void tcp_xmit_retransmit_queue(struct sock *sk)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct sk_buff *skb;
+	struct sk_buff *skb, *prev;
 	struct sk_buff *hole = NULL;
+	struct sk_buff *old = tp->retransmit_skb_hint;
 	u32 last_lost;
 	int mib_idx;
 	int fwd_rexmitting = 0;
+	bool saw_hint = false;
+
+	if (!tp->packets_out) {
+		if (net_ratelimit())
+			printk("XXX !tp->packets_out, retransmit_skb_hint=%p, write_queue_head=%p\n",
+			       tp->retransmit_skb_hint, tcp_write_queue_head(sk));
+		return;
+	}

 	if (!tp->lost_out)
 		tp->retransmit_high = tp->snd_una;

+	for (skb = tcp_write_queue_head(sk),
+	     prev = (struct sk_buff *)&sk->sk_write_queue;
+	     skb != (struct sk_buff *)&sk->sk_write_queue;
+	     prev = skb, skb = skb->next) {
+		if (prev != skb->prev) {
+			printk("XXX sanity check: prev corrupt\n");
+			print_queue(sk, old, hole);
+		}
+		if (skb == tp->retransmit_skb_hint)
+			saw_hint = true;
+		if (skb == tcp_write_queue_tail(sk) &&
+		    skb->next != (struct sk_buff *)(&sk->sk_write_queue)) {
+			printk("XXX sanity check: end corrupt\n");
+			print_queue(sk, old, hole);
+		}
+	}
+	if (tp->retransmit_skb_hint && !saw_hint) {
+		printk("XXX sanity check: retransmit_skb_hint=%p is not on list, claring hint\n",
+		       tp->retransmit_skb_hint);
+		print_queue(sk, old, hole);
+		tp->retransmit_skb_hint = NULL;
+	}
+
 	if (tp->retransmit_skb_hint) {
 		skb = tp->retransmit_skb_hint;
 		last_lost = TCP_SKB_CB(skb)->end_seq;
@@ -2218,7 +2299,17 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 			last_lost = tp->retransmit_high;
 	} else {
 		skb = tcp_write_queue_head(sk);
-		last_lost = tp->snd_una;
+		if (skb)
+			last_lost = tp->snd_una;
+	}
+
+checknull:
+	if (skb == NULL) {
+		print_queue(sk, old, hole);
+		caught_it++;
+		if (net_ratelimit())
+			printk("XXX Errors caught so far %u\n", caught_it);
+		return;
 	}

 	tcp_for_write_queue_from(skb, sk) {
@@ -2261,7 +2352,7 @@ begin_fwd:
 		} else if (!(sacked & TCPCB_LOST)) {
 			if (hole == NULL && !(sacked & (TCPCB_SACKED_RETRANS|TCPCB_SACKED_ACKED)))
 				hole = skb;
-			continue;
+			goto cont;

 		} else {
 			last_lost = TCP_SKB_CB(skb)->end_seq;
@@ -2272,7 +2363,7 @@ begin_fwd:
 		}

 		if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))
-			continue;
+			goto cont;

 		if (tcp_retransmit_skb(sk, skb))
 			return;
@@ -2282,6 +2373,9 @@ begin_fwd:
 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
 						  inet_csk(sk)->icsk_rto,
 						  TCP_RTO_MAX);
+cont:
+		skb = skb->next;
+		goto checknull;
 	}
 }

-- 
tejun

  parent reply	other threads:[~2010-07-19 14:58 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-07-08  8:22 oops in tcp_xmit_retransmit_queue() w/ v2.6.32.15 Tejun Heo
2010-07-11  2:36 ` David Miller
2010-07-11 16:09 ` Ilpo Järvinen
2010-07-11 17:06   ` Eric Dumazet
2010-07-11 17:46     ` Eric Dumazet
2010-07-11 18:29       ` Eric Dumazet
2010-07-11 19:22         ` Ilpo Järvinen
2010-07-11 19:25           ` Ilpo Järvinen
2010-07-11 19:44             ` Ilpo Järvinen
2010-07-15 11:58   ` Lennart Schulte
2010-07-15 12:05     ` Eric Dumazet
2010-07-15 12:55       ` Lennart Schulte
2010-07-16 12:02         ` Ilpo Järvinen
2010-07-16 12:25           ` Lennart Schulte
2010-07-16 13:19             ` Ilpo Järvinen
2010-07-19  8:06               ` Lennart Schulte
2010-07-19 11:16                 ` [PATCHv2] tcp: fix crash in tcp_xmit_retransmit_queue Ilpo Järvinen
2010-07-19 14:09                   ` Eric Dumazet
2010-07-19 17:25                     ` Ilpo Järvinen
2010-07-19 17:39                       ` Eric Dumazet
2010-07-19 19:55                         ` David Miller
2010-07-20  8:33                           ` Ilpo Järvinen
2010-07-19 14:57           ` Tejun Heo [this message]
2010-07-20  8:41             ` oops in tcp_xmit_retransmit_queue() w/ v2.6.32.15 Ilpo Järvinen
2010-09-08  9:32             ` Ilpo Järvinen
2010-09-08 10:25               ` Tejun Heo
2010-09-08 10:34                 ` Ilpo Järvinen
2010-09-09 10:27                   ` Tejun Heo
2010-09-09 10:45                     ` Ilpo Järvinen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4C4467E0.9080907@kernel.org \
    --to=tj@kernel.org \
    --cc=carsten.aulbert@aei.mpg.de \
    --cc=davem@davemloft.net \
    --cc=eric.dumazet@gmail.com \
    --cc=henning.fehrmann@aei.mpg.de \
    --cc=ilpo.jarvinen@helsinki.fi \
    --cc=lennart.schulte@nets.rwth-aachen.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.