All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH NF_CONNTRACK 1/9]: Fix multiple problems with TCP window tracking
@ 2005-05-23  6:17 Yasuyuki KOZAKAI
  2005-05-23 13:46 ` Michal Rokos
  0 siblings, 1 reply; 20+ messages in thread
From: Yasuyuki KOZAKAI @ 2005-05-23  6:17 UTC (permalink / raw)
  To: kaber; +Cc: netfilter-devel

[-- Attachment #1: Type: Text/Plain, Size: 1331 bytes --]


This patch follows the recent changes to ip_conntrack.

Signed-off-by: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>

===========

    [NETFILTER]: Fix multiple problems with TCP window tracking

    The first attached patch addresses several problems in the current TCP
    connection tracking in the 2.6 tree. Some of the problems was reported,
    others was discovered by nfsim tests:

    - tcp_sack function was not safe against nonlinear skbs
    - practically arbitrary RST segments (addresses, ports assumed to be
      known) could cause connection teardown in conntrack (thanks to Tim
      Burress for the bugreport and patch)
    - article on which the code was based falsely assumed that packets
      must fit completely into the window: packets must at least overlap
      (thanks to Phil Oester for the bugreport and patch)
    - state table slightly changed to handle ACK packets sent by server to
      late resent SYNs
    - tracking reopening connections reworked
    - cosmetic change: when window tracking is ignored by setting
      ip_conntrack_tcp_be_liberal to nonzero, it's ignored completely from
      now on

    Signed-off-by: Patrick McHardy <kaber@trash.net>

-----------------------------------------------------------------
Yasuyuki Kozakai @ USAGI Project <yasuyuki.kozakai@toshiba.co.jp>

[-- Attachment #2: 01-tcp-window.patch --]
[-- Type: Text/Plain, Size: 10962 bytes --]

diff -Nur linux-2.6.12-rc4-nfct/net/netfilter/nf_conntrack_proto_tcp.c linux-2.6.12-rc4-nfct-1-tcp-window/net/netfilter/nf_conntrack_proto_tcp.c
--- linux-2.6.12-rc4-nfct/net/netfilter/nf_conntrack_proto_tcp.c	2005-05-11 15:21:47.000000000 +0900
+++ linux-2.6.12-rc4-nfct-1-tcp-window/net/netfilter/nf_conntrack_proto_tcp.c	2005-05-19 13:52:56.000000000 +0900
@@ -261,7 +261,7 @@
  *	sSS -> sSR	Standard open.
  *	sSR -> sSR	Retransmitted SYN/ACK.
  *	sES -> sIG	Late retransmitted SYN/ACK?
- *	sFW -> sIG
+ *	sFW -> sIG	Might be SYN/ACK answering ignored SYN
  *	sCW -> sIG
  *	sLA -> sIG
  *	sTW -> sIG
@@ -280,10 +280,10 @@
  *	sCL -> sCL
  */
 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
-/*ack*/	   { sIV, sIG, sIV, sES, sCW, sCW, sTW, sTW, sCL, sIV },
+/*ack*/	   { sIV, sIV, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV },
 /*
- *	sSS -> sIG	Might be a half-open connection.
- *	sSR -> sIV	Simultaneous open.
+ *	sSS -> sIV	Might be a half-open connection.
+ *	sSR -> sSR	Might answer late resent SYN.
  *	sES -> sES	:-)
  *	sFW -> sCW	Normal close request answered by ACK.
  *	sCW -> sCW
@@ -359,14 +359,19 @@
    http://www.nluug.nl/events/sane2000/papers.html
    http://www.iae.nl/users/guido/papers/tcp_filtering.ps.gz
    
-   The boundaries and the conditions are slightly changed:
-   
+   The boundaries and the conditions are changed according to RFC793:
+   the packet must intersect the window (i.e. segments may be
+   after the right or before the left edge) and thus receivers may ACK
+   segments after the right edge of the window.
+
    	td_maxend = max(sack + max(win,1)) seen in reply packets
 	td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
+	td_maxwin += seq + len - sender.td_maxend
+			if seq + len > sender.td_maxend
 	td_end    = max(seq + len) seen in sent packets
    
-   I. 	Upper bound for valid data:	seq + len <= sender.td_maxend
-   II. 	Lower bound for valid data:	seq >= sender.td_end - receiver.td_maxwin
+   I.   Upper bound for valid data:	seq <= sender.td_maxend
+   II.  Lower bound for valid data:	seq + len >= sender.td_end - receiver.td_maxwin
    III.	Upper bound for valid ack:      sack <= receiver.td_end
    IV.	Lower bound for valid ack:	ack >= receiver.td_end - MAXACKWINDOW
 
@@ -455,10 +460,13 @@
 static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
 		     struct tcphdr *tcph, __u32 *sack)
 {
-	__u32 tmp;
-	unsigned char *ptr;
         unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
+	unsigned char *ptr;
 	int length = (tcph->doff*4) - sizeof(struct tcphdr);
+	__u32 tmp;
+
+	if (!length)
+		return;
 
 	ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
 				 length, buff);
@@ -515,7 +523,7 @@
 
 static int tcp_in_window(struct ip_ct_tcp *state, 
                          enum ip_conntrack_dir dir,
-                         unsigned int *index,
+                         unsigned int index,
                          const struct sk_buff *skb,
 			 unsigned int dataoff,
                          struct tcphdr *tcph,
@@ -614,20 +622,23 @@
 		ack = sack = receiver->td_end;
 	}
 
-	if (seq == end)
+	if (seq == end
+	    && (!tcph->rst
+		|| (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)))
 		/*
 		 * Packets contains no data: we assume it is valid
 		 * and check the ack value only.
+		 * However RST segments are always validated by their
+		 * SEQ number, except when seq == 0 (reset sent answering
+		 * SYN.
 		 */
 		seq = end = sender->td_end;
 
 	DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
-	       "seq=%u ack=%u sack =%u win=%u end=%u trim=%u\n",
+	       "seq=%u ack=%u sack =%u win=%u end=%u\n",
 		NIPQUAD(iph->saddr), ntohs(tcph->source),
 		NIPQUAD(iph->daddr), ntohs(tcph->dest),
-		seq, ack, sack, win, end, 
-		after(end, sender->td_maxend) && before(seq, sender->td_maxend)
-		? sender->td_maxend : end);
+		seq, ack, sack, win, end);
 	DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
 	       "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
 		sender->td_end, sender->td_maxend, sender->td_maxwin,
@@ -635,24 +646,15 @@
 		receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
 		receiver->td_scale);
 
-	/* Ignore data over the right edge of the receiver's window. */
-	if (after(end, sender->td_maxend) &&
-	    before(seq, sender->td_maxend)) {
-		end = sender->td_maxend;
-		if (*index == TCP_FIN_SET)
-			*index = TCP_ACK_SET;
-	}
 	DEBUGP("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
-		before(end, sender->td_maxend + 1) 
-		    || before(seq, sender->td_maxend + 1),
-	    	after(seq, sender->td_end - receiver->td_maxwin - 1) 
-	    	    || after(end, sender->td_end - receiver->td_maxwin - 1),
+		before(seq, sender->td_maxend + 1),
+		after(end, sender->td_end - receiver->td_maxwin - 1),
 	    	before(sack, receiver->td_end + 1),
 	    	after(ack, receiver->td_end - MAXACKWINDOW(sender)));
 
 	if (sender->loose || receiver->loose ||
-	    (before(end, sender->td_maxend + 1) &&
-	     after(seq, sender->td_end - receiver->td_maxwin - 1) &&
+	    (before(seq, sender->td_maxend + 1) &&
+	     after(end, sender->td_end - receiver->td_maxwin - 1) &&
 	     before(sack, receiver->td_end + 1) &&
 	     after(ack, receiver->td_end - MAXACKWINDOW(sender)))) {
 	    	/*
@@ -669,6 +671,11 @@
 			sender->td_maxwin = swin;
 		if (after(end, sender->td_end))
 			sender->td_end = end;
+		/*
+		 * Update receiver data.
+		 */
+		if (after(end, sender->td_maxend))
+			receiver->td_maxwin += end - sender->td_maxend;
 		if (after(sack + win, receiver->td_maxend - 1)) {
 			receiver->td_maxend = sack + win;
 			if (win == 0)
@@ -678,7 +685,7 @@
 		/* 
 		 * Check retransmissions.
 		 */
-		if (*index == TCP_ACK_SET) {
+		if (index == TCP_ACK_SET) {
 			if (state->last_dir == dir
 			    && state->last_seq == seq
 			    && state->last_ack == ack
@@ -703,16 +710,16 @@
 		if (LOG_INVALID(IPPROTO_TCP))
 			nf_log_packet(pf, 0, skb, NULL, NULL,
 			"nf_ct_tcp: %s ",
-			before(end, sender->td_maxend + 1) ?
-			after(seq, sender->td_end - receiver->td_maxwin - 1) ?
+			before(seq, sender->td_maxend + 1) ?
+			after(end, sender->td_end - receiver->td_maxwin - 1) ?
 			before(sack, receiver->td_end + 1) ?
 			after(ack, receiver->td_end - MAXACKWINDOW(sender)) ? "BUG"
-			: "ACK is under the lower bound (possibly overly delayed ACK)"
-			: "ACK is over the upper bound (ACKed data has never seen yet)"
-			: "SEQ is under the lower bound (retransmitted already ACKed data)"
+			: "ACK is under the lower bound (possible overly delayed ACK)"
+			: "ACK is over the upper bound (ACKed data not seen yet)"
+			: "SEQ is under the lower bound (already ACKed data retransmitted)"
 			: "SEQ is over the upper bound (over the window of the receiver)");
 
-		res = nf_ct_tcp_be_liberal && !tcph->rst;
+		res = nf_ct_tcp_be_liberal;
   	}
   
 	DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u "
@@ -903,13 +910,12 @@
 	switch (new_state) {
 	case TCP_CONNTRACK_IGNORE:
 		/* Either SYN in ORIGINAL
-		 * or SYN/ACK in REPLY
-		 * or ACK in REPLY direction (half-open connection). */
+		 * or SYN/ACK in REPLY. */
 		if (index == TCP_SYNACK_SET
 		    && conntrack->proto.tcp.last_index == TCP_SYN_SET
 		    && conntrack->proto.tcp.last_dir != dir
-		    && after(ntohl(th->ack_seq),
-		    	     conntrack->proto.tcp.last_seq)) {
+		    && ntohl(th->ack_seq) ==
+		    	     conntrack->proto.tcp.last_end) {
 			/* This SYN/ACK acknowledges a SYN that we earlier 
 			 * ignored as invalid. This means that the client and
 			 * the server are both in sync, while the firewall is
@@ -929,6 +935,8 @@
 		conntrack->proto.tcp.last_index = index;
 		conntrack->proto.tcp.last_dir = dir;
 		conntrack->proto.tcp.last_seq = ntohl(th->seq);
+		conntrack->proto.tcp.last_end =
+		    segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
 
 		WRITE_UNLOCK(&tcp_lock);
 		if (LOG_INVALID(IPPROTO_TCP))
@@ -946,7 +954,12 @@
 				  "nf_ct_tcp: invalid state ");
 		return -NF_ACCEPT;
 	case TCP_CONNTRACK_SYN_SENT:
-		if (old_state >= TCP_CONNTRACK_TIME_WAIT) {
+		if (old_state < TCP_CONNTRACK_TIME_WAIT)
+			break;
+		if ((conntrack->proto.tcp.seen[dir].flags &
+			IP_CT_TCP_FLAG_CLOSE_INIT)
+		    || after(ntohl(th->seq),
+			     conntrack->proto.tcp.seen[dir].td_end)) {
 		    	/* Attempt to reopen a closed connection.
 		    	* Delete this connection and look up again. */
 		    	WRITE_UNLOCK(&tcp_lock);
@@ -955,22 +968,16 @@
 		    					    conntrack);
 		    	return -NF_REPEAT;
 		}
-		break;
 	case TCP_CONNTRACK_CLOSE:
 		if (index == TCP_RST_SET
-		    && ((test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)
-		         && conntrack->proto.tcp.last_index <= TCP_SYNACK_SET)
-			|| (!test_bit(IPS_ASSURED_BIT, &conntrack->status)
-			 && conntrack->proto.tcp.last_index == TCP_ACK_SET))
-		    && after(ntohl(th->ack_seq),
-		    	     conntrack->proto.tcp.last_seq)) {
-			/* Ignore RST closing down invalid SYN or ACK
-			   we had let trough. */ 
-		    	WRITE_UNLOCK(&tcp_lock);
-			if (LOG_INVALID(IPPROTO_TCP))
-				nf_log_packet(pf, 0, skb, NULL, NULL, 
-					  "nf_ct_tcp: invalid RST (ignored) ");
-			return NF_ACCEPT;
+		    && test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)
+		    && conntrack->proto.tcp.last_index == TCP_SYN_SET
+		    && ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) {
+			/* RST sent to invalid SYN we had let trough
+			 * SYN was in window then, tear down connection.
+			 * We skip window checking, because packet might ACK
+			 * segments we ignored in the SYN. */
+			goto in_window;
 		}
 		/* Just fall trough */
 	default:
@@ -978,16 +985,14 @@
 		break;
 	}
 
-	if (!tcp_in_window(&conntrack->proto.tcp, dir, &index, 
+	if (!tcp_in_window(&conntrack->proto.tcp, dir, index,
 			   skb, dataoff, th, pf)) {
 		WRITE_UNLOCK(&tcp_lock);
 		return -NF_ACCEPT;
 	}
+     in_window:
 	/* From now on we have got in-window packets */
-
-	/* If FIN was trimmed off, we don't change state. */
 	conntrack->proto.tcp.last_index = index;
-	new_state = tcp_conntracks[dir][index][old_state];
 
 	DEBUGP("tcp_conntracks: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
 	       "syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
@@ -998,6 +1003,10 @@
 		old_state, new_state);
 
 	conntrack->proto.tcp.state = new_state;
+	if (old_state != new_state
+	    && (new_state == TCP_CONNTRACK_FIN_WAIT
+		|| new_state == TCP_CONNTRACK_CLOSE))
+		conntrack->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
 	timeout = conntrack->proto.tcp.retrans >= nf_ct_tcp_max_retrans
 		  && *tcp_timeouts[new_state] > nf_ct_tcp_timeout_max_retrans
 		  ? nf_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state];
@@ -1028,7 +1037,7 @@
 	return NF_ACCEPT;
 }
  
-  /* Called when a new connection for this protocol found. */
+/* Called when a new connection for this protocol found. */
 static int tcp_new(struct nf_conn *conntrack,
 		   const struct sk_buff *skb,
 		   unsigned int dataoff)

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2005-06-11 15:49 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-05-23  6:17 [PATCH NF_CONNTRACK 1/9]: Fix multiple problems with TCP window tracking Yasuyuki KOZAKAI
2005-05-23 13:46 ` Michal Rokos
2005-03-16  0:35   ` nf_conntrack tree Patrick McHardy
2005-03-16  6:48     ` Yasuyuki KOZAKAI
2005-03-16  9:53       ` Pablo Neira
2005-03-16 10:13         ` Jozsef Kadlecsik
2005-03-16 13:14           ` Yasuyuki KOZAKAI
2005-03-16 14:36           ` Patrick McHardy
2005-03-16 14:31       ` Patrick McHardy
     [not found]     ` <200503160648.j2G6mXVV014699@toshiba.co.jp>
2005-03-16 19:22       ` Yasuyuki KOZAKAI
2005-03-16 19:32         ` Patrick McHardy
2005-03-16 19:54           ` Yasuyuki KOZAKAI
2005-03-16 20:04             ` Patrick McHardy
2005-03-17  8:31               ` Yasuyuki KOZAKAI
2005-03-20 16:42                 ` Patrick McHardy
2005-03-22 16:31                   ` Yasuyuki KOZAKAI
2005-05-24  1:53     ` [PATCH NF_CONNTRACK 1/9]: Fix multiple problems with TCP window tracking Yasuyuki KOZAKAI
2005-06-11 15:42       ` Patrick McHardy
     [not found]   ` <200505240153.j4O1r35h028029@toshiba.co.jp>
2005-05-24 11:23     ` Michal Rokos
2005-06-11 15:49       ` Patrick McHardy

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.