All of lore.kernel.org
 help / color / mirror / Atom feed
From: Pablo Neira Ayuso <pablo@netfilter.org>
To: Netfilter Development Mailinglist <netfilter-devel@lists.netfilter.org>
Cc: Harald Welte <laforge@netfilter.org>, Patrick McHardy <kaber@trash.net>
Subject: [PATCH 2/3][CONNTRACK] Introduce the pickup facilities to take over TCP connections
Date: Mon, 21 Aug 2006 10:46:25 +0200	[thread overview]
Message-ID: <44E972E1.4080500@netfilter.org> (raw)

[-- Attachment #1: Type: text/plain, Size: 564 bytes --]

This patch introduces a new flag called IPS_PICKUP that forces the 
protocol handler to pick up the window of valid TCP packets. Moreover, 
four new attributes to inject the window scale factor and enable SACK 
are introduced.

These new facilities provide the appropiate mechanisms to take over TCP 
connections in failover settings with TCP tracking enabled.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

-- 
The dawn of the fourth age of Linux firewalling is coming; a time of 
great struggle and heroic deeds -- J.Kadlecsik got inspired by J.Morris

[-- Attachment #2: 05pickup.patch --]
[-- Type: text/plain, Size: 13508 bytes --]

[CONNTRACK] Introduce the pickup facilities to take over TCP connections

This patch introduces a new flag called IPS_PICKUP that forces the protocol
handler to pick up the window of valid TCP packets. Moreover, four new
attributes to inject the window scale factor and enable SACK are introduced.
These new facilities provide the appropiate mechanisms to take over
TCP connections in failover settings with TCP tracking enabled.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

Index: net-2.6/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
===================================================================
--- net-2.6.orig/net/ipv4/netfilter/ip_conntrack_proto_tcp.c	2006-08-16 22:35:52.000000000 +0200
+++ net-2.6/net/ipv4/netfilter/ip_conntrack_proto_tcp.c	2006-08-17 13:56:10.000000000 +0200
@@ -341,11 +341,24 @@ static int tcp_to_nfattr(struct sk_buff 
 			 const struct ip_conntrack *ct)
 {
 	struct nfattr *nest_parms;
-	
+	u_int8_t sack;
+
 	read_lock_bh(&tcp_lock);
 	nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP);
 	NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t),
 		&ct->proto.tcp.state);
+	/* window scale factor: original direction (SYN) */
+	NFA_PUT(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL, sizeof(u_int8_t),
+		&ct->proto.tcp.seen[0].td_scale);
+	/* window scale factor: reply direction (SYN+ACK) */
+	NFA_PUT(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY, sizeof(u_int8_t),
+		&ct->proto.tcp.seen[1].td_scale);
+	/* SACK: original direction */
+	sack = ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_SACK_PERM;
+	NFA_PUT(skb, CTA_PROTOINFO_TCP_SACK_ORIGINAL, sizeof(u_int8_t), &sack);
+	/* SACK: reply direction */
+	sack = ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_SACK_PERM;
+	NFA_PUT(skb, CTA_PROTOINFO_TCP_SACK_REPLY, sizeof(u_int8_t), &sack);
 	read_unlock_bh(&tcp_lock);
 
 	NFA_NEST_END(skb, nest_parms);
@@ -358,7 +371,11 @@ nfattr_failure:
 }
 
 static const size_t cta_min_tcp[CTA_PROTOINFO_TCP_MAX] = {
-	[CTA_PROTOINFO_TCP_STATE-1]	= sizeof(u_int8_t),
+	[CTA_PROTOINFO_TCP_STATE-1]		= sizeof(u_int8_t),
+	[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL-1]	= sizeof(u_int8_t),
+	[CTA_PROTOINFO_TCP_WSCALE_REPLY-1]	= sizeof(u_int8_t),
+	[CTA_PROTOINFO_TCP_SACK_ORIGINAL-1]	= sizeof(u_int8_t),
+	[CTA_PROTOINFO_TCP_SACK_REPLY-1]	= sizeof(u_int8_t)
 };
 
 static int nfattr_to_tcp(struct nfattr *cda[], struct ip_conntrack *ct)
@@ -382,6 +399,40 @@ static int nfattr_to_tcp(struct nfattr *
 	write_lock_bh(&tcp_lock);
 	ct->proto.tcp.state = 
 		*(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_STATE-1]);
+	/* window scale factor: original direction (SYN) */
+	if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL-1]) {
+		ct->proto.tcp.seen[0].td_scale = 
+		*(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL-1]);
+		ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_WINDOW_SCALE;
+	}
+	/* window scale factor: reply direction (SYN+ACK) */
+	if (tb[CTA_PROTOINFO_TCP_WSCALE_REPLY-1]) {
+		ct->proto.tcp.seen[1].td_scale = 
+		*(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY-1]);
+		ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_WINDOW_SCALE;
+	}
+	/* enable/disable SACK: original direction */
+	if (tb[CTA_PROTOINFO_TCP_SACK_ORIGINAL-1]) {
+		u_int8_t enable = 
+		*(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_SACK_ORIGINAL-1]);
+		if (enable)
+			ct->proto.tcp.seen[0].flags |= 
+				IP_CT_TCP_FLAG_SACK_PERM;
+		else
+			ct->proto.tcp.seen[0].flags &= 
+				~IP_CT_TCP_FLAG_SACK_PERM;
+	}
+        /* enable/disable SACK: reply direction */
+	if (tb[CTA_PROTOINFO_TCP_SACK_REPLY-1]) {
+		u_int8_t enable = 
+		*(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_SACK_REPLY-1]);
+		if (enable)
+			ct->proto.tcp.seen[1].flags |=
+				IP_CT_TCP_FLAG_SACK_PERM;
+		else
+			ct->proto.tcp.seen[1].flags &=
+				~IP_CT_TCP_FLAG_SACK_PERM;
+	}
 	write_unlock_bh(&tcp_lock);
 
 	return 0;
@@ -425,10 +476,10 @@ static unsigned int get_conntrack_index(
    we doesn't have to deal with fragments. 
 */
 
-static inline __u32 segment_seq_plus_len(__u32 seq,
-					 size_t len,
-					 struct iphdr *iph,
-					 struct tcphdr *tcph)
+static inline __u32 segment_seq_plus_len(const __u32 seq,
+					 const size_t len,
+					 const struct iphdr *iph,
+					 const struct tcphdr *tcph)
 {
 	return (seq + len - (iph->ihl + tcph->doff)*4
 		+ (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
@@ -890,6 +941,22 @@ static int tcp_error(struct sk_buff *skb
 	return NF_ACCEPT;
 }
 
+static void tcp_pickup_window(struct ip_conntrack *conntrack,
+			      const struct sk_buff *skb,
+			      const struct iphdr *iph,
+			      const struct tcphdr *th)
+{
+	conntrack->proto.tcp.seen[0].td_end =
+		segment_seq_plus_len(ntohl(th->seq), skb->len, iph, th);
+	conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
+	if (conntrack->proto.tcp.seen[0].td_maxwin == 0)
+		conntrack->proto.tcp.seen[0].td_maxwin = 1;
+	conntrack->proto.tcp.seen[0].td_maxend =
+		conntrack->proto.tcp.seen[0].td_end + 
+		conntrack->proto.tcp.seen[0].td_maxwin;
+		conntrack->proto.tcp.seen[0].td_scale = 0;
+}
+
 /* Returns verdict for packet, or -1 for invalid. */
 static int tcp_packet(struct ip_conntrack *conntrack,
 		      const struct sk_buff *skb,
@@ -907,6 +974,14 @@ static int tcp_packet(struct ip_conntrac
 	BUG_ON(th == NULL);
 	
 	write_lock_bh(&tcp_lock);
+
+	/*
+	 * This conntrack was added via ctnetlink or ct_sync and needs to
+	 * take over sequence tracking in order to work properly.
+	 */
+	if (test_and_clear_bit(IPS_PICKUP, &conntrack->status))
+		tcp_pickup_window(conntrack, skb, iph, th);
+
 	old_state = conntrack->proto.tcp.state;
 	dir = CTINFO2DIR(ctinfo);
 	index = get_conntrack_index(th);
@@ -1116,16 +1191,7 @@ static int tcp_new(struct ip_conntrack *
 		 * its history is lost for us.
 		 * Let's try to use the data from the packet.
 		 */
-		conntrack->proto.tcp.seen[0].td_end =
-			segment_seq_plus_len(ntohl(th->seq), skb->len,
-					     iph, th);
-		conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
-		if (conntrack->proto.tcp.seen[0].td_maxwin == 0)
-			conntrack->proto.tcp.seen[0].td_maxwin = 1;
-		conntrack->proto.tcp.seen[0].td_maxend =
-			conntrack->proto.tcp.seen[0].td_end + 
-			conntrack->proto.tcp.seen[0].td_maxwin;
-		conntrack->proto.tcp.seen[0].td_scale = 0;
+		tcp_pickup_window(conntrack, skb, iph, th);
 
 		/* We assume SACK. Should we assume window scaling too? */
 		conntrack->proto.tcp.seen[0].flags =
Index: net-2.6/include/linux/netfilter/nf_conntrack_common.h
===================================================================
--- net-2.6.orig/include/linux/netfilter/nf_conntrack_common.h	2006-08-17 11:51:40.000000000 +0200
+++ net-2.6/include/linux/netfilter/nf_conntrack_common.h	2006-08-17 11:53:57.000000000 +0200
@@ -73,6 +73,10 @@ enum ip_conntrack_status {
 	/* Connection has fixed timeout. */
 	IPS_FIXED_TIMEOUT_BIT = 10,
 	IPS_FIXED_TIMEOUT = (1 << IPS_FIXED_TIMEOUT_BIT),
+
+	/* Pick up connection information if required */
+	IPS_PICKUP_BIT = 11,
+	IPS_PICKUP = (1 << IPS_PICKUP_BIT),
 };
 
 /* Connection tracking event bits */
Index: net-2.6/net/netfilter/nf_conntrack_proto_tcp.c
===================================================================
--- net-2.6.orig/net/netfilter/nf_conntrack_proto_tcp.c	2006-08-16 22:35:52.000000000 +0200
+++ net-2.6/net/netfilter/nf_conntrack_proto_tcp.c	2006-08-17 13:55:13.000000000 +0200
@@ -381,10 +381,10 @@ static unsigned int get_conntrack_index(
    we doesn't have to deal with fragments. 
 */
 
-static inline __u32 segment_seq_plus_len(__u32 seq,
-					 size_t len,
-					 unsigned int dataoff,
-					 struct tcphdr *tcph)
+static inline __u32 segment_seq_plus_len(const __u32 seq,
+					 const size_t len,
+					 const unsigned int dataoff,
+					 const struct tcphdr *tcph)
 {
 	/* XXX Should I use payload length field in IP/IPv6 header ?
 	 * - YK */
@@ -850,6 +850,22 @@ static int tcp_error(struct sk_buff *skb
 	return NF_ACCEPT;
 }
 
+static void tcp_pickup_window(struct nf_conn *conntrack,
+			      const struct sk_buff *skb,
+			      const unsigned int dataoff,
+			      const struct tcphdr *th)
+{
+	conntrack->proto.tcp.seen[0].td_end =
+		segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
+	conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
+	if (conntrack->proto.tcp.seen[0].td_maxwin == 0)
+		conntrack->proto.tcp.seen[0].td_maxwin = 1;
+	conntrack->proto.tcp.seen[0].td_maxend =
+		conntrack->proto.tcp.seen[0].td_end + 
+		conntrack->proto.tcp.seen[0].td_maxwin;
+		conntrack->proto.tcp.seen[0].td_scale = 0;
+}
+
 /* Returns verdict for packet, or -1 for invalid. */
 static int tcp_packet(struct nf_conn *conntrack,
 		      const struct sk_buff *skb,
@@ -868,6 +884,14 @@ static int tcp_packet(struct nf_conn *co
 	BUG_ON(th == NULL);
 
 	write_lock_bh(&tcp_lock);
+
+	/*
+	 * This conntrack was added via ctnetlink or ct_sync and needs to
+	 * take over sequence tracking in order to work properly.
+	 */
+	if (test_and_clear_bit(IPS_PICKUP, &conntrack->status))
+		tcp_pickup_window(conntrack, skb, dataoff, th);
+
 	old_state = conntrack->proto.tcp.state;
 	dir = CTINFO2DIR(ctinfo);
 	index = get_conntrack_index(th);
@@ -1075,16 +1099,7 @@ static int tcp_new(struct nf_conn *connt
 		 * its history is lost for us.
 		 * Let's try to use the data from the packet.
 		 */
-		conntrack->proto.tcp.seen[0].td_end =
-			segment_seq_plus_len(ntohl(th->seq), skb->len,
-					     dataoff, th);
-		conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
-		if (conntrack->proto.tcp.seen[0].td_maxwin == 0)
-			conntrack->proto.tcp.seen[0].td_maxwin = 1;
-		conntrack->proto.tcp.seen[0].td_maxend =
-			conntrack->proto.tcp.seen[0].td_end + 
-			conntrack->proto.tcp.seen[0].td_maxwin;
-		conntrack->proto.tcp.seen[0].td_scale = 0;
+		tcp_pickup_window(conntrack, skb, dataoff, th);
 
 		/* We assume SACK. Should we assume window scaling too? */
 		conntrack->proto.tcp.seen[0].flags =
@@ -1121,11 +1136,24 @@ static int tcp_to_nfattr(struct sk_buff 
 			 const struct nf_conn *ct)
 {
 	struct nfattr *nest_parms;
-	
+	u_int8_t sack;
+
 	read_lock_bh(&tcp_lock);
 	nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP);
 	NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t),
 		&ct->proto.tcp.state);
+	/* window scale factor: original direction (SYN) */
+	NFA_PUT(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL, sizeof(u_int8_t),
+		&ct->proto.tcp.seen[0].td_scale);
+	/* window scale factor: reply direction (SYN+ACK) */
+	NFA_PUT(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY, sizeof(u_int8_t),
+		&ct->proto.tcp.seen[1].td_scale);
+	/* SACK: original direction */
+	sack = ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_SACK_PERM;
+	NFA_PUT(skb, CTA_PROTOINFO_TCP_SACK_ORIGINAL, sizeof(u_int8_t), &sack);
+	/* SACK: reply direction */
+	sack = ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_SACK_PERM;
+	NFA_PUT(skb, CTA_PROTOINFO_TCP_SACK_REPLY, sizeof(u_int8_t), &sack);
 	read_unlock_bh(&tcp_lock);
 
 	NFA_NEST_END(skb, nest_parms);
@@ -1138,7 +1166,11 @@ nfattr_failure:
 }
 
 static const size_t cta_min_tcp[CTA_PROTOINFO_TCP_MAX] = {
-	[CTA_PROTOINFO_TCP_STATE-1]	= sizeof(u_int8_t),
+	[CTA_PROTOINFO_TCP_STATE-1]		= sizeof(u_int8_t),
+	[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL-1] 	= sizeof(u_int8_t),
+	[CTA_PROTOINFO_TCP_WSCALE_REPLY-1]	= sizeof(u_int8_t),
+	[CTA_PROTOINFO_TCP_SACK_ORIGINAL-1]	= sizeof(u_int8_t),
+	[CTA_PROTOINFO_TCP_SACK_REPLY-1]	= sizeof(u_int8_t)
 };
 
 static int nfattr_to_tcp(struct nfattr *cda[], struct nf_conn *ct)
@@ -1162,6 +1194,40 @@ static int nfattr_to_tcp(struct nfattr *
 	write_lock_bh(&tcp_lock);
 	ct->proto.tcp.state = 
 		*(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_STATE-1]);
+	/* window scale factor: original direction (SYN) */
+	if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL-1]) {
+		ct->proto.tcp.seen[0].td_scale = 
+		*(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL-1]);
+		ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_WINDOW_SCALE;
+	}
+	/* window scale factor: reply direction (SYN+ACK) */
+	if (tb[CTA_PROTOINFO_TCP_WSCALE_REPLY-1]) {
+		ct->proto.tcp.seen[1].td_scale = 
+		*(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY-1]);
+		ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_WINDOW_SCALE;
+	}
+	/* enable/disable SACK: original direction */
+	if (tb[CTA_PROTOINFO_TCP_SACK_ORIGINAL-1]) {
+		u_int8_t enable = 
+		*(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_SACK_ORIGINAL-1]);
+		if (enable)
+			ct->proto.tcp.seen[0].flags |= 
+				IP_CT_TCP_FLAG_SACK_PERM;
+		else
+			ct->proto.tcp.seen[0].flags &= 
+				~IP_CT_TCP_FLAG_SACK_PERM;
+	}
+        /* enable/disable SACK: reply direction */
+	if (tb[CTA_PROTOINFO_TCP_SACK_REPLY-1]) {
+		u_int8_t enable = 
+		*(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_SACK_REPLY-1]);
+		if (enable)
+			ct->proto.tcp.seen[1].flags |=
+				IP_CT_TCP_FLAG_SACK_PERM;
+		else
+			ct->proto.tcp.seen[1].flags &=
+				~IP_CT_TCP_FLAG_SACK_PERM;
+	}
 	write_unlock_bh(&tcp_lock);
 
 	return 0;
Index: net-2.6/include/linux/netfilter/nfnetlink_conntrack.h
===================================================================
--- net-2.6.orig/include/linux/netfilter/nfnetlink_conntrack.h	2006-08-16 22:35:52.000000000 +0200
+++ net-2.6/include/linux/netfilter/nfnetlink_conntrack.h	2006-08-17 13:24:13.000000000 +0200
@@ -83,6 +83,10 @@ enum ctattr_protoinfo {
 enum ctattr_protoinfo_tcp {
 	CTA_PROTOINFO_TCP_UNSPEC,
 	CTA_PROTOINFO_TCP_STATE,
+	CTA_PROTOINFO_TCP_WSCALE_ORIGINAL,
+	CTA_PROTOINFO_TCP_WSCALE_REPLY,
+	CTA_PROTOINFO_TCP_SACK_ORIGINAL,
+	CTA_PROTOINFO_TCP_SACK_REPLY,
 	__CTA_PROTOINFO_TCP_MAX
 };
 #define CTA_PROTOINFO_TCP_MAX (__CTA_PROTOINFO_TCP_MAX - 1)

             reply	other threads:[~2006-08-21  8:46 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-08-21  8:46 Pablo Neira Ayuso [this message]
2006-08-21 10:18 ` [PATCH 2/3][CONNTRACK] Introduce the pickup facilities to take over TCP connections Krzysztof Oledzki
2006-08-21 20:04   ` Pablo Neira Ayuso
2006-08-21 22:15     ` Krzysztof Oledzki
2006-08-21 23:12       ` Patrick McHardy
2006-09-24  3:46         ` Pablo Neira Ayuso

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=44E972E1.4080500@netfilter.org \
    --to=pablo@netfilter.org \
    --cc=kaber@trash.net \
    --cc=laforge@netfilter.org \
    --cc=netfilter-devel@lists.netfilter.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.