From: Pablo Neira Ayuso <pablo@netfilter.org>
To: Netfilter Development Mailinglist <netfilter-devel@lists.netfilter.org>
Cc: Patrick McHardy <kaber@trash.net>,
Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Subject: [PATCH 1/4][CONNTRACK] Introduce flag facilities to take over TCP connections
Date: Fri, 10 Nov 2006 02:16:34 +0100 [thread overview]
Message-ID: <4553D2F2.1020107@netfilter.org> (raw)
[-- Attachment #1: Type: text/plain, Size: 608 bytes --]
This patch introduces two new flags called IPS_PICKUP that forces the
protocol handler to pick up the window of valid TCP packets and
IPS_IN_WINDOW to by pass window checkings.
Moreover, four new attributes to inject the window scale factor and
enable SACK are introduced. These new facilities provide the appropiate
mechanisms to take over TCP connections in failover settings with TCP
tracking enabled.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
--
The dawn of the fourth age of Linux firewalling is coming; a time of
great struggle and heroic deeds -- J.Kadlecsik got inspired by J.Morris
[-- Attachment #2: 01pickup.patch --]
[-- Type: text/plain, Size: 14073 bytes --]
[CONNTRACK] Introduce flag facilities to take over TCP connections
This patch introduces two new flags called IPS_PICKUP that forces the protocol
handler to pick up the window of valid TCP packets and IPS_IN_WINDOW to by
pass window checkings.
Moreover, four new attributes to inject the window scale factor and
enable SACK are introduced. These new facilities provide the appropiate
mechanisms to take over TCP connections in failover settings with TCP
tracking enabled.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Index: linux-2.6.git/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
===================================================================
--- linux-2.6.git.orig/net/ipv4/netfilter/ip_conntrack_proto_tcp.c 2006-11-08 23:14:06.000000000 +0100
+++ linux-2.6.git/net/ipv4/netfilter/ip_conntrack_proto_tcp.c 2006-11-08 23:51:33.000000000 +0100
@@ -340,13 +340,32 @@ static int tcp_to_nfattr(struct sk_buff
const struct ip_conntrack *ct)
{
struct nfattr *nest_parms;
-
+ u_int8_t sack;
+
read_lock_bh(&tcp_lock);
nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP);
NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t),
&ct->proto.tcp.state);
- read_unlock_bh(&tcp_lock);
+ /* do not dump sack information if in-window checkings are by-passed */
+ if (test_bit(IPS_IN_WINDOW, &ct->status))
+ goto out;
+
+ /* window scale factor: original direction (SYN) */
+ NFA_PUT(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL, sizeof(u_int8_t),
+ &ct->proto.tcp.seen[0].td_scale);
+ /* window scale factor: reply direction (SYN+ACK) */
+ NFA_PUT(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY, sizeof(u_int8_t),
+ &ct->proto.tcp.seen[1].td_scale);
+ /* SACK: original direction */
+ sack = ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_SACK_PERM;
+ NFA_PUT(skb, CTA_PROTOINFO_TCP_SACK_ORIGINAL, sizeof(u_int8_t), &sack);
+ /* SACK: reply direction */
+ sack = ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_SACK_PERM;
+ NFA_PUT(skb, CTA_PROTOINFO_TCP_SACK_REPLY, sizeof(u_int8_t), &sack);
+
+out:
+ read_unlock_bh(&tcp_lock);
NFA_NEST_END(skb, nest_parms);
return 0;
@@ -357,7 +376,11 @@ nfattr_failure:
}
static const size_t cta_min_tcp[CTA_PROTOINFO_TCP_MAX] = {
- [CTA_PROTOINFO_TCP_STATE-1] = sizeof(u_int8_t),
+ [CTA_PROTOINFO_TCP_STATE-1] = sizeof(u_int8_t),
+ [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL-1] = sizeof(u_int8_t),
+ [CTA_PROTOINFO_TCP_WSCALE_REPLY-1] = sizeof(u_int8_t),
+ [CTA_PROTOINFO_TCP_SACK_ORIGINAL-1] = sizeof(u_int8_t),
+ [CTA_PROTOINFO_TCP_SACK_REPLY-1] = sizeof(u_int8_t)
};
static int nfattr_to_tcp(struct nfattr *cda[], struct ip_conntrack *ct)
@@ -381,6 +404,40 @@ static int nfattr_to_tcp(struct nfattr *
write_lock_bh(&tcp_lock);
ct->proto.tcp.state =
*(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_STATE-1]);
+ /* window scale factor: original direction (SYN) */
+ if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL-1]) {
+ ct->proto.tcp.seen[0].td_scale =
+ *(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL-1]);
+ ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_WINDOW_SCALE;
+ }
+ /* window scale factor: reply direction (SYN+ACK) */
+ if (tb[CTA_PROTOINFO_TCP_WSCALE_REPLY-1]) {
+ ct->proto.tcp.seen[1].td_scale =
+ *(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY-1]);
+ ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_WINDOW_SCALE;
+ }
+ /* enable/disable SACK: original direction */
+ if (tb[CTA_PROTOINFO_TCP_SACK_ORIGINAL-1]) {
+ u_int8_t enable =
+ *(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_SACK_ORIGINAL-1]);
+ if (enable)
+ ct->proto.tcp.seen[0].flags |=
+ IP_CT_TCP_FLAG_SACK_PERM;
+ else
+ ct->proto.tcp.seen[0].flags &=
+ ~IP_CT_TCP_FLAG_SACK_PERM;
+ }
+ /* enable/disable SACK: reply direction */
+ if (tb[CTA_PROTOINFO_TCP_SACK_REPLY-1]) {
+ u_int8_t enable =
+ *(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_SACK_REPLY-1]);
+ if (enable)
+ ct->proto.tcp.seen[1].flags |=
+ IP_CT_TCP_FLAG_SACK_PERM;
+ else
+ ct->proto.tcp.seen[1].flags &=
+ ~IP_CT_TCP_FLAG_SACK_PERM;
+ }
write_unlock_bh(&tcp_lock);
return 0;
@@ -424,10 +481,10 @@ static unsigned int get_conntrack_index(
we doesn't have to deal with fragments.
*/
-static inline __u32 segment_seq_plus_len(__u32 seq,
- size_t len,
- struct iphdr *iph,
- struct tcphdr *tcph)
+static inline __u32 segment_seq_plus_len(const __u32 seq,
+ const size_t len,
+ const struct iphdr *iph,
+ const struct tcphdr *tcph)
{
return (seq + len - (iph->ihl + tcph->doff)*4
+ (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
@@ -890,6 +947,22 @@ static int tcp_error(struct sk_buff *skb
return NF_ACCEPT;
}
+static inline void tcp_pickup_window(struct ip_conntrack *conntrack,
+ const struct sk_buff *skb,
+ const struct iphdr *iph,
+ const struct tcphdr *th)
+{
+ conntrack->proto.tcp.seen[0].td_end =
+ segment_seq_plus_len(ntohl(th->seq), skb->len, iph, th);
+ conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
+ if (conntrack->proto.tcp.seen[0].td_maxwin == 0)
+ conntrack->proto.tcp.seen[0].td_maxwin = 1;
+ conntrack->proto.tcp.seen[0].td_maxend =
+ conntrack->proto.tcp.seen[0].td_end +
+ conntrack->proto.tcp.seen[0].td_maxwin;
+ conntrack->proto.tcp.seen[0].td_scale = 0;
+}
+
/* Returns verdict for packet, or -1 for invalid. */
static int tcp_packet(struct ip_conntrack *conntrack,
const struct sk_buff *skb,
@@ -912,6 +985,12 @@ static int tcp_packet(struct ip_conntrac
index = get_conntrack_index(th);
new_state = tcp_conntracks[dir][index][old_state];
+ /* pick up or by-pass window tracking */
+ if (test_bit(IPS_IN_WINDOW, &conntrack->status))
+ goto in_window;
+ else if (test_and_clear_bit(IPS_PICKUP, &conntrack->status))
+ tcp_pickup_window(conntrack, skb, iph, th);
+
switch (new_state) {
case TCP_CONNTRACK_IGNORE:
/* Ignored packets:
@@ -1116,16 +1195,7 @@ static int tcp_new(struct ip_conntrack *
* its history is lost for us.
* Let's try to use the data from the packet.
*/
- conntrack->proto.tcp.seen[0].td_end =
- segment_seq_plus_len(ntohl(th->seq), skb->len,
- iph, th);
- conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
- if (conntrack->proto.tcp.seen[0].td_maxwin == 0)
- conntrack->proto.tcp.seen[0].td_maxwin = 1;
- conntrack->proto.tcp.seen[0].td_maxend =
- conntrack->proto.tcp.seen[0].td_end +
- conntrack->proto.tcp.seen[0].td_maxwin;
- conntrack->proto.tcp.seen[0].td_scale = 0;
+ tcp_pickup_window(conntrack, skb, iph, th);
/* We assume SACK. Should we assume window scaling too? */
conntrack->proto.tcp.seen[0].flags =
Index: linux-2.6.git/include/linux/netfilter/nf_conntrack_common.h
===================================================================
--- linux-2.6.git.orig/include/linux/netfilter/nf_conntrack_common.h 2006-11-08 23:14:06.000000000 +0100
+++ linux-2.6.git/include/linux/netfilter/nf_conntrack_common.h 2006-11-08 23:37:40.000000000 +0100
@@ -73,6 +73,14 @@ enum ip_conntrack_status {
/* Connection has fixed timeout. */
IPS_FIXED_TIMEOUT_BIT = 10,
IPS_FIXED_TIMEOUT = (1 << IPS_FIXED_TIMEOUT_BIT),
+
+ /* Pick up connection information. */
+ IPS_PICKUP_BIT = 11,
+ IPS_PICKUP = (1 << IPS_PICKUP_BIT),
+
+ /* Ignore in-window checkings. */
+ IPS_IN_WINDOW_BIT = 12,
+ IPS_IN_WINDOW = (1 << IPS_IN_WINDOW_BIT),
};
/* Connection tracking event bits */
Index: linux-2.6.git/net/netfilter/nf_conntrack_proto_tcp.c
===================================================================
--- linux-2.6.git.orig/net/netfilter/nf_conntrack_proto_tcp.c 2006-11-08 23:14:06.000000000 +0100
+++ linux-2.6.git/net/netfilter/nf_conntrack_proto_tcp.c 2006-11-08 23:50:52.000000000 +0100
@@ -380,10 +380,10 @@ static unsigned int get_conntrack_index(
we doesn't have to deal with fragments.
*/
-static inline __u32 segment_seq_plus_len(__u32 seq,
- size_t len,
- unsigned int dataoff,
- struct tcphdr *tcph)
+static inline __u32 segment_seq_plus_len(const __u32 seq,
+ const size_t len,
+ const unsigned int dataoff,
+ const struct tcphdr *tcph)
{
/* XXX Should I use payload length field in IP/IPv6 header ?
* - YK */
@@ -850,6 +850,22 @@ static int tcp_error(struct sk_buff *skb
return NF_ACCEPT;
}
+static inline void tcp_pickup_window(struct nf_conn *conntrack,
+ const struct sk_buff *skb,
+ const unsigned int dataoff,
+ const struct tcphdr *th)
+{
+ conntrack->proto.tcp.seen[0].td_end =
+ segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
+ conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
+ if (conntrack->proto.tcp.seen[0].td_maxwin == 0)
+ conntrack->proto.tcp.seen[0].td_maxwin = 1;
+ conntrack->proto.tcp.seen[0].td_maxend =
+ conntrack->proto.tcp.seen[0].td_end +
+ conntrack->proto.tcp.seen[0].td_maxwin;
+ conntrack->proto.tcp.seen[0].td_scale = 0;
+}
+
/* Returns verdict for packet, or -1 for invalid. */
static int tcp_packet(struct nf_conn *conntrack,
const struct sk_buff *skb,
@@ -873,6 +889,12 @@ static int tcp_packet(struct nf_conn *co
index = get_conntrack_index(th);
new_state = tcp_conntracks[dir][index][old_state];
+ /* pick up or by-pass window tracking */
+ if (test_bit(IPS_IN_WINDOW, &conntrack->status))
+ goto in_window;
+ else if (test_and_clear_bit(IPS_PICKUP, &conntrack->status))
+ tcp_pickup_window(conntrack, skb, dataoff, th);
+
switch (new_state) {
case TCP_CONNTRACK_IGNORE:
/* Ignored packets:
@@ -1075,16 +1097,7 @@ static int tcp_new(struct nf_conn *connt
* its history is lost for us.
* Let's try to use the data from the packet.
*/
- conntrack->proto.tcp.seen[0].td_end =
- segment_seq_plus_len(ntohl(th->seq), skb->len,
- dataoff, th);
- conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
- if (conntrack->proto.tcp.seen[0].td_maxwin == 0)
- conntrack->proto.tcp.seen[0].td_maxwin = 1;
- conntrack->proto.tcp.seen[0].td_maxend =
- conntrack->proto.tcp.seen[0].td_end +
- conntrack->proto.tcp.seen[0].td_maxwin;
- conntrack->proto.tcp.seen[0].td_scale = 0;
+ tcp_pickup_window(conntrack, skb, dataoff, th);
/* We assume SACK. Should we assume window scaling too? */
conntrack->proto.tcp.seen[0].flags =
@@ -1121,13 +1134,32 @@ static int tcp_to_nfattr(struct sk_buff
const struct nf_conn *ct)
{
struct nfattr *nest_parms;
-
+ u_int8_t sack;
+
read_lock_bh(&tcp_lock);
nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP);
NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t),
&ct->proto.tcp.state);
- read_unlock_bh(&tcp_lock);
+ /* do not dump sack information if in-window checkings are by-passed */
+ if (test_bit(IPS_IN_WINDOW, &ct->status))
+ goto out;
+
+ /* window scale factor: original direction (SYN) */
+ NFA_PUT(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL, sizeof(u_int8_t),
+ &ct->proto.tcp.seen[0].td_scale);
+ /* window scale factor: reply direction (SYN+ACK) */
+ NFA_PUT(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY, sizeof(u_int8_t),
+ &ct->proto.tcp.seen[1].td_scale);
+ /* SACK: original direction */
+ sack = ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_SACK_PERM;
+ NFA_PUT(skb, CTA_PROTOINFO_TCP_SACK_ORIGINAL, sizeof(u_int8_t), &sack);
+ /* SACK: reply direction */
+ sack = ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_SACK_PERM;
+ NFA_PUT(skb, CTA_PROTOINFO_TCP_SACK_REPLY, sizeof(u_int8_t), &sack);
+
+out:
+ read_unlock_bh(&tcp_lock);
NFA_NEST_END(skb, nest_parms);
return 0;
@@ -1138,7 +1170,11 @@ nfattr_failure:
}
static const size_t cta_min_tcp[CTA_PROTOINFO_TCP_MAX] = {
- [CTA_PROTOINFO_TCP_STATE-1] = sizeof(u_int8_t),
+ [CTA_PROTOINFO_TCP_STATE-1] = sizeof(u_int8_t),
+ [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL-1] = sizeof(u_int8_t),
+ [CTA_PROTOINFO_TCP_WSCALE_REPLY-1] = sizeof(u_int8_t),
+ [CTA_PROTOINFO_TCP_SACK_ORIGINAL-1] = sizeof(u_int8_t),
+ [CTA_PROTOINFO_TCP_SACK_REPLY-1] = sizeof(u_int8_t)
};
static int nfattr_to_tcp(struct nfattr *cda[], struct nf_conn *ct)
@@ -1162,6 +1198,40 @@ static int nfattr_to_tcp(struct nfattr *
write_lock_bh(&tcp_lock);
ct->proto.tcp.state =
*(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_STATE-1]);
+ /* window scale factor: original direction (SYN) */
+ if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL-1]) {
+ ct->proto.tcp.seen[0].td_scale =
+ *(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL-1]);
+ ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_WINDOW_SCALE;
+ }
+ /* window scale factor: reply direction (SYN+ACK) */
+ if (tb[CTA_PROTOINFO_TCP_WSCALE_REPLY-1]) {
+ ct->proto.tcp.seen[1].td_scale =
+ *(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY-1]);
+ ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_WINDOW_SCALE;
+ }
+ /* enable/disable SACK: original direction */
+ if (tb[CTA_PROTOINFO_TCP_SACK_ORIGINAL-1]) {
+ u_int8_t enable =
+ *(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_SACK_ORIGINAL-1]);
+ if (enable)
+ ct->proto.tcp.seen[0].flags |=
+ IP_CT_TCP_FLAG_SACK_PERM;
+ else
+ ct->proto.tcp.seen[0].flags &=
+ ~IP_CT_TCP_FLAG_SACK_PERM;
+ }
+ /* enable/disable SACK: reply direction */
+ if (tb[CTA_PROTOINFO_TCP_SACK_REPLY-1]) {
+ u_int8_t enable =
+ *(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_SACK_REPLY-1]);
+ if (enable)
+ ct->proto.tcp.seen[1].flags |=
+ IP_CT_TCP_FLAG_SACK_PERM;
+ else
+ ct->proto.tcp.seen[1].flags &=
+ ~IP_CT_TCP_FLAG_SACK_PERM;
+ }
write_unlock_bh(&tcp_lock);
return 0;
Index: linux-2.6.git/include/linux/netfilter/nfnetlink_conntrack.h
===================================================================
--- linux-2.6.git.orig/include/linux/netfilter/nfnetlink_conntrack.h 2006-11-08 23:14:06.000000000 +0100
+++ linux-2.6.git/include/linux/netfilter/nfnetlink_conntrack.h 2006-11-08 23:14:44.000000000 +0100
@@ -83,6 +83,10 @@ enum ctattr_protoinfo {
enum ctattr_protoinfo_tcp {
CTA_PROTOINFO_TCP_UNSPEC,
CTA_PROTOINFO_TCP_STATE,
+ CTA_PROTOINFO_TCP_WSCALE_ORIGINAL,
+ CTA_PROTOINFO_TCP_WSCALE_REPLY,
+ CTA_PROTOINFO_TCP_SACK_ORIGINAL,
+ CTA_PROTOINFO_TCP_SACK_REPLY,
__CTA_PROTOINFO_TCP_MAX
};
#define CTA_PROTOINFO_TCP_MAX (__CTA_PROTOINFO_TCP_MAX - 1)
next reply other threads:[~2006-11-10 1:16 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-11-10 1:16 Pablo Neira Ayuso [this message]
2006-11-10 20:49 ` [PATCH 1/4][CONNTRACK] Introduce flag facilities to take over TCP connections Jozsef Kadlecsik
2006-11-12 19:03 ` Pablo Neira Ayuso
2006-11-23 13:28 ` Patrick McHardy
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4553D2F2.1020107@netfilter.org \
--to=pablo@netfilter.org \
--cc=kaber@trash.net \
--cc=kadlec@blackhole.kfki.hu \
--cc=netfilter-devel@lists.netfilter.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.