* [PATCH] nf_conntrack port of conntrack-event-api
@ 2005-08-03 21:38 Harald Welte
2005-08-03 21:50 ` Patrick McHardy
0 siblings, 1 reply; 4+ messages in thread
From: Harald Welte @ 2005-08-03 21:38 UTC (permalink / raw)
To: Netfilter Development Mailinglist; +Cc: Patrick McHardy, Yasuyuki KOZAKAI
[-- Attachment #1: Type: text/plain, Size: 21243 bytes --]
Hi! I've just committed the following patch to the nf_conntrack branch
of my netfilter-2.6.14 tree:
[NETFILTER] port conntrack-events from ip_conntrack to nf_conntrack
This adds a notifier chain based event mechanism for nf_conntrack state
changes. As opposed to the previous implementations in patch-o-matic, we
do no longer need a field in the skb to achieve this.
Thanks to the valuable input from Patrick McHardy and Rusty on the idea
of a per_cpu implementation.
Signed-off-by: Harald Welte <laforge@netfilter.org>
---
commit c911fa4cf2d2ba954eeeb81c486a3c856afd22ba
tree 6a430a735a417cf3223fc30b2493357680824ca9
parent feece338f857e94078aac5ada027ecc35d96d4d3
author Harald Welte <laforge@netfilter.org> Mi, 03 Aug 2005 22:40:10 +0200
committer Harald Welte <laforge@netfilter.org> Mi, 03 Aug 2005 22:40:10 +0200
include/linux/netfilter/nf_conntrack.h | 88 +++++++++++++++++
include/linux/netfilter/nf_conntrack_core.h | 17 +++
net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 3 +
net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 1
net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 3 +
net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 1
net/netfilter/Kconfig | 10 ++
net/netfilter/nf_conntrack_core.c | 123 +++++++++++++++++++++++-
net/netfilter/nf_conntrack_ftp.c | 12 ++
net/netfilter/nf_conntrack_proto_sctp.c | 2
net/netfilter/nf_conntrack_proto_tcp.c | 4 +
net/netfilter/nf_conntrack_proto_udp.c | 3 -
net/netfilter/nf_conntrack_standalone.c | 9 ++
13 files changed, 261 insertions(+), 15 deletions(-)
diff --git a/include/linux/netfilter/nf_conntrack.h b/include/linux/netfilter/nf_conntrack.h
--- a/include/linux/netfilter/nf_conntrack.h
+++ b/include/linux/netfilter/nf_conntrack.h
@@ -168,7 +168,7 @@ nf_conntrack_tuple_taken(const struct nf
/* Return conntrack_info and tuple hash for given skb. */
static inline struct nf_conn *
-nf_ct_get(struct sk_buff *skb, enum ip_conntrack_info *ctinfo)
+nf_ct_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo)
{
*ctinfo = skb->nfctinfo;
return (struct nf_conn *)skb->nfct;
@@ -214,10 +214,96 @@ static inline int nf_ct_is_confirmed(str
return test_bit(IPS_CONFIRMED_BIT, &ct->status);
}
+static inline int nf_ct_is_dying(struct nf_conn *ct)
+{
+ return test_bit(IPS_DYING_BIT, &ct->status);
+}
+
extern unsigned int nf_conntrack_htable_size;
#define NF_CT_STAT_INC(count) (__get_cpu_var(nf_conntrack_stat).count++)
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+#include <linux/notifier.h>
+
+struct nf_conntrack_ecache {
+ struct nf_conn *ct;
+ unsigned int events;
+};
+DECLARE_PER_CPU(struct nf_conntrack_ecache, nf_conntrack_ecache);
+
+#define CONNTRACK_ECACHE(x) (__get_cpu_var(nf_conntrack_ecache).x)
+
+extern struct notifier_block *nf_conntrack_chain;
+extern struct notifier_block *nf_conntrack_expect_chain;
+
+static inline int nf_conntrack_register_notifier(struct notifier_block *nb)
+{
+ return notifier_chain_register(&nf_conntrack_chain, nb);
+}
+
+static inline int nf_conntrack_unregister_notifier(struct notifier_block *nb)
+{
+ return notifier_chain_unregister(&nf_conntrack_chain, nb);
+}
+
+static inline int
+nf_conntrack_expect_register_notifier(struct notifier_block *nb)
+{
+ return notifier_chain_register(&nf_conntrack_expect_chain, nb);
+}
+
+static inline int
+nf_conntrack_expect_unregister_notifier(struct notifier_block *nb)
+{
+ return notifier_chain_unregister(&nf_conntrack_expect_chain, nb);
+}
+
+static inline void
+nf_conntrack_event_cache(enum ip_conntrack_events event,
+ const struct sk_buff *skb)
+{
+ struct nf_conntrack_ecache *ecache =
+ &__get_cpu_var(nf_conntrack_ecache);
+
+ if (unlikely((struct nf_conn *) skb->nfct != ecache->ct)) {
+ if (net_ratelimit()) {
+ printk(KERN_ERR "ctevent: skb->ct != ecache->ct !!!\n");
+ dump_stack();
+ }
+ }
+ ecache->events |= event;
+}
+
+extern void
+nf_conntrack_deliver_cached_events_for(const struct nf_conn *ct);
+extern void nf_conntrack_event_cache_init(const struct sk_buff *skb);
+
+static inline void nf_conntrack_event(enum ip_conntrack_events event,
+ struct nf_conn *ct)
+{
+ if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct))
+ notifier_call_chain(&nf_conntrack_chain, event, ct);
+}
+
+static inline void
+nf_conntrack_expect_event(enum ip_conntrack_expect_events event,
+ struct nf_conntrack_expect *exp)
+{
+ notifier_call_chain(&nf_conntrack_expect_chain, event, exp);
+}
+#else /* CONFIG_NF_CONNTRACK_EVENTS */
+static inline void nf_conntrack_event_cache(enum ip_conntrack_events event,
+ const struct sk_buff *skb) {}
+static inline void nf_conntrack_event(enum ip_conntrack_events event,
+ struct nf_conn *ct) {}
+static inline void nf_conntrack_deliver_cached_events_for(struct nf_conn *ct) {}
+static inline void nf_conntrack_event_cache_init(const struct sk_buff *skb) {}
+static inline void
+nf_conntrack_expect_event(enum ip_conntrack_expect_events event,
+ struct nf_conntrack_expect *exp) {}
+#endif /* CONFIG_NF_CONNTRACK_EVENTS */
+
/* eg. PROVIDES_CONNTRACK(ftp); */
#define PROVIDES_CONNTRACK(name) \
int needs_nf_conntrack_##name; \
diff --git a/include/linux/netfilter/nf_conntrack_core.h b/include/linux/netfilter/nf_conntrack_core.h
--- a/include/linux/netfilter/nf_conntrack_core.h
+++ b/include/linux/netfilter/nf_conntrack_core.h
@@ -57,12 +57,21 @@ extern int __nf_conntrack_confirm(struct
/* Confirm a connection: returns NF_DROP if packet must be dropped. */
static inline int nf_conntrack_confirm(struct sk_buff **pskb)
{
- if ((*pskb)->nfct
- && !nf_ct_is_confirmed((struct nf_conn *)(*pskb)->nfct))
- return __nf_conntrack_confirm(pskb);
- return NF_ACCEPT;
+ struct nf_conn *ct = (struct nf_conn *)(*pskb)->nfct;
+ int ret = NF_ACCEPT;
+
+ if (ct && !nf_ct_is_confirmed(ct))
+ ret = __nf_conntrack_confirm(pskb);
+ nf_conntrack_deliver_cached_events_for(ct);
+
+ return ret;
}
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+struct nf_conntrack_ecache;
+extern void __nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ec);
+#endif
+
extern void __nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb);
extern struct list_head *nf_conntrack_hash;
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -129,6 +129,7 @@ static unsigned int ipv4_confirm(unsigne
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
+ nf_conntrack_event_cache_init(*pskb);
/* We've seen it coming out the other side: confirm it */
return nf_conntrack_confirm(pskb);
}
@@ -146,6 +147,7 @@ static unsigned int ipv4_conntrack_help(
ct = nf_ct_get(*pskb, &ctinfo);
if (ct && ct->helper) {
unsigned int ret;
+ nf_conntrack_event_cache_init(*pskb);
ret = ct->helper->help(pskb,
(*pskb)->nh.raw - (*pskb)->data
+ (*pskb)->nh.iph->ihl*4,
@@ -506,6 +508,7 @@ static int init_or_cleanup(int init)
return ret;
cleanup:
+ synchronize_net();
ip_ct_attach = NULL;
#ifdef CONFIG_SYSCTL
unregister_sysctl_table(nf_ct_ipv4_sysctl_header);
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -109,6 +109,7 @@ static int icmp_packet(struct nf_conn *c
ct->timeout.function((unsigned long)ct);
} else {
atomic_inc(&ct->proto.icmp.count);
+ nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmp_timeout);
}
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -182,6 +182,8 @@ static unsigned int ipv6_confirm(unsigne
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
+ nf_conntrack_event_cache_init(*pskb);
+
/* This is where we call the helper: as the packet goes out. */
ct = nf_ct_get(*pskb, &ctinfo);
if (ct && ct->helper) {
@@ -499,6 +501,7 @@ static int init_or_cleanup(int init)
return ret;
cleanup:
+ synchronize_net();
#ifdef CONFIG_SYSCTL
unregister_sysctl_table(nf_ct_ipv6_sysctl_header);
cleanup_localinops:
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -113,6 +113,7 @@ static int icmpv6_packet(struct nf_conn
ct->timeout.function((unsigned long)ct);
} else {
atomic_inc(&ct->proto.icmp.count);
+ nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmpv6_timeout);
}
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -57,6 +57,16 @@ config NF_CONNTRACK_MARK
of packets, but this mark value is kept in the conntrack session
instead of the individual packets.
+config NF_CONNTRACK_EVENTS
+ bool "Connection tracking events"
+ depends on NF_CONNTRACK
+ help
+ If this option is enabled, the connection tracking code will
+ provide a notifier chain that can be used by other kernel code
+ to get notified aboutchanges in the connection tracking state.
+
+ If unsure, say `N'.
+
config NF_CT_PROTO_SCTP
tristate 'SCTP protocol on new connection tracking support (EXPERIMENTAL)'
depends on EXPERIMENTAL && NF_CONNTRACK
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -38,6 +38,7 @@
#include <linux/err.h>
#include <linux/percpu.h>
#include <linux/moduleparam.h>
+#include <linux/notifier.h>
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <linux/socket.h>
@@ -54,7 +55,7 @@
#include <linux/netfilter/nf_conntrack_core.h>
#include <linux/netfilter_ipv4/listhelp.h>
-#define NF_CONNTRACK_VERSION "0.3.1"
+#define NF_CONNTRACK_VERSION "0.4.1"
#if 0
#define DEBUGP printk
@@ -81,6 +82,83 @@ unsigned int nf_ct_log_invalid;
static LIST_HEAD(unconfirmed);
static int nf_conntrack_vmalloc;
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+struct notifier_block *nf_conntrack_chain;
+struct notifier_block *nf_conntrack_expect_chain;
+
+DEFINE_PER_CPU(struct nf_conntrack_ecache, nf_conntrack_ecache);
+
+static inline void __deliver_cached_events(struct nf_conntrack_ecache *ecache)
+{
+ if (nf_ct_is_confirmed(ecache->ct) && !nf_ct_is_dying(ecache->ct)
+ && ecache->events)
+ notifier_call_chain(&nf_conntrack_chain, ecache->events,
+ ecache->ct);
+
+ ecache->events = 0;
+}
+
+void __nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache)
+{
+ __deliver_cached_events(ecache);
+}
+
+/* Deliver all cached events for a particular conntrack. This is called
+ * by code prior to async packet handling for freeing the skb */
+void
+nf_conntrack_deliver_cached_events_for(const struct nf_conn *ct)
+{
+ struct nf_conntrack_ecache *ecache =
+ &__get_cpu_var(nf_conntrack_ecache);
+
+ if (!ct)
+ return;
+
+ if (ecache->ct == ct) {
+ DEBUGP("ecache: delivering event for %p\n", ct);
+ __deliver_cached_events(ecache);
+ } else {
+ if (net_ratelimit())
+ printk(KERN_WARNING "ecache: want to deliver for %p, "
+ "but cache has %p\n", ct, ecache->ct);
+ }
+
+ /* signalize that events have already been delviered */
+ ecache->ct = NULL;
+}
+
+/* Deliver cached events for old pending events, if current conntrack != old */
+void nf_conntrack_event_cache_init(const struct sk_buff *skb)
+{
+ struct nf_conn *ct = (struct nf_conn *) skb->nfct;
+ struct nf_conntrack_ecache *ecache =
+ &__get_cpu_var(nf_conntrack_ecache);
+
+ /* take care of delivering potentially old events */
+ if (ecache->ct != ct) {
+ enum ip_conntrack_info ctinfo;
+ /* we have to check, since at startup the cache is NULL */
+ if (likely(ecache->ct)) {
+ DEBUGP("ecache: entered for different conntrack: "
+ "ecache->ct=%p, skb->nfct=%p. delivering "
+ "events\n", ecache->ct, ct);
+ __deliver_cached_events(ecache);
+ nf_ct_put(ecache->ct);
+ } else {
+ DEBUGP("ecache: entered for conntrack %p, "
+ "cache was clean before\n", ct);
+ }
+
+ /* initialize for this conntrack/packet */
+ ecache->ct = nf_ct_get(skb, &ctinfo);
+ /* ecache->events cleared by __deliver_cached_events() */
+ } else {
+ DEBUGP("ecache: re-entered for conntrack %p.\n", ct);
+ }
+}
+
+#endif /* CONFIG_NF_CONNTRACK_EVENTS */
+
DEFINE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat);
EXPORT_PER_CPU_SYMBOL(nf_conntrack_stat);
@@ -457,6 +535,8 @@ destroy_conntrack(struct nf_conntrack *n
NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
NF_CT_ASSERT(!timer_pending(&ct->timeout));
+ set_bit(IPS_DYING_BIT, &ct->status);
+
/* To make sure we don't get any weird locking issues here:
* destroy_conntrack() MUST NOT be called with a write lock
* to nf_conntrack_lock!!! -HW */
@@ -500,6 +580,7 @@ static void death_by_timeout(unsigned lo
{
struct nf_conn *ct = (void *)ul_conntrack;
+ nf_conntrack_event(IPCT_DESTROY, ct);
write_lock_bh(&nf_conntrack_lock);
/* Inside lock so preempt is disabled on module removal path.
* Otherwise we can get spurious warnings. */
@@ -613,6 +694,15 @@ __nf_conntrack_confirm(struct sk_buff **
set_bit(IPS_CONFIRMED_BIT, &ct->status);
NF_CT_STAT_INC(insert);
write_unlock_bh(&nf_conntrack_lock);
+ if (ct->helper)
+ nf_conntrack_event_cache(IPCT_HELPER, *pskb);
+#ifdef CONFIG_NF_NAT_NEEDED
+ if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
+ test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
+ nf_conntrack_event_cache(IPCT_NATINFO, *pskb);
+#endif
+ nf_conntrack_event_cache(master_ct(ct) ?
+ IPCT_RELATED : IPCT_NEW, *pskb);
return NF_ACCEPT;
}
@@ -855,7 +945,7 @@ nf_conntrack_in(int pf, unsigned int hoo
struct nf_conntrack_protocol *proto;
unsigned int dataoff;
u_int8_t protonum;
- int set_reply;
+ int set_reply = 0;
int ret;
/* Previously seen (loopback or untracked)? Ignore. */
@@ -898,6 +988,8 @@ nf_conntrack_in(int pf, unsigned int hoo
NF_CT_ASSERT((*pskb)->nfct);
+ nf_conntrack_event_cache_init(*pskb);
+
ret = proto->packet(ct, *pskb, dataoff, ctinfo, pf, hooknum);
if (ret < 0) {
/* Invalid: inverse of the return code tells
@@ -909,8 +1001,8 @@ nf_conntrack_in(int pf, unsigned int hoo
return -ret;
}
- if (set_reply)
- set_bit(IPS_SEEN_REPLY_BIT, &ct->status);
+ if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
+ nf_conntrack_event_cache(IPCT_STATUS, *pskb);
return ret;
}
@@ -1067,6 +1159,7 @@ int nf_conntrack_expect_related(struct n
evict_oldest_expect(expect->master);
nf_conntrack_expect_insert(expect);
+ nf_conntrack_expect_event(IPEXP_NEW, expect);
ret = 0;
out:
write_unlock_bh(&nf_conntrack_lock);
@@ -1115,8 +1208,10 @@ int nf_conntrack_helper_register(struct
static inline int unhelp(struct nf_conntrack_tuple_hash *i,
const struct nf_conntrack_helper *me)
{
- if (nf_ct_tuplehash_to_ctrack(i)->helper == me)
+ if (nf_ct_tuplehash_to_ctrack(i)->helper == me) {
+ nf_conntrack_event(IPCT_HELPER, nf_ct_tuplehash_to_ctrack(i));
nf_ct_tuplehash_to_ctrack(i)->helper = NULL;
+ }
return 0;
}
@@ -1181,6 +1276,7 @@ void nf_ct_refresh_acct(struct nf_conn *
if (del_timer(&ct->timeout)) {
ct->timeout.expires = jiffies + extra_jiffies;
add_timer(&ct->timeout);
+ nf_conntrack_event_cache(IPCT_REFRESH, skb);
}
ct_add_counters(ct, ctinfo, skb);
write_unlock_bh(&nf_conntrack_lock);
@@ -1253,6 +1349,23 @@ nf_ct_iterate_cleanup(int (*iter)(struct
nf_ct_put(ct);
}
+
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+ {
+ /* we need to deliver all cached events in order to drop
+ * the reference counts */
+ int cpu;
+ for_each_cpu(cpu) {
+ struct nf_conntrack_ecache *ecache =
+ &per_cpu(nf_conntrack_ecache, cpu);
+ if (ecache->ct) {
+ __nf_ct_deliver_cached_events(ecache);
+ nf_ct_put(ecache->ct);
+ ecache->ct = NULL;
+ }
+ }
+ }
+#endif
}
static int kill_all(struct nf_conn *i, void *data)
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -404,7 +404,8 @@ static int find_nl_seq(u32 seq, const st
}
/* We don't update if it's older than what we have. */
-static void update_nl_seq(u32 nl_seq, struct ip_ct_ftp_master *info, int dir)
+static void update_nl_seq(u32 nl_seq, struct ip_ct_ftp_master *info, int dir,
+ struct sk_buff *skb)
{
unsigned int i, oldest = NUM_SEQ_TO_REMEMBER;
@@ -418,10 +419,13 @@ static void update_nl_seq(u32 nl_seq, st
oldest = i;
}
- if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER)
+ if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) {
info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq;
- else if (oldest != NUM_SEQ_TO_REMEMBER)
+ nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb);
+ } else if (oldest != NUM_SEQ_TO_REMEMBER) {
info->seq_aft_nl[dir][oldest] = nl_seq;
+ nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb);
+ }
}
static int help(struct sk_buff **pskb,
@@ -609,7 +613,7 @@ out_update_nl:
/* Now if this ends in \n, update ftp info. Seq may have been
* adjusted by NAT code. */
if (ends_in_nl)
- update_nl_seq(seq, ct_ftp_info,dir);
+ update_nl_seq(seq, ct_ftp_info, dir, *pskb);
out:
spin_unlock_bh(&nf_ftp_lock);
return ret;
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -412,6 +412,8 @@ static int sctp_packet(struct nf_conn *c
}
conntrack->proto.sctp.state = newconntrack;
+ if (oldsctpstate != newconntrack)
+ nf_conntrack_event_cache(IPCT_PROTOINFO, skb);
write_unlock_bh(&sctp_lock);
}
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1013,6 +1013,10 @@ static int tcp_packet(struct nf_conn *co
? nf_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state];
write_unlock_bh(&tcp_lock);
+ nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
+ if (new_state != old_state)
+ nf_conntrack_event_cache(IPCT_PROTOINFO, skb);
+
if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
/* If only reply is a RST, we can consider ourselves not to
have an established connection: this is a fairly common
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -83,7 +83,8 @@ static int udp_packet(struct nf_conn *co
nf_ct_refresh_acct(conntrack, ctinfo, skb,
nf_ct_udp_timeout_stream);
/* Also, more likely to be important, and not a probe */
- set_bit(IPS_ASSURED_BIT, &conntrack->status);
+ if (!test_and_set_bit(IPS_ASSURED_BIT, &conntrack->status))
+ nf_conntrack_event_cache(IPCT_STATUS, skb);
} else
nf_ct_refresh_acct(conntrack, ctinfo, skb, nf_ct_udp_timeout);
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -825,6 +825,15 @@ void need_nf_conntrack(void)
{
}
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+EXPORT_SYMBOL_GPL(nf_conntrack_chain);
+EXPORT_SYMBOL_GPL(nf_conntrack_expect_chain);
+EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier);
+EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
+EXPORT_PER_CPU_SYMBOL_GPL(nf_conntrack_ecache);
+EXPORT_SYMBOL_GPL(nf_conntrack_event_cache_init);
+EXPORT_SYMBOL_GPL(nf_conntrack_deliver_cached_events_for);
+#endif
EXPORT_SYMBOL(nf_conntrack_l3proto_register);
EXPORT_SYMBOL(nf_conntrack_l3proto_unregister);
EXPORT_SYMBOL(nf_conntrack_protocol_register);
--
- Harald Welte <laforge@netfilter.org> http://netfilter.org/
============================================================================
"Fragmentation is like classful addressing -- an interesting early
architectural error that shows how much experimentation was going
on while IP was being designed." -- Paul Vixie
[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] nf_conntrack port of conntrack-event-api
2005-08-03 21:38 [PATCH] nf_conntrack port of conntrack-event-api Harald Welte
@ 2005-08-03 21:50 ` Patrick McHardy
2005-08-04 21:43 ` Harald Welte
2005-08-05 16:48 ` Harald Welte
0 siblings, 2 replies; 4+ messages in thread
From: Patrick McHardy @ 2005-08-03 21:50 UTC (permalink / raw)
To: Harald Welte; +Cc: Netfilter Development Mailinglist, Yasuyuki KOZAKAI
Harald Welte wrote:
> Hi! I've just committed the following patch to the nf_conntrack branch
> of my netfilter-2.6.14 tree:
>
> [NETFILTER] port conntrack-events from ip_conntrack to nf_conntrack
>
> This adds a notifier chain based event mechanism for nf_conntrack state
> changes. As opposed to the previous implementations in patch-o-matic, we
> do no longer need a field in the skb to achieve this.
The event cache also needs the fixes I sent in the "atomic counter
underflow" thread two hours ago.
Regards
Patrick
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] nf_conntrack port of conntrack-event-api
2005-08-03 21:50 ` Patrick McHardy
@ 2005-08-04 21:43 ` Harald Welte
2005-08-05 16:48 ` Harald Welte
1 sibling, 0 replies; 4+ messages in thread
From: Harald Welte @ 2005-08-04 21:43 UTC (permalink / raw)
To: Patrick McHardy; +Cc: Netfilter Development Mailinglist, Yasuyuki KOZAKAI
[-- Attachment #1: Type: text/plain, Size: 1281 bytes --]
On Wed, Aug 03, 2005 at 11:50:01PM +0200, Patrick McHardy wrote:
> Harald Welte wrote:
> > Hi! I've just committed the following patch to the nf_conntrack branch
> > of my netfilter-2.6.14 tree:
> >
> > [NETFILTER] port conntrack-events from ip_conntrack to nf_conntrack
> >
> > This adds a notifier chain based event mechanism for nf_conntrack state
> > changes. As opposed to the previous implementations in patch-o-matic, we
> > do no longer need a field in the skb to achieve this.
>
> The event cache also needs the fixes I sent in the "atomic counter
> underflow" thread two hours ago.
Yes, I'm aware of that. While I fully trust you on your changes, I
first want to understand them, then port them to nf_conntrack. Since
I'm still officially on holidays visiting my parents, my time is quite
limited, but I expect it to be done by tomorrow (friday) evening.
cheers,
Harald
--
- Harald Welte <laforge@netfilter.org> http://netfilter.org/
============================================================================
"Fragmentation is like classful addressing -- an interesting early
architectural error that shows how much experimentation was going
on while IP was being designed." -- Paul Vixie
[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] nf_conntrack port of conntrack-event-api
2005-08-03 21:50 ` Patrick McHardy
2005-08-04 21:43 ` Harald Welte
@ 2005-08-05 16:48 ` Harald Welte
1 sibling, 0 replies; 4+ messages in thread
From: Harald Welte @ 2005-08-05 16:48 UTC (permalink / raw)
To: Patrick McHardy; +Cc: Netfilter Development Mailinglist, Yasuyuki KOZAKAI
[-- Attachment #1.1: Type: text/plain, Size: 1058 bytes --]
On Wed, Aug 03, 2005 at 11:50:01PM +0200, Patrick McHardy wrote:
> Harald Welte wrote:
> > Hi! I've just committed the following patch to the nf_conntrack branch
> > of my netfilter-2.6.14 tree:
> >
> > [NETFILTER] port conntrack-events from ip_conntrack to nf_conntrack
> >
> > This adds a notifier chain based event mechanism for nf_conntrack state
> > changes. As opposed to the previous implementations in patch-o-matic, we
> > do no longer need a field in the skb to achieve this.
>
> The event cache also needs the fixes I sent in the "atomic counter
> underflow" thread two hours ago.
I've just committed the attached patch to netfilter-2.6.14#nf_conntrack.
--
- Harald Welte <laforge@netfilter.org> http://netfilter.org/
============================================================================
"Fragmentation is like classful addressing -- an interesting early
architectural error that shows how much experimentation was going
on while IP was being designed." -- Paul Vixie
[-- Attachment #1.2: nfct-underflow.patch --]
[-- Type: text/plain, Size: 11938 bytes --]
[NETFILTER]: Fix multiple problems with the conntrack event cache
refcnt underflow: the reference count is decremented when a conntrack
entry is removed from the hash but it is not incremented when entering
new entries.
missing protection of process context against softirq context: all
cache operations need to locally disable softirqs to avoid races.
Additionally the event cache can't be initialized when a packet enteres
the conntrack code but needs to be initialized whenever we cache an event
and the stored conntrack entry doesn't match the current one.
incorrect flushing of the event cache in ip_ct_iterate_cleanup: without
real locking we can't flush the cache for different CPUs without incurring
races. The cache for different CPUs can only be flushed when no packets
are going through the code. ip_ct_iterate_cleanup doesn't need to drop
all references, so flushing is moved to the cleanup path.
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Harald Welte <laforge@netfilter.org>
---
commit 69d90870b705ad24bfab685fc1bb56e2e5d649be
tree 1989bcc9e353b8364155158ec02a743266ae58a8
parent de3cf64608b638e5af3f232383a6eb99f80ceed4
author Harald Welte <laforge@netfilter.org> Fr, 05 Aug 2005 18:46:16 +0200
committer Harald Welte <laforge@netfilter.org> Fr, 05 Aug 2005 18:46:16 +0200
include/linux/netfilter/nf_conntrack.h | 26 +++---
include/linux/netfilter/nf_conntrack_core.h | 14 +--
net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 2
net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 2
net/netfilter/nf_conntrack_core.c | 106 +++++++++---------------
net/netfilter/nf_conntrack_standalone.c | 4 -
6 files changed, 58 insertions(+), 96 deletions(-)
diff --git a/include/linux/netfilter/nf_conntrack.h b/include/linux/netfilter/nf_conntrack.h
--- a/include/linux/netfilter/nf_conntrack.h
+++ b/include/linux/netfilter/nf_conntrack.h
@@ -228,6 +228,7 @@ extern unsigned int nf_conntrack_htable_
#ifdef CONFIG_NF_CONNTRACK_EVENTS
#include <linux/notifier.h>
+#include <linux/interrupt.h>
struct nf_conntrack_ecache {
struct nf_conn *ct;
@@ -262,26 +263,24 @@ nf_conntrack_expect_unregister_notifier(
return notifier_chain_unregister(&nf_conntrack_expect_chain, nb);
}
+extern void nf_ct_deliver_cached_events(const struct nf_conn *ct);
+extern void __nf_ct_event_cache_init(struct nf_conn *ct);
+
static inline void
nf_conntrack_event_cache(enum ip_conntrack_events event,
const struct sk_buff *skb)
{
- struct nf_conntrack_ecache *ecache =
- &__get_cpu_var(nf_conntrack_ecache);
+ struct nf_conn *ct = (struct nf_conn *)skb->nfct;
+ struct nf_conntrack_ecache *ecache;
- if (unlikely((struct nf_conn *) skb->nfct != ecache->ct)) {
- if (net_ratelimit()) {
- printk(KERN_ERR "ctevent: skb->ct != ecache->ct !!!\n");
- dump_stack();
- }
- }
+ local_bh_disable();
+ ecache = &__get_cpu_var(nf_conntrack_ecache);
+ if (ct != ecache->ct)
+ __nf_ct_event_cache_init(ct);
ecache->events |= event;
+ local_bh_enable();
}
-extern void
-nf_conntrack_deliver_cached_events_for(const struct nf_conn *ct);
-extern void nf_conntrack_event_cache_init(const struct sk_buff *skb);
-
static inline void nf_conntrack_event(enum ip_conntrack_events event,
struct nf_conn *ct)
{
@@ -300,8 +299,7 @@ static inline void nf_conntrack_event_ca
const struct sk_buff *skb) {}
static inline void nf_conntrack_event(enum ip_conntrack_events event,
struct nf_conn *ct) {}
-static inline void nf_conntrack_deliver_cached_events_for(struct nf_conn *ct) {}
-static inline void nf_conntrack_event_cache_init(const struct sk_buff *skb) {}
+static inline void nf_ct_deliver_cached_events(const struct nf_conn *ct) {}
static inline void
nf_conntrack_expect_event(enum ip_conntrack_expect_events event,
struct nf_conntrack_expect *exp) {}
diff --git a/include/linux/netfilter/nf_conntrack_core.h b/include/linux/netfilter/nf_conntrack_core.h
--- a/include/linux/netfilter/nf_conntrack_core.h
+++ b/include/linux/netfilter/nf_conntrack_core.h
@@ -60,18 +60,14 @@ static inline int nf_conntrack_confirm(s
struct nf_conn *ct = (struct nf_conn *)(*pskb)->nfct;
int ret = NF_ACCEPT;
- if (ct && !nf_ct_is_confirmed(ct))
- ret = __nf_conntrack_confirm(pskb);
- nf_conntrack_deliver_cached_events_for(ct);
-
+ if (ct) {
+ if (!nf_ct_is_confirmed(ct))
+ ret = __nf_conntrack_confirm(pskb);
+ nf_ct_deliver_cached_events(ct);
+ }
return ret;
}
-#ifdef CONFIG_NF_CONNTRACK_EVENTS
-struct nf_conntrack_ecache;
-extern void __nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ec);
-#endif
-
extern void __nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb);
extern struct list_head *nf_conntrack_hash;
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -129,7 +129,6 @@ static unsigned int ipv4_confirm(unsigne
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- nf_conntrack_event_cache_init(*pskb);
/* We've seen it coming out the other side: confirm it */
return nf_conntrack_confirm(pskb);
}
@@ -147,7 +146,6 @@ static unsigned int ipv4_conntrack_help(
ct = nf_ct_get(*pskb, &ctinfo);
if (ct && ct->helper) {
unsigned int ret;
- nf_conntrack_event_cache_init(*pskb);
ret = ct->helper->help(pskb,
(*pskb)->nh.raw - (*pskb)->data
+ (*pskb)->nh.iph->ihl*4,
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -182,8 +182,6 @@ static unsigned int ipv6_confirm(unsigne
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
- nf_conntrack_event_cache_init(*pskb);
-
/* This is where we call the helper: as the packet goes out. */
ct = nf_ct_get(*pskb, &ctinfo);
if (ct && ct->helper) {
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -88,75 +88,65 @@ struct notifier_block *nf_conntrack_expe
DEFINE_PER_CPU(struct nf_conntrack_ecache, nf_conntrack_ecache);
-static inline void __deliver_cached_events(struct nf_conntrack_ecache *ecache)
+/* deliver cached events and clear cache entry - must be called with locally
+ * disabled softirqs */
+static inline void
+__nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache)
{
+ DEBUGP("ecache: delivering events for %p\n", ecache->ct);
if (nf_ct_is_confirmed(ecache->ct) && !nf_ct_is_dying(ecache->ct)
&& ecache->events)
notifier_call_chain(&nf_conntrack_chain, ecache->events,
ecache->ct);
ecache->events = 0;
-}
-
-void __nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache)
-{
- __deliver_cached_events(ecache);
+ nf_ct_put(ecache->ct);
+ ecache->ct = NULL;
}
/* Deliver all cached events for a particular conntrack. This is called
* by code prior to async packet handling for freeing the skb */
-void
-nf_conntrack_deliver_cached_events_for(const struct nf_conn *ct)
+void nf_ct_deliver_cached_events(const struct nf_conn *ct)
{
- struct nf_conntrack_ecache *ecache =
- &__get_cpu_var(nf_conntrack_ecache);
-
- if (!ct)
- return;
-
- if (ecache->ct == ct) {
- DEBUGP("ecache: delivering event for %p\n", ct);
- __deliver_cached_events(ecache);
- } else {
- if (net_ratelimit())
- printk(KERN_WARNING "ecache: want to deliver for %p, "
- "but cache has %p\n", ct, ecache->ct);
- }
+ struct nf_conntrack_ecache *ecache;
- /* signalize that events have already been delviered */
- ecache->ct = NULL;
+ local_bh_disable();
+ ecache = &__get_cpu_var(nf_conntrack_ecache);
+ if (ecache->ct == ct)
+ __nf_ct_deliver_cached_events(ecache);
+ local_bh_enable();
}
/* Deliver cached events for old pending events, if current conntrack != old */
-void nf_conntrack_event_cache_init(const struct sk_buff *skb)
+void __nf_ct_event_cache_init(struct nf_conn *ct)
{
- struct nf_conn *ct = (struct nf_conn *) skb->nfct;
- struct nf_conntrack_ecache *ecache =
- &__get_cpu_var(nf_conntrack_ecache);
-
+ struct nf_conntrack_ecache *ecache;
+
/* take care of delivering potentially old events */
- if (ecache->ct != ct) {
- enum ip_conntrack_info ctinfo;
- /* we have to check, since at startup the cache is NULL */
- if (likely(ecache->ct)) {
- DEBUGP("ecache: entered for different conntrack: "
- "ecache->ct=%p, skb->nfct=%p. delivering "
- "events\n", ecache->ct, ct);
- __deliver_cached_events(ecache);
+ ecache = &__get_cpu_var(nf_conntrack_ecache);
+ BUG_ON(ecache->ct == ct);
+ if (ecache->ct)
+ __nf_ct_deliver_cached_events(ecache);
+ /* initialize for this conntrack/packet */
+ ecache->ct = ct;
+ nf_conntrack_get(&ct->ct_general);
+}
+
+/* flush the event cache - touches other CPU's data and must not be called
+ * while packets are still passing through the code */
+static void nf_ct_event_cache_flush(void)
+{
+ struct nf_conntrack_ecache *ecache;
+ int cpu;
+
+ for_each_cpu(cpu) {
+ ecache = &per_cpu(nf_conntrack_ecache, cpu);
+ if (ecache->ct)
nf_ct_put(ecache->ct);
- } else {
- DEBUGP("ecache: entered for conntrack %p, "
- "cache was clean before\n", ct);
- }
-
- /* initialize for this conntrack/packet */
- ecache->ct = nf_ct_get(skb, &ctinfo);
- /* ecache->events cleared by __deliver_cached_events() */
- } else {
- DEBUGP("ecache: re-entered for conntrack %p.\n", ct);
}
}
-
+#else
+static inline void nf_ct_event_cache_flush(void) {}
#endif /* CONFIG_NF_CONNTRACK_EVENTS */
DEFINE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat);
@@ -981,8 +971,6 @@ nf_conntrack_in(int pf, unsigned int hoo
NF_CT_ASSERT((*pskb)->nfct);
- nf_conntrack_event_cache_init(*pskb);
-
ret = proto->packet(ct, *pskb, dataoff, ctinfo, pf, hooknum);
if (ret < 0) {
/* Invalid: inverse of the return code tells
@@ -1345,23 +1333,6 @@ nf_ct_iterate_cleanup(int (*iter)(struct
nf_ct_put(ct);
}
-
-#ifdef CONFIG_NF_CONNTRACK_EVENTS
- {
- /* we need to deliver all cached events in order to drop
- * the reference counts */
- int cpu;
- for_each_cpu(cpu) {
- struct nf_conntrack_ecache *ecache =
- &per_cpu(nf_conntrack_ecache, cpu);
- if (ecache->ct) {
- __nf_ct_deliver_cached_events(ecache);
- nf_ct_put(ecache->ct);
- ecache->ct = NULL;
- }
- }
- }
-#endif
}
static int kill_all(struct nf_conn *i, void *data)
@@ -1390,6 +1361,7 @@ void nf_conntrack_cleanup(void)
delete... */
synchronize_net();
+ nf_ct_event_cache_flush();
i_see_dead_people:
nf_ct_iterate_cleanup(kill_all, NULL);
if (atomic_read(&nf_conntrack_count) != 0) {
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -830,9 +830,9 @@ EXPORT_SYMBOL_GPL(nf_conntrack_chain);
EXPORT_SYMBOL_GPL(nf_conntrack_expect_chain);
EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier);
EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
+EXPORT_SYMBOL_GPL(__nf_ct_event_cache_init);
EXPORT_PER_CPU_SYMBOL_GPL(nf_conntrack_ecache);
-EXPORT_SYMBOL_GPL(nf_conntrack_event_cache_init);
-EXPORT_SYMBOL_GPL(nf_conntrack_deliver_cached_events_for);
+EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events);
#endif
EXPORT_SYMBOL(nf_conntrack_l3proto_register);
EXPORT_SYMBOL(nf_conntrack_l3proto_unregister);
[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2005-08-05 16:48 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-08-03 21:38 [PATCH] nf_conntrack port of conntrack-event-api Harald Welte
2005-08-03 21:50 ` Patrick McHardy
2005-08-04 21:43 ` Harald Welte
2005-08-05 16:48 ` Harald Welte
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.