* [PATCH 0/3] [RFC] conntrack event caching changes
@ 2009-03-30 13:27 Pablo Neira Ayuso
2009-03-30 13:27 ` [PATCH 1/3] netfilter: ecache: replace notify chain by function pointer Pablo Neira Ayuso
` (2 more replies)
0 siblings, 3 replies; 5+ messages in thread
From: Pablo Neira Ayuso @ 2009-03-30 13:27 UTC (permalink / raw)
To: netfilter-devel; +Cc: kaber
Hi Patrick,
This is another round of the event caching rework. I'm not including
patch from 1/5 to 3/5 that I previously sent that are required by these
patches but that are not relevant for the RFC I think.
Basically, this rework contains:
a) The replacement for the use of the notify chain by a single function
pointer.
b) The replacement of the per-cpu event cache by a per-conntrack event
cache.
c) The addition of optional reliable conntrack event delivery containing
the recent comments during one of our discussions.
Thanks!
---
Pablo Neira Ayuso (3):
ctnetlink: optional reliable conntrack event delivery
conntrack: ecache: move event cache to conntrack extension infrastructure
netfilter: ecache: replace notify chain by function pointer
include/net/netfilter/nf_conntrack.h | 2
include/net/netfilter/nf_conntrack_core.h | 6 -
include/net/netfilter/nf_conntrack_ecache.h | 163 +++++++++++----
include/net/netfilter/nf_conntrack_extend.h | 2
include/net/netfilter/nf_conntrack_helper.h | 2
include/net/netns/conntrack.h | 7 -
net/netfilter/nf_conntrack_core.c | 124 ++++++++----
net/netfilter/nf_conntrack_ecache.c | 288 ++++++++++++++++++++-------
net/netfilter/nf_conntrack_ftp.c | 4
net/netfilter/nf_conntrack_helper.c | 15 +
net/netfilter/nf_conntrack_netlink.c | 103 +++++-----
net/netfilter/nf_conntrack_proto_sctp.c | 2
net/netfilter/nf_conntrack_proto_tcp.c | 7 -
13 files changed, 513 insertions(+), 212 deletions(-)
--
3.3.3
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 1/3] netfilter: ecache: replace notify chain by function pointer
2009-03-30 13:27 [PATCH 0/3] [RFC] conntrack event caching changes Pablo Neira Ayuso
@ 2009-03-30 13:27 ` Pablo Neira Ayuso
2009-03-30 13:28 ` [PATCH 2/3] conntrack: ecache: move event cache to conntrack extension infrastructure Pablo Neira Ayuso
2009-03-30 13:28 ` [PATCH 3/3] ctnetlink: optional reliable conntrack event delivery Pablo Neira Ayuso
2 siblings, 0 replies; 5+ messages in thread
From: Pablo Neira Ayuso @ 2009-03-30 13:27 UTC (permalink / raw)
To: netfilter-devel; +Cc: kaber
This patch removes the notify chain from per-cpu event cache
infrastructure and replace it by a simple function pointer. The
use of the notify chain adds too much overhead for something that
is only used by ctnetlink.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
include/net/netfilter/nf_conntrack_ecache.h | 70 +++++++++++++------
net/netfilter/nf_conntrack_ecache.c | 101 +++++++++++++++++++++++----
net/netfilter/nf_conntrack_netlink.c | 41 +++++------
3 files changed, 154 insertions(+), 58 deletions(-)
diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
index 892b8cd..13ce023 100644
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -6,7 +6,6 @@
#define _NF_CONNTRACK_ECACHE_H
#include <net/netfilter/nf_conntrack.h>
-#include <linux/notifier.h>
#include <linux/interrupt.h>
#include <net/net_namespace.h>
#include <net/netfilter/nf_conntrack_expect.h>
@@ -93,9 +92,13 @@ struct nf_ct_event {
int report;
};
-extern struct atomic_notifier_head nf_conntrack_chain;
-extern int nf_conntrack_register_notifier(struct notifier_block *nb);
-extern int nf_conntrack_unregister_notifier(struct notifier_block *nb);
+struct nf_ct_event_notifier {
+ int (*fcn)(unsigned int events, struct nf_ct_event *item);
+};
+
+extern struct nf_ct_event_notifier *nf_conntrack_event_cb;
+extern int nf_conntrack_register_notifier(struct nf_ct_event_notifier *nb);
+extern int nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *nb);
extern void nf_ct_deliver_cached_events(const struct nf_conn *ct);
extern void __nf_ct_event_cache_init(struct nf_conn *ct);
@@ -121,13 +124,24 @@ nf_conntrack_event_report(enum ip_conntrack_events event,
u32 pid,
int report)
{
- struct nf_ct_event item = {
- .ct = ct,
- .pid = pid,
- .report = report
- };
- if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct))
- atomic_notifier_call_chain(&nf_conntrack_chain, event, &item);
+ struct nf_ct_event_notifier *notify;
+
+ rcu_read_lock();
+ notify = rcu_dereference(nf_conntrack_event_cb);
+ if (notify == NULL)
+ goto out_unlock;
+
+ if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct)) {
+ struct nf_ct_event item = {
+ .ct = ct,
+ .pid = pid,
+ .report = report
+ };
+
+ notify->fcn(event, &item);
+ }
+out_unlock:
+ rcu_read_unlock();
}
static inline void
@@ -142,9 +156,13 @@ struct nf_exp_event {
int report;
};
-extern struct atomic_notifier_head nf_ct_expect_chain;
-extern int nf_ct_expect_register_notifier(struct notifier_block *nb);
-extern int nf_ct_expect_unregister_notifier(struct notifier_block *nb);
+struct nf_exp_event_notifier {
+ int (*fcn)(unsigned int events, struct nf_exp_event *item);
+};
+
+extern struct nf_exp_event_notifier *nf_expect_event_cb;
+extern int nf_ct_expect_register_notifier(struct nf_exp_event_notifier *nb);
+extern int nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *nb);
static inline void
nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
@@ -152,12 +170,24 @@ nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
u32 pid,
int report)
{
- struct nf_exp_event item = {
- .exp = exp,
- .pid = pid,
- .report = report
- };
- atomic_notifier_call_chain(&nf_ct_expect_chain, event, &item);
+ struct nf_exp_event_notifier *notify;
+
+ rcu_read_lock();
+ notify = rcu_dereference(nf_expect_event_cb);
+ if (notify == NULL)
+ goto out_unlock;
+
+ {
+ struct nf_exp_event item = {
+ .exp = exp,
+ .pid = pid,
+ .report = report
+ };
+
+ notify->fcn(event, &item);
+ }
+out_unlock:
+ rcu_read_unlock();
}
static inline void
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index dee4190..780278b 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -16,24 +16,32 @@
#include <linux/stddef.h>
#include <linux/err.h>
#include <linux/percpu.h>
-#include <linux/notifier.h>
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
-ATOMIC_NOTIFIER_HEAD(nf_conntrack_chain);
-EXPORT_SYMBOL_GPL(nf_conntrack_chain);
+static DEFINE_MUTEX(nf_ct_ecache_mutex);
-ATOMIC_NOTIFIER_HEAD(nf_ct_expect_chain);
-EXPORT_SYMBOL_GPL(nf_ct_expect_chain);
+struct nf_ct_event_notifier *nf_conntrack_event_cb;
+EXPORT_SYMBOL_GPL(nf_conntrack_event_cb);
+
+struct nf_exp_event_notifier *nf_expect_event_cb;
+EXPORT_SYMBOL_GPL(nf_expect_event_cb);
/* deliver cached events and clear cache entry - must be called with locally
* disabled softirqs */
static inline void
__nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache)
{
+ struct nf_ct_event_notifier *notify;
+
+ rcu_read_lock();
+ notify = rcu_dereference(nf_conntrack_event_cb);
+ if (notify == NULL)
+ goto out_unlock;
+
if (nf_ct_is_confirmed(ecache->ct) && !nf_ct_is_dying(ecache->ct)
&& ecache->events) {
struct nf_ct_event item = {
@@ -42,14 +50,15 @@ __nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache)
.report = 0
};
- atomic_notifier_call_chain(&nf_conntrack_chain,
- ecache->events,
- &item);
+ notify->fcn(ecache->events, &item);
}
ecache->events = 0;
nf_ct_put(ecache->ct);
ecache->ct = NULL;
+
+out_unlock:
+ rcu_read_unlock();
}
/* Deliver all cached events for a particular conntrack. This is called
@@ -111,26 +120,86 @@ void nf_conntrack_ecache_fini(struct net *net)
free_percpu(net->ct.ecache);
}
-int nf_conntrack_register_notifier(struct notifier_block *nb)
+int nf_conntrack_register_notifier(struct nf_ct_event_notifier *new)
{
- return atomic_notifier_chain_register(&nf_conntrack_chain, nb);
+ int ret = 0;
+ struct nf_ct_event_notifier *notify;
+
+ mutex_lock(&nf_ct_ecache_mutex);
+ notify = rcu_dereference(nf_conntrack_event_cb);
+ if (notify != NULL) {
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+ rcu_assign_pointer(nf_conntrack_event_cb, new);
+ mutex_unlock(&nf_ct_ecache_mutex);
+ return ret;
+
+out_unlock:
+ mutex_unlock(&nf_ct_ecache_mutex);
+ return ret;
}
EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier);
-int nf_conntrack_unregister_notifier(struct notifier_block *nb)
+int nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *new)
{
- return atomic_notifier_chain_unregister(&nf_conntrack_chain, nb);
+ int ret = 0;
+ struct nf_ct_event_notifier *notify;
+
+ mutex_lock(&nf_ct_ecache_mutex);
+ notify = rcu_dereference(nf_conntrack_event_cb);
+ if (notify != new) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+ rcu_assign_pointer(nf_conntrack_event_cb, NULL);
+ mutex_unlock(&nf_ct_ecache_mutex);
+ return ret;
+
+out_unlock:
+ mutex_unlock(&nf_ct_ecache_mutex);
+ return ret;
}
EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
-int nf_ct_expect_register_notifier(struct notifier_block *nb)
+int nf_ct_expect_register_notifier(struct nf_exp_event_notifier *new)
{
- return atomic_notifier_chain_register(&nf_ct_expect_chain, nb);
+ int ret = 0;
+ struct nf_exp_event_notifier *notify;
+
+ mutex_lock(&nf_ct_ecache_mutex);
+ notify = rcu_dereference(nf_expect_event_cb);
+ if (notify != NULL) {
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+ rcu_assign_pointer(nf_expect_event_cb, new);
+ mutex_unlock(&nf_ct_ecache_mutex);
+ return ret;
+
+out_unlock:
+ mutex_unlock(&nf_ct_ecache_mutex);
+ return ret;
}
EXPORT_SYMBOL_GPL(nf_ct_expect_register_notifier);
-int nf_ct_expect_unregister_notifier(struct notifier_block *nb)
+int nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *new)
{
- return atomic_notifier_chain_unregister(&nf_ct_expect_chain, nb);
+ int ret = 0;
+ struct nf_exp_event_notifier *notify;
+
+ mutex_lock(&nf_ct_ecache_mutex);
+ notify = rcu_dereference(nf_expect_event_cb);
+ if (notify != new) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+ rcu_assign_pointer(nf_expect_event_cb, NULL);
+ mutex_unlock(&nf_ct_ecache_mutex);
+ return ret;
+
+out_unlock:
+ mutex_unlock(&nf_ct_ecache_mutex);
+ return ret;
}
EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index dc6cc81..3164291 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -26,7 +26,6 @@
#include <linux/netlink.h>
#include <linux/spinlock.h>
#include <linux/interrupt.h>
-#include <linux/notifier.h>
#include <linux/netfilter.h>
#include <net/netlink.h>
@@ -404,13 +403,12 @@ nla_put_failure:
}
#ifdef CONFIG_NF_CONNTRACK_EVENTS
-static int ctnetlink_conntrack_event(struct notifier_block *this,
- unsigned long events, void *ptr)
+static int
+ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
{
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
struct nlattr *nest_parms;
- struct nf_ct_event *item = (struct nf_ct_event *)ptr;
struct nf_conn *ct = item->ct;
struct sk_buff *skb;
unsigned int type;
@@ -419,7 +417,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
/* ignore our fake conntrack entry */
if (ct == &nf_conntrack_untracked)
- return NOTIFY_DONE;
+ return 0;
if (events & IPCT_DESTROY) {
type = IPCTNL_MSG_CT_DELETE;
@@ -432,14 +430,14 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
type = IPCTNL_MSG_CT_NEW;
group = NFNLGRP_CONNTRACK_UPDATE;
} else
- return NOTIFY_DONE;
+ return 0;
if (!item->report && !nfnetlink_has_listeners(group))
- return NOTIFY_DONE;
+ return 0;
skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
if (!skb)
- return NOTIFY_DONE;
+ return 0;
b = skb->tail;
@@ -513,14 +511,14 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
nlh->nlmsg_len = skb->tail - b;
nfnetlink_send(skb, item->pid, group, item->report);
- return NOTIFY_DONE;
+ return 0;
nla_put_failure:
rcu_read_unlock();
nlmsg_failure:
nfnetlink_set_err(0, group, -ENOBUFS);
kfree_skb(skb);
- return NOTIFY_DONE;
+ return 0;
}
#endif /* CONFIG_NF_CONNTRACK_EVENTS */
@@ -1466,12 +1464,11 @@ nla_put_failure:
}
#ifdef CONFIG_NF_CONNTRACK_EVENTS
-static int ctnetlink_expect_event(struct notifier_block *this,
- unsigned long events, void *ptr)
+static int
+ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
{
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
- struct nf_exp_event *item = (struct nf_exp_event *)ptr;
struct nf_conntrack_expect *exp = item->exp;
struct sk_buff *skb;
unsigned int type;
@@ -1482,15 +1479,15 @@ static int ctnetlink_expect_event(struct notifier_block *this,
type = IPCTNL_MSG_EXP_NEW;
flags = NLM_F_CREATE|NLM_F_EXCL;
} else
- return NOTIFY_DONE;
+ return 0;
if (!item->report &&
!nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW))
- return NOTIFY_DONE;
+ return 0;
skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
if (!skb)
- return NOTIFY_DONE;
+ return 0;
b = skb->tail;
@@ -1510,14 +1507,14 @@ static int ctnetlink_expect_event(struct notifier_block *this,
nlh->nlmsg_len = skb->tail - b;
nfnetlink_send(skb, item->pid, NFNLGRP_CONNTRACK_EXP_NEW, item->report);
- return NOTIFY_DONE;
+ return 0;
nla_put_failure:
rcu_read_unlock();
nlmsg_failure:
nfnetlink_set_err(0, 0, -ENOBUFS);
kfree_skb(skb);
- return NOTIFY_DONE;
+ return 0;
}
#endif
static int ctnetlink_exp_done(struct netlink_callback *cb)
@@ -1823,12 +1820,12 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
}
#ifdef CONFIG_NF_CONNTRACK_EVENTS
-static struct notifier_block ctnl_notifier = {
- .notifier_call = ctnetlink_conntrack_event,
+static struct nf_ct_event_notifier ctnl_notifier = {
+ .fcn = ctnetlink_conntrack_event,
};
-static struct notifier_block ctnl_notifier_exp = {
- .notifier_call = ctnetlink_expect_event,
+static struct nf_exp_event_notifier ctnl_notifier_exp = {
+ .fcn = ctnetlink_expect_event,
};
#endif
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 2/3] conntrack: ecache: move event cache to conntrack extension infrastructure
2009-03-30 13:27 [PATCH 0/3] [RFC] conntrack event caching changes Pablo Neira Ayuso
2009-03-30 13:27 ` [PATCH 1/3] netfilter: ecache: replace notify chain by function pointer Pablo Neira Ayuso
@ 2009-03-30 13:28 ` Pablo Neira Ayuso
2009-03-30 13:28 ` [PATCH 3/3] ctnetlink: optional reliable conntrack event delivery Pablo Neira Ayuso
2 siblings, 0 replies; 5+ messages in thread
From: Pablo Neira Ayuso @ 2009-03-30 13:28 UTC (permalink / raw)
To: netfilter-devel; +Cc: kaber
This patch reworks the event caching infrastructure to use the
conntrack extension infrastructure. As a result, you can enable and
disable event delivery via /proc/sys/net/netfilter/nf_conntrack_events
in runtime opposed to compilation time. The main drawback is that
we consume more memory per conntrack if event delivery is enabled.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
include/net/netfilter/nf_conntrack_ecache.h | 73 +++++++---
include/net/netfilter/nf_conntrack_extend.h | 2
include/net/netns/conntrack.h | 5 -
net/netfilter/nf_conntrack_core.c | 37 ++---
net/netfilter/nf_conntrack_ecache.c | 197 +++++++++++++++++----------
net/netfilter/nf_conntrack_ftp.c | 4 -
net/netfilter/nf_conntrack_netlink.c | 1
net/netfilter/nf_conntrack_proto_sctp.c | 2
net/netfilter/nf_conntrack_proto_tcp.c | 7 +
9 files changed, 207 insertions(+), 121 deletions(-)
diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
index 13ce023..eb5dadf 100644
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -6,9 +6,11 @@
#define _NF_CONNTRACK_ECACHE_H
#include <net/netfilter/nf_conntrack.h>
-#include <linux/interrupt.h>
#include <net/net_namespace.h>
#include <net/netfilter/nf_conntrack_expect.h>
+#include <linux/netfilter/nf_conntrack_common.h>
+#include <linux/netfilter/nf_conntrack_tuple_common.h>
+#include <net/netfilter/nf_conntrack_extend.h>
/* Connection tracking event bits */
enum ip_conntrack_events
@@ -81,8 +83,24 @@ enum ip_conntrack_expect_events {
#ifdef CONFIG_NF_CONNTRACK_EVENTS
struct nf_conntrack_ecache {
- struct nf_conn *ct;
- unsigned int events;
+ unsigned int cache;
+};
+
+static inline struct nf_conntrack_ecache *
+nf_ct_ecache_find(const struct nf_conn *ct)
+{
+ return nf_ct_ext_find(ct, NF_CT_EXT_ECACHE);
+}
+
+static inline struct nf_conntrack_ecache *
+nf_ct_ecache_ext_add(struct nf_conn *ct, gfp_t gfp)
+{
+ struct net *net = nf_ct_net(ct);
+
+ if (!net->ct.sysctl_events)
+ return NULL;
+
+ return nf_ct_ext_add(ct, NF_CT_EXT_ECACHE, gfp);
};
/* This structure is passed to event handler */
@@ -100,22 +118,33 @@ extern struct nf_ct_event_notifier *nf_conntrack_event_cb;
extern int nf_conntrack_register_notifier(struct nf_ct_event_notifier *nb);
extern int nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *nb);
-extern void nf_ct_deliver_cached_events(const struct nf_conn *ct);
-extern void __nf_ct_event_cache_init(struct nf_conn *ct);
-extern void nf_ct_event_cache_flush(struct net *net);
+extern void nf_ct_deliver_cached_events(struct nf_conn *ct);
+
+static inline void
+__nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct)
+{
+ struct nf_conntrack_ecache *e;
+ struct nf_ct_event_notifier *notify =
+ rcu_dereference(nf_conntrack_event_cb);
+
+ if (notify == NULL)
+ return;
+
+ e = nf_ct_ecache_find(ct);
+ if (e == NULL)
+ return;
+
+ e->cache |= event;
+}
+
+extern spinlock_t nf_conntrack_lock;
static inline void
nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct)
{
- struct net *net = nf_ct_net(ct);
- struct nf_conntrack_ecache *ecache;
-
- local_bh_disable();
- ecache = per_cpu_ptr(net->ct.ecache, raw_smp_processor_id());
- if (ct != ecache->ct)
- __nf_ct_event_cache_init(ct);
- ecache->events |= event;
- local_bh_enable();
+ spin_lock_bh(&nf_conntrack_lock);
+ __nf_conntrack_event_cache(event, ct);
+ spin_unlock_bh(&nf_conntrack_lock);
}
static inline void
@@ -124,6 +153,7 @@ nf_conntrack_event_report(enum ip_conntrack_events event,
u32 pid,
int report)
{
+ struct net *net = nf_ct_net(ct);
struct nf_ct_event_notifier *notify;
rcu_read_lock();
@@ -131,6 +161,9 @@ nf_conntrack_event_report(enum ip_conntrack_events event,
if (notify == NULL)
goto out_unlock;
+ if (!net->ct.sysctl_events)
+ return;
+
if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct)) {
struct nf_ct_event item = {
.ct = ct,
@@ -170,6 +203,7 @@ nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
u32 pid,
int report)
{
+ struct net *net = nf_ct_exp_net(exp);
struct nf_exp_event_notifier *notify;
rcu_read_lock();
@@ -177,6 +211,9 @@ nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
if (notify == NULL)
goto out_unlock;
+ if (!net->ct.sysctl_events)
+ return;
+
{
struct nf_exp_event item = {
.exp = exp,
@@ -217,12 +254,6 @@ static inline void nf_ct_expect_event_report(enum ip_conntrack_expect_events e,
struct nf_conntrack_expect *exp,
u32 pid,
int report) {}
-static inline void nf_ct_event_cache_flush(struct net *net) {}
-
-static inline int nf_conntrack_ecache_init(struct net *net)
-{
- return 0;
-}
static inline void nf_conntrack_ecache_fini(struct net *net)
{
diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h
index da8ee52..7f8fc5d 100644
--- a/include/net/netfilter/nf_conntrack_extend.h
+++ b/include/net/netfilter/nf_conntrack_extend.h
@@ -8,12 +8,14 @@ enum nf_ct_ext_id
NF_CT_EXT_HELPER,
NF_CT_EXT_NAT,
NF_CT_EXT_ACCT,
+ NF_CT_EXT_ECACHE,
NF_CT_EXT_NUM,
};
#define NF_CT_EXT_HELPER_TYPE struct nf_conn_help
#define NF_CT_EXT_NAT_TYPE struct nf_conn_nat
#define NF_CT_EXT_ACCT_TYPE struct nf_conn_counter
+#define NF_CT_EXT_ECACHE_TYPE struct nf_conntrack_ecache
/* Extensions: optional stuff which isn't permanently in struct. */
struct nf_ct_ext {
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index f4498a6..69dd322 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -14,15 +14,14 @@ struct netns_ct {
struct hlist_head *expect_hash;
struct hlist_head unconfirmed;
struct ip_conntrack_stat *stat;
-#ifdef CONFIG_NF_CONNTRACK_EVENTS
- struct nf_conntrack_ecache *ecache;
-#endif
+ int sysctl_events;
int sysctl_acct;
int sysctl_checksum;
unsigned int sysctl_log_invalid; /* Log invalid packets */
#ifdef CONFIG_SYSCTL
struct ctl_table_header *sysctl_header;
struct ctl_table_header *acct_sysctl_header;
+ struct ctl_table_header *event_sysctl_header;
#endif
int hash_vmalloc;
int expect_vmalloc;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index e0359d6..a4862f0 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -371,17 +371,17 @@ __nf_conntrack_confirm(struct sk_buff *skb)
atomic_inc(&ct->ct_general.use);
set_bit(IPS_CONFIRMED_BIT, &ct->status);
NF_CT_STAT_INC(net, insert);
- spin_unlock_bh(&nf_conntrack_lock);
help = nfct_help(ct);
if (help && help->helper)
- nf_conntrack_event_cache(IPCT_HELPER, ct);
+ __nf_conntrack_event_cache(IPCT_HELPER, ct);
#ifdef CONFIG_NF_NAT_NEEDED
if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
- nf_conntrack_event_cache(IPCT_NATINFO, ct);
+ __nf_conntrack_event_cache(IPCT_NATINFO, ct);
#endif
- nf_conntrack_event_cache(master_ct(ct) ?
- IPCT_RELATED : IPCT_NEW, ct);
+ __nf_conntrack_event_cache(master_ct(ct) ?
+ IPCT_RELATED : IPCT_NEW, ct);
+ spin_unlock_bh(&nf_conntrack_lock);
return NF_ACCEPT;
out:
@@ -564,6 +564,7 @@ init_conntrack(struct net *net,
}
nf_ct_acct_ext_add(ct, GFP_ATOMIC);
+ nf_ct_ecache_ext_add(ct, GFP_ATOMIC);
spin_lock_bh(&nf_conntrack_lock);
exp = nf_ct_find_expectation(net, tuple);
@@ -726,6 +727,9 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
NF_CT_ASSERT(skb->nfct);
+ /* We may have pending events, deliver them and clear the cache */
+ nf_ct_deliver_cached_events(ct);
+
ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum);
if (ret <= 0) {
/* Invalid: inverse of the return code tells
@@ -791,8 +795,6 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
unsigned long extra_jiffies,
int do_acct)
{
- int event = 0;
-
NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct);
NF_CT_ASSERT(skb);
@@ -805,7 +807,7 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
/* If not in hash table, timer will not be active yet */
if (!nf_ct_is_confirmed(ct)) {
ct->timeout.expires = extra_jiffies;
- event = IPCT_REFRESH;
+ __nf_conntrack_event_cache(IPCT_REFRESH, ct);
} else {
unsigned long newtime = jiffies + extra_jiffies;
@@ -816,7 +818,7 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
&& del_timer(&ct->timeout)) {
ct->timeout.expires = newtime;
add_timer(&ct->timeout);
- event = IPCT_REFRESH;
+ __nf_conntrack_event_cache(IPCT_REFRESH, ct);
}
}
@@ -833,10 +835,6 @@ acct:
}
spin_unlock_bh(&nf_conntrack_lock);
-
- /* must be unlocked when calling event cache */
- if (event)
- nf_conntrack_event_cache(event, ct);
}
EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
@@ -1022,8 +1020,6 @@ static void nf_conntrack_cleanup_init_net(void)
static void nf_conntrack_cleanup_net(struct net *net)
{
- nf_ct_event_cache_flush(net);
- nf_conntrack_ecache_fini(net);
i_see_dead_people:
nf_ct_iterate_cleanup(net, kill_all, NULL);
if (atomic_read(&net->ct.count) != 0) {
@@ -1036,6 +1032,7 @@ static void nf_conntrack_cleanup_net(struct net *net)
nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
nf_conntrack_htable_size);
+ nf_conntrack_ecache_fini(net);
nf_conntrack_acct_fini(net);
nf_conntrack_expect_fini(net);
free_percpu(net->ct.stat);
@@ -1209,9 +1206,6 @@ static int nf_conntrack_init_net(struct net *net)
ret = -ENOMEM;
goto err_stat;
}
- ret = nf_conntrack_ecache_init(net);
- if (ret < 0)
- goto err_ecache;
net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size,
&net->ct.hash_vmalloc);
if (!net->ct.hash) {
@@ -1225,6 +1219,9 @@ static int nf_conntrack_init_net(struct net *net)
ret = nf_conntrack_acct_init(net);
if (ret < 0)
goto err_acct;
+ ret = nf_conntrack_ecache_init(net);
+ if (ret < 0)
+ goto err_ecache;
/* Set up fake conntrack:
- to never be deleted, not in any hashes */
@@ -1237,14 +1234,14 @@ static int nf_conntrack_init_net(struct net *net)
return 0;
+err_ecache:
+ nf_conntrack_acct_fini(net);
err_acct:
nf_conntrack_expect_fini(net);
err_expect:
nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
nf_conntrack_htable_size);
err_hash:
- nf_conntrack_ecache_fini(net);
-err_ecache:
free_percpu(net->ct.stat);
err_stat:
return ret;
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index 780278b..77f9254 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -1,7 +1,7 @@
/* Event cache for netfilter. */
/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2002-2009 Netfilter Core Team <coreteam@netfilter.org>
* (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
*
* This program is free software; you can redistribute it and/or modify
@@ -21,6 +21,7 @@
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_extend.h>
static DEFINE_MUTEX(nf_ct_ecache_mutex);
@@ -30,96 +31,38 @@ EXPORT_SYMBOL_GPL(nf_conntrack_event_cb);
struct nf_exp_event_notifier *nf_expect_event_cb;
EXPORT_SYMBOL_GPL(nf_expect_event_cb);
-/* deliver cached events and clear cache entry - must be called with locally
- * disabled softirqs */
-static inline void
-__nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache)
+/* Deliver all cached events for a particular conntrack. This is called
+ * by code prior to async packet handling for freeing the skb */
+void nf_ct_deliver_cached_events(struct nf_conn *ct)
{
struct nf_ct_event_notifier *notify;
+ struct nf_conntrack_ecache *e;
rcu_read_lock();
notify = rcu_dereference(nf_conntrack_event_cb);
if (notify == NULL)
goto out_unlock;
- if (nf_ct_is_confirmed(ecache->ct) && !nf_ct_is_dying(ecache->ct)
- && ecache->events) {
+ e = nf_ct_ecache_find(ct);
+ if (e == NULL)
+ return;
+
+ if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct) && e->cache) {
struct nf_ct_event item = {
- .ct = ecache->ct,
+ .ct = ct,
.pid = 0,
.report = 0
};
- notify->fcn(ecache->events, &item);
+ notify->fcn(e->cache, &item);
}
-
- ecache->events = 0;
- nf_ct_put(ecache->ct);
- ecache->ct = NULL;
+ xchg(&e->cache, 0);
out_unlock:
rcu_read_unlock();
}
-
-/* Deliver all cached events for a particular conntrack. This is called
- * by code prior to async packet handling for freeing the skb */
-void nf_ct_deliver_cached_events(const struct nf_conn *ct)
-{
- struct net *net = nf_ct_net(ct);
- struct nf_conntrack_ecache *ecache;
-
- local_bh_disable();
- ecache = per_cpu_ptr(net->ct.ecache, raw_smp_processor_id());
- if (ecache->ct == ct)
- __nf_ct_deliver_cached_events(ecache);
- local_bh_enable();
-}
EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events);
-/* Deliver cached events for old pending events, if current conntrack != old */
-void __nf_ct_event_cache_init(struct nf_conn *ct)
-{
- struct net *net = nf_ct_net(ct);
- struct nf_conntrack_ecache *ecache;
-
- /* take care of delivering potentially old events */
- ecache = per_cpu_ptr(net->ct.ecache, raw_smp_processor_id());
- BUG_ON(ecache->ct == ct);
- if (ecache->ct)
- __nf_ct_deliver_cached_events(ecache);
- /* initialize for this conntrack/packet */
- ecache->ct = ct;
- nf_conntrack_get(&ct->ct_general);
-}
-EXPORT_SYMBOL_GPL(__nf_ct_event_cache_init);
-
-/* flush the event cache - touches other CPU's data and must not be called
- * while packets are still passing through the code */
-void nf_ct_event_cache_flush(struct net *net)
-{
- struct nf_conntrack_ecache *ecache;
- int cpu;
-
- for_each_possible_cpu(cpu) {
- ecache = per_cpu_ptr(net->ct.ecache, cpu);
- if (ecache->ct)
- nf_ct_put(ecache->ct);
- }
-}
-
-int nf_conntrack_ecache_init(struct net *net)
-{
- net->ct.ecache = alloc_percpu(struct nf_conntrack_ecache);
- if (!net->ct.ecache)
- return -ENOMEM;
- return 0;
-}
-
-void nf_conntrack_ecache_fini(struct net *net)
-{
- free_percpu(net->ct.ecache);
-}
-
int nf_conntrack_register_notifier(struct nf_ct_event_notifier *new)
{
int ret = 0;
@@ -203,3 +146,115 @@ out_unlock:
return ret;
}
EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
+
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+#define NF_CT_EVENTS_DEFAULT 1
+#else
+#define NF_CT_EVENTS_DEFAULT 0
+#endif
+
+static int nf_ct_events_switch __read_mostly = NF_CT_EVENTS_DEFAULT;
+
+module_param_named(event, nf_ct_events_switch, bool, 0644);
+MODULE_PARM_DESC(event, "Enable connection tracking event delivery");
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table event_sysctl_table[] = {
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "nf_conntrack_events",
+ .data = &init_net.ct.sysctl_events,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {}
+};
+#endif /* CONFIG_SYSCTL */
+
+static struct nf_ct_ext_type event_extend __read_mostly = {
+ .len = sizeof(struct nf_conntrack_ecache),
+ .align = __alignof__(struct nf_conntrack_ecache),
+ .id = NF_CT_EXT_ECACHE,
+};
+
+#ifdef CONFIG_SYSCTL
+static int nf_conntrack_event_init_sysctl(struct net *net)
+{
+ struct ctl_table *table;
+
+ table = kmemdup(event_sysctl_table, sizeof(event_sysctl_table),
+ GFP_KERNEL);
+ if (!table)
+ goto out;
+
+ table[0].data = &net->ct.sysctl_events;
+
+ net->ct.event_sysctl_header =
+ register_net_sysctl_table(net,
+ nf_net_netfilter_sysctl_path, table);
+ if (!net->ct.event_sysctl_header) {
+ printk(KERN_ERR "nf_ct_event: can't register to sysctl.\n");
+ goto out_register;
+ }
+ return 0;
+
+out_register:
+ kfree(table);
+out:
+ return -ENOMEM;
+}
+
+static void nf_conntrack_event_fini_sysctl(struct net *net)
+{
+ struct ctl_table *table;
+
+ table = net->ct.event_sysctl_header->ctl_table_arg;
+ unregister_net_sysctl_table(net->ct.event_sysctl_header);
+ kfree(table);
+}
+#else
+static int nf_conntrack_event_init_sysctl(struct net *net)
+{
+ return 0;
+}
+
+static void nf_conntrack_event_fini_sysctl(struct net *net)
+{
+}
+#endif
+
+int nf_conntrack_ecache_init(struct net *net)
+{
+ int ret;
+
+ net->ct.sysctl_events = nf_ct_events_switch;
+
+ if (net_eq(net, &init_net)) {
+ ret = nf_ct_extend_register(&event_extend);
+ if (ret < 0) {
+ printk(KERN_ERR "nf_ct_event: Unable to register "
+ "event extension.\n");
+ goto out_extend_register;
+ }
+ }
+
+ ret = nf_conntrack_event_init_sysctl(net);
+ if (ret < 0)
+ goto out_sysctl;
+
+ return 0;
+
+out_sysctl:
+ if (net_eq(net, &init_net))
+ nf_ct_extend_unregister(&event_extend);
+out_extend_register:
+ return ret;
+}
+
+void nf_conntrack_ecache_fini(struct net *net)
+{
+ nf_conntrack_event_fini_sysctl(net);
+ if (net_eq(net, &init_net))
+ nf_ct_extend_unregister(&event_extend);
+}
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 00fecc3..fe2931d 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -338,11 +338,11 @@ static void update_nl_seq(struct nf_conn *ct, u32 nl_seq,
if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) {
info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq;
- nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, ct);
+ __nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, ct);
} else if (oldest != NUM_SEQ_TO_REMEMBER &&
after(nl_seq, info->seq_aft_nl[dir][oldest])) {
info->seq_aft_nl[dir][oldest] = nl_seq;
- nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, ct);
+ __nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, ct);
}
}
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 3164291..6aece58 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1224,6 +1224,7 @@ ctnetlink_create_conntrack(struct nlattr *cda[],
}
nf_ct_acct_ext_add(ct, GFP_ATOMIC);
+ nf_ct_ecache_ext_add(ct, GFP_ATOMIC);
#if defined(CONFIG_NF_CONNTRACK_MARK)
if (cda[CTA_MARK])
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 74e0379..e7ea25c 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -369,7 +369,7 @@ static int sctp_packet(struct nf_conn *ct,
ct->proto.sctp.state = new_state;
if (old_state != new_state)
- nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
+ __nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
}
write_unlock_bh(&sctp_lock);
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 0aeb8b0..9ca3aa9 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -972,11 +972,12 @@ static int tcp_packet(struct nf_conn *ct,
timeout = nf_ct_tcp_timeout_unacknowledged;
else
timeout = tcp_timeouts[new_state];
- write_unlock_bh(&tcp_lock);
- nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct);
+ __nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct);
if (new_state != old_state)
- nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
+ __nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
+
+ write_unlock_bh(&tcp_lock);
if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
/* If only reply is a RST, we can consider ourselves not to
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 3/3] ctnetlink: optional reliable conntrack event delivery
2009-03-30 13:27 [PATCH 0/3] [RFC] conntrack event caching changes Pablo Neira Ayuso
2009-03-30 13:27 ` [PATCH 1/3] netfilter: ecache: replace notify chain by function pointer Pablo Neira Ayuso
2009-03-30 13:28 ` [PATCH 2/3] conntrack: ecache: move event cache to conntrack extension infrastructure Pablo Neira Ayuso
@ 2009-03-30 13:28 ` Pablo Neira Ayuso
2009-03-30 13:32 ` Pablo Neira Ayuso
2 siblings, 1 reply; 5+ messages in thread
From: Pablo Neira Ayuso @ 2009-03-30 13:28 UTC (permalink / raw)
To: netfilter-devel; +Cc: kaber
This patch improves ctnetlink event reliability if one broadcast
listener has set the NETLINK_BROADCAST_ERROR socket option.
The logic is the following: if the event delivery fails, ctnetlink
sets IPCT_DELIVERY_FAILED event bit and keep the undelivered
events in the conntrack event cache. Thus, once the next packet
arrives, we trigger another event delivery in nf_conntrack_in(). If
things don't go well in this second try, we accumulate the pending
events in the cache but we try to deliver the current state as soon
as possible. Therefore, we may lost state transitions but the
userspace process gets in sync at some point.
At worst case, if no events were delivered to userspace, we make
sure that destroy events are successfully delivered. This happens
because if ctnetlink fails to deliver the destroy event, we remove
the conntrack entry from the hashes and insert them in the dying
list, which contains inactive entries. Then, the conntrack timer
is added with an extra grace timeout of 15 seconds to trigger the
event again (this grace timeout is tunable via /proc).
The maximum number of conntrack entries (active or inactive) is
still handled by nf_conntrack_max. Thus, we may start dropping
packets at some point if we accumulate a lot of inactive conntrack
entries waiting to deliver the destroy event to userspace.
For expectations, no changes are introduced in this patch.
Currently, event delivery is only done for new expectations (no
events from expectation removal and confirmation) and, apart from
the conntrack command line tool, I don't see any client that may
benefit of reliable expectation event delivery, at least until
destroy and confirm events are introduced.
This patch does not include the removal of the notify chain as
Patrick suggested yet, but I'll work on such change on top of this
patch if this is OK.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
include/net/netfilter/nf_conntrack.h | 2 +
include/net/netfilter/nf_conntrack_core.h | 6 +-
include/net/netfilter/nf_conntrack_ecache.h | 30 ++++++---
include/net/netfilter/nf_conntrack_helper.h | 2 +
include/net/netns/conntrack.h | 2 +
net/netfilter/nf_conntrack_core.c | 87 ++++++++++++++++++++++-----
net/netfilter/nf_conntrack_ecache.c | 26 +++++++-
net/netfilter/nf_conntrack_helper.c | 15 +++++
net/netfilter/nf_conntrack_netlink.c | 61 ++++++++++++-------
9 files changed, 175 insertions(+), 56 deletions(-)
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 6340be3..d212fad 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -289,6 +289,8 @@ extern int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp);
extern unsigned int nf_conntrack_htable_size;
extern unsigned int nf_conntrack_max;
+extern void nf_ct_setup_event_timer(struct nf_conn *ct);
+
#define NF_CT_STAT_INC(net, count) \
(per_cpu_ptr((net)->ct.stat, raw_smp_processor_id())->count++)
#define NF_CT_STAT_INC_ATOMIC(net, count) \
diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 5a449b4..1be51ba 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -62,8 +62,10 @@ static inline int nf_conntrack_confirm(struct sk_buff *skb)
if (ct && ct != &nf_conntrack_untracked) {
if (!nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct))
ret = __nf_conntrack_confirm(skb);
- if (likely(ret == NF_ACCEPT))
- nf_ct_deliver_cached_events(ct);
+ if (unlikely(ret == NF_DROP))
+ return NF_DROP;
+ if (unlikely(nf_ct_deliver_cached_events(ct) < 0))
+ nf_conntrack_event_cache(IPCT_DELIVERY_FAILED, ct);
}
return ret;
}
diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
index eb5dadf..f72e9e8 100644
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -74,6 +74,10 @@ enum ip_conntrack_events
/* Secmark is set */
IPCT_SECMARK_BIT = 14,
IPCT_SECMARK = (1 << IPCT_SECMARK_BIT),
+
+ /* An event delivery has failed */
+ IPCT_DELIVERY_FAILED_BIT = 31,
+ IPCT_DELIVERY_FAILED = (1 << IPCT_DELIVERY_FAILED_BIT),
};
enum ip_conntrack_expect_events {
@@ -118,7 +122,7 @@ extern struct nf_ct_event_notifier *nf_conntrack_event_cb;
extern int nf_conntrack_register_notifier(struct nf_ct_event_notifier *nb);
extern int nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *nb);
-extern void nf_ct_deliver_cached_events(struct nf_conn *ct);
+extern int nf_ct_deliver_cached_events(struct nf_conn *ct);
static inline void
__nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct)
@@ -147,12 +151,13 @@ nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct)
spin_unlock_bh(&nf_conntrack_lock);
}
-static inline void
+static inline int
nf_conntrack_event_report(enum ip_conntrack_events event,
struct nf_conn *ct,
u32 pid,
int report)
{
+ int ret = 0;
struct net *net = nf_ct_net(ct);
struct nf_ct_event_notifier *notify;
@@ -162,7 +167,7 @@ nf_conntrack_event_report(enum ip_conntrack_events event,
goto out_unlock;
if (!net->ct.sysctl_events)
- return;
+ goto out_unlock;
if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct)) {
struct nf_ct_event item = {
@@ -171,16 +176,17 @@ nf_conntrack_event_report(enum ip_conntrack_events event,
.report = report
};
- notify->fcn(event, &item);
+ ret = notify->fcn(event, &item);
}
out_unlock:
rcu_read_unlock();
+ return ret;
}
-static inline void
+static inline int
nf_conntrack_event(enum ip_conntrack_events event, struct nf_conn *ct)
{
- nf_conntrack_event_report(event, ct, 0, 0);
+ return nf_conntrack_event_report(event, ct, 0, 0);
}
struct nf_exp_event {
@@ -197,12 +203,13 @@ extern struct nf_exp_event_notifier *nf_expect_event_cb;
extern int nf_ct_expect_register_notifier(struct nf_exp_event_notifier *nb);
extern int nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *nb);
-static inline void
+static inline int
nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
struct nf_conntrack_expect *exp,
u32 pid,
int report)
{
+ int ret = 0;
struct net *net = nf_ct_exp_net(exp);
struct nf_exp_event_notifier *notify;
@@ -212,7 +219,7 @@ nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
goto out_unlock;
if (!net->ct.sysctl_events)
- return;
+ goto out_unlock;
{
struct nf_exp_event item = {
@@ -221,17 +228,18 @@ nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
.report = report
};
- notify->fcn(event, &item);
+ ret = notify->fcn(event, &item);
}
out_unlock:
rcu_read_unlock();
+ return ret;
}
-static inline void
+static inline int
nf_ct_expect_event(enum ip_conntrack_expect_events event,
struct nf_conntrack_expect *exp)
{
- nf_ct_expect_event_report(event, exp, 0, 0);
+ return nf_ct_expect_event_report(event, exp, 0, 0);
}
extern int nf_conntrack_ecache_init(struct net *net);
diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h
index 66d65a7..0d4b5aa 100644
--- a/include/net/netfilter/nf_conntrack_helper.h
+++ b/include/net/netfilter/nf_conntrack_helper.h
@@ -48,6 +48,8 @@ extern struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp);
extern int __nf_ct_try_assign_helper(struct nf_conn *ct, gfp_t flags);
+extern void nf_ct_helper_destroy(struct nf_conn *ct);
+
static inline struct nf_conn_help *nfct_help(const struct nf_conn *ct)
{
return nf_ct_ext_find(ct, NF_CT_EXT_HELPER);
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 69dd322..0471cd5 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -13,8 +13,10 @@ struct netns_ct {
struct hlist_head *hash;
struct hlist_head *expect_hash;
struct hlist_head unconfirmed;
+ struct hlist_head dying;
struct ip_conntrack_stat *stat;
int sysctl_events;
+ unsigned int sysctl_events_retry_timeout;
int sysctl_acct;
int sysctl_checksum;
unsigned int sysctl_log_invalid; /* Log invalid packets */
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index a4862f0..f9b17c0 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -181,10 +181,6 @@ destroy_conntrack(struct nf_conntrack *nfct)
NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
NF_CT_ASSERT(!timer_pending(&ct->timeout));
- if (!test_bit(IPS_DYING_BIT, &ct->status))
- nf_conntrack_event(IPCT_DESTROY, ct);
- set_bit(IPS_DYING_BIT, &ct->status);
-
/* To make sure we don't get any weird locking issues here:
* destroy_conntrack() MUST NOT be called with a write lock
* to nf_conntrack_lock!!! -HW */
@@ -218,20 +214,9 @@ destroy_conntrack(struct nf_conntrack *nfct)
nf_conntrack_free(ct);
}
-static void death_by_timeout(unsigned long ul_conntrack)
+static void nf_ct_delete_from_lists(struct nf_conn *ct)
{
- struct nf_conn *ct = (void *)ul_conntrack;
struct net *net = nf_ct_net(ct);
- struct nf_conn_help *help = nfct_help(ct);
- struct nf_conntrack_helper *helper;
-
- if (help) {
- rcu_read_lock();
- helper = rcu_dereference(help->helper);
- if (helper && helper->destroy)
- helper->destroy(ct);
- rcu_read_unlock();
- }
spin_lock_bh(&nf_conntrack_lock);
/* Inside lock so preempt is disabled on module removal path.
@@ -239,6 +224,58 @@ static void death_by_timeout(unsigned long ul_conntrack)
NF_CT_STAT_INC(net, delete_list);
clean_from_lists(ct);
spin_unlock_bh(&nf_conntrack_lock);
+}
+
+static void death_by_event(unsigned long ul_conntrack)
+{
+ struct nf_conn *ct = (void *)ul_conntrack;
+ struct net *net = nf_ct_net(ct);
+
+ if (nf_conntrack_event(IPCT_DESTROY, ct) < 0) {
+ /* bad luck, let's retry again */
+ ct->timeout.expires =
+ jiffies + net->ct.sysctl_events_retry_timeout;
+ add_timer(&ct->timeout);
+ }
+ spin_lock_bh(&nf_conntrack_lock);
+ hlist_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode);
+ spin_unlock_bh(&nf_conntrack_lock);
+ nf_ct_helper_destroy(ct);
+ nf_ct_put(ct);
+}
+
+void nf_ct_setup_event_timer(struct nf_conn *ct)
+{
+ struct net *net = nf_ct_net(ct);
+
+ /* destroy event was not delivered */
+ set_bit(IPS_DYING_BIT, &ct->status);
+ nf_ct_delete_from_lists(ct);
+ /* add this conntrack to the dying list */
+ spin_lock_bh(&nf_conntrack_lock);
+ hlist_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode,
+ &net->ct.dying);
+ /* set a new timer to retry event delivery */
+ setup_timer(&ct->timeout, death_by_event, (unsigned long)ct);
+ ct->timeout.expires =
+ jiffies + net->ct.sysctl_events_retry_timeout;
+ add_timer(&ct->timeout);
+ spin_unlock_bh(&nf_conntrack_lock);
+}
+EXPORT_SYMBOL_GPL(nf_ct_setup_event_timer);
+
+static void death_by_timeout(unsigned long ul_conntrack)
+{
+ struct nf_conn *ct = (void *)ul_conntrack;
+
+ if (!test_bit(IPS_DYING_BIT, &ct->status) &&
+ unlikely(nf_conntrack_event(IPCT_DESTROY, ct) < 0)) {
+ nf_ct_setup_event_timer(ct);
+ return;
+ }
+ set_bit(IPS_DYING_BIT, &ct->status);
+ nf_ct_helper_destroy(ct);
+ nf_ct_delete_from_lists(ct);
nf_ct_put(ct);
}
@@ -1011,6 +1048,22 @@ void nf_conntrack_flush_report(struct net *net, u32 pid, int report)
}
EXPORT_SYMBOL_GPL(nf_conntrack_flush_report);
+static void nf_ct_release_dying_list(void)
+{
+ struct nf_conntrack_tuple_hash *h;
+ struct nf_conn *ct;
+ struct hlist_node *n;
+
+ spin_lock_bh(&nf_conntrack_lock);
+ hlist_for_each_entry(h, n, &init_net.ct.dying, hnode) {
+ ct = nf_ct_tuplehash_to_ctrack(h);
+ /* never fails to remove them, no listeners at this point */
+ if (del_timer(&ct->timeout))
+ ct->timeout.function((unsigned long)ct);
+ }
+ spin_unlock_bh(&nf_conntrack_lock);
+}
+
static void nf_conntrack_cleanup_init_net(void)
{
nf_conntrack_helper_fini();
@@ -1022,6 +1075,7 @@ static void nf_conntrack_cleanup_net(struct net *net)
{
i_see_dead_people:
nf_ct_iterate_cleanup(net, kill_all, NULL);
+ nf_ct_release_dying_list();
if (atomic_read(&net->ct.count) != 0) {
schedule();
goto i_see_dead_people;
@@ -1201,6 +1255,7 @@ static int nf_conntrack_init_net(struct net *net)
atomic_set(&net->ct.count, 0);
INIT_HLIST_HEAD(&net->ct.unconfirmed);
+ INIT_HLIST_HEAD(&net->ct.dying);
net->ct.stat = alloc_percpu(struct ip_conntrack_stat);
if (!net->ct.stat) {
ret = -ENOMEM;
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index 77f9254..29d432c 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -33,10 +33,11 @@ EXPORT_SYMBOL_GPL(nf_expect_event_cb);
/* Deliver all cached events for a particular conntrack. This is called
* by code prior to async packet handling for freeing the skb */
-void nf_ct_deliver_cached_events(struct nf_conn *ct)
+int nf_ct_deliver_cached_events(struct nf_conn *ct)
{
struct nf_ct_event_notifier *notify;
struct nf_conntrack_ecache *e;
+ int ret = 0, delivered = 0;
rcu_read_lock();
notify = rcu_dereference(nf_conntrack_event_cb);
@@ -45,7 +46,7 @@ void nf_ct_deliver_cached_events(struct nf_conn *ct)
e = nf_ct_ecache_find(ct);
if (e == NULL)
- return;
+ goto out_unlock;
if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct) && e->cache) {
struct nf_ct_event item = {
@@ -54,12 +55,16 @@ void nf_ct_deliver_cached_events(struct nf_conn *ct)
.report = 0
};
- notify->fcn(e->cache, &item);
+ ret = notify->fcn(e->cache, &item);
+ if (ret == 0)
+ delivered = 1;
}
- xchg(&e->cache, 0);
+ if (delivered)
+ xchg(&e->cache, 0);
out_unlock:
rcu_read_unlock();
+ return ret;
}
EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events);
@@ -154,9 +159,12 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
#endif
static int nf_ct_events_switch __read_mostly = NF_CT_EVENTS_DEFAULT;
+static int nf_ct_events_retry_timeout __read_mostly = 15*HZ;
module_param_named(event, nf_ct_events_switch, bool, 0644);
MODULE_PARM_DESC(event, "Enable connection tracking event delivery");
+module_param_named(retry_timeout, nf_ct_events_retry_timeout, bool, 0644);
+MODULE_PARM_DESC(retry_timeout, "Event delivery retry timeout");
#ifdef CONFIG_SYSCTL
static struct ctl_table event_sysctl_table[] = {
@@ -168,6 +176,14 @@ static struct ctl_table event_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "nf_conntrack_events_retry_timeout",
+ .data = &init_net.ct.sysctl_events_retry_timeout,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
{}
};
#endif /* CONFIG_SYSCTL */
@@ -189,6 +205,7 @@ static int nf_conntrack_event_init_sysctl(struct net *net)
goto out;
table[0].data = &net->ct.sysctl_events;
+ table[1].data = &net->ct.sysctl_events_retry_timeout;
net->ct.event_sysctl_header =
register_net_sysctl_table(net,
@@ -229,6 +246,7 @@ int nf_conntrack_ecache_init(struct net *net)
int ret;
net->ct.sysctl_events = nf_ct_events_switch;
+ net->ct.sysctl_events_retry_timeout = nf_ct_events_retry_timeout;
if (net_eq(net, &init_net)) {
ret = nf_ct_extend_register(&event_extend);
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index a51bdac..6af779d 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -136,6 +136,21 @@ static inline int unhelp(struct nf_conntrack_tuple_hash *i,
return 0;
}
+void nf_ct_helper_destroy(struct nf_conn *ct)
+{
+ struct nf_conn_help *help = nfct_help(ct);
+ struct nf_conntrack_helper *helper;
+
+ if (help) {
+ rcu_read_lock();
+ helper = rcu_dereference(help->helper);
+ if (helper && helper->destroy)
+ helper->destroy(ct);
+ rcu_read_unlock();
+ }
+}
+EXPORT_SYMBOL_GPL(nf_ct_helper_destroy);
+
int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
{
unsigned int h = helper_hash(&me->tuple);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 6aece58..fc000ae 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -414,6 +414,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
unsigned int type;
sk_buff_data_t b;
unsigned int flags = 0, group;
+ int err;
/* ignore our fake conntrack entry */
if (ct == &nf_conntrack_untracked)
@@ -510,13 +511,16 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
rcu_read_unlock();
nlh->nlmsg_len = skb->tail - b;
- nfnetlink_send(skb, item->pid, group, item->report);
+ err = nfnetlink_send(skb, item->pid, group, item->report);
+ if ((err == -ENOBUFS) || (err == -EAGAIN))
+ return -ENOBUFS;
+
return 0;
nla_put_failure:
rcu_read_unlock();
nlmsg_failure:
- nfnetlink_set_err(0, group, -ENOBUFS);
+ nfnetlink_set_err(item->pid, group, -ENOBUFS);
kfree_skb(skb);
return 0;
}
@@ -747,10 +751,14 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
}
}
- nf_conntrack_event_report(IPCT_DESTROY,
- ct,
- NETLINK_CB(skb).pid,
- nlmsg_report(nlh));
+ if (nf_conntrack_event_report(IPCT_DESTROY, ct,
+ NETLINK_CB(skb).pid,
+ nlmsg_report(nlh)) < 0) {
+ /* we failed to report the event, try later */
+ nf_ct_setup_event_timer(ct);
+ nf_ct_put(ct);
+ return 0;
+ }
/* death_by_timeout would report the event again */
set_bit(IPS_DYING_BIT, &ct->status);
@@ -1108,7 +1116,7 @@ ctnetlink_change_conntrack(struct nf_conn *ct, struct nlattr *cda[])
return 0;
}
-static inline void
+static inline int
ctnetlink_event_report(struct nf_conn *ct, u32 pid, int report)
{
unsigned int events = 0;
@@ -1118,16 +1126,13 @@ ctnetlink_event_report(struct nf_conn *ct, u32 pid, int report)
else
events |= IPCT_NEW;
- nf_conntrack_event_report(IPCT_STATUS |
- IPCT_HELPER |
- IPCT_REFRESH |
- IPCT_PROTOINFO |
- IPCT_NATSEQADJ |
- IPCT_MARK |
- events,
- ct,
- pid,
- report);
+ return nf_conntrack_event_report(IPCT_STATUS |
+ IPCT_HELPER |
+ IPCT_REFRESH |
+ IPCT_PROTOINFO |
+ IPCT_NATSEQADJ |
+ IPCT_MARK |
+ events, ct, pid, report);
}
static struct nf_conn *
@@ -1307,9 +1312,14 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
err = 0;
nf_conntrack_get(&ct->ct_general);
spin_unlock_bh(&nf_conntrack_lock);
- ctnetlink_event_report(ct,
- NETLINK_CB(skb).pid,
- nlmsg_report(nlh));
+ if (ctnetlink_event_report(ct,
+ NETLINK_CB(skb).pid,
+ nlmsg_report(nlh)) < 0) {
+ nf_conntrack_event_cache(IPCT_DELIVERY_FAILED,
+ ct);
+ nf_ct_put(ct);
+ return 0;
+ }
nf_ct_put(ct);
} else
spin_unlock_bh(&nf_conntrack_lock);
@@ -1328,9 +1338,14 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
if (err == 0) {
nf_conntrack_get(&ct->ct_general);
spin_unlock_bh(&nf_conntrack_lock);
- ctnetlink_event_report(ct,
- NETLINK_CB(skb).pid,
- nlmsg_report(nlh));
+ if (ctnetlink_event_report(ct,
+ NETLINK_CB(skb).pid,
+ nlmsg_report(nlh)) < 0) {
+ nf_conntrack_event_cache(IPCT_DELIVERY_FAILED,
+ ct);
+ nf_ct_put(ct);
+ return 0;
+ }
nf_ct_put(ct);
} else
spin_unlock_bh(&nf_conntrack_lock);
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH 3/3] ctnetlink: optional reliable conntrack event delivery
2009-03-30 13:28 ` [PATCH 3/3] ctnetlink: optional reliable conntrack event delivery Pablo Neira Ayuso
@ 2009-03-30 13:32 ` Pablo Neira Ayuso
0 siblings, 0 replies; 5+ messages in thread
From: Pablo Neira Ayuso @ 2009-03-30 13:32 UTC (permalink / raw)
To: kaber; +Cc: netfilter-devel
Pablo Neira Ayuso wrote:
> This patch does not include the removal of the notify chain as
> Patrick suggested yet, but I'll work on such change on top of this
> patch if this is OK.
^^^
BTW, this is comment is obsolte. I wrote it and then go back to add the
notify chain removal as the very first patch :).
--
"Los honestos son inadaptados sociales" -- Les Luthiers
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2009-03-30 13:32 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-03-30 13:27 [PATCH 0/3] [RFC] conntrack event caching changes Pablo Neira Ayuso
2009-03-30 13:27 ` [PATCH 1/3] netfilter: ecache: replace notify chain by function pointer Pablo Neira Ayuso
2009-03-30 13:28 ` [PATCH 2/3] conntrack: ecache: move event cache to conntrack extension infrastructure Pablo Neira Ayuso
2009-03-30 13:28 ` [PATCH 3/3] ctnetlink: optional reliable conntrack event delivery Pablo Neira Ayuso
2009-03-30 13:32 ` Pablo Neira Ayuso
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).