From: Pablo Neira Ayuso <pablo@netfilter.org>
To: Patrick McHardy <kaber@trash.net>
Cc: netfilter-devel@vger.kernel.org
Subject: Re: [PATCH 9/9] netfilter: conntrack: replace notify chain by function pointer
Date: Wed, 03 Jun 2009 10:55:10 +0200 [thread overview]
Message-ID: <4A263A6E.1060104@netfilter.org> (raw)
In-Reply-To: <4A26396D.9000503@netfilter.org>
[-- Attachment #1: Type: text/plain, Size: 406 bytes --]
Pablo Neira Ayuso wrote:
> Please, keep it there :). I'm going to send you a new version of this
> patch to the mailing list.
New 9/9 patch attached.
As said, you can pull all the changes from:
git://1984.lsi.us.es/nf-next-2.6 master
people.netfilter.org is still broken, so I had to deploy the git
infrastructure here in one of my servers.
--
"Los honestos son inadaptados sociales" -- Les Luthiers
[-- Attachment #2: ct-remove-notify-chain.patch --]
[-- Type: text/x-diff, Size: 13658 bytes --]
netfilter: conntrack: replace notify chain by function pointer
This patch removes the notify chain infrastructure and replace it
by a simple function pointer. This issue has been mentioned in the
mailing list several times: the use of the notify chain adds
too much overhead for something that is only used by ctnetlink.
This patch also changes nfnetlink_send(). It seems that gfp_any()
returns GFP_KERNEL for user-context request, like those via
ctnetlink, inside the RCU read-side section which is not valid.
Using GFP_KERNEL is also evil since netlink may schedule(),
this leads to "scheduling while atomic" bug reports.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
include/linux/netfilter/nfnetlink.h | 2 -
include/net/netfilter/nf_conntrack_ecache.h | 68 ++++++++++++++++------
net/netfilter/nf_conntrack_ecache.c | 83 ++++++++++++++++++++++-----
net/netfilter/nf_conntrack_netlink.c | 42 +++++++-------
net/netfilter/nfnetlink.c | 5 +-
5 files changed, 139 insertions(+), 61 deletions(-)
diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index c600083..2214e51 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -75,7 +75,7 @@ extern int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n);
extern int nfnetlink_has_listeners(unsigned int group);
extern int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group,
- int echo);
+ int echo, gfp_t flags);
extern void nfnetlink_set_err(u32 pid, u32 group, int error);
extern int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags);
diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
index 2e17a2d..1afb907 100644
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -6,7 +6,6 @@
#define _NF_CONNTRACK_ECACHE_H
#include <net/netfilter/nf_conntrack.h>
-#include <linux/notifier.h>
#include <linux/interrupt.h>
#include <net/net_namespace.h>
#include <net/netfilter/nf_conntrack_expect.h>
@@ -69,9 +68,13 @@ struct nf_ct_event {
int report;
};
-extern struct atomic_notifier_head nf_conntrack_chain;
-extern int nf_conntrack_register_notifier(struct notifier_block *nb);
-extern int nf_conntrack_unregister_notifier(struct notifier_block *nb);
+struct nf_ct_event_notifier {
+ int (*fcn)(unsigned int events, struct nf_ct_event *item);
+};
+
+extern struct nf_ct_event_notifier *nf_conntrack_event_cb;
+extern int nf_conntrack_register_notifier(struct nf_ct_event_notifier *nb);
+extern void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *nb);
extern void nf_ct_deliver_cached_events(const struct nf_conn *ct);
extern void __nf_ct_event_cache_init(struct nf_conn *ct);
@@ -97,13 +100,23 @@ nf_conntrack_event_report(enum ip_conntrack_events event,
u32 pid,
int report)
{
- struct nf_ct_event item = {
- .ct = ct,
- .pid = pid,
- .report = report
- };
- if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct))
- atomic_notifier_call_chain(&nf_conntrack_chain, event, &item);
+ struct nf_ct_event_notifier *notify;
+
+ rcu_read_lock();
+ notify = rcu_dereference(nf_conntrack_event_cb);
+ if (notify == NULL)
+ goto out_unlock;
+
+ if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct)) {
+ struct nf_ct_event item = {
+ .ct = ct,
+ .pid = pid,
+ .report = report
+ };
+ notify->fcn(event, &item);
+ }
+out_unlock:
+ rcu_read_unlock();
}
static inline void
@@ -118,9 +131,13 @@ struct nf_exp_event {
int report;
};
-extern struct atomic_notifier_head nf_ct_expect_chain;
-extern int nf_ct_expect_register_notifier(struct notifier_block *nb);
-extern int nf_ct_expect_unregister_notifier(struct notifier_block *nb);
+struct nf_exp_event_notifier {
+ int (*fcn)(unsigned int events, struct nf_exp_event *item);
+};
+
+extern struct nf_exp_event_notifier *nf_expect_event_cb;
+extern int nf_ct_expect_register_notifier(struct nf_exp_event_notifier *nb);
+extern void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *nb);
static inline void
nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
@@ -128,12 +145,23 @@ nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
u32 pid,
int report)
{
- struct nf_exp_event item = {
- .exp = exp,
- .pid = pid,
- .report = report
- };
- atomic_notifier_call_chain(&nf_ct_expect_chain, event, &item);
+ struct nf_exp_event_notifier *notify;
+
+ rcu_read_lock();
+ notify = rcu_dereference(nf_expect_event_cb);
+ if (notify == NULL)
+ goto out_unlock;
+
+ {
+ struct nf_exp_event item = {
+ .exp = exp,
+ .pid = pid,
+ .report = report
+ };
+ notify->fcn(event, &item);
+ }
+out_unlock:
+ rcu_read_unlock();
}
static inline void
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index dee4190..5516b3e 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -16,24 +16,32 @@
#include <linux/stddef.h>
#include <linux/err.h>
#include <linux/percpu.h>
-#include <linux/notifier.h>
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
-ATOMIC_NOTIFIER_HEAD(nf_conntrack_chain);
-EXPORT_SYMBOL_GPL(nf_conntrack_chain);
+static DEFINE_MUTEX(nf_ct_ecache_mutex);
-ATOMIC_NOTIFIER_HEAD(nf_ct_expect_chain);
-EXPORT_SYMBOL_GPL(nf_ct_expect_chain);
+struct nf_ct_event_notifier *nf_conntrack_event_cb __read_mostly;
+EXPORT_SYMBOL_GPL(nf_conntrack_event_cb);
+
+struct nf_exp_event_notifier *nf_expect_event_cb __read_mostly;
+EXPORT_SYMBOL_GPL(nf_expect_event_cb);
/* deliver cached events and clear cache entry - must be called with locally
* disabled softirqs */
static inline void
__nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache)
{
+ struct nf_ct_event_notifier *notify;
+
+ rcu_read_lock();
+ notify = rcu_dereference(nf_conntrack_event_cb);
+ if (notify == NULL)
+ goto out_unlock;
+
if (nf_ct_is_confirmed(ecache->ct) && !nf_ct_is_dying(ecache->ct)
&& ecache->events) {
struct nf_ct_event item = {
@@ -42,14 +50,15 @@ __nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache)
.report = 0
};
- atomic_notifier_call_chain(&nf_conntrack_chain,
- ecache->events,
- &item);
+ notify->fcn(ecache->events, &item);
}
ecache->events = 0;
nf_ct_put(ecache->ct);
ecache->ct = NULL;
+
+out_unlock:
+ rcu_read_unlock();
}
/* Deliver all cached events for a particular conntrack. This is called
@@ -111,26 +120,68 @@ void nf_conntrack_ecache_fini(struct net *net)
free_percpu(net->ct.ecache);
}
-int nf_conntrack_register_notifier(struct notifier_block *nb)
+int nf_conntrack_register_notifier(struct nf_ct_event_notifier *new)
{
- return atomic_notifier_chain_register(&nf_conntrack_chain, nb);
+ int ret = 0;
+ struct nf_ct_event_notifier *notify;
+
+ mutex_lock(&nf_ct_ecache_mutex);
+ notify = rcu_dereference(nf_conntrack_event_cb);
+ if (notify != NULL) {
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+ rcu_assign_pointer(nf_conntrack_event_cb, new);
+ mutex_unlock(&nf_ct_ecache_mutex);
+ return ret;
+
+out_unlock:
+ mutex_unlock(&nf_ct_ecache_mutex);
+ return ret;
}
EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier);
-int nf_conntrack_unregister_notifier(struct notifier_block *nb)
+void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *new)
{
- return atomic_notifier_chain_unregister(&nf_conntrack_chain, nb);
+ struct nf_ct_event_notifier *notify;
+
+ mutex_lock(&nf_ct_ecache_mutex);
+ notify = rcu_dereference(nf_conntrack_event_cb);
+ BUG_ON(notify != new);
+ rcu_assign_pointer(nf_conntrack_event_cb, NULL);
+ mutex_unlock(&nf_ct_ecache_mutex);
}
EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
-int nf_ct_expect_register_notifier(struct notifier_block *nb)
+int nf_ct_expect_register_notifier(struct nf_exp_event_notifier *new)
{
- return atomic_notifier_chain_register(&nf_ct_expect_chain, nb);
+ int ret = 0;
+ struct nf_exp_event_notifier *notify;
+
+ mutex_lock(&nf_ct_ecache_mutex);
+ notify = rcu_dereference(nf_expect_event_cb);
+ if (notify != NULL) {
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+ rcu_assign_pointer(nf_expect_event_cb, new);
+ mutex_unlock(&nf_ct_ecache_mutex);
+ return ret;
+
+out_unlock:
+ mutex_unlock(&nf_ct_ecache_mutex);
+ return ret;
}
EXPORT_SYMBOL_GPL(nf_ct_expect_register_notifier);
-int nf_ct_expect_unregister_notifier(struct notifier_block *nb)
+void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *new)
{
- return atomic_notifier_chain_unregister(&nf_ct_expect_chain, nb);
+ struct nf_exp_event_notifier *notify;
+
+ mutex_lock(&nf_ct_ecache_mutex);
+ notify = rcu_dereference(nf_expect_event_cb);
+ BUG_ON(notify != new);
+ rcu_assign_pointer(nf_expect_event_cb, NULL);
+ mutex_unlock(&nf_ct_ecache_mutex);
}
EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index b1b9e4f..4448b06 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -27,7 +27,6 @@
#include <linux/netlink.h>
#include <linux/spinlock.h>
#include <linux/interrupt.h>
-#include <linux/notifier.h>
#include <linux/netfilter.h>
#include <net/netlink.h>
@@ -454,13 +453,12 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct)
;
}
-static int ctnetlink_conntrack_event(struct notifier_block *this,
- unsigned long events, void *ptr)
+static int
+ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
{
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
struct nlattr *nest_parms;
- struct nf_ct_event *item = (struct nf_ct_event *)ptr;
struct nf_conn *ct = item->ct;
struct sk_buff *skb;
unsigned int type;
@@ -468,7 +466,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
/* ignore our fake conntrack entry */
if (ct == &nf_conntrack_untracked)
- return NOTIFY_DONE;
+ return 0;
if (events & IPCT_DESTROY) {
type = IPCTNL_MSG_CT_DELETE;
@@ -481,10 +479,10 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
type = IPCTNL_MSG_CT_NEW;
group = NFNLGRP_CONNTRACK_UPDATE;
} else
- return NOTIFY_DONE;
+ return 0;
if (!item->report && !nfnetlink_has_listeners(group))
- return NOTIFY_DONE;
+ return 0;
skb = nlmsg_new(ctnetlink_nlmsg_size(ct), GFP_ATOMIC);
if (skb == NULL)
@@ -560,8 +558,8 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
rcu_read_unlock();
nlmsg_end(skb, nlh);
- nfnetlink_send(skb, item->pid, group, item->report);
- return NOTIFY_DONE;
+ nfnetlink_send(skb, item->pid, group, item->report, GFP_ATOMIC);
+ return 0;
nla_put_failure:
rcu_read_unlock();
@@ -570,7 +568,7 @@ nlmsg_failure:
kfree_skb(skb);
errout:
nfnetlink_set_err(0, group, -ENOBUFS);
- return NOTIFY_DONE;
+ return 0;
}
#endif /* CONFIG_NF_CONNTRACK_EVENTS */
@@ -1507,12 +1505,11 @@ nla_put_failure:
}
#ifdef CONFIG_NF_CONNTRACK_EVENTS
-static int ctnetlink_expect_event(struct notifier_block *this,
- unsigned long events, void *ptr)
+static int
+ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
{
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
- struct nf_exp_event *item = (struct nf_exp_event *)ptr;
struct nf_conntrack_expect *exp = item->exp;
struct sk_buff *skb;
unsigned int type;
@@ -1522,11 +1519,11 @@ static int ctnetlink_expect_event(struct notifier_block *this,
type = IPCTNL_MSG_EXP_NEW;
flags = NLM_F_CREATE|NLM_F_EXCL;
} else
- return NOTIFY_DONE;
+ return 0;
if (!item->report &&
!nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW))
- return NOTIFY_DONE;
+ return 0;
skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
if (skb == NULL)
@@ -1548,8 +1545,9 @@ static int ctnetlink_expect_event(struct notifier_block *this,
rcu_read_unlock();
nlmsg_end(skb, nlh);
- nfnetlink_send(skb, item->pid, NFNLGRP_CONNTRACK_EXP_NEW, item->report);
- return NOTIFY_DONE;
+ nfnetlink_send(skb, item->pid, NFNLGRP_CONNTRACK_EXP_NEW,
+ item->report, GFP_ATOMIC);
+ return 0;
nla_put_failure:
rcu_read_unlock();
@@ -1558,7 +1556,7 @@ nlmsg_failure:
kfree_skb(skb);
errout:
nfnetlink_set_err(0, 0, -ENOBUFS);
- return NOTIFY_DONE;
+ return 0;
}
#endif
static int ctnetlink_exp_done(struct netlink_callback *cb)
@@ -1864,12 +1862,12 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
}
#ifdef CONFIG_NF_CONNTRACK_EVENTS
-static struct notifier_block ctnl_notifier = {
- .notifier_call = ctnetlink_conntrack_event,
+static struct nf_ct_event_notifier ctnl_notifier = {
+ .fcn = ctnetlink_conntrack_event,
};
-static struct notifier_block ctnl_notifier_exp = {
- .notifier_call = ctnetlink_expect_event,
+static struct nf_exp_event_notifier ctnl_notifier_exp = {
+ .fcn = ctnetlink_expect_event,
};
#endif
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 9dbd570..92761a9 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -107,9 +107,10 @@ int nfnetlink_has_listeners(unsigned int group)
}
EXPORT_SYMBOL_GPL(nfnetlink_has_listeners);
-int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo)
+int nfnetlink_send(struct sk_buff *skb, u32 pid,
+ unsigned group, int echo, gfp_t flags)
{
- return nlmsg_notify(nfnl, skb, pid, group, echo, gfp_any());
+ return nlmsg_notify(nfnl, skb, pid, group, echo, flags);
}
EXPORT_SYMBOL_GPL(nfnetlink_send);
next prev parent reply other threads:[~2009-06-03 9:18 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-06-02 18:17 [PATCH 0/9] Netfilter updates for nf-next tree (2.6.31) (2nd try) Pablo Neira Ayuso
2009-06-02 18:18 ` [PATCH 1/9] netfilter: nfnetlink: cleanup for nfnetlink_rcv_msg() function Pablo Neira Ayuso
2009-06-02 18:18 ` [PATCH 2/9] netfilter: ctnetlink: remove nowait parameter from *fill_info() Pablo Neira Ayuso
2009-06-02 18:18 ` [PATCH 3/9] netfilter: ctnetlink: rename tuple() by nf_ct_tuple() macro definition Pablo Neira Ayuso
2009-06-02 18:19 ` [PATCH 4/9] netfilter: ctnetlink: use nlmsg_* helper function to build messages Pablo Neira Ayuso
2009-06-02 18:19 ` [PATCH 5/9] netfilter: ctnetlink: cleanup message-size calculation Pablo Neira Ayuso
2009-06-02 18:20 ` [PATCH 6/9] netfilter: conntrack: don't report events on module removal Pablo Neira Ayuso
2009-06-02 18:20 ` [PATCH 7/9] netfilter: conntrack: remove events flags from userspace exposed file Pablo Neira Ayuso
2009-06-02 18:20 ` [PATCH 8/9] netfilter: conntrack: simplify event caching system Pablo Neira Ayuso
2009-06-02 18:21 ` [PATCH 9/9] netfilter: conntrack: replace notify chain by function pointer Pablo Neira Ayuso
2009-06-03 6:24 ` Patrick McHardy
2009-06-03 8:05 ` Pablo Neira Ayuso
2009-06-03 8:27 ` Patrick McHardy
2009-06-03 8:50 ` Pablo Neira Ayuso
2009-06-03 8:55 ` Pablo Neira Ayuso [this message]
2009-06-03 9:24 ` Patrick McHardy
2009-06-03 9:10 ` Patrick McHardy
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4A263A6E.1060104@netfilter.org \
--to=pablo@netfilter.org \
--cc=kaber@trash.net \
--cc=netfilter-devel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.