* [PATCH 1/8] [PATCH] get rid of module refcounting in ctnetlink
@ 2008-10-09 8:33 Pablo Neira Ayuso
2008-10-09 8:34 ` [PATCH 2/8] [PATCH] connection tracking helper name persistent aliases Pablo Neira Ayuso
` (7 more replies)
0 siblings, 8 replies; 29+ messages in thread
From: Pablo Neira Ayuso @ 2008-10-09 8:33 UTC (permalink / raw)
To: netfilter-devel; +Cc: kaber
This patch replaces the unnecessary module refcounting with
the read-side locks. With this patch, all the dump and fill_info
function are called under the RCU read lock.
Based on a patch from Fabian Hugelshofer.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
net/netfilter/nf_conntrack_netlink.c | 38 ++++++++++++++++------------------
1 files changed, 18 insertions(+), 20 deletions(-)
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index cadfd15..f38c649 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -105,16 +105,14 @@ ctnetlink_dump_tuples(struct sk_buff *skb,
struct nf_conntrack_l3proto *l3proto;
struct nf_conntrack_l4proto *l4proto;
- l3proto = nf_ct_l3proto_find_get(tuple->src.l3num);
+ l3proto = __nf_ct_l3proto_find(tuple->src.l3num);
ret = ctnetlink_dump_tuples_ip(skb, tuple, l3proto);
- nf_ct_l3proto_put(l3proto);
if (unlikely(ret < 0))
return ret;
- l4proto = nf_ct_l4proto_find_get(tuple->src.l3num, tuple->dst.protonum);
+ l4proto = __nf_ct_l4proto_find(tuple->src.l3num, tuple->dst.protonum);
ret = ctnetlink_dump_tuples_proto(skb, tuple, l4proto);
- nf_ct_l4proto_put(l4proto);
return ret;
}
@@ -151,11 +149,9 @@ ctnetlink_dump_protoinfo(struct sk_buff *skb, const struct nf_conn *ct)
struct nlattr *nest_proto;
int ret;
- l4proto = nf_ct_l4proto_find_get(nf_ct_l3num(ct), nf_ct_protonum(ct));
- if (!l4proto->to_nlattr) {
- nf_ct_l4proto_put(l4proto);
+ l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
+ if (!l4proto->to_nlattr)
return 0;
- }
nest_proto = nla_nest_start(skb, CTA_PROTOINFO | NLA_F_NESTED);
if (!nest_proto)
@@ -163,14 +159,11 @@ ctnetlink_dump_protoinfo(struct sk_buff *skb, const struct nf_conn *ct)
ret = l4proto->to_nlattr(skb, nest_proto, ct);
- nf_ct_l4proto_put(l4proto);
-
nla_nest_end(skb, nest_proto);
return ret;
nla_put_failure:
- nf_ct_l4proto_put(l4proto);
return -1;
}
@@ -184,7 +177,6 @@ ctnetlink_dump_helpinfo(struct sk_buff *skb, const struct nf_conn *ct)
if (!help)
return 0;
- rcu_read_lock();
helper = rcu_dereference(help->helper);
if (!helper)
goto out;
@@ -199,11 +191,9 @@ ctnetlink_dump_helpinfo(struct sk_buff *skb, const struct nf_conn *ct)
nla_nest_end(skb, nest_helper);
out:
- rcu_read_unlock();
return 0;
nla_put_failure:
- rcu_read_unlock();
return -1;
}
@@ -461,6 +451,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
nfmsg->version = NFNETLINK_V0;
nfmsg->res_id = 0;
+ rcu_read_lock();
nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED);
if (!nest_parms)
goto nla_put_failure;
@@ -517,13 +508,15 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
&& ctnetlink_dump_mark(skb, ct) < 0)
goto nla_put_failure;
#endif
+ rcu_read_unlock();
nlh->nlmsg_len = skb->tail - b;
nfnetlink_send(skb, 0, group, 0);
return NOTIFY_DONE;
-nlmsg_failure:
nla_put_failure:
+ rcu_read_unlock();
+nlmsg_failure:
kfree_skb(skb);
return NOTIFY_DONE;
}
@@ -860,8 +853,10 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
return -ENOMEM;
}
+ rcu_read_lock();
err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq,
IPCTNL_MSG_CT_NEW, 1, ct);
+ rcu_read_unlock();
nf_ct_put(ct);
if (err <= 0)
goto free;
@@ -1319,16 +1314,14 @@ ctnetlink_exp_dump_mask(struct sk_buff *skb,
if (!nest_parms)
goto nla_put_failure;
- l3proto = nf_ct_l3proto_find_get(tuple->src.l3num);
+ l3proto = __nf_ct_l3proto_find(tuple->src.l3num);
ret = ctnetlink_dump_tuples_ip(skb, &m, l3proto);
- nf_ct_l3proto_put(l3proto);
if (unlikely(ret < 0))
goto nla_put_failure;
- l4proto = nf_ct_l4proto_find_get(tuple->src.l3num, tuple->dst.protonum);
+ l4proto = __nf_ct_l4proto_find(tuple->src.l3num, tuple->dst.protonum);
ret = ctnetlink_dump_tuples_proto(skb, &m, l4proto);
- nf_ct_l4proto_put(l4proto);
if (unlikely(ret < 0))
goto nla_put_failure;
@@ -1435,15 +1428,18 @@ static int ctnetlink_expect_event(struct notifier_block *this,
nfmsg->version = NFNETLINK_V0;
nfmsg->res_id = 0;
+ rcu_read_lock();
if (ctnetlink_exp_dump_expect(skb, exp) < 0)
goto nla_put_failure;
+ rcu_read_unlock();
nlh->nlmsg_len = skb->tail - b;
nfnetlink_send(skb, 0, NFNLGRP_CONNTRACK_EXP_NEW, 0);
return NOTIFY_DONE;
-nlmsg_failure:
nla_put_failure:
+ rcu_read_unlock();
+nlmsg_failure:
kfree_skb(skb);
return NOTIFY_DONE;
}
@@ -1547,9 +1543,11 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
if (!skb2)
goto out;
+ rcu_read_lock();
err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).pid,
nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW,
1, exp);
+ rcu_read_unlock();
if (err <= 0)
goto free;
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH 2/8] [PATCH] connection tracking helper name persistent aliases
2008-10-09 8:33 [PATCH 1/8] [PATCH] get rid of module refcounting in ctnetlink Pablo Neira Ayuso
@ 2008-10-09 8:34 ` Pablo Neira Ayuso
2008-10-09 13:27 ` Patrick McHardy
2008-10-09 8:34 ` [PATCH 3/8] [PATCH] Helper modules load-on-demand support for ctnetlink Pablo Neira Ayuso
` (6 subsequent siblings)
7 siblings, 1 reply; 29+ messages in thread
From: Pablo Neira Ayuso @ 2008-10-09 8:34 UTC (permalink / raw)
To: netfilter-devel; +Cc: kaber
This patch adds the macro MODULE_ALIAS_NFCT_HELPER that defines a
way to provide generic and persistent aliases for the connection
tracking helpers.
This next patch requires this patch.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
include/net/netfilter/nf_conntrack.h | 3 +++
net/netfilter/nf_conntrack_amanda.c | 1 +
net/netfilter/nf_conntrack_ftp.c | 1 +
net/netfilter/nf_conntrack_h323_main.c | 1 +
net/netfilter/nf_conntrack_irc.c | 1 +
net/netfilter/nf_conntrack_netbios_ns.c | 1 +
net/netfilter/nf_conntrack_pptp.c | 1 +
net/netfilter/nf_conntrack_sane.c | 1 +
net/netfilter/nf_conntrack_sip.c | 1 +
net/netfilter/nf_conntrack_tftp.c | 1 +
10 files changed, 12 insertions(+), 0 deletions(-)
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index b76a868..f11255e 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -298,5 +298,8 @@ do { \
local_bh_enable(); \
} while (0)
+#define MODULE_ALIAS_NFCT_HELPER(helper) \
+ MODULE_ALIAS("nfct-helper-" helper)
+
#endif /* __KERNEL__ */
#endif /* _NF_CONNTRACK_H */
diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c
index 38aedee..4f8fcf4 100644
--- a/net/netfilter/nf_conntrack_amanda.c
+++ b/net/netfilter/nf_conntrack_amanda.c
@@ -30,6 +30,7 @@ MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>");
MODULE_DESCRIPTION("Amanda connection tracking module");
MODULE_LICENSE("GPL");
MODULE_ALIAS("ip_conntrack_amanda");
+MODULE_ALIAS_NFCT_HELPER("amanda");
module_param(master_timeout, uint, 0600);
MODULE_PARM_DESC(master_timeout, "timeout for the master connection");
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 4f71071..374136e 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -29,6 +29,7 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
MODULE_DESCRIPTION("ftp connection tracking helper");
MODULE_ALIAS("ip_conntrack_ftp");
+MODULE_ALIAS_NFCT_HELPER("ftp");
/* This is slow, but it's simple. --RR */
static char *ftp_buffer;
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index c1504f7..0fa52f2 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -1831,3 +1831,4 @@ MODULE_AUTHOR("Jing Min Zhao <zhaojingmin@users.sourceforge.net>");
MODULE_DESCRIPTION("H.323 connection tracking helper");
MODULE_LICENSE("GPL");
MODULE_ALIAS("ip_conntrack_h323");
+MODULE_ALIAS_NFCT_HELPER("h323");
diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
index 20633fd..fd5e368 100644
--- a/net/netfilter/nf_conntrack_irc.c
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -41,6 +41,7 @@ MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
MODULE_DESCRIPTION("IRC (DCC) connection tracking helper");
MODULE_LICENSE("GPL");
MODULE_ALIAS("ip_conntrack_irc");
+MODULE_ALIAS_NFCT_HELPER("irc");
module_param_array(ports, ushort, &ports_c, 0400);
MODULE_PARM_DESC(ports, "port numbers of IRC servers");
diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c
index 08404e6..5af4273 100644
--- a/net/netfilter/nf_conntrack_netbios_ns.c
+++ b/net/netfilter/nf_conntrack_netbios_ns.c
@@ -37,6 +37,7 @@ MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
MODULE_DESCRIPTION("NetBIOS name service broadcast connection tracking helper");
MODULE_LICENSE("GPL");
MODULE_ALIAS("ip_conntrack_netbios_ns");
+MODULE_ALIAS_NFCT_HELPER("netbios_ns");
static unsigned int timeout __read_mostly = 3;
module_param(timeout, uint, 0400);
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 373e51e..358c41b 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -37,6 +37,7 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
MODULE_DESCRIPTION("Netfilter connection tracking helper module for PPTP");
MODULE_ALIAS("ip_conntrack_pptp");
+MODULE_ALIAS_NFCT_HELPER("pptp");
static DEFINE_SPINLOCK(nf_pptp_lock);
diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c
index a94294b..dcfecbb 100644
--- a/net/netfilter/nf_conntrack_sane.c
+++ b/net/netfilter/nf_conntrack_sane.c
@@ -30,6 +30,7 @@
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Michal Schmidt <mschmidt@redhat.com>");
MODULE_DESCRIPTION("SANE connection tracking helper");
+MODULE_ALIAS_NFCT_HELPER("sane");
static char *sane_buffer;
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 6813f1c..4b57216 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -28,6 +28,7 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Christian Hentschel <chentschel@arnet.com.ar>");
MODULE_DESCRIPTION("SIP connection tracking helper");
MODULE_ALIAS("ip_conntrack_sip");
+MODULE_ALIAS_NFCT_HELPER("sip");
#define MAX_PORTS 8
static unsigned short ports[MAX_PORTS];
diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c
index f57f6e7..46e646b 100644
--- a/net/netfilter/nf_conntrack_tftp.c
+++ b/net/netfilter/nf_conntrack_tftp.c
@@ -22,6 +22,7 @@ MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>");
MODULE_DESCRIPTION("TFTP connection tracking helper");
MODULE_LICENSE("GPL");
MODULE_ALIAS("ip_conntrack_tftp");
+MODULE_ALIAS_NFCT_HELPER("tftp");
#define MAX_PORTS 8
static unsigned short ports[MAX_PORTS];
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH 3/8] [PATCH] Helper modules load-on-demand support for ctnetlink
2008-10-09 8:33 [PATCH 1/8] [PATCH] get rid of module refcounting in ctnetlink Pablo Neira Ayuso
2008-10-09 8:34 ` [PATCH 2/8] [PATCH] connection tracking helper name persistent aliases Pablo Neira Ayuso
@ 2008-10-09 8:34 ` Pablo Neira Ayuso
2008-10-09 13:34 ` Patrick McHardy
2008-10-09 8:34 ` [PATCH 4/8] [PATCH] use EOPNOTSUPP instead of EINVAL if the conntrack has no helper Pablo Neira Ayuso
` (5 subsequent siblings)
7 siblings, 1 reply; 29+ messages in thread
From: Pablo Neira Ayuso @ 2008-10-09 8:34 UTC (permalink / raw)
To: netfilter-devel; +Cc: kaber
This patch adds module loading for helpers via ctnetlink.
* Creation path: We support explicit and implicit helper assignation. For
the explicit case, we try to load the module. If the module is correctly
loaded and the helper is present, we return EAGAIN to re-start the
creation. Otherwise, we return EOPNOTSUPP.
* Update path: release the spin lock, load the module and check. If it is
present, then return EAGAIN to re-start the update.
This patch includes a minor change in nfnetlink to support EAGAIN.
Based on a suggestion from Patrick McHardy.
This patch provides a refactorized function to lookup-and-set the
connection tracking helper. The function removes the exported symbol
__nf_ct_helper_find as it has not clients anymore.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
include/net/netfilter/nf_conntrack_helper.h | 16 +++++-
net/netfilter/nf_conntrack_core.c | 30 +-----------
net/netfilter/nf_conntrack_helper.c | 33 ++++++++++++-
net/netfilter/nf_conntrack_netlink.c | 70 ++++++++++++++++++++++++---
net/netfilter/nfnetlink.c | 3 +
5 files changed, 109 insertions(+), 43 deletions(-)
diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h
index f8060ab..9455699 100644
--- a/include/net/netfilter/nf_conntrack_helper.h
+++ b/include/net/netfilter/nf_conntrack_helper.h
@@ -39,9 +39,6 @@ struct nf_conntrack_helper
};
extern struct nf_conntrack_helper *
-__nf_ct_helper_find(const struct nf_conntrack_tuple *tuple);
-
-extern struct nf_conntrack_helper *
__nf_conntrack_helper_find_byname(const char *name);
extern int nf_conntrack_helper_register(struct nf_conntrack_helper *);
@@ -49,6 +46,19 @@ extern void nf_conntrack_helper_unregister(struct nf_conntrack_helper *);
extern struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp);
+extern int __nf_ct_set_helper(struct nf_conn *ct, gfp_t flags);
+
+static inline int nf_ct_set_helper(struct nf_conn *ct, gfp_t flags)
+{
+ int ret;
+
+ rcu_read_lock();
+ ret = __nf_ct_set_helper(ct, flags);
+ rcu_read_unlock();
+
+ return ret;
+}
+
static inline struct nf_conn_help *nfct_help(const struct nf_conn *ct)
{
return nf_ct_ext_find(ct, NF_CT_EXT_HELPER);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 27de3c7..f465090 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -581,14 +581,7 @@ init_conntrack(struct net *net,
nf_conntrack_get(&ct->master->ct_general);
NF_CT_STAT_INC(net, expect_new);
} else {
- struct nf_conntrack_helper *helper;
-
- helper = __nf_ct_helper_find(&repl_tuple);
- if (helper) {
- help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
- if (help)
- rcu_assign_pointer(help->helper, helper);
- }
+ __nf_ct_set_helper(ct, GFP_ATOMIC);
NF_CT_STAT_INC(net, new);
}
@@ -765,7 +758,6 @@ void nf_conntrack_alter_reply(struct nf_conn *ct,
const struct nf_conntrack_tuple *newreply)
{
struct nf_conn_help *help = nfct_help(ct);
- struct nf_conntrack_helper *helper;
/* Should be unconfirmed, so not in hash table yet */
NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
@@ -777,25 +769,7 @@ void nf_conntrack_alter_reply(struct nf_conn *ct,
if (ct->master || (help && !hlist_empty(&help->expectations)))
return;
- rcu_read_lock();
- helper = __nf_ct_helper_find(newreply);
- if (helper == NULL) {
- if (help)
- rcu_assign_pointer(help->helper, NULL);
- goto out;
- }
-
- if (help == NULL) {
- help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
- if (help == NULL)
- goto out;
- } else {
- memset(&help->help, 0, sizeof(help->help));
- }
-
- rcu_assign_pointer(help->helper, helper);
-out:
- rcu_read_unlock();
+ nf_ct_set_helper(ct, GFP_ATOMIC);
}
EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply);
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 9c06b9f..86cdbde 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -44,7 +44,7 @@ static unsigned int helper_hash(const struct nf_conntrack_tuple *tuple)
(__force __u16)tuple->src.u.all) % nf_ct_helper_hsize;
}
-struct nf_conntrack_helper *
+static struct nf_conntrack_helper *
__nf_ct_helper_find(const struct nf_conntrack_tuple *tuple)
{
struct nf_conntrack_helper *helper;
@@ -62,7 +62,6 @@ __nf_ct_helper_find(const struct nf_conntrack_tuple *tuple)
}
return NULL;
}
-EXPORT_SYMBOL_GPL(__nf_ct_helper_find);
struct nf_conntrack_helper *
__nf_conntrack_helper_find_byname(const char *name)
@@ -94,6 +93,36 @@ struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp)
}
EXPORT_SYMBOL_GPL(nf_ct_helper_ext_add);
+int __nf_ct_set_helper(struct nf_conn *ct, gfp_t flags)
+{
+ int ret = 0;
+ struct nf_conntrack_helper *helper;
+ struct nf_conn_help *help = nfct_help(ct);
+
+ helper = __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+ if (helper == NULL) {
+ if (help)
+ rcu_assign_pointer(help->helper, NULL);
+ ret = -ENOENT;
+ goto out;
+ }
+
+ if (help == NULL) {
+ help = nf_ct_helper_ext_add(ct, flags);
+ if (help == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ } else {
+ memset(&help->help, 0, sizeof(help->help));
+ }
+
+ rcu_assign_pointer(help->helper, helper);
+out:
+ return ret;
+}
+EXPORT_SYMBOL_GPL(__nf_ct_set_helper);
+
static inline int unhelp(struct nf_conntrack_tuple_hash *i,
const struct nf_conntrack_helper *me)
{
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index f38c649..a649f7b 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -952,8 +952,22 @@ ctnetlink_change_helper(struct nf_conn *ct, struct nlattr *cda[])
}
helper = __nf_conntrack_helper_find_byname(helpname);
- if (helper == NULL)
+ if (helper == NULL) {
+#ifdef CONFIG_KMOD
+ spin_unlock_bh(&nf_conntrack_lock);
+
+ if (request_module("nfct-helper-%s", helpname) < 0) {
+ spin_lock_bh(&nf_conntrack_lock);
+ return -EOPNOTSUPP;
+ }
+
+ spin_lock_bh(&nf_conntrack_lock);
+ helper = __nf_conntrack_helper_find_byname(helpname);
+ if (helper)
+ return -EAGAIN;
+#endif
return -EOPNOTSUPP;
+ }
if (help) {
if (help->helper == helper)
@@ -1117,7 +1131,6 @@ ctnetlink_create_conntrack(struct nlattr *cda[],
{
struct nf_conn *ct;
int err = -EINVAL;
- struct nf_conn_help *help;
struct nf_conntrack_helper *helper;
ct = nf_conntrack_alloc(&init_net, otuple, rtuple, GFP_KERNEL);
@@ -1132,16 +1145,55 @@ ctnetlink_create_conntrack(struct nlattr *cda[],
ct->status |= IPS_CONFIRMED;
rcu_read_lock();
- helper = __nf_ct_helper_find(rtuple);
- if (helper) {
- help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
- if (help == NULL) {
+ if (cda[CTA_HELP]) {
+ char *helpname;
+
+ err = ctnetlink_parse_help(cda[CTA_HELP], &helpname);
+ if (err < 0) {
+ rcu_read_unlock();
+ goto err;
+ }
+
+ helper = __nf_conntrack_helper_find_byname(helpname);
+ if (helper == NULL) {
+ rcu_read_unlock();
+#ifdef CONFIG_KMOD
+ if (request_module("nfct-helper-%s", helpname) < 0) {
+ err = -EOPNOTSUPP;
+ goto err;
+ }
+
+ rcu_read_lock();
+ helper = __nf_conntrack_helper_find_byname(helpname);
+ if (helper) {
+ rcu_read_unlock();
+ err = -EAGAIN;
+ goto err;
+ }
+ rcu_read_unlock();
+#endif
+ err = -EOPNOTSUPP;
+ goto err;
+ } else {
+ struct nf_conn_help *help;
+
+ help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
+ if (help == NULL) {
+ rcu_read_unlock();
+ err = -ENOMEM;
+ goto err;
+ }
+
+ /* not in hash table yet so not strictly necessary */
+ rcu_assign_pointer(help->helper, helper);
+ }
+ } else {
+ /* try an implicit helper assignation */
+ err = __nf_ct_set_helper(ct, GFP_ATOMIC);
+ if (err == -ENOMEM) {
rcu_read_unlock();
- err = -ENOMEM;
goto err;
}
- /* not in hash table yet so not strictly necessary */
- rcu_assign_pointer(help->helper, helper);
}
if (cda[CTA_STATUS]) {
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index b75c9c4..323dcf8 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -165,7 +165,8 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
} else
return -EINVAL;
- return nc->call(nfnl, skb, nlh, cda);
+ while ((err = nc->call(nfnl, skb, nlh, cda)) == -EAGAIN);
+ return err;
}
}
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH 4/8] [PATCH] use EOPNOTSUPP instead of EINVAL if the conntrack has no helper
2008-10-09 8:33 [PATCH 1/8] [PATCH] get rid of module refcounting in ctnetlink Pablo Neira Ayuso
2008-10-09 8:34 ` [PATCH 2/8] [PATCH] connection tracking helper name persistent aliases Pablo Neira Ayuso
2008-10-09 8:34 ` [PATCH 3/8] [PATCH] Helper modules load-on-demand support for ctnetlink Pablo Neira Ayuso
@ 2008-10-09 8:34 ` Pablo Neira Ayuso
2008-10-10 13:03 ` Patrick McHardy
2008-10-09 8:35 ` [PATCH 5/8] [PATCH] deliver events for conntracks created via ctnetlink Pablo Neira Ayuso
` (4 subsequent siblings)
7 siblings, 1 reply; 29+ messages in thread
From: Pablo Neira Ayuso @ 2008-10-09 8:34 UTC (permalink / raw)
To: netfilter-devel; +Cc: kaber
This patch changes the return value if the conntrack has no helper assigned.
Instead of EINVAL, which is reserved for malformed messages, it returns
EOPNOTSUPP.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
net/netfilter/nf_conntrack_netlink.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index a649f7b..da2437c 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1729,7 +1729,7 @@ ctnetlink_create_expect(struct nlattr *cda[], u_int8_t u3)
if (!help || !help->helper) {
/* such conntrack hasn't got any helper, abort */
- err = -EINVAL;
+ err = -EOPNOTSUPP;
goto out;
}
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH 5/8] [PATCH] deliver events for conntracks created via ctnetlink
2008-10-09 8:33 [PATCH 1/8] [PATCH] get rid of module refcounting in ctnetlink Pablo Neira Ayuso
` (2 preceding siblings ...)
2008-10-09 8:34 ` [PATCH 4/8] [PATCH] use EOPNOTSUPP instead of EINVAL if the conntrack has no helper Pablo Neira Ayuso
@ 2008-10-09 8:35 ` Pablo Neira Ayuso
2008-10-10 13:31 ` Patrick McHardy
2008-10-09 8:35 ` [PATCH 6/8] [PATCH] bump the expectation helper name Pablo Neira Ayuso
` (3 subsequent siblings)
7 siblings, 1 reply; 29+ messages in thread
From: Pablo Neira Ayuso @ 2008-10-09 8:35 UTC (permalink / raw)
To: netfilter-devel; +Cc: kaber
As for now, the creation and update of conntracks via ctnetlink do not
propagate an event to userspace. This can result in inconsistent situations
if several userspace processes modify the connection tracking table by means
of ctnetlink at the same time. Specifically, using the conntrack command
line tool and conntrackd at the same time can trigger unconsistencies.
This patch also modifies the event cache infrastructure to pass the
process PID and the ECHO flag to nfnetlink_send() to report back
to userspace if the process that triggered the change needs so.
Based on a suggestion from Patrick McHardy.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
include/net/netfilter/nf_conntrack.h | 2 -
include/net/netfilter/nf_conntrack_ecache.h | 59 +++++++++++++++++-
include/net/netfilter/nf_conntrack_expect.h | 2 +
net/netfilter/nf_conntrack_core.c | 29 ++++++++-
net/netfilter/nf_conntrack_ecache.c | 14 +++-
net/netfilter/nf_conntrack_expect.c | 43 +++++++++++--
net/netfilter/nf_conntrack_netlink.c | 88 ++++++++++++++++++++++-----
7 files changed, 203 insertions(+), 34 deletions(-)
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index f11255e..2e0c536 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -199,7 +199,7 @@ __nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple);
extern void nf_conntrack_hash_insert(struct nf_conn *ct);
-extern void nf_conntrack_flush(struct net *net);
+extern void nf_conntrack_flush(struct net *net, u32 pid, int report);
extern bool nf_ct_get_tuplepr(const struct sk_buff *skb,
unsigned int nhoff, u_int16_t l3num,
diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
index 35f814c..d1054df 100644
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -17,6 +17,13 @@ struct nf_conntrack_ecache {
unsigned int events;
};
+/* This structure is passed to event handler */
+struct nf_ct_event {
+ struct nf_conn *ct;
+ u32 pid;
+ int report;
+};
+
extern struct atomic_notifier_head nf_conntrack_chain;
extern int nf_conntrack_register_notifier(struct notifier_block *nb);
extern int nf_conntrack_unregister_notifier(struct notifier_block *nb);
@@ -39,22 +46,58 @@ nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct)
local_bh_enable();
}
-static inline void nf_conntrack_event(enum ip_conntrack_events event,
- struct nf_conn *ct)
+static inline void
+nf_conntrack_event_report(enum ip_conntrack_events event,
+ struct nf_conn *ct,
+ u32 pid,
+ int report)
{
+ struct nf_ct_event item = {
+ .ct = ct,
+ .pid = pid,
+ .report = report
+ };
+
if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct))
- atomic_notifier_call_chain(&nf_conntrack_chain, event, ct);
+ atomic_notifier_call_chain(&nf_conntrack_chain, event, &item);
}
+static inline void
+nf_conntrack_event(enum ip_conntrack_events event, struct nf_conn *ct)
+{
+ nf_conntrack_event_report(event, ct, 0, 0);
+}
+
+struct nf_exp_event {
+ struct nf_conntrack_expect *exp;
+ u32 pid;
+ int report;
+};
+
extern struct atomic_notifier_head nf_ct_expect_chain;
extern int nf_ct_expect_register_notifier(struct notifier_block *nb);
extern int nf_ct_expect_unregister_notifier(struct notifier_block *nb);
static inline void
+nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
+ struct nf_conntrack_expect *exp,
+ u32 pid,
+ int report)
+{
+ struct nf_exp_event item = {
+ .exp = exp,
+ .pid = pid,
+ .report = report
+ };
+
+ atomic_notifier_call_chain(&nf_ct_expect_chain, event, &item);
+}
+
+static inline void
nf_ct_expect_event(enum ip_conntrack_expect_events event,
struct nf_conntrack_expect *exp)
{
- atomic_notifier_call_chain(&nf_ct_expect_chain, event, exp);
+ nf_ct_expect_event_report(event, exp, 0, 0);
}
extern int nf_conntrack_ecache_init(struct net *net);
@@ -66,9 +109,17 @@ static inline void nf_conntrack_event_cache(enum ip_conntrack_events event,
const struct sk_buff *skb) {}
static inline void nf_conntrack_event(enum ip_conntrack_events event,
struct nf_conn *ct) {}
+static inline void nf_conntrack_event_report(enum ip_conntrack_events event,
+ struct nf_conn *ct,
+ u32 pid,
+ int report) {}
static inline void nf_ct_deliver_cached_events(const struct nf_conn *ct) {}
static inline void nf_ct_expect_event(enum ip_conntrack_expect_events event,
struct nf_conntrack_expect *exp) {}
+static inline void nf_ct_expect_event_report(enum ip_conntrack_expect_events e,
+ struct nf_conntrack_expect *exp,
+ u32 pid,
+ int report) {}
static inline void nf_ct_event_cache_flush(struct net *net) {}
static inline int nf_conntrack_ecache_init(struct net *net)
diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h
index 37a7fc1..ab17a15 100644
--- a/include/net/netfilter/nf_conntrack_expect.h
+++ b/include/net/netfilter/nf_conntrack_expect.h
@@ -100,6 +100,8 @@ void nf_ct_expect_init(struct nf_conntrack_expect *, unsigned int, u_int8_t,
u_int8_t, const __be16 *, const __be16 *);
void nf_ct_expect_put(struct nf_conntrack_expect *exp);
int nf_ct_expect_related(struct nf_conntrack_expect *expect);
+int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
+ u32 pid, int report);
#endif /*_NF_CONNTRACK_EXPECT_H*/
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index f465090..aab2618 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -174,7 +174,8 @@ destroy_conntrack(struct nf_conntrack *nfct)
NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
NF_CT_ASSERT(!timer_pending(&ct->timeout));
- nf_conntrack_event(IPCT_DESTROY, ct);
+ if (!test_bit(IPS_DYING_BIT, &ct->status))
+ nf_conntrack_event(IPCT_DESTROY, ct);
set_bit(IPS_DYING_BIT, &ct->status);
/* To make sure we don't get any weird locking issues here:
@@ -963,8 +964,24 @@ void nf_ct_iterate_cleanup(struct net *net,
}
EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup);
+struct __nf_ct_flush_report {
+ u32 pid;
+ int report;
+};
+
static int kill_all(struct nf_conn *i, void *data)
{
+ struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data;
+
+ if (!fr->report)
+ return 1;
+
+ /* get_next_corpse sets the dying bit for us */
+ nf_conntrack_event_report(IPCT_DESTROY,
+ i,
+ fr->pid,
+ fr->report);
+
return 1;
}
@@ -978,9 +995,13 @@ void nf_ct_free_hashtable(struct hlist_head *hash, int vmalloced, unsigned int s
}
EXPORT_SYMBOL_GPL(nf_ct_free_hashtable);
-void nf_conntrack_flush(struct net *net)
+void nf_conntrack_flush(struct net *net, u32 pid, int report)
{
- nf_ct_iterate_cleanup(net, kill_all, NULL);
+ struct __nf_ct_flush_report fr = {
+ .pid = pid,
+ .report = report,
+ };
+ nf_ct_iterate_cleanup(net, kill_all, &fr);
}
EXPORT_SYMBOL_GPL(nf_conntrack_flush);
@@ -996,7 +1017,7 @@ static void nf_conntrack_cleanup_net(struct net *net)
nf_ct_event_cache_flush(net);
nf_conntrack_ecache_fini(net);
i_see_dead_people:
- nf_conntrack_flush(net);
+ nf_conntrack_flush(net, 0, 0);
if (atomic_read(&net->ct.count) != 0) {
schedule();
goto i_see_dead_people;
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index a5f5e2e..dee4190 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -35,9 +35,17 @@ static inline void
__nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache)
{
if (nf_ct_is_confirmed(ecache->ct) && !nf_ct_is_dying(ecache->ct)
- && ecache->events)
- atomic_notifier_call_chain(&nf_conntrack_chain, ecache->events,
- ecache->ct);
+ && ecache->events) {
+ struct nf_ct_event item = {
+ .ct = ecache->ct,
+ .pid = 0,
+ .report = 0
+ };
+
+ atomic_notifier_call_chain(&nf_conntrack_chain,
+ ecache->events,
+ &item);
+ }
ecache->events = 0;
nf_ct_put(ecache->ct);
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 37a703b..3a8a34a 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -362,7 +362,7 @@ static inline int refresh_timer(struct nf_conntrack_expect *i)
return 1;
}
-int nf_ct_expect_related(struct nf_conntrack_expect *expect)
+static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
{
const struct nf_conntrack_expect_policy *p;
struct nf_conntrack_expect *i;
@@ -371,11 +371,8 @@ int nf_ct_expect_related(struct nf_conntrack_expect *expect)
struct net *net = nf_ct_exp_net(expect);
struct hlist_node *n;
unsigned int h;
- int ret;
-
- NF_CT_ASSERT(master_help);
+ int ret = 0;
- spin_lock_bh(&nf_conntrack_lock);
if (!master_help->helper) {
ret = -ESHUTDOWN;
goto out;
@@ -409,18 +406,50 @@ int nf_ct_expect_related(struct nf_conntrack_expect *expect)
printk(KERN_WARNING
"nf_conntrack: expectation table full\n");
ret = -EMFILE;
- goto out;
}
+out:
+ return ret;
+}
+
+int nf_ct_expect_related(struct nf_conntrack_expect *expect)
+{
+ int ret;
+
+ spin_lock_bh(&nf_conntrack_lock);
+ ret = __nf_ct_expect_check(expect);
+ if (ret < 0)
+ goto out;
nf_ct_expect_insert(expect);
+ atomic_inc(&expect->use);
+ spin_unlock_bh(&nf_conntrack_lock);
nf_ct_expect_event(IPEXP_NEW, expect);
- ret = 0;
+ nf_ct_expect_put(expect);
+ return ret;
out:
spin_unlock_bh(&nf_conntrack_lock);
return ret;
}
EXPORT_SYMBOL_GPL(nf_ct_expect_related);
+int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
+ u32 pid, int report)
+{
+ int ret;
+
+ spin_lock_bh(&nf_conntrack_lock);
+ ret = __nf_ct_expect_check(expect);
+ if (ret < 0)
+ goto out;
+ nf_ct_expect_insert(expect);
+out:
+ spin_unlock_bh(&nf_conntrack_lock);
+ if (ret == 0)
+ nf_ct_expect_event_report(IPEXP_NEW, expect, pid, report);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nf_ct_expect_related_report);
+
#ifdef CONFIG_PROC_FS
struct ct_expect_iter_state {
struct seq_net_private p;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index da2437c..a115d78 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -410,7 +410,8 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
struct nlattr *nest_parms;
- struct nf_conn *ct = (struct nf_conn *)ptr;
+ struct nf_ct_event *item = (struct nf_ct_event *)ptr;
+ struct nf_conn *ct = item->ct;
struct sk_buff *skb;
unsigned int type;
sk_buff_data_t b;
@@ -443,7 +444,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
b = skb->tail;
type |= NFNL_SUBSYS_CTNETLINK << 8;
- nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg));
+ nlh = NLMSG_PUT(skb, item->pid, 0, type, sizeof(struct nfgenmsg));
nfmsg = NLMSG_DATA(nlh);
nlh->nlmsg_flags = flags;
@@ -511,7 +512,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
rcu_read_unlock();
nlh->nlmsg_len = skb->tail - b;
- nfnetlink_send(skb, 0, group, 0);
+ nfnetlink_send(skb, item->pid, group, item->report);
return NOTIFY_DONE;
nla_put_failure:
@@ -787,7 +788,9 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3);
else {
/* Flush the whole table */
- nf_conntrack_flush(&init_net);
+ nf_conntrack_flush(&init_net,
+ NETLINK_CB(skb).pid,
+ nlmsg_report(nlh));
return 0;
}
@@ -808,6 +811,14 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
}
}
+ nf_conntrack_event_report(IPCT_DESTROY,
+ ct,
+ NETLINK_CB(skb).pid,
+ nlmsg_report(nlh));
+
+ /* death_by_timeout would report the event again */
+ set_bit(IPS_DYING_BIT, &ct->status);
+
nf_ct_kill(ct);
nf_ct_put(ct);
@@ -1123,11 +1134,35 @@ ctnetlink_change_conntrack(struct nf_conn *ct, struct nlattr *cda[])
return 0;
}
+static inline void
+ctnetlink_event_report(struct nf_conn *ct, u32 pid, int report)
+{
+ unsigned int events = 0;
+
+ if (test_bit(IPS_EXPECTED_BIT, &ct->status))
+ events |= IPCT_RELATED;
+ else
+ events |= IPCT_NEW;
+
+ nf_conntrack_event_report(IPCT_STATUS |
+ IPCT_HELPER |
+ IPCT_REFRESH |
+ IPCT_PROTOINFO |
+ IPCT_NATSEQADJ |
+ IPCT_MARK |
+ events,
+ ct,
+ pid,
+ report);
+}
+
static int
ctnetlink_create_conntrack(struct nlattr *cda[],
struct nf_conntrack_tuple *otuple,
struct nf_conntrack_tuple *rtuple,
- struct nf_conn *master_ct)
+ struct nf_conn *master_ct,
+ u32 pid,
+ int report)
{
struct nf_conn *ct;
int err = -EINVAL;
@@ -1225,9 +1260,12 @@ ctnetlink_create_conntrack(struct nlattr *cda[],
ct->master = master_ct;
}
+ nf_conntrack_get(&ct->ct_general);
add_timer(&ct->timeout);
nf_conntrack_hash_insert(ct);
rcu_read_unlock();
+ ctnetlink_event_report(ct, pid, report);
+ nf_ct_put(ct);
return 0;
@@ -1292,7 +1330,9 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
err = ctnetlink_create_conntrack(cda,
&otuple,
&rtuple,
- master_ct);
+ master_ct,
+ NETLINK_CB(skb).pid,
+ nlmsg_report(nlh));
if (err < 0 && master_ct)
nf_ct_put(master_ct);
@@ -1304,6 +1344,8 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
* so there's no need to increase the refcount */
err = -EEXIST;
if (!(nlh->nlmsg_flags & NLM_F_EXCL)) {
+ struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+
/* we only allow nat config for new conntracks */
if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) {
err = -EOPNOTSUPP;
@@ -1314,8 +1356,19 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
err = -EOPNOTSUPP;
goto out_unlock;
}
- err = ctnetlink_change_conntrack(nf_ct_tuplehash_to_ctrack(h),
- cda);
+
+ err = ctnetlink_change_conntrack(ct, cda);
+ if (err == 0) {
+ nf_conntrack_get(&ct->ct_general);
+ spin_unlock_bh(&nf_conntrack_lock);
+ ctnetlink_event_report(ct,
+ NETLINK_CB(skb).pid,
+ nlmsg_report(nlh));
+ nf_ct_put(ct);
+ } else
+ spin_unlock_bh(&nf_conntrack_lock);
+
+ return err;
}
out_unlock:
@@ -1450,7 +1503,8 @@ static int ctnetlink_expect_event(struct notifier_block *this,
{
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
- struct nf_conntrack_expect *exp = (struct nf_conntrack_expect *)ptr;
+ struct nf_exp_event *item = (struct nf_exp_event *)ptr;
+ struct nf_conntrack_expect *exp = item->exp;
struct sk_buff *skb;
unsigned int type;
sk_buff_data_t b;
@@ -1472,7 +1526,7 @@ static int ctnetlink_expect_event(struct notifier_block *this,
b = skb->tail;
type |= NFNL_SUBSYS_CTNETLINK_EXP << 8;
- nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg));
+ nlh = NLMSG_PUT(skb, item->pid, 0, type, sizeof(struct nfgenmsg));
nfmsg = NLMSG_DATA(nlh);
nlh->nlmsg_flags = flags;
@@ -1486,7 +1540,7 @@ static int ctnetlink_expect_event(struct notifier_block *this,
rcu_read_unlock();
nlh->nlmsg_len = skb->tail - b;
- nfnetlink_send(skb, 0, NFNLGRP_CONNTRACK_EXP_NEW, 0);
+ nfnetlink_send(skb, item->pid, NFNLGRP_CONNTRACK_EXP_NEW, item->report);
return NOTIFY_DONE;
nla_put_failure:
@@ -1700,7 +1754,7 @@ ctnetlink_change_expect(struct nf_conntrack_expect *x, struct nlattr *cda[])
}
static int
-ctnetlink_create_expect(struct nlattr *cda[], u_int8_t u3)
+ctnetlink_create_expect(struct nlattr *cda[], u_int8_t u3, u32 pid, int report)
{
struct nf_conntrack_tuple tuple, mask, master_tuple;
struct nf_conntrack_tuple_hash *h = NULL;
@@ -1747,7 +1801,7 @@ ctnetlink_create_expect(struct nlattr *cda[], u_int8_t u3)
memcpy(&exp->mask.src.u3, &mask.src.u3, sizeof(exp->mask.src.u3));
exp->mask.src.u.all = mask.src.u.all;
- err = nf_ct_expect_related(exp);
+ err = nf_ct_expect_related_report(exp, pid, report);
nf_ct_expect_put(exp);
out:
@@ -1780,8 +1834,12 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
if (!exp) {
spin_unlock_bh(&nf_conntrack_lock);
err = -ENOENT;
- if (nlh->nlmsg_flags & NLM_F_CREATE)
- err = ctnetlink_create_expect(cda, u3);
+ if (nlh->nlmsg_flags & NLM_F_CREATE) {
+ err = ctnetlink_create_expect(cda,
+ u3,
+ NETLINK_CB(skb).pid,
+ nlmsg_report(nlh));
+ }
return err;
}
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH 6/8] [PATCH] bump the expectation helper name
2008-10-09 8:33 [PATCH 1/8] [PATCH] get rid of module refcounting in ctnetlink Pablo Neira Ayuso
` (3 preceding siblings ...)
2008-10-09 8:35 ` [PATCH 5/8] [PATCH] deliver events for conntracks created via ctnetlink Pablo Neira Ayuso
@ 2008-10-09 8:35 ` Pablo Neira Ayuso
2008-10-09 8:35 ` [PATCH 7/8] [PATCH] dynamic calculation of event message size for ctnetlink Pablo Neira Ayuso
` (2 subsequent siblings)
7 siblings, 0 replies; 29+ messages in thread
From: Pablo Neira Ayuso @ 2008-10-09 8:35 UTC (permalink / raw)
To: netfilter-devel; +Cc: kaber
Include the expectation helper name at expectation dumping.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
net/netfilter/nf_conntrack_netlink.c | 9 +++++++++
1 files changed, 9 insertions(+), 0 deletions(-)
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index a115d78..01ed29c 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1443,6 +1443,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
const struct nf_conntrack_expect *exp)
{
struct nf_conn *master = exp->master;
+ struct nf_conn_help *help = nfct_help(master);
long timeout = (exp->timeout.expires - jiffies) / HZ;
if (timeout < 0)
@@ -1460,6 +1461,14 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
NLA_PUT_BE32(skb, CTA_EXPECT_TIMEOUT, htonl(timeout));
NLA_PUT_BE32(skb, CTA_EXPECT_ID, htonl((unsigned long)exp));
+ if (help) {
+ struct nf_conntrack_helper *helper;
+
+ helper = rcu_dereference(help->helper);
+ if (helper)
+ NLA_PUT_STRING(skb, CTA_EXPECT_HELP_NAME, helper->name);
+ }
+
return 0;
nla_put_failure:
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH 7/8] [PATCH] dynamic calculation of event message size for ctnetlink
2008-10-09 8:33 [PATCH 1/8] [PATCH] get rid of module refcounting in ctnetlink Pablo Neira Ayuso
` (4 preceding siblings ...)
2008-10-09 8:35 ` [PATCH 6/8] [PATCH] bump the expectation helper name Pablo Neira Ayuso
@ 2008-10-09 8:35 ` Pablo Neira Ayuso
2008-10-09 16:45 ` Pablo Neira Ayuso
2008-10-09 8:36 ` [PATCH 8/8] [PATCH] remove module dependency between ctnetlink and nf_nat Pablo Neira Ayuso
2008-10-09 13:26 ` [PATCH 1/8] [PATCH] get rid of module refcounting in ctnetlink Patrick McHardy
7 siblings, 1 reply; 29+ messages in thread
From: Pablo Neira Ayuso @ 2008-10-09 8:35 UTC (permalink / raw)
To: netfilter-devel; +Cc: kaber
This patch adds dynamic message size calculation for ctnetlink. This
reduces CPU consumption since the overhead in the message trimming
is removed.
Based on a suggestion from Patrick McHardy.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
include/net/netfilter/nf_conntrack_l3proto.h | 2
include/net/netfilter/nf_conntrack_l4proto.h | 3 +
net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 6 +
net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 8 ++
net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 6 +
net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 8 ++
net/netfilter/nf_conntrack_core.c | 6 +
net/netfilter/nf_conntrack_netlink.c | 103 ++++++++++++++++++++++++
net/netfilter/nf_conntrack_proto_dccp.c | 10 ++
net/netfilter/nf_conntrack_proto_gre.c | 1
net/netfilter/nf_conntrack_proto_sctp.c | 12 +++
net/netfilter/nf_conntrack_proto_tcp.c | 14 +++
net/netfilter/nf_conntrack_proto_udp.c | 2
net/netfilter/nf_conntrack_proto_udplite.c | 2
14 files changed, 182 insertions(+), 1 deletions(-)
diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h
index 0378676..e0007f9 100644
--- a/include/net/netfilter/nf_conntrack_l3proto.h
+++ b/include/net/netfilter/nf_conntrack_l3proto.h
@@ -55,6 +55,8 @@ struct nf_conntrack_l3proto
int (*nlattr_to_tuple)(struct nlattr *tb[],
struct nf_conntrack_tuple *t);
+
+ size_t (*nlattr_size)(void);
const struct nla_policy *nla_policy;
#ifdef CONFIG_SYSCTL
diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 7f2f43c..3c80479 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -72,6 +72,8 @@ struct nf_conntrack_l4proto
const struct nf_conntrack_tuple *t);
int (*nlattr_to_tuple)(struct nlattr *tb[],
struct nf_conntrack_tuple *t);
+ size_t (*nlattr_size)(void);
+ size_t (*nlattr_protoinfo_size)(void);
const struct nla_policy *nla_policy;
#ifdef CONFIG_SYSCTL
@@ -115,6 +117,7 @@ extern int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple);
extern int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[],
struct nf_conntrack_tuple *t);
+extern size_t nf_ct_port_nlattr_size(void);
extern const struct nla_policy nf_ct_port_nla_policy[];
#ifdef CONFIG_SYSCTL
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 4a7c352..7b354e5 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -330,6 +330,11 @@ static int ipv4_nlattr_to_tuple(struct nlattr *tb[],
return 0;
}
+
+static size_t ipv4_nlattr_size(void)
+{
+ return nla_total_size(sizeof(u_int32_t))*2;
+}
#endif
static struct nf_sockopt_ops so_getorigdst = {
@@ -350,6 +355,7 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
.tuple_to_nlattr = ipv4_tuple_to_nlattr,
.nlattr_to_tuple = ipv4_nlattr_to_tuple,
+ .nlattr_size = ipv4_nlattr_size,
.nla_policy = ipv4_nla_policy,
#endif
#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 4e88792..9e6bb21 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -262,6 +262,13 @@ static int icmp_nlattr_to_tuple(struct nlattr *tb[],
return 0;
}
+
+static size_t icmp_nlattr_size(void)
+{
+ return nla_total_size(sizeof(u_int8_t)) +
+ nla_total_size(sizeof(u_int8_t)) +
+ nla_total_size(sizeof(u_int16_t));
+}
#endif
#ifdef CONFIG_SYSCTL
@@ -310,6 +317,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly =
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
.tuple_to_nlattr = icmp_tuple_to_nlattr,
.nlattr_to_tuple = icmp_nlattr_to_tuple,
+ .nlattr_size = icmp_nlattr_size,
.nla_policy = icmp_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index e91db16..3bc103d 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -342,6 +342,11 @@ static int ipv6_nlattr_to_tuple(struct nlattr *tb[],
return 0;
}
+
+static size_t ipv6_nlattr_size(void)
+{
+ return nla_total_size(sizeof(u_int32_t)*4)*2;
+}
#endif
struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = {
@@ -354,6 +359,7 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = {
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
.tuple_to_nlattr = ipv6_tuple_to_nlattr,
.nlattr_to_tuple = ipv6_nlattr_to_tuple,
+ .nlattr_size = ipv6_nlattr_size,
.nla_policy = ipv6_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 0572617..1c32c71 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -243,6 +243,13 @@ static int icmpv6_nlattr_to_tuple(struct nlattr *tb[],
return 0;
}
+
+static size_t icmpv6_nlattr_size(void)
+{
+ return nla_total_size(sizeof(u_int8_t)) +
+ nla_total_size(sizeof(u_int8_t)) +
+ nla_total_size(sizeof(u_int16_t));
+}
#endif
#ifdef CONFIG_SYSCTL
@@ -275,6 +282,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly =
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
.tuple_to_nlattr = icmpv6_tuple_to_nlattr,
.nlattr_to_tuple = icmpv6_nlattr_to_tuple,
+ .nlattr_size = icmpv6_nlattr_size,
.nla_policy = icmpv6_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index aab2618..281b398 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -895,6 +895,12 @@ int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[],
return 0;
}
EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_to_tuple);
+
+size_t nf_ct_port_nlattr_size(void)
+{
+ return nla_total_size(sizeof(u_int16_t))*2;
+}
+EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_size);
#endif
/* Used by ipt_REJECT and ip6t_REJECT. */
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 01ed29c..1046901 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -404,6 +404,107 @@ nla_put_failure:
}
#ifdef CONFIG_NF_CONNTRACK_EVENTS
+static inline size_t calculate_tuple_room_size(const struct nf_conn *ct)
+{
+ struct nf_conntrack_l3proto *l3proto;
+ struct nf_conntrack_l4proto *l4proto;
+ size_t size;
+
+ rcu_read_lock();
+ /* nested attributes CTA_TUPLE_[ORIG|REPLY] plus CTA_TUPLE_IP */
+ size = nla_total_size(0) * 2;
+ l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
+ if (likely(l3proto->nlattr_size))
+ size += l3proto->nlattr_size();
+
+ /* nested attributes CTA_TUPLE_PROTO plus CTA_PROTONUM */
+ size += nla_total_size(0) + nla_total_size(sizeof(u_int8_t));
+ l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
+ if (likely(l4proto->nlattr_size))
+ size += l4proto->nlattr_size();
+
+ rcu_read_unlock();
+ return size;
+}
+
+static inline size_t calculate_protoinfo_room_size(const struct nf_conn *ct)
+{
+ size_t size = 0;
+ struct nf_conntrack_l4proto *l4proto;
+
+ rcu_read_lock();
+ l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
+ if (l4proto->nlattr_protoinfo_size)
+ size = l4proto->nlattr_protoinfo_size();
+ rcu_read_unlock();
+
+ return size;
+}
+
+static inline size_t calculate_helper_room_size(const struct nf_conn *ct)
+{
+ const struct nf_conn_help *help = nfct_help(ct);
+ struct nf_conntrack_helper *helper;
+ size_t size;
+
+ if (!help)
+ goto out;
+
+ rcu_read_lock();
+ helper = rcu_dereference(help->helper);
+ if (!helper)
+ goto out_unlock;
+
+ size = nla_total_size(0) + /* CTA_HELP */
+ nla_total_size(strlen(helper->name));
+out_unlock:
+ rcu_read_unlock();
+out:
+ return size;
+}
+
+static inline size_t
+ctnetlink_calculate_room_size(const struct nf_conn *ct, unsigned long events)
+{
+ size_t size = NLMSG_SPACE(sizeof(struct nfgenmsg));
+
+ size += calculate_tuple_room_size(ct) * 2 + /* original and reply */
+ nla_total_size(sizeof(u_int32_t)) + /* status */
+ nla_total_size(sizeof(u_int32_t)); /* id */
+
+#ifdef CONFIG_NF_CONNTRACK_MARK
+ if (events & IPCT_MARK || ct->mark)
+ size += nla_total_size(sizeof(u_int32_t));
+#endif
+
+ if (events & IPCT_DESTROY) {
+ const struct nf_conn_counter *acct;
+
+ acct = nf_conn_acct_find(ct);
+ if (acct) {
+ size += nla_total_size(0) * 2 +
+ nla_total_size(sizeof(u_int64_t)) * 2 * 2;
+ }
+ return size;
+ }
+
+ size += nla_total_size(sizeof(u_int32_t)); /* CTA_TIMEOUT */
+ if (events & IPCT_PROTOINFO) {
+ size += calculate_protoinfo_room_size(ct);
+ }if (events & IPCT_HELPER || nfct_help(ct))
+ size += calculate_helper_room_size(ct);
+ if (events & IPCT_RELATED)
+ size += calculate_tuple_room_size(ct->master);
+ if (events & IPCT_NATSEQADJ)
+ size += nla_total_size(0) * 2 +
+ nla_total_size(sizeof(u_int32_t)) * 3 * 2;
+#ifdef CONFIG_NF_CONNTRACK_SECMARK
+ if (events & IPCT_SECMARK || ct->secmark)
+ size += nla_total_size(sizeof(u_int32_t));
+#endif
+ return size;
+}
+
static int ctnetlink_conntrack_event(struct notifier_block *this,
unsigned long events, void *ptr)
{
@@ -437,7 +538,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
if (!nfnetlink_has_listeners(group))
return NOTIFY_DONE;
- skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
+ skb = alloc_skb(ctnetlink_calculate_room_size(ct, events), GFP_ATOMIC);
if (!skb)
return NOTIFY_DONE;
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 8fcf176..6694173 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -657,6 +657,12 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct)
write_unlock_bh(&dccp_lock);
return 0;
}
+
+static size_t dccp_nlattr_protoinfo_size(void)
+{
+ return nla_total_size(0)*2 + /* CTA_PROTOINFO */
+ nla_total_size(sizeof(u_int8_t));
+}
#endif
#ifdef CONFIG_SYSCTL
@@ -749,6 +755,8 @@ static struct nf_conntrack_l4proto dccp_proto4 __read_mostly = {
.from_nlattr = nlattr_to_dccp,
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nlattr_size = nf_ct_port_nlattr_size,
+ .nlattr_protoinfo_size = dccp_nlattr_protoinfo_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
@@ -774,6 +782,8 @@ static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = {
.from_nlattr = nlattr_to_dccp,
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nlattr_size = nf_ct_port_nlattr_size,
+ .nlattr_protoinfo_size = dccp_nlattr_protoinfo_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index a2cdbcb..6e381c9 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -294,6 +294,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = {
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nlattr_size = nf_ct_port_nlattr_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
};
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index ae8c260..a867849 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -537,6 +537,14 @@ static int nlattr_to_sctp(struct nlattr *cda[], struct nf_conn *ct)
return 0;
}
+
+static size_t sctp_nlattr_protoinfo_size(void)
+{
+ return nla_total_size(0)*2 + /* CTA_PROTOINFO */
+ nla_total_size(sizeof(u_int8_t)) +
+ nla_total_size(sizeof(u_int32_t)) +
+ nla_total_size(sizeof(u_int32_t));
+}
#endif
#ifdef CONFIG_SYSCTL
@@ -671,6 +679,8 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
.from_nlattr = nlattr_to_sctp,
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nlattr_size = nf_ct_port_nlattr_size,
+ .nlattr_protoinfo_size = sctp_nlattr_protoinfo_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
@@ -699,6 +709,8 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = {
.from_nlattr = nlattr_to_sctp,
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nlattr_size = nf_ct_port_nlattr_size,
+ .nlattr_protoinfo_size = sctp_nlattr_protoinfo_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index f947ec4..ddc87a3 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1181,6 +1181,16 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
return 0;
}
+
+static size_t tcp_nlattr_protoinfo_size(void)
+{
+ return nla_total_size(0)*2 + /* CTA_PROTOINFO */
+ nla_total_size(sizeof(u_int8_t)) +
+ nla_total_size(sizeof(u_int8_t)) +
+ nla_total_size(sizeof(u_int8_t)) +
+ nla_total_size(sizeof(struct nf_ct_tcp_flags)) +
+ nla_total_size(sizeof(struct nf_ct_tcp_flags));
+}
#endif
#ifdef CONFIG_SYSCTL
@@ -1399,6 +1409,8 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
.from_nlattr = nlattr_to_tcp,
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nlattr_size = nf_ct_port_nlattr_size,
+ .nlattr_protoinfo_size = tcp_nlattr_protoinfo_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
@@ -1429,6 +1441,8 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =
.from_nlattr = nlattr_to_tcp,
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nlattr_size = nf_ct_port_nlattr_size,
+ .nlattr_protoinfo_size = tcp_nlattr_protoinfo_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 7c2ca48..549da2c 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -193,6 +193,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly =
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nlattr_size = nf_ct_port_nlattr_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
@@ -220,6 +221,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly =
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nlattr_size = nf_ct_port_nlattr_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index d22d839..70a21ff 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -181,6 +181,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly =
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nlattr_size = nf_ct_port_nlattr_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
@@ -204,6 +205,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly =
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nlattr_size = nf_ct_port_nlattr_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH 8/8] [PATCH] remove module dependency between ctnetlink and nf_nat
2008-10-09 8:33 [PATCH 1/8] [PATCH] get rid of module refcounting in ctnetlink Pablo Neira Ayuso
` (5 preceding siblings ...)
2008-10-09 8:35 ` [PATCH 7/8] [PATCH] dynamic calculation of event message size for ctnetlink Pablo Neira Ayuso
@ 2008-10-09 8:36 ` Pablo Neira Ayuso
2008-10-09 13:26 ` [PATCH 1/8] [PATCH] get rid of module refcounting in ctnetlink Patrick McHardy
7 siblings, 0 replies; 29+ messages in thread
From: Pablo Neira Ayuso @ 2008-10-09 8:36 UTC (permalink / raw)
To: netfilter-devel; +Cc: kaber
This patch removes the module dependency between ctnetlink and
nf_nat by means of an indirect call that is initialized when
nf_nat is loaded. Now, nf_conntrack_netlink only requires
nf_conntrack and nfnetlink.
This patch puts nfnetlink_parse_nat_setup_hook into the
nf_conntrack_core to avoid dependencies between ctnetlink,
nf_conntrack_ipv4 and nf_conntrack_ipv6.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
include/net/netfilter/nf_nat_core.h | 8 ++
net/ipv4/netfilter/nf_nat_core.c | 97 ++++++++++++++++++++++++++++
net/netfilter/nf_conntrack_core.c | 7 ++
net/netfilter/nf_conntrack_netlink.c | 116 +++++++++++-----------------------
4 files changed, 150 insertions(+), 78 deletions(-)
diff --git a/include/net/netfilter/nf_nat_core.h b/include/net/netfilter/nf_nat_core.h
index f29eeb9..5868406 100644
--- a/include/net/netfilter/nf_nat_core.h
+++ b/include/net/netfilter/nf_nat_core.h
@@ -25,4 +25,12 @@ static inline int nf_nat_initialized(struct nf_conn *ct,
else
return test_bit(IPS_DST_NAT_DONE_BIT, &ct->status);
}
+
+struct nlattr;
+
+extern int
+(*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct,
+ enum nf_nat_manip_type manip,
+ struct nlattr *attr);
+
#endif /* _NF_NAT_CORE_H */
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 2ac9eaf..91ec535 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -584,6 +584,98 @@ static struct nf_ct_ext_type nat_extend __read_mostly = {
.flags = NF_CT_EXT_F_PREALLOC,
};
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = {
+ [CTA_PROTONAT_PORT_MIN] = { .type = NLA_U16 },
+ [CTA_PROTONAT_PORT_MAX] = { .type = NLA_U16 },
+};
+
+static int nfnetlink_parse_nat_proto(struct nlattr *attr,
+ const struct nf_conn *ct,
+ struct nf_nat_range *range)
+{
+ struct nlattr *tb[CTA_PROTONAT_MAX+1];
+ const struct nf_nat_protocol *npt;
+ int err;
+
+ err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr, protonat_nla_policy);
+ if (err < 0)
+ return err;
+
+ npt = nf_nat_proto_find_get(nf_ct_protonum(ct));
+ if (npt->nlattr_to_range)
+ err = npt->nlattr_to_range(tb, range);
+ nf_nat_proto_put(npt);
+ return err;
+}
+
+static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = {
+ [CTA_NAT_MINIP] = { .type = NLA_U32 },
+ [CTA_NAT_MAXIP] = { .type = NLA_U32 },
+};
+
+static int
+nfnetlink_parse_nat(struct nlattr *nat,
+ const struct nf_conn *ct, struct nf_nat_range *range)
+{
+ struct nlattr *tb[CTA_NAT_MAX+1];
+ int err;
+
+ memset(range, 0, sizeof(*range));
+
+ err = nla_parse_nested(tb, CTA_NAT_MAX, nat, nat_nla_policy);
+ if (err < 0)
+ return err;
+
+ if (tb[CTA_NAT_MINIP])
+ range->min_ip = nla_get_be32(tb[CTA_NAT_MINIP]);
+
+ if (!tb[CTA_NAT_MAXIP])
+ range->max_ip = range->min_ip;
+ else
+ range->max_ip = nla_get_be32(tb[CTA_NAT_MAXIP]);
+
+ if (range->min_ip)
+ range->flags |= IP_NAT_RANGE_MAP_IPS;
+
+ if (!tb[CTA_NAT_PROTO])
+ return 0;
+
+ err = nfnetlink_parse_nat_proto(tb[CTA_NAT_PROTO], ct, range);
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+static int
+nfnetlink_parse_nat_setup(struct nf_conn *ct,
+ enum nf_nat_manip_type manip,
+ struct nlattr *attr)
+{
+ struct nf_nat_range range;
+
+ if (nfnetlink_parse_nat(attr, ct, &range) < 0)
+ return -EINVAL;
+ if (nf_nat_initialized(ct, manip))
+ return -EEXIST;
+
+ return nf_nat_setup_info(ct, &range, manip);
+}
+#else
+static int
+nfnetlink_parse_nat_setup(struct nf_conn *ct,
+ enum nf_nat_manip_type manip,
+ struct nlattr *attr)
+{
+ return -EOPNOTSUPP;
+}
+#endif
+
static int __net_init nf_nat_net_init(struct net *net)
{
net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size,
@@ -654,6 +746,9 @@ static int __init nf_nat_init(void)
BUG_ON(nf_nat_seq_adjust_hook != NULL);
rcu_assign_pointer(nf_nat_seq_adjust_hook, nf_nat_seq_adjust);
+ BUG_ON(nfnetlink_parse_nat_setup_hook != NULL);
+ rcu_assign_pointer(nfnetlink_parse_nat_setup_hook,
+ nfnetlink_parse_nat_setup);
return 0;
cleanup_extend:
@@ -667,10 +762,12 @@ static void __exit nf_nat_cleanup(void)
nf_ct_l3proto_put(l3proto);
nf_ct_extend_unregister(&nat_extend);
rcu_assign_pointer(nf_nat_seq_adjust_hook, NULL);
+ rcu_assign_pointer(nfnetlink_parse_nat_setup_hook, NULL);
synchronize_net();
}
MODULE_LICENSE("GPL");
+MODULE_ALIAS("nf-nat-ipv4");
module_init(nf_nat_init);
module_exit(nf_nat_cleanup);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 281b398..1f9a0a8 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -38,9 +38,16 @@
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_extend.h>
#include <net/netfilter/nf_conntrack_acct.h>
+#include <net/netfilter/nf_nat.h>
#define NF_CONNTRACK_VERSION "0.5.0"
+unsigned int
+(*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct,
+ enum nf_nat_manip_type manip,
+ struct nlattr *attr) __read_mostly;
+EXPORT_SYMBOL_GPL(nfnetlink_parse_nat_setup_hook);
+
DEFINE_SPINLOCK(nf_conntrack_lock);
EXPORT_SYMBOL_GPL(nf_conntrack_lock);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 1046901..bce9e5d 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -784,71 +784,6 @@ ctnetlink_parse_tuple(struct nlattr *cda[], struct nf_conntrack_tuple *tuple,
return 0;
}
-#ifdef CONFIG_NF_NAT_NEEDED
-static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = {
- [CTA_PROTONAT_PORT_MIN] = { .type = NLA_U16 },
- [CTA_PROTONAT_PORT_MAX] = { .type = NLA_U16 },
-};
-
-static int nfnetlink_parse_nat_proto(struct nlattr *attr,
- const struct nf_conn *ct,
- struct nf_nat_range *range)
-{
- struct nlattr *tb[CTA_PROTONAT_MAX+1];
- const struct nf_nat_protocol *npt;
- int err;
-
- err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr, protonat_nla_policy);
- if (err < 0)
- return err;
-
- npt = nf_nat_proto_find_get(nf_ct_protonum(ct));
- if (npt->nlattr_to_range)
- err = npt->nlattr_to_range(tb, range);
- nf_nat_proto_put(npt);
- return err;
-}
-
-static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = {
- [CTA_NAT_MINIP] = { .type = NLA_U32 },
- [CTA_NAT_MAXIP] = { .type = NLA_U32 },
-};
-
-static inline int
-nfnetlink_parse_nat(struct nlattr *nat,
- const struct nf_conn *ct, struct nf_nat_range *range)
-{
- struct nlattr *tb[CTA_NAT_MAX+1];
- int err;
-
- memset(range, 0, sizeof(*range));
-
- err = nla_parse_nested(tb, CTA_NAT_MAX, nat, nat_nla_policy);
- if (err < 0)
- return err;
-
- if (tb[CTA_NAT_MINIP])
- range->min_ip = nla_get_be32(tb[CTA_NAT_MINIP]);
-
- if (!tb[CTA_NAT_MAXIP])
- range->max_ip = range->min_ip;
- else
- range->max_ip = nla_get_be32(tb[CTA_NAT_MAXIP]);
-
- if (range->min_ip)
- range->flags |= IP_NAT_RANGE_MAP_IPS;
-
- if (!tb[CTA_NAT_PROTO])
- return 0;
-
- err = nfnetlink_parse_nat_proto(tb[CTA_NAT_PROTO], ct, range);
- if (err < 0)
- return err;
-
- return 0;
-}
-#endif
-
static inline int
ctnetlink_parse_help(struct nlattr *attr, char **helper_name)
{
@@ -986,6 +921,33 @@ out:
}
static int
+ctnetlink_parse_nat_setup(struct nf_conn *ct,
+ enum nf_nat_manip_type manip,
+ struct nlattr *attr)
+{
+ typeof(nfnetlink_parse_nat_setup_hook) parse_nat_setup;
+
+ parse_nat_setup = rcu_dereference(nfnetlink_parse_nat_setup_hook);
+ if (!parse_nat_setup) {
+#ifdef CONFIG_KMOD
+ spin_unlock_bh(&nf_conntrack_lock);
+ if (request_module("nf-nat-ipv4") < 0) {
+ spin_lock_bh(&nf_conntrack_lock);
+ return -EOPNOTSUPP;
+ }
+ spin_lock_bh(&nf_conntrack_lock);
+ parse_nat_setup =
+ rcu_dereference(nfnetlink_parse_nat_setup_hook);
+ if (parse_nat_setup)
+ return -EAGAIN;
+#endif
+ return -EOPNOTSUPP;
+ }
+
+ return parse_nat_setup(ct, manip, attr);
+}
+
+static int
ctnetlink_change_status(struct nf_conn *ct, struct nlattr *cda[])
{
unsigned long d;
@@ -1008,23 +970,21 @@ ctnetlink_change_status(struct nf_conn *ct, struct nlattr *cda[])
#ifndef CONFIG_NF_NAT_NEEDED
return -EOPNOTSUPP;
#else
- struct nf_nat_range range;
+ int ret;
if (cda[CTA_NAT_DST]) {
- if (nfnetlink_parse_nat(cda[CTA_NAT_DST], ct,
- &range) < 0)
- return -EINVAL;
- if (nf_nat_initialized(ct, IP_NAT_MANIP_DST))
- return -EEXIST;
- nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST);
+ ret = ctnetlink_parse_nat_setup(ct,
+ IP_NAT_MANIP_DST,
+ cda[CTA_NAT_DST]);
+ if (ret < 0)
+ return ret;
}
if (cda[CTA_NAT_SRC]) {
- if (nfnetlink_parse_nat(cda[CTA_NAT_SRC], ct,
- &range) < 0)
- return -EINVAL;
- if (nf_nat_initialized(ct, IP_NAT_MANIP_SRC))
- return -EEXIST;
- nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC);
+ ret = ctnetlink_parse_nat_setup(ct,
+ IP_NAT_MANIP_SRC,
+ cda[CTA_NAT_SRC]);
+ if (ret < 0)
+ return ret;
}
#endif
}
^ permalink raw reply related [flat|nested] 29+ messages in thread
* Re: [PATCH 1/8] [PATCH] get rid of module refcounting in ctnetlink
2008-10-09 8:33 [PATCH 1/8] [PATCH] get rid of module refcounting in ctnetlink Pablo Neira Ayuso
` (6 preceding siblings ...)
2008-10-09 8:36 ` [PATCH 8/8] [PATCH] remove module dependency between ctnetlink and nf_nat Pablo Neira Ayuso
@ 2008-10-09 13:26 ` Patrick McHardy
7 siblings, 0 replies; 29+ messages in thread
From: Patrick McHardy @ 2008-10-09 13:26 UTC (permalink / raw)
To: Pablo Neira Ayuso; +Cc: netfilter-devel
Pablo Neira Ayuso wrote:
> This patch replaces the unnecessary module refcounting with
> the read-side locks. With this patch, all the dump and fill_info
> function are called under the RCU read lock.
>
> Based on a patch from Fabian Hugelshofer.
Applied, thanks.
Some of the new rcu_read_locks wouldn't have been necessary though
since some of the paths can only be reached through nf_hook_slow,
which already does rcu_read_lock() (and we're relying on that in
many spots).
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH 2/8] [PATCH] connection tracking helper name persistent aliases
2008-10-09 8:34 ` [PATCH 2/8] [PATCH] connection tracking helper name persistent aliases Pablo Neira Ayuso
@ 2008-10-09 13:27 ` Patrick McHardy
0 siblings, 0 replies; 29+ messages in thread
From: Patrick McHardy @ 2008-10-09 13:27 UTC (permalink / raw)
To: Pablo Neira Ayuso; +Cc: netfilter-devel
Pablo Neira Ayuso wrote:
> This patch adds the macro MODULE_ALIAS_NFCT_HELPER that defines a
> way to provide generic and persistent aliases for the connection
> tracking helpers.
>
> This next patch requires this patch.
Applied, thanks.
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH 3/8] [PATCH] Helper modules load-on-demand support for ctnetlink
2008-10-09 8:34 ` [PATCH 3/8] [PATCH] Helper modules load-on-demand support for ctnetlink Pablo Neira Ayuso
@ 2008-10-09 13:34 ` Patrick McHardy
2008-10-09 13:43 ` Patrick McHardy
0 siblings, 1 reply; 29+ messages in thread
From: Patrick McHardy @ 2008-10-09 13:34 UTC (permalink / raw)
To: Pablo Neira Ayuso; +Cc: netfilter-devel
Pablo Neira Ayuso wrote:
> This patch adds module loading for helpers via ctnetlink.
>
> * Creation path: We support explicit and implicit helper assignation. For
> the explicit case, we try to load the module. If the module is correctly
> loaded and the helper is present, we return EAGAIN to re-start the
> creation. Otherwise, we return EOPNOTSUPP.
> * Update path: release the spin lock, load the module and check. If it is
> present, then return EAGAIN to re-start the update.
>
> This patch includes a minor change in nfnetlink to support EAGAIN.
> Based on a suggestion from Patrick McHardy.
>
> This patch provides a refactorized function to lookup-and-set the
> connection tracking helper. The function removes the exported symbol
> __nf_ct_helper_find as it has not clients anymore.
This one doesn't apply:
patching file include/net/netfilter/nf_conntrack_helper.h
patching file net/netfilter/nf_conntrack_core.c
Hunk #1 FAILED at 581.
Hunk #2 succeeded at 753 (offset -5 lines).
Hunk #3 succeeded at 764 (offset -5 lines).
1 out of 3 hunks FAILED -- saving rejects to file
net/netfilter/nf_conntrack_core.c.rej
patching file net/netfilter/nf_conntrack_helper.c
patching file net/netfilter/nf_conntrack_netlink.c
Hunk #2 succeeded at 1131 with fuzz 1.
patching file net/netfilter/nfnetlink.c
> +static inline int nf_ct_set_helper(struct nf_conn *ct, gfp_t flags)
> +{
> + int ret;
> +
> + rcu_read_lock();
> + ret = __nf_ct_set_helper(ct, flags);
> + rcu_read_unlock();
Its a bit confusing to use rcu_read_lock for a function called
_set_something, I would suggest to just do the rcu_read_lock
in that function itself.
> diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
> index f38c649..a649f7b 100644
> --- a/net/netfilter/nf_conntrack_netlink.c
> +++ b/net/netfilter/nf_conntrack_netlink.c
> @@ -952,8 +952,22 @@ ctnetlink_change_helper(struct nf_conn *ct, struct nlattr *cda[])
> }
>
> helper = __nf_conntrack_helper_find_byname(helpname);
> - if (helper == NULL)
> + if (helper == NULL) {
> +#ifdef CONFIG_KMOD
> + spin_unlock_bh(&nf_conntrack_lock);
> +
> + if (request_module("nfct-helper-%s", helpname) < 0) {
> + spin_lock_bh(&nf_conntrack_lock);
> + return -EOPNOTSUPP;
EOPNOTSUPP doesn't seem wrong, but we usually use ENOENT for lookup
failures. It also needs to drop the nfnl_lock for request_module().
> diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
> index b75c9c4..323dcf8 100644
> --- a/net/netfilter/nfnetlink.c
> +++ b/net/netfilter/nfnetlink.c
> @@ -165,7 +165,8 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
> } else
> return -EINVAL;
>
> - return nc->call(nfnl, skb, nlh, cda);
> + while ((err = nc->call(nfnl, skb, nlh, cda)) == -EAGAIN);
Ughh no hiding of ";" please :) With dropping nfnl_lock() in ctnetlink,
this needs to do the get_subsys() and find_client() again anyways since
both are protected by that lock.
I have this patch in my nftables tree:
@@ -132,6 +140,7 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb,
struct nlmsghdr *nlh)
return 0;
type = nlh->nlmsg_type;
+replay:
ss = nfnetlink_get_subsys(type);
if (!ss) {
#ifdef CONFIG_KMOD
@@ -165,7 +174,10 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb,
struct nlmsghdr *nlh)
} else
return -EINVAL;
- return nc->call(nfnl, skb, nlh, cda);
+ err = nc->call(nfnl, skb, nlh, cda);
+ if (err == -EAGAIN)
+ goto replay;
+ return err;
}
}
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH 3/8] [PATCH] Helper modules load-on-demand support for ctnetlink
2008-10-09 13:34 ` Patrick McHardy
@ 2008-10-09 13:43 ` Patrick McHardy
2008-10-09 14:50 ` Pablo Neira Ayuso
0 siblings, 1 reply; 29+ messages in thread
From: Patrick McHardy @ 2008-10-09 13:43 UTC (permalink / raw)
To: Pablo Neira Ayuso; +Cc: netfilter-devel
Patrick McHardy wrote:
> This one doesn't apply:
>
> patching file include/net/netfilter/nf_conntrack_helper.h
> patching file net/netfilter/nf_conntrack_core.c
> Hunk #1 FAILED at 581.
> Hunk #2 succeeded at 753 (offset -5 lines).
> Hunk #3 succeeded at 764 (offset -5 lines).
> 1 out of 3 hunks FAILED -- saving rejects to file
> net/netfilter/nf_conntrack_core.c.rej
> patching file net/netfilter/nf_conntrack_helper.c
> patching file net/netfilter/nf_conntrack_netlink.c
> Hunk #2 succeeded at 1131 with fuzz 1.
> patching file net/netfilter/nfnetlink.c
Oops, I think I didn't refresh my tree, so it didn't include any
of the netfilter patches I sent to Dave :)
The remaining issues, especially the nfnl_lock think, should be
fixed though.
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH 3/8] [PATCH] Helper modules load-on-demand support for ctnetlink
2008-10-09 13:43 ` Patrick McHardy
@ 2008-10-09 14:50 ` Pablo Neira Ayuso
2008-10-09 16:11 ` Patrick McHardy
0 siblings, 1 reply; 29+ messages in thread
From: Pablo Neira Ayuso @ 2008-10-09 14:50 UTC (permalink / raw)
To: Patrick McHardy; +Cc: netfilter-devel
Patrick McHardy wrote:
> Patrick McHardy wrote:
>> This one doesn't apply:
>>
>> patching file include/net/netfilter/nf_conntrack_helper.h
>> patching file net/netfilter/nf_conntrack_core.c
>> Hunk #1 FAILED at 581.
>> Hunk #2 succeeded at 753 (offset -5 lines).
>> Hunk #3 succeeded at 764 (offset -5 lines).
>> 1 out of 3 hunks FAILED -- saving rejects to file
>> net/netfilter/nf_conntrack_core.c.rej
>> patching file net/netfilter/nf_conntrack_helper.c
>> patching file net/netfilter/nf_conntrack_netlink.c
>> Hunk #2 succeeded at 1131 with fuzz 1.
>> patching file net/netfilter/nfnetlink.c
>
> Oops, I think I didn't refresh my tree, so it didn't include any
> of the netfilter patches I sent to Dave :)
So, what do I have to do now? I'm cloning Dave's tree now to put the
patches on top on it. Fine with it?
> The remaining issues, especially the nfnl_lock think, should be
> fixed though.
OK. BTW, returning EOPNOTSUPP looks better to me. ENOENT is misleading
since the user won't be able to differenciate between "I don't know
anything about that helper" and "this conntrack entry does not exist".
So, I prefer using EOPNOTSUPP.
--
"Los honestos son inadaptados sociales" -- Les Luthiers
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH 3/8] [PATCH] Helper modules load-on-demand support for ctnetlink
2008-10-09 14:50 ` Pablo Neira Ayuso
@ 2008-10-09 16:11 ` Patrick McHardy
2008-10-09 16:43 ` Pablo Neira Ayuso
0 siblings, 1 reply; 29+ messages in thread
From: Patrick McHardy @ 2008-10-09 16:11 UTC (permalink / raw)
To: Pablo Neira Ayuso; +Cc: netfilter-devel
Pablo Neira Ayuso wrote:
> Patrick McHardy wrote:
>> Patrick McHardy wrote:
>>> This one doesn't apply:
>>>
>>> patching file include/net/netfilter/nf_conntrack_helper.h
>>> patching file net/netfilter/nf_conntrack_core.c
>>> Hunk #1 FAILED at 581.
>>> Hunk #2 succeeded at 753 (offset -5 lines).
>>> Hunk #3 succeeded at 764 (offset -5 lines).
>>> 1 out of 3 hunks FAILED -- saving rejects to file
>>> net/netfilter/nf_conntrack_core.c.rej
>>> patching file net/netfilter/nf_conntrack_helper.c
>>> patching file net/netfilter/nf_conntrack_netlink.c
>>> Hunk #2 succeeded at 1131 with fuzz 1.
>>> patching file net/netfilter/nfnetlink.c
>> Oops, I think I didn't refresh my tree, so it didn't include any
>> of the netfilter patches I sent to Dave :)
>
> So, what do I have to do now? I'm cloning Dave's tree now to put the
> patches on top on it. Fine with it?
I think it was my mistake, let me try again ... yes, it applies
cleanly. Both Dave's tree or my tree should work, but use Dave's
if you want to be safe since I haven't updated mine yet.
>> The remaining issues, especially the nfnl_lock think, should be
>> fixed though.
>
> OK. BTW, returning EOPNOTSUPP looks better to me. ENOENT is misleading
> since the user won't be able to differenciate between "I don't know
> anything about that helper" and "this conntrack entry does not exist".
> So, I prefer using EOPNOTSUPP.
Makes sense.
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH 3/8] [PATCH] Helper modules load-on-demand support for ctnetlink
2008-10-09 16:11 ` Patrick McHardy
@ 2008-10-09 16:43 ` Pablo Neira Ayuso
0 siblings, 0 replies; 29+ messages in thread
From: Pablo Neira Ayuso @ 2008-10-09 16:43 UTC (permalink / raw)
To: Patrick McHardy; +Cc: netfilter-devel
[-- Attachment #1: Type: text/plain, Size: 1207 bytes --]
Patrick McHardy wrote:
> Pablo Neira Ayuso wrote:
>> Patrick McHardy wrote:
>>> Patrick McHardy wrote:
>>>> This one doesn't apply:
>>>>
>>>> patching file include/net/netfilter/nf_conntrack_helper.h
>>>> patching file net/netfilter/nf_conntrack_core.c
>>>> Hunk #1 FAILED at 581.
>>>> Hunk #2 succeeded at 753 (offset -5 lines).
>>>> Hunk #3 succeeded at 764 (offset -5 lines).
>>>> 1 out of 3 hunks FAILED -- saving rejects to file
>>>> net/netfilter/nf_conntrack_core.c.rej
>>>> patching file net/netfilter/nf_conntrack_helper.c
>>>> patching file net/netfilter/nf_conntrack_netlink.c
>>>> Hunk #2 succeeded at 1131 with fuzz 1.
>>>> patching file net/netfilter/nfnetlink.c
>>> Oops, I think I didn't refresh my tree, so it didn't include any
>>> of the netfilter patches I sent to Dave :)
>>
>> So, what do I have to do now? I'm cloning Dave's tree now to put the
>> patches on top on it. Fine with it?
>
> I think it was my mistake, let me try again ... yes, it applies
> cleanly. Both Dave's tree or my tree should work, but use Dave's
> if you want to be safe since I haven't updated mine yet.
OK, attached the new version of the patch.
--
"Los honestos son inadaptados sociales" -- Les Luthiers
[-- Attachment #2: 03.patch --]
[-- Type: text/x-diff, Size: 9747 bytes --]
[PATCH] Helper modules load-on-demand support for ctnetlink
From: Pablo Neira Ayuso <pablo@netfilter.org>
This patch adds module loading for helpers via ctnetlink.
* Creation path: We support explicit and implicit helper assignation. For
the explicit case, we try to load the module. If the module is correctly
loaded and the helper is present, we return EAGAIN to re-start the
creation. Otherwise, we return EOPNOTSUPP.
* Update path: release the spin lock, load the module and check. If it is
present, then return EAGAIN to re-start the update.
This patch includes a minor change in nfnetlink to support EAGAIN.
Based on a suggestion from Patrick McHardy.
This patch provides a refactorized function to lookup-and-set the
connection tracking helper. The function removes the exported symbol
__nf_ct_helper_find as it has not clients anymore.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
include/linux/netfilter/nfnetlink.h | 3 +
include/net/netfilter/nf_conntrack_helper.h | 5 +-
net/netfilter/nf_conntrack_core.c | 28 +---------
net/netfilter/nf_conntrack_helper.c | 33 +++++++++++-
net/netfilter/nf_conntrack_netlink.c | 73 ++++++++++++++++++++++++---
net/netfilter/nfnetlink.c | 12 +++-
6 files changed, 111 insertions(+), 43 deletions(-)
diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index 0d8424f..7d8e045 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -78,6 +78,9 @@ extern int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group,
int echo);
extern int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags);
+extern void nfnl_lock(void);
+extern void nfnl_unlock(void);
+
#define MODULE_ALIAS_NFNL_SUBSYS(subsys) \
MODULE_ALIAS("nfnetlink-subsys-" __stringify(subsys))
diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h
index f8060ab..8eebf6e 100644
--- a/include/net/netfilter/nf_conntrack_helper.h
+++ b/include/net/netfilter/nf_conntrack_helper.h
@@ -39,9 +39,6 @@ struct nf_conntrack_helper
};
extern struct nf_conntrack_helper *
-__nf_ct_helper_find(const struct nf_conntrack_tuple *tuple);
-
-extern struct nf_conntrack_helper *
__nf_conntrack_helper_find_byname(const char *name);
extern int nf_conntrack_helper_register(struct nf_conntrack_helper *);
@@ -49,6 +46,8 @@ extern void nf_conntrack_helper_unregister(struct nf_conntrack_helper *);
extern struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp);
+extern int __nf_ct_set_helper(struct nf_conn *ct, gfp_t flags);
+
static inline struct nf_conn_help *nfct_help(const struct nf_conn *ct)
{
return nf_ct_ext_find(ct, NF_CT_EXT_HELPER);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 27de3c7..54e35f3 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -581,14 +581,7 @@ init_conntrack(struct net *net,
nf_conntrack_get(&ct->master->ct_general);
NF_CT_STAT_INC(net, expect_new);
} else {
- struct nf_conntrack_helper *helper;
-
- helper = __nf_ct_helper_find(&repl_tuple);
- if (helper) {
- help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
- if (help)
- rcu_assign_pointer(help->helper, helper);
- }
+ __nf_ct_set_helper(ct, GFP_ATOMIC);
NF_CT_STAT_INC(net, new);
}
@@ -765,7 +758,6 @@ void nf_conntrack_alter_reply(struct nf_conn *ct,
const struct nf_conntrack_tuple *newreply)
{
struct nf_conn_help *help = nfct_help(ct);
- struct nf_conntrack_helper *helper;
/* Should be unconfirmed, so not in hash table yet */
NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
@@ -778,23 +770,7 @@ void nf_conntrack_alter_reply(struct nf_conn *ct,
return;
rcu_read_lock();
- helper = __nf_ct_helper_find(newreply);
- if (helper == NULL) {
- if (help)
- rcu_assign_pointer(help->helper, NULL);
- goto out;
- }
-
- if (help == NULL) {
- help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
- if (help == NULL)
- goto out;
- } else {
- memset(&help->help, 0, sizeof(help->help));
- }
-
- rcu_assign_pointer(help->helper, helper);
-out:
+ __nf_ct_set_helper(ct, GFP_ATOMIC);
rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply);
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 9c06b9f..86cdbde 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -44,7 +44,7 @@ static unsigned int helper_hash(const struct nf_conntrack_tuple *tuple)
(__force __u16)tuple->src.u.all) % nf_ct_helper_hsize;
}
-struct nf_conntrack_helper *
+static struct nf_conntrack_helper *
__nf_ct_helper_find(const struct nf_conntrack_tuple *tuple)
{
struct nf_conntrack_helper *helper;
@@ -62,7 +62,6 @@ __nf_ct_helper_find(const struct nf_conntrack_tuple *tuple)
}
return NULL;
}
-EXPORT_SYMBOL_GPL(__nf_ct_helper_find);
struct nf_conntrack_helper *
__nf_conntrack_helper_find_byname(const char *name)
@@ -94,6 +93,36 @@ struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp)
}
EXPORT_SYMBOL_GPL(nf_ct_helper_ext_add);
+int __nf_ct_set_helper(struct nf_conn *ct, gfp_t flags)
+{
+ int ret = 0;
+ struct nf_conntrack_helper *helper;
+ struct nf_conn_help *help = nfct_help(ct);
+
+ helper = __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+ if (helper == NULL) {
+ if (help)
+ rcu_assign_pointer(help->helper, NULL);
+ ret = -ENOENT;
+ goto out;
+ }
+
+ if (help == NULL) {
+ help = nf_ct_helper_ext_add(ct, flags);
+ if (help == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ } else {
+ memset(&help->help, 0, sizeof(help->help));
+ }
+
+ rcu_assign_pointer(help->helper, helper);
+out:
+ return ret;
+}
+EXPORT_SYMBOL_GPL(__nf_ct_set_helper);
+
static inline int unhelp(struct nf_conntrack_tuple_hash *i,
const struct nf_conntrack_helper *me)
{
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index f38c649..98939e6 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -952,8 +952,23 @@ ctnetlink_change_helper(struct nf_conn *ct, struct nlattr *cda[])
}
helper = __nf_conntrack_helper_find_byname(helpname);
- if (helper == NULL)
+ if (helper == NULL) {
+#ifdef CONFIG_KMOD
+ spin_unlock_bh(&nf_conntrack_lock);
+ nfnl_unlock();
+ if (request_module("nfct-helper-%s", helpname) < 0) {
+ nfnl_lock();
+ spin_lock_bh(&nf_conntrack_lock);
+ return -EOPNOTSUPP;
+ }
+ nfnl_lock();
+ spin_lock_bh(&nf_conntrack_lock);
+ helper = __nf_conntrack_helper_find_byname(helpname);
+ if (helper)
+ return -EAGAIN;
+#endif
return -EOPNOTSUPP;
+ }
if (help) {
if (help->helper == helper)
@@ -1117,7 +1132,6 @@ ctnetlink_create_conntrack(struct nlattr *cda[],
{
struct nf_conn *ct;
int err = -EINVAL;
- struct nf_conn_help *help;
struct nf_conntrack_helper *helper;
ct = nf_conntrack_alloc(&init_net, otuple, rtuple, GFP_KERNEL);
@@ -1132,16 +1146,57 @@ ctnetlink_create_conntrack(struct nlattr *cda[],
ct->status |= IPS_CONFIRMED;
rcu_read_lock();
- helper = __nf_ct_helper_find(rtuple);
- if (helper) {
- help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
- if (help == NULL) {
+ if (cda[CTA_HELP]) {
+ char *helpname;
+
+ err = ctnetlink_parse_help(cda[CTA_HELP], &helpname);
+ if (err < 0) {
+ rcu_read_unlock();
+ goto err;
+ }
+
+ helper = __nf_conntrack_helper_find_byname(helpname);
+ if (helper == NULL) {
+ rcu_read_unlock();
+#ifdef CONFIG_KMOD
+ nfnl_unlock();
+ if (request_module("nfct-helper-%s", helpname) < 0) {
+ nfnl_lock();
+ err = -EOPNOTSUPP;
+ goto err;
+ }
+ nfnl_lock();
+ rcu_read_lock();
+ helper = __nf_conntrack_helper_find_byname(helpname);
+ if (helper) {
+ rcu_read_unlock();
+ err = -EAGAIN;
+ goto err;
+ }
+ rcu_read_unlock();
+#endif
+ err = -EOPNOTSUPP;
+ goto err;
+ } else {
+ struct nf_conn_help *help;
+
+ help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
+ if (help == NULL) {
+ rcu_read_unlock();
+ err = -ENOMEM;
+ goto err;
+ }
+
+ /* not in hash table yet so not strictly necessary */
+ rcu_assign_pointer(help->helper, helper);
+ }
+ } else {
+ /* try an implicit helper assignation */
+ err = __nf_ct_set_helper(ct, GFP_ATOMIC);
+ if (err == -ENOMEM) {
rcu_read_unlock();
- err = -ENOMEM;
goto err;
}
- /* not in hash table yet so not strictly necessary */
- rcu_assign_pointer(help->helper, helper);
}
if (cda[CTA_STATUS]) {
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index b75c9c4..4739f9f 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -44,15 +44,17 @@ static struct sock *nfnl = NULL;
static const struct nfnetlink_subsystem *subsys_table[NFNL_SUBSYS_COUNT];
static DEFINE_MUTEX(nfnl_mutex);
-static inline void nfnl_lock(void)
+void nfnl_lock(void)
{
mutex_lock(&nfnl_mutex);
}
+EXPORT_SYMBOL_GPL(nfnl_lock);
-static inline void nfnl_unlock(void)
+void nfnl_unlock(void)
{
mutex_unlock(&nfnl_mutex);
}
+EXPORT_SYMBOL_GPL(nfnl_unlock);
int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n)
{
@@ -132,6 +134,7 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
return 0;
type = nlh->nlmsg_type;
+replay:
ss = nfnetlink_get_subsys(type);
if (!ss) {
#ifdef CONFIG_KMOD
@@ -165,7 +168,10 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
} else
return -EINVAL;
- return nc->call(nfnl, skb, nlh, cda);
+ err = nc->call(nfnl, skb, nlh, cda);
+ if (err == -EAGAIN)
+ goto replay;
+ return err;
}
}
^ permalink raw reply related [flat|nested] 29+ messages in thread
* Re: [PATCH 7/8] [PATCH] dynamic calculation of event message size for ctnetlink
2008-10-09 8:35 ` [PATCH 7/8] [PATCH] dynamic calculation of event message size for ctnetlink Pablo Neira Ayuso
@ 2008-10-09 16:45 ` Pablo Neira Ayuso
0 siblings, 0 replies; 29+ messages in thread
From: Pablo Neira Ayuso @ 2008-10-09 16:45 UTC (permalink / raw)
To: netfilter-devel; +Cc: kaber
[-- Attachment #1: Type: text/plain, Size: 348 bytes --]
Pablo Neira Ayuso wrote:
> This patch adds dynamic message size calculation for ctnetlink. This
> reduces CPU consumption since the overhead in the message trimming
> is removed.
Please, take this patch instead. I just noticed one uninitilized
variable in calculate_helper_room_size().
--
"Los honestos son inadaptados sociales" -- Les Luthiers
[-- Attachment #2: 07.patch --]
[-- Type: text/x-diff, Size: 16959 bytes --]
[PATCH] dynamic calculation of event message size for ctnetlink
From: Pablo Neira Ayuso <pablo@netfilter.org>
This patch adds dynamic message size calculation for ctnetlink. This
reduces CPU consumption since the overhead in the message trimming
is removed.
Based on a suggestion from Patrick McHardy.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
include/net/netfilter/nf_conntrack_l3proto.h | 2
include/net/netfilter/nf_conntrack_l4proto.h | 3 +
net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 6 +
net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 8 ++
net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 6 +
net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 8 ++
net/netfilter/nf_conntrack_core.c | 6 +
net/netfilter/nf_conntrack_netlink.c | 103 ++++++++++++++++++++++++
net/netfilter/nf_conntrack_proto_dccp.c | 10 ++
net/netfilter/nf_conntrack_proto_gre.c | 1
net/netfilter/nf_conntrack_proto_sctp.c | 12 +++
net/netfilter/nf_conntrack_proto_tcp.c | 14 +++
net/netfilter/nf_conntrack_proto_udp.c | 2
net/netfilter/nf_conntrack_proto_udplite.c | 2
14 files changed, 182 insertions(+), 1 deletions(-)
diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h
index 0378676..e0007f9 100644
--- a/include/net/netfilter/nf_conntrack_l3proto.h
+++ b/include/net/netfilter/nf_conntrack_l3proto.h
@@ -55,6 +55,8 @@ struct nf_conntrack_l3proto
int (*nlattr_to_tuple)(struct nlattr *tb[],
struct nf_conntrack_tuple *t);
+
+ size_t (*nlattr_size)(void);
const struct nla_policy *nla_policy;
#ifdef CONFIG_SYSCTL
diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 7f2f43c..3c80479 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -72,6 +72,8 @@ struct nf_conntrack_l4proto
const struct nf_conntrack_tuple *t);
int (*nlattr_to_tuple)(struct nlattr *tb[],
struct nf_conntrack_tuple *t);
+ size_t (*nlattr_size)(void);
+ size_t (*nlattr_protoinfo_size)(void);
const struct nla_policy *nla_policy;
#ifdef CONFIG_SYSCTL
@@ -115,6 +117,7 @@ extern int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple);
extern int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[],
struct nf_conntrack_tuple *t);
+extern size_t nf_ct_port_nlattr_size(void);
extern const struct nla_policy nf_ct_port_nla_policy[];
#ifdef CONFIG_SYSCTL
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 4a7c352..7b354e5 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -330,6 +330,11 @@ static int ipv4_nlattr_to_tuple(struct nlattr *tb[],
return 0;
}
+
+static size_t ipv4_nlattr_size(void)
+{
+ return nla_total_size(sizeof(u_int32_t))*2;
+}
#endif
static struct nf_sockopt_ops so_getorigdst = {
@@ -350,6 +355,7 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
.tuple_to_nlattr = ipv4_tuple_to_nlattr,
.nlattr_to_tuple = ipv4_nlattr_to_tuple,
+ .nlattr_size = ipv4_nlattr_size,
.nla_policy = ipv4_nla_policy,
#endif
#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 4e88792..9e6bb21 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -262,6 +262,13 @@ static int icmp_nlattr_to_tuple(struct nlattr *tb[],
return 0;
}
+
+static size_t icmp_nlattr_size(void)
+{
+ return nla_total_size(sizeof(u_int8_t)) +
+ nla_total_size(sizeof(u_int8_t)) +
+ nla_total_size(sizeof(u_int16_t));
+}
#endif
#ifdef CONFIG_SYSCTL
@@ -310,6 +317,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly =
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
.tuple_to_nlattr = icmp_tuple_to_nlattr,
.nlattr_to_tuple = icmp_nlattr_to_tuple,
+ .nlattr_size = icmp_nlattr_size,
.nla_policy = icmp_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index e91db16..3bc103d 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -342,6 +342,11 @@ static int ipv6_nlattr_to_tuple(struct nlattr *tb[],
return 0;
}
+
+static size_t ipv6_nlattr_size(void)
+{
+ return nla_total_size(sizeof(u_int32_t)*4)*2;
+}
#endif
struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = {
@@ -354,6 +359,7 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = {
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
.tuple_to_nlattr = ipv6_tuple_to_nlattr,
.nlattr_to_tuple = ipv6_nlattr_to_tuple,
+ .nlattr_size = ipv6_nlattr_size,
.nla_policy = ipv6_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 0572617..1c32c71 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -243,6 +243,13 @@ static int icmpv6_nlattr_to_tuple(struct nlattr *tb[],
return 0;
}
+
+static size_t icmpv6_nlattr_size(void)
+{
+ return nla_total_size(sizeof(u_int8_t)) +
+ nla_total_size(sizeof(u_int8_t)) +
+ nla_total_size(sizeof(u_int16_t));
+}
#endif
#ifdef CONFIG_SYSCTL
@@ -275,6 +282,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly =
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
.tuple_to_nlattr = icmpv6_tuple_to_nlattr,
.nlattr_to_tuple = icmpv6_nlattr_to_tuple,
+ .nlattr_size = icmpv6_nlattr_size,
.nla_policy = icmpv6_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 8c482c4..d44b225 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -897,6 +897,12 @@ int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[],
return 0;
}
EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_to_tuple);
+
+size_t nf_ct_port_nlattr_size(void)
+{
+ return nla_total_size(sizeof(u_int16_t))*2;
+}
+EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_size);
#endif
/* Used by ipt_REJECT and ip6t_REJECT. */
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 4316e1f..2d4dbc7 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -404,6 +404,107 @@ nla_put_failure:
}
#ifdef CONFIG_NF_CONNTRACK_EVENTS
+static inline size_t calculate_tuple_room_size(const struct nf_conn *ct)
+{
+ struct nf_conntrack_l3proto *l3proto;
+ struct nf_conntrack_l4proto *l4proto;
+ size_t size;
+
+ rcu_read_lock();
+ /* nested attributes CTA_TUPLE_[ORIG|REPLY] plus CTA_TUPLE_IP */
+ size = nla_total_size(0) * 2;
+ l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
+ if (likely(l3proto->nlattr_size))
+ size += l3proto->nlattr_size();
+
+ /* nested attributes CTA_TUPLE_PROTO plus CTA_PROTONUM */
+ size += nla_total_size(0) + nla_total_size(sizeof(u_int8_t));
+ l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
+ if (likely(l4proto->nlattr_size))
+ size += l4proto->nlattr_size();
+
+ rcu_read_unlock();
+ return size;
+}
+
+static inline size_t calculate_protoinfo_room_size(const struct nf_conn *ct)
+{
+ size_t size = 0;
+ struct nf_conntrack_l4proto *l4proto;
+
+ rcu_read_lock();
+ l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
+ if (l4proto->nlattr_protoinfo_size)
+ size = l4proto->nlattr_protoinfo_size();
+ rcu_read_unlock();
+
+ return size;
+}
+
+static inline size_t calculate_helper_room_size(const struct nf_conn *ct)
+{
+ const struct nf_conn_help *help = nfct_help(ct);
+ struct nf_conntrack_helper *helper;
+ size_t size;
+
+ if (!help)
+ goto out;
+
+ rcu_read_lock();
+ helper = rcu_dereference(help->helper);
+ if (!helper)
+ goto out_unlock;
+
+ size = nla_total_size(0) + /* CTA_HELP */
+ nla_total_size(strlen(helper->name));
+out_unlock:
+ rcu_read_unlock();
+out:
+ return size;
+}
+
+static inline size_t
+ctnetlink_calculate_room_size(const struct nf_conn *ct, unsigned long events)
+{
+ size_t size = NLMSG_SPACE(sizeof(struct nfgenmsg));
+
+ size += calculate_tuple_room_size(ct) * 2 + /* original and reply */
+ nla_total_size(sizeof(u_int32_t)) + /* status */
+ nla_total_size(sizeof(u_int32_t)); /* id */
+
+#ifdef CONFIG_NF_CONNTRACK_MARK
+ if (events & IPCT_MARK || ct->mark)
+ size += nla_total_size(sizeof(u_int32_t));
+#endif
+
+ if (events & IPCT_DESTROY) {
+ const struct nf_conn_counter *acct;
+
+ acct = nf_conn_acct_find(ct);
+ if (acct) {
+ size += nla_total_size(0) * 2 +
+ nla_total_size(sizeof(u_int64_t)) * 2 * 2;
+ }
+ return size;
+ }
+
+ size += nla_total_size(sizeof(u_int32_t)); /* CTA_TIMEOUT */
+ if (events & IPCT_PROTOINFO) {
+ size += calculate_protoinfo_room_size(ct);
+ }if (events & IPCT_HELPER || nfct_help(ct))
+ size += calculate_helper_room_size(ct);
+ if (events & IPCT_RELATED)
+ size += calculate_tuple_room_size(ct->master);
+ if (events & IPCT_NATSEQADJ)
+ size += nla_total_size(0) * 2 +
+ nla_total_size(sizeof(u_int32_t)) * 3 * 2;
+#ifdef CONFIG_NF_CONNTRACK_SECMARK
+ if (events & IPCT_SECMARK || ct->secmark)
+ size += nla_total_size(sizeof(u_int32_t));
+#endif
+ return size;
+}
+
static int ctnetlink_conntrack_event(struct notifier_block *this,
unsigned long events, void *ptr)
{
@@ -437,7 +538,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
if (!nfnetlink_has_listeners(group))
return NOTIFY_DONE;
- skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
+ skb = alloc_skb(ctnetlink_calculate_room_size(ct, events), GFP_ATOMIC);
if (!skb)
return NOTIFY_DONE;
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 8fcf176..6694173 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -657,6 +657,12 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct)
write_unlock_bh(&dccp_lock);
return 0;
}
+
+static size_t dccp_nlattr_protoinfo_size(void)
+{
+ return nla_total_size(0)*2 + /* CTA_PROTOINFO */
+ nla_total_size(sizeof(u_int8_t));
+}
#endif
#ifdef CONFIG_SYSCTL
@@ -749,6 +755,8 @@ static struct nf_conntrack_l4proto dccp_proto4 __read_mostly = {
.from_nlattr = nlattr_to_dccp,
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nlattr_size = nf_ct_port_nlattr_size,
+ .nlattr_protoinfo_size = dccp_nlattr_protoinfo_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
@@ -774,6 +782,8 @@ static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = {
.from_nlattr = nlattr_to_dccp,
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nlattr_size = nf_ct_port_nlattr_size,
+ .nlattr_protoinfo_size = dccp_nlattr_protoinfo_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index a2cdbcb..6e381c9 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -294,6 +294,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = {
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nlattr_size = nf_ct_port_nlattr_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
};
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index ae8c260..a867849 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -537,6 +537,14 @@ static int nlattr_to_sctp(struct nlattr *cda[], struct nf_conn *ct)
return 0;
}
+
+static size_t sctp_nlattr_protoinfo_size(void)
+{
+ return nla_total_size(0)*2 + /* CTA_PROTOINFO */
+ nla_total_size(sizeof(u_int8_t)) +
+ nla_total_size(sizeof(u_int32_t)) +
+ nla_total_size(sizeof(u_int32_t));
+}
#endif
#ifdef CONFIG_SYSCTL
@@ -671,6 +679,8 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
.from_nlattr = nlattr_to_sctp,
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nlattr_size = nf_ct_port_nlattr_size,
+ .nlattr_protoinfo_size = sctp_nlattr_protoinfo_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
@@ -699,6 +709,8 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = {
.from_nlattr = nlattr_to_sctp,
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nlattr_size = nf_ct_port_nlattr_size,
+ .nlattr_protoinfo_size = sctp_nlattr_protoinfo_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index f947ec4..ddc87a3 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1181,6 +1181,16 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
return 0;
}
+
+static size_t tcp_nlattr_protoinfo_size(void)
+{
+ return nla_total_size(0)*2 + /* CTA_PROTOINFO */
+ nla_total_size(sizeof(u_int8_t)) +
+ nla_total_size(sizeof(u_int8_t)) +
+ nla_total_size(sizeof(u_int8_t)) +
+ nla_total_size(sizeof(struct nf_ct_tcp_flags)) +
+ nla_total_size(sizeof(struct nf_ct_tcp_flags));
+}
#endif
#ifdef CONFIG_SYSCTL
@@ -1399,6 +1409,8 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
.from_nlattr = nlattr_to_tcp,
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nlattr_size = nf_ct_port_nlattr_size,
+ .nlattr_protoinfo_size = tcp_nlattr_protoinfo_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
@@ -1429,6 +1441,8 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =
.from_nlattr = nlattr_to_tcp,
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nlattr_size = nf_ct_port_nlattr_size,
+ .nlattr_protoinfo_size = tcp_nlattr_protoinfo_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 7c2ca48..549da2c 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -193,6 +193,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly =
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nlattr_size = nf_ct_port_nlattr_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
@@ -220,6 +221,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly =
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nlattr_size = nf_ct_port_nlattr_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index d22d839..70a21ff 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -181,6 +181,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly =
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nlattr_size = nf_ct_port_nlattr_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
@@ -204,6 +205,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly =
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nlattr_size = nf_ct_port_nlattr_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
^ permalink raw reply related [flat|nested] 29+ messages in thread
* Re: [PATCH 4/8] [PATCH] use EOPNOTSUPP instead of EINVAL if the conntrack has no helper
2008-10-09 8:34 ` [PATCH 4/8] [PATCH] use EOPNOTSUPP instead of EINVAL if the conntrack has no helper Pablo Neira Ayuso
@ 2008-10-10 13:03 ` Patrick McHardy
0 siblings, 0 replies; 29+ messages in thread
From: Patrick McHardy @ 2008-10-10 13:03 UTC (permalink / raw)
To: Pablo Neira Ayuso; +Cc: netfilter-devel
Pablo Neira Ayuso wrote:
> This patch changes the return value if the conntrack has no helper assigned.
> Instead of EINVAL, which is reserved for malformed messages, it returns
> EOPNOTSUPP.
Applied, thanks.
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH 5/8] [PATCH] deliver events for conntracks created via ctnetlink
2008-10-09 8:35 ` [PATCH 5/8] [PATCH] deliver events for conntracks created via ctnetlink Pablo Neira Ayuso
@ 2008-10-10 13:31 ` Patrick McHardy
2008-10-11 12:05 ` Pablo Neira Ayuso
0 siblings, 1 reply; 29+ messages in thread
From: Patrick McHardy @ 2008-10-10 13:31 UTC (permalink / raw)
To: Pablo Neira Ayuso; +Cc: netfilter-devel
Pablo Neira Ayuso wrote:
> As for now, the creation and update of conntracks via ctnetlink do not
> propagate an event to userspace. This can result in inconsistent situations
> if several userspace processes modify the connection tracking table by means
> of ctnetlink at the same time. Specifically, using the conntrack command
> line tool and conntrackd at the same time can trigger unconsistencies.
>
> This patch also modifies the event cache infrastructure to pass the
> process PID and the ECHO flag to nfnetlink_send() to report back
> to userspace if the process that triggered the change needs so.
> Based on a suggestion from Patrick McHardy.
Looks pretty good, some minor issues:
- there are quite a lot of trailing whitespaces in this
patch, please remove those.
> +/* This structure is passed to event handler */
> +struct nf_ct_event {
> + struct nf_conn *ct;
> + u32 pid;
> + int report;
> +};
Just a suggestion: passing the nlmsghdr instead of the ECHO
flag and doing the approriate handling in the event functions
seems more logical to me. I think I know why you did it this
way (no reporting on unload, no netlink context there), see
below about that.
> diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
> index f465090..aab2618 100644
> --- a/net/netfilter/nf_conntrack_core.c
> +++ b/net/netfilter/nf_conntrack_core.c
> @@ -174,7 +174,8 @@ destroy_conntrack(struct nf_conntrack *nfct)
> NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
> NF_CT_ASSERT(!timer_pending(&ct->timeout));
>
> - nf_conntrack_event(IPCT_DESTROY, ct);
> + if (!test_bit(IPS_DYING_BIT, &ct->status))
> + nf_conntrack_event(IPCT_DESTROY, ct);
Whats the idea behind this change? Is it simply an optimization?
> set_bit(IPS_DYING_BIT, &ct->status);
>
> /* To make sure we don't get any weird locking issues here:
> @@ -963,8 +964,24 @@ void nf_ct_iterate_cleanup(struct net *net,
> }
> EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup);
>
> +struct __nf_ct_flush_report {
> + u32 pid;
> + int report;
> +};
> +
> static int kill_all(struct nf_conn *i, void *data)
> {
> + struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data;
> +
> + if (!fr->report)
> + return 1;
Whats the reasoning behind not reporting destroy events on unload?
I don't think there's anything special about module unload, so it
seems inconsistent.
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH 5/8] [PATCH] deliver events for conntracks created via ctnetlink
2008-10-10 13:31 ` Patrick McHardy
@ 2008-10-11 12:05 ` Pablo Neira Ayuso
2008-10-11 14:53 ` Patrick McHardy
0 siblings, 1 reply; 29+ messages in thread
From: Pablo Neira Ayuso @ 2008-10-11 12:05 UTC (permalink / raw)
To: Patrick McHardy; +Cc: netfilter-devel
Patrick McHardy wrote:
> Looks pretty good, some minor issues:
>
> - there are quite a lot of trailing whitespaces in this
> patch, please remove those.
I have added a tweak for vim to remove them automatically when I write
the file, so this should not happen anymore. BTW, does git complain on
this by default when I apply one patch or I have to tweak something?
>> +/* This structure is passed to event handler */
>> +struct nf_ct_event {
>> + struct nf_conn *ct;
>> + u32 pid;
>> + int report;
>> +};
>
> Just a suggestion: passing the nlmsghdr instead of the ECHO
> flag and doing the approriate handling in the event functions
> seems more logical to me. I think I know why you did it this
> way (no reporting on unload, no netlink context there), see
> below about that.
Indeed.
>> diff --git a/net/netfilter/nf_conntrack_core.c
>> b/net/netfilter/nf_conntrack_core.c
>> index f465090..aab2618 100644
>> --- a/net/netfilter/nf_conntrack_core.c
>> +++ b/net/netfilter/nf_conntrack_core.c
>> @@ -174,7 +174,8 @@ destroy_conntrack(struct nf_conntrack *nfct)
>> NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
>> NF_CT_ASSERT(!timer_pending(&ct->timeout));
>>
>> - nf_conntrack_event(IPCT_DESTROY, ct);
>> + if (!test_bit(IPS_DYING_BIT, &ct->status))
>> + nf_conntrack_event(IPCT_DESTROY, ct);
>
> Whats the idea behind this change? Is it simply an optimization?
If we remove a conntrack entry via ctnetlink, we get the event report
twice, one from ctnetlink and another one from death_by_timeout, so we
set the dying bit in ctnetlink to avoid this double reporting in
death_by_timeout. This idea is actually yours :)
>> set_bit(IPS_DYING_BIT, &ct->status);
>>
>> /* To make sure we don't get any weird locking issues here:
>> @@ -963,8 +964,24 @@ void nf_ct_iterate_cleanup(struct net *net,
>> }
>> EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup);
>>
>> +struct __nf_ct_flush_report {
>> + u32 pid;
>> + int report;
>> +};
>> +
>> static int kill_all(struct nf_conn *i, void *data)
>> {
>> + struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report
>> *)data;
>> +
>> + if (!fr->report)
>> + return 1;
>
> Whats the reasoning behind not reporting destroy events on unload?
> I don't think there's anything special about module unload, so it
> seems inconsistent.
OK, I'll fix this. I'm going to prepare another patch round to cover
this issues.
--
"Los honestos son inadaptados sociales" -- Les Luthiers
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH 5/8] [PATCH] deliver events for conntracks created via ctnetlink
2008-10-11 12:05 ` Pablo Neira Ayuso
@ 2008-10-11 14:53 ` Patrick McHardy
0 siblings, 0 replies; 29+ messages in thread
From: Patrick McHardy @ 2008-10-11 14:53 UTC (permalink / raw)
To: Pablo Neira Ayuso; +Cc: netfilter-devel
Pablo Neira Ayuso wrote:
> Patrick McHardy wrote:
>> Looks pretty good, some minor issues:
>>
>> - there are quite a lot of trailing whitespaces in this
>> patch, please remove those.
>
> I have added a tweak for vim to remove them automatically when I write
> the file, so this should not happen anymore. BTW, does git complain on
> this by default when I apply one patch or I have to tweak something?
I think it only does when importing a patch, not when doing commits.
>>> +/* This structure is passed to event handler */
>>> +struct nf_ct_event {
>>> + struct nf_conn *ct;
>>> + u32 pid;
>>> + int report;
>>> +};
>> Just a suggestion: passing the nlmsghdr instead of the ECHO
>> flag and doing the approriate handling in the event functions
>> seems more logical to me. I think I know why you did it this
>> way (no reporting on unload, no netlink context there), see
>> below about that.
>
> Indeed.
>
>>> diff --git a/net/netfilter/nf_conntrack_core.c
>>> b/net/netfilter/nf_conntrack_core.c
>>> index f465090..aab2618 100644
>>> --- a/net/netfilter/nf_conntrack_core.c
>>> +++ b/net/netfilter/nf_conntrack_core.c
>>> @@ -174,7 +174,8 @@ destroy_conntrack(struct nf_conntrack *nfct)
>>> NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
>>> NF_CT_ASSERT(!timer_pending(&ct->timeout));
>>>
>>> - nf_conntrack_event(IPCT_DESTROY, ct);
>>> + if (!test_bit(IPS_DYING_BIT, &ct->status))
>>> + nf_conntrack_event(IPCT_DESTROY, ct);
>> Whats the idea behind this change? Is it simply an optimization?
>
> If we remove a conntrack entry via ctnetlink, we get the event report
> twice, one from ctnetlink and another one from death_by_timeout, so we
> set the dying bit in ctnetlink to avoid this double reporting in
> death_by_timeout. This idea is actually yours :)
I see, thanks for the explanation :)
>>> + struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report
>>> *)data;
>>> +
>>> + if (!fr->report)
>>> + return 1;
>> Whats the reasoning behind not reporting destroy events on unload?
>> I don't think there's anything special about module unload, so it
>> seems inconsistent.
>
> OK, I'll fix this. I'm going to prepare another patch round to cover
> this issues.
Unfortunately the networking merge window is closed already.
The NAT decoupling qualifies as a bugfix in my opinion and
we can get that one in, the others unfortunately will have
to wait.
^ permalink raw reply [flat|nested] 29+ messages in thread
* [PATCH 7/8] [PATCH] dynamic calculation of event message size for ctnetlink
2008-11-17 8:39 [PATCH 1/8] [PATCH] use nf_conntrack_get instead of atomic_inc Pablo Neira Ayuso
@ 2008-11-17 8:41 ` Pablo Neira Ayuso
2008-11-17 8:44 ` David Miller
2008-11-17 15:32 ` Patrick McHardy
0 siblings, 2 replies; 29+ messages in thread
From: Pablo Neira Ayuso @ 2008-11-17 8:41 UTC (permalink / raw)
To: netfilter-devel; +Cc: kaber
This patch adds dynamic message size calculation for ctnetlink. This
reduces CPU consumption since the overhead in the message trimming
is removed.
Based on a suggestion from Patrick McHardy.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
include/net/netfilter/nf_conntrack_l3proto.h | 2
include/net/netfilter/nf_conntrack_l4proto.h | 3 +
net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 6 +
net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 8 ++
net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 6 +
net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 8 ++
net/netfilter/nf_conntrack_core.c | 6 +
net/netfilter/nf_conntrack_netlink.c | 103 ++++++++++++++++++++++++
net/netfilter/nf_conntrack_proto_dccp.c | 10 ++
net/netfilter/nf_conntrack_proto_gre.c | 1
net/netfilter/nf_conntrack_proto_sctp.c | 12 +++
net/netfilter/nf_conntrack_proto_tcp.c | 14 +++
net/netfilter/nf_conntrack_proto_udp.c | 2
net/netfilter/nf_conntrack_proto_udplite.c | 2
14 files changed, 182 insertions(+), 1 deletions(-)
diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h
index 0378676..e0007f9 100644
--- a/include/net/netfilter/nf_conntrack_l3proto.h
+++ b/include/net/netfilter/nf_conntrack_l3proto.h
@@ -55,6 +55,8 @@ struct nf_conntrack_l3proto
int (*nlattr_to_tuple)(struct nlattr *tb[],
struct nf_conntrack_tuple *t);
+
+ size_t (*nlattr_size)(void);
const struct nla_policy *nla_policy;
#ifdef CONFIG_SYSCTL
diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 7f2f43c..3c80479 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -72,6 +72,8 @@ struct nf_conntrack_l4proto
const struct nf_conntrack_tuple *t);
int (*nlattr_to_tuple)(struct nlattr *tb[],
struct nf_conntrack_tuple *t);
+ size_t (*nlattr_size)(void);
+ size_t (*nlattr_protoinfo_size)(void);
const struct nla_policy *nla_policy;
#ifdef CONFIG_SYSCTL
@@ -115,6 +117,7 @@ extern int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple);
extern int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[],
struct nf_conntrack_tuple *t);
+extern size_t nf_ct_port_nlattr_size(void);
extern const struct nla_policy nf_ct_port_nla_policy[];
#ifdef CONFIG_SYSCTL
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index b2141e1..a739d5a 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -329,6 +329,11 @@ static int ipv4_nlattr_to_tuple(struct nlattr *tb[],
return 0;
}
+
+static size_t ipv4_nlattr_size(void)
+{
+ return nla_total_size(sizeof(u_int32_t))*2;
+}
#endif
static struct nf_sockopt_ops so_getorigdst = {
@@ -349,6 +354,7 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
.tuple_to_nlattr = ipv4_tuple_to_nlattr,
.nlattr_to_tuple = ipv4_nlattr_to_tuple,
+ .nlattr_size = ipv4_nlattr_size,
.nla_policy = ipv4_nla_policy,
#endif
#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 1fd3ef7..f2648aa 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -262,6 +262,13 @@ static int icmp_nlattr_to_tuple(struct nlattr *tb[],
return 0;
}
+
+static size_t icmp_nlattr_size(void)
+{
+ return nla_total_size(sizeof(u_int8_t)) +
+ nla_total_size(sizeof(u_int8_t)) +
+ nla_total_size(sizeof(u_int16_t));
+}
#endif
#ifdef CONFIG_SYSCTL
@@ -310,6 +317,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly =
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
.tuple_to_nlattr = icmp_tuple_to_nlattr,
.nlattr_to_tuple = icmp_nlattr_to_tuple,
+ .nlattr_size = icmp_nlattr_size,
.nla_policy = icmp_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 727b953..4d3573e 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -341,6 +341,11 @@ static int ipv6_nlattr_to_tuple(struct nlattr *tb[],
return 0;
}
+
+static size_t ipv6_nlattr_size(void)
+{
+ return nla_total_size(sizeof(u_int32_t)*4)*2;
+}
#endif
struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = {
@@ -353,6 +358,7 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = {
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
.tuple_to_nlattr = ipv6_tuple_to_nlattr,
.nlattr_to_tuple = ipv6_nlattr_to_tuple,
+ .nlattr_size = ipv6_nlattr_size,
.nla_policy = ipv6_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index bd52151..acb852a 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -243,6 +243,13 @@ static int icmpv6_nlattr_to_tuple(struct nlattr *tb[],
return 0;
}
+
+static size_t icmpv6_nlattr_size(void)
+{
+ return nla_total_size(sizeof(u_int8_t)) +
+ nla_total_size(sizeof(u_int8_t)) +
+ nla_total_size(sizeof(u_int16_t));
+}
#endif
#ifdef CONFIG_SYSCTL
@@ -275,6 +282,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly =
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
.tuple_to_nlattr = icmpv6_tuple_to_nlattr,
.nlattr_to_tuple = icmpv6_nlattr_to_tuple,
+ .nlattr_size = icmpv6_nlattr_size,
.nla_policy = icmpv6_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 63afdf2..9a1c206 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -904,6 +904,12 @@ int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[],
return 0;
}
EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_to_tuple);
+
+size_t nf_ct_port_nlattr_size(void)
+{
+ return nla_total_size(sizeof(u_int16_t))*2;
+}
+EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_size);
#endif
/* Used by ipt_REJECT and ip6t_REJECT. */
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index b9b5c53..0205ae6 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -404,6 +404,107 @@ nla_put_failure:
}
#ifdef CONFIG_NF_CONNTRACK_EVENTS
+static inline size_t calculate_tuple_room_size(const struct nf_conn *ct)
+{
+ struct nf_conntrack_l3proto *l3proto;
+ struct nf_conntrack_l4proto *l4proto;
+ size_t size;
+
+ rcu_read_lock();
+ /* nested attributes CTA_TUPLE_[ORIG|REPLY] plus CTA_TUPLE_IP */
+ size = nla_total_size(0) * 2;
+ l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
+ if (likely(l3proto->nlattr_size))
+ size += l3proto->nlattr_size();
+
+ /* nested attributes CTA_TUPLE_PROTO plus CTA_PROTONUM */
+ size += nla_total_size(0) + nla_total_size(sizeof(u_int8_t));
+ l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
+ if (likely(l4proto->nlattr_size))
+ size += l4proto->nlattr_size();
+
+ rcu_read_unlock();
+ return size;
+}
+
+static inline size_t calculate_protoinfo_room_size(const struct nf_conn *ct)
+{
+ size_t size = 0;
+ struct nf_conntrack_l4proto *l4proto;
+
+ rcu_read_lock();
+ l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
+ if (l4proto->nlattr_protoinfo_size)
+ size = l4proto->nlattr_protoinfo_size();
+ rcu_read_unlock();
+
+ return size;
+}
+
+static inline size_t calculate_helper_room_size(const struct nf_conn *ct)
+{
+ const struct nf_conn_help *help = nfct_help(ct);
+ struct nf_conntrack_helper *helper;
+ size_t size;
+
+ if (!help)
+ goto out;
+
+ rcu_read_lock();
+ helper = rcu_dereference(help->helper);
+ if (!helper)
+ goto out_unlock;
+
+ size = nla_total_size(0) + /* CTA_HELP */
+ nla_total_size(strlen(helper->name));
+out_unlock:
+ rcu_read_unlock();
+out:
+ return size;
+}
+
+static inline size_t
+ctnetlink_calculate_room_size(const struct nf_conn *ct, unsigned long events)
+{
+ size_t size = NLMSG_SPACE(sizeof(struct nfgenmsg));
+
+ size += calculate_tuple_room_size(ct) * 2 + /* original and reply */
+ nla_total_size(sizeof(u_int32_t)) + /* status */
+ nla_total_size(sizeof(u_int32_t)); /* id */
+
+#ifdef CONFIG_NF_CONNTRACK_MARK
+ if (events & IPCT_MARK || ct->mark)
+ size += nla_total_size(sizeof(u_int32_t));
+#endif
+
+ if (events & IPCT_DESTROY) {
+ const struct nf_conn_counter *acct;
+
+ acct = nf_conn_acct_find(ct);
+ if (acct) {
+ size += nla_total_size(0) * 2 +
+ nla_total_size(sizeof(u_int64_t)) * 2 * 2;
+ }
+ return size;
+ }
+
+ size += nla_total_size(sizeof(u_int32_t)); /* CTA_TIMEOUT */
+ if (events & IPCT_PROTOINFO) {
+ size += calculate_protoinfo_room_size(ct);
+ }if (events & IPCT_HELPER || nfct_help(ct))
+ size += calculate_helper_room_size(ct);
+ if (events & IPCT_RELATED)
+ size += calculate_tuple_room_size(ct->master);
+ if (events & IPCT_NATSEQADJ)
+ size += nla_total_size(0) * 2 +
+ nla_total_size(sizeof(u_int32_t)) * 3 * 2;
+#ifdef CONFIG_NF_CONNTRACK_SECMARK
+ if (events & IPCT_SECMARK || ct->secmark)
+ size += nla_total_size(sizeof(u_int32_t));
+#endif
+ return size;
+}
+
static int ctnetlink_conntrack_event(struct notifier_block *this,
unsigned long events, void *ptr)
{
@@ -437,7 +538,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
if (!nfnetlink_has_listeners(group))
return NOTIFY_DONE;
- skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
+ skb = alloc_skb(ctnetlink_calculate_room_size(ct, events), GFP_ATOMIC);
if (!skb)
return NOTIFY_DONE;
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 8fcf176..6694173 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -657,6 +657,12 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct)
write_unlock_bh(&dccp_lock);
return 0;
}
+
+static size_t dccp_nlattr_protoinfo_size(void)
+{
+ return nla_total_size(0)*2 + /* CTA_PROTOINFO */
+ nla_total_size(sizeof(u_int8_t));
+}
#endif
#ifdef CONFIG_SYSCTL
@@ -749,6 +755,8 @@ static struct nf_conntrack_l4proto dccp_proto4 __read_mostly = {
.from_nlattr = nlattr_to_dccp,
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nlattr_size = nf_ct_port_nlattr_size,
+ .nlattr_protoinfo_size = dccp_nlattr_protoinfo_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
@@ -774,6 +782,8 @@ static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = {
.from_nlattr = nlattr_to_dccp,
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nlattr_size = nf_ct_port_nlattr_size,
+ .nlattr_protoinfo_size = dccp_nlattr_protoinfo_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 4ab62ad..8237cff 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -294,6 +294,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = {
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nlattr_size = nf_ct_port_nlattr_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
};
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index c2bd457..78283c1 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -537,6 +537,14 @@ static int nlattr_to_sctp(struct nlattr *cda[], struct nf_conn *ct)
return 0;
}
+
+static size_t sctp_nlattr_protoinfo_size(void)
+{
+ return nla_total_size(0)*2 + /* CTA_PROTOINFO */
+ nla_total_size(sizeof(u_int8_t)) +
+ nla_total_size(sizeof(u_int32_t)) +
+ nla_total_size(sizeof(u_int32_t));
+}
#endif
#ifdef CONFIG_SYSCTL
@@ -671,6 +679,8 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
.from_nlattr = nlattr_to_sctp,
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nlattr_size = nf_ct_port_nlattr_size,
+ .nlattr_protoinfo_size = sctp_nlattr_protoinfo_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
@@ -699,6 +709,8 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = {
.from_nlattr = nlattr_to_sctp,
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nlattr_size = nf_ct_port_nlattr_size,
+ .nlattr_protoinfo_size = sctp_nlattr_protoinfo_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index a1edb9c..36e2876 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1181,6 +1181,16 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
return 0;
}
+
+static size_t tcp_nlattr_protoinfo_size(void)
+{
+ return nla_total_size(0)*2 + /* CTA_PROTOINFO */
+ nla_total_size(sizeof(u_int8_t)) +
+ nla_total_size(sizeof(u_int8_t)) +
+ nla_total_size(sizeof(u_int8_t)) +
+ nla_total_size(sizeof(struct nf_ct_tcp_flags)) +
+ nla_total_size(sizeof(struct nf_ct_tcp_flags));
+}
#endif
#ifdef CONFIG_SYSCTL
@@ -1399,6 +1409,8 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
.from_nlattr = nlattr_to_tcp,
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nlattr_size = nf_ct_port_nlattr_size,
+ .nlattr_protoinfo_size = tcp_nlattr_protoinfo_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
@@ -1429,6 +1441,8 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =
.from_nlattr = nlattr_to_tcp,
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nlattr_size = nf_ct_port_nlattr_size,
+ .nlattr_protoinfo_size = tcp_nlattr_protoinfo_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 2b8b1f5..e9abc8d 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -193,6 +193,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly =
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nlattr_size = nf_ct_port_nlattr_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
@@ -220,6 +221,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly =
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nlattr_size = nf_ct_port_nlattr_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index 4579d8d..90f14e7 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -181,6 +181,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly =
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nlattr_size = nf_ct_port_nlattr_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
@@ -204,6 +205,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly =
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nlattr_size = nf_ct_port_nlattr_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
^ permalink raw reply related [flat|nested] 29+ messages in thread
* Re: [PATCH 7/8] [PATCH] dynamic calculation of event message size for ctnetlink
2008-11-17 8:41 ` [PATCH 7/8] [PATCH] dynamic calculation of event message size for ctnetlink Pablo Neira Ayuso
@ 2008-11-17 8:44 ` David Miller
2008-11-17 15:32 ` Patrick McHardy
1 sibling, 0 replies; 29+ messages in thread
From: David Miller @ 2008-11-17 8:44 UTC (permalink / raw)
To: pablo; +Cc: netfilter-devel, kaber
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 17 Nov 2008 09:41:42 +0100
> This patch adds dynamic message size calculation for ctnetlink. This
> reduces CPU consumption since the overhead in the message trimming
> is removed.
>
> Based on a suggestion from Patrick McHardy.
>
> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Nice patch.
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH 7/8] [PATCH] dynamic calculation of event message size for ctnetlink
2008-11-17 8:41 ` [PATCH 7/8] [PATCH] dynamic calculation of event message size for ctnetlink Pablo Neira Ayuso
2008-11-17 8:44 ` David Miller
@ 2008-11-17 15:32 ` Patrick McHardy
2008-11-18 3:33 ` Pablo Neira Ayuso
1 sibling, 1 reply; 29+ messages in thread
From: Patrick McHardy @ 2008-11-17 15:32 UTC (permalink / raw)
To: Pablo Neira Ayuso; +Cc: netfilter-devel
Pablo Neira Ayuso wrote:
> This patch adds dynamic message size calculation for ctnetlink. This
> reduces CPU consumption since the overhead in the message trimming
> is removed.
>
> static int ctnetlink_conntrack_event(struct notifier_block *this,
> unsigned long events, void *ptr)
> {
> @@ -437,7 +538,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
> if (!nfnetlink_has_listeners(group))
> return NOTIFY_DONE;
>
> - skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
> + skb = alloc_skb(ctnetlink_calculate_room_size(ct, events), GFP_ATOMIC);
> if (!skb)
> return NOTIFY_DONE;
These calculations look somewhat expensive to perform for every message.
Do you have any numbers for this new patch that shows the difference
in CPU usage compared to the resizing done by af_netlink.c?
Since many of these are static in size an alternative would be to
update those sizes during protocol/helper/whatever (un)registration.
But if this patch already improves things, we can put it in and work
on perfecting it later :)
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH 7/8] [PATCH] dynamic calculation of event message size for ctnetlink
2008-11-17 15:32 ` Patrick McHardy
@ 2008-11-18 3:33 ` Pablo Neira Ayuso
2008-11-18 10:21 ` Pablo Neira Ayuso
2008-11-18 11:01 ` Patrick McHardy
0 siblings, 2 replies; 29+ messages in thread
From: Pablo Neira Ayuso @ 2008-11-18 3:33 UTC (permalink / raw)
To: Patrick McHardy; +Cc: netfilter-devel
Patrick McHardy wrote:
> Pablo Neira Ayuso wrote:
>> This patch adds dynamic message size calculation for ctnetlink. This
>> reduces CPU consumption since the overhead in the message trimming
>> is removed.
>>
>> static int ctnetlink_conntrack_event(struct notifier_block *this,
>> unsigned long events, void *ptr)
>> {
>> @@ -437,7 +538,7 @@ static int ctnetlink_conntrack_event(struct
>> notifier_block *this,
>> if (!nfnetlink_has_listeners(group))
>> return NOTIFY_DONE;
>>
>> - skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
>> + skb = alloc_skb(ctnetlink_calculate_room_size(ct, events),
>> GFP_ATOMIC);
>> if (!skb)
>> return NOTIFY_DONE;
>
> These calculations look somewhat expensive to perform for every message.
> Do you have any numbers for this new patch that shows the difference
> in CPU usage compared to the resizing done by af_netlink.c?
Fabian Hugelshofer reported some reduction (~5%) on an embedded
environment but he was using top to measure the difference. I'll collect
some more trustable data and get back to you.
> Since many of these are static in size an alternative would be to
> update those sizes during protocol/helper/whatever (un)registration.
> But if this patch already improves things, we can put it in and work
> on perfecting it later :)
We can same save some branches doing so, but still ctnetlink has a lot
of attributes that are dependent of the message type.
What if we reduce the size of the skb allocated for netlink to more
reasonable size instead of the accurate calculation? I'll check this
possibility.
BTW, probably you already know, but in case that you try to use oprofile
with your current tree, since it is based on 2.6.28-rc2, oprofile was
broken here, I had to pull the fixes from somewhere else.
--
"Los honestos son inadaptados sociales" -- Les Luthiers
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH 7/8] [PATCH] dynamic calculation of event message size for ctnetlink
2008-11-18 3:33 ` Pablo Neira Ayuso
@ 2008-11-18 10:21 ` Pablo Neira Ayuso
2008-11-18 11:03 ` Patrick McHardy
2008-11-18 11:01 ` Patrick McHardy
1 sibling, 1 reply; 29+ messages in thread
From: Pablo Neira Ayuso @ 2008-11-18 10:21 UTC (permalink / raw)
To: Patrick McHardy; +Cc: netfilter-devel
Pablo Neira Ayuso wrote:
> Patrick McHardy wrote:
>> These calculations look somewhat expensive to perform for every message.
>> Do you have any numbers for this new patch that shows the difference
>> in CPU usage compared to the resizing done by af_netlink.c?
>
> Fabian Hugelshofer reported some reduction (~5%) on an embedded
> environment but he was using top to measure the difference. I'll collect
> some more trustable data and get back to you.
Some oprofile results:
wo/patch
2189 0.0305 nf_conntrack_netlink.ko nf_conntrack_netlink
ctnetlink_conntrack_event
w/patch
2302 0.0440 nf_conntrack_netlink.ko nf_conntrack_netlink
ctnetlink_conntrack_event
While __alloc_skb and netlink_broadcast report similar values for w/ and
wo/ the patch.
--
"Los honestos son inadaptados sociales" -- Les Luthiers
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH 7/8] [PATCH] dynamic calculation of event message size for ctnetlink
2008-11-18 3:33 ` Pablo Neira Ayuso
2008-11-18 10:21 ` Pablo Neira Ayuso
@ 2008-11-18 11:01 ` Patrick McHardy
1 sibling, 0 replies; 29+ messages in thread
From: Patrick McHardy @ 2008-11-18 11:01 UTC (permalink / raw)
To: Pablo Neira Ayuso; +Cc: netfilter-devel
Pablo Neira Ayuso wrote:
> Patrick McHardy wrote:
>> Pablo Neira Ayuso wrote:
>>> This patch adds dynamic message size calculation for ctnetlink. This
>>> reduces CPU consumption since the overhead in the message trimming
>>> is removed.
>>>
>>> static int ctnetlink_conntrack_event(struct notifier_block *this,
>>> unsigned long events, void *ptr)
>>> {
>>> @@ -437,7 +538,7 @@ static int ctnetlink_conntrack_event(struct
>>> notifier_block *this,
>>> if (!nfnetlink_has_listeners(group))
>>> return NOTIFY_DONE;
>>>
>>> - skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
>>> + skb = alloc_skb(ctnetlink_calculate_room_size(ct, events),
>>> GFP_ATOMIC);
>>> if (!skb)
>>> return NOTIFY_DONE;
>> These calculations look somewhat expensive to perform for every message.
>> Do you have any numbers for this new patch that shows the difference
>> in CPU usage compared to the resizing done by af_netlink.c?
>
> Fabian Hugelshofer reported some reduction (~5%) on an embedded
> environment but he was using top to measure the difference. I'll collect
> some more trustable data and get back to you.
Thanks.
>> Since many of these are static in size an alternative would be to
>> update those sizes during protocol/helper/whatever (un)registration.
>> But if this patch already improves things, we can put it in and work
>> on perfecting it later :)
>
> We can same save some branches doing so, but still ctnetlink has a lot
> of attributes that are dependent of the message type.
Indeed. But f.i. all the ->size(void) functions could obviously be
constants.
> What if we reduce the size of the skb allocated for netlink to more
> reasonable size instead of the accurate calculation? I'll check this
> possibility.
I don't think we need to be 100% accurate, just find a good middle
ground between grossly overallocating and the resulting unnecessary
copy, potentially expensive size calculations and socket memory waste.
> BTW, probably you already know, but in case that you try to use oprofile
> with your current tree, since it is based on 2.6.28-rc2, oprofile was
> broken here, I had to pull the fixes from somewhere else.
I'll probably merge what I have to Dave soon so I can resync with
his tree.
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH 7/8] [PATCH] dynamic calculation of event message size for ctnetlink
2008-11-18 10:21 ` Pablo Neira Ayuso
@ 2008-11-18 11:03 ` Patrick McHardy
2008-11-19 0:03 ` Pablo Neira Ayuso
0 siblings, 1 reply; 29+ messages in thread
From: Patrick McHardy @ 2008-11-18 11:03 UTC (permalink / raw)
To: Pablo Neira Ayuso; +Cc: netfilter-devel
Pablo Neira Ayuso wrote:
> Pablo Neira Ayuso wrote:
>> Patrick McHardy wrote:
>>> These calculations look somewhat expensive to perform for every message.
>>> Do you have any numbers for this new patch that shows the difference
>>> in CPU usage compared to the resizing done by af_netlink.c?
>> Fabian Hugelshofer reported some reduction (~5%) on an embedded
>> environment but he was using top to measure the difference. I'll collect
>> some more trustable data and get back to you.
>
> Some oprofile results:
>
> wo/patch
> 2189 0.0305 nf_conntrack_netlink.ko nf_conntrack_netlink
> ctnetlink_conntrack_event
>
> w/patch
> 2302 0.0440 nf_conntrack_netlink.ko nf_conntrack_netlink
> ctnetlink_conntrack_event
>
> While __alloc_skb and netlink_broadcast report similar values for w/ and
> wo/ the patch.
So its actually getting worse? :) Any other differences, like less
cycles for memcpy in netlink_trim()?
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH 7/8] [PATCH] dynamic calculation of event message size for ctnetlink
2008-11-18 11:03 ` Patrick McHardy
@ 2008-11-19 0:03 ` Pablo Neira Ayuso
2008-11-19 12:05 ` Patrick McHardy
0 siblings, 1 reply; 29+ messages in thread
From: Pablo Neira Ayuso @ 2008-11-19 0:03 UTC (permalink / raw)
To: Patrick McHardy; +Cc: netfilter-devel
Patrick McHardy wrote:
> Pablo Neira Ayuso wrote:
>> Pablo Neira Ayuso wrote:
>>> Patrick McHardy wrote:
>>>> These calculations look somewhat expensive to perform for every
>>>> message.
>>>> Do you have any numbers for this new patch that shows the difference
>>>> in CPU usage compared to the resizing done by af_netlink.c?
>>> Fabian Hugelshofer reported some reduction (~5%) on an embedded
>>> environment but he was using top to measure the difference. I'll collect
>>> some more trustable data and get back to you.
>>
>> Some oprofile results:
>>
>> wo/patch
>> 2189 0.0305 nf_conntrack_netlink.ko nf_conntrack_netlink
>> ctnetlink_conntrack_event
>>
>> w/patch
>> 2302 0.0440 nf_conntrack_netlink.ko nf_conntrack_netlink
>> ctnetlink_conntrack_event
>>
>> While __alloc_skb and netlink_broadcast report similar values for w/ and
>> wo/ the patch.
>
> So its actually getting worse? :) Any other differences, like less
> cycles for memcpy in netlink_trim()?
netlink_trim is inlined, so it is included in netlink_broadcast, and
there's no improve in memcpy nor netlink_broadcast. I'm going to repeat
all the test to check if I'm doing something wrong, until that, let's
keep it back.
--
"Los honestos son inadaptados sociales" -- Les Luthiers
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH 7/8] [PATCH] dynamic calculation of event message size for ctnetlink
2008-11-19 0:03 ` Pablo Neira Ayuso
@ 2008-11-19 12:05 ` Patrick McHardy
0 siblings, 0 replies; 29+ messages in thread
From: Patrick McHardy @ 2008-11-19 12:05 UTC (permalink / raw)
To: Pablo Neira Ayuso; +Cc: netfilter-devel
Pablo Neira Ayuso wrote:
> Patrick McHardy wrote:
>> Pablo Neira Ayuso wrote:
>>> Pablo Neira Ayuso wrote:
>>>> Patrick McHardy wrote:
>>>>> These calculations look somewhat expensive to perform for every
>>>>> message.
>>>>> Do you have any numbers for this new patch that shows the difference
>>>>> in CPU usage compared to the resizing done by af_netlink.c?
>>>> Fabian Hugelshofer reported some reduction (~5%) on an embedded
>>>> environment but he was using top to measure the difference. I'll collect
>>>> some more trustable data and get back to you.
>>> Some oprofile results:
>>>
>>> wo/patch
>>> 2189 0.0305 nf_conntrack_netlink.ko nf_conntrack_netlink
>>> ctnetlink_conntrack_event
>>>
>>> w/patch
>>> 2302 0.0440 nf_conntrack_netlink.ko nf_conntrack_netlink
>>> ctnetlink_conntrack_event
>>>
>>> While __alloc_skb and netlink_broadcast report similar values for w/ and
>>> wo/ the patch.
>> So its actually getting worse? :) Any other differences, like less
>> cycles for memcpy in netlink_trim()?
>
> netlink_trim is inlined, so it is included in netlink_broadcast, and
> there's no improve in memcpy nor netlink_broadcast. I'm going to repeat
> all the test to check if I'm doing something wrong, until that, let's
> keep it back.
Thats really strange, there has to be at least some reduction of work
because we should be avoiding the packet copy in netlink_trim. Unless
there's a bug somewhere in the calculation and we're still
overallocating by more than 50%.
^ permalink raw reply [flat|nested] 29+ messages in thread
end of thread, other threads:[~2008-11-19 12:05 UTC | newest]
Thread overview: 29+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-10-09 8:33 [PATCH 1/8] [PATCH] get rid of module refcounting in ctnetlink Pablo Neira Ayuso
2008-10-09 8:34 ` [PATCH 2/8] [PATCH] connection tracking helper name persistent aliases Pablo Neira Ayuso
2008-10-09 13:27 ` Patrick McHardy
2008-10-09 8:34 ` [PATCH 3/8] [PATCH] Helper modules load-on-demand support for ctnetlink Pablo Neira Ayuso
2008-10-09 13:34 ` Patrick McHardy
2008-10-09 13:43 ` Patrick McHardy
2008-10-09 14:50 ` Pablo Neira Ayuso
2008-10-09 16:11 ` Patrick McHardy
2008-10-09 16:43 ` Pablo Neira Ayuso
2008-10-09 8:34 ` [PATCH 4/8] [PATCH] use EOPNOTSUPP instead of EINVAL if the conntrack has no helper Pablo Neira Ayuso
2008-10-10 13:03 ` Patrick McHardy
2008-10-09 8:35 ` [PATCH 5/8] [PATCH] deliver events for conntracks created via ctnetlink Pablo Neira Ayuso
2008-10-10 13:31 ` Patrick McHardy
2008-10-11 12:05 ` Pablo Neira Ayuso
2008-10-11 14:53 ` Patrick McHardy
2008-10-09 8:35 ` [PATCH 6/8] [PATCH] bump the expectation helper name Pablo Neira Ayuso
2008-10-09 8:35 ` [PATCH 7/8] [PATCH] dynamic calculation of event message size for ctnetlink Pablo Neira Ayuso
2008-10-09 16:45 ` Pablo Neira Ayuso
2008-10-09 8:36 ` [PATCH 8/8] [PATCH] remove module dependency between ctnetlink and nf_nat Pablo Neira Ayuso
2008-10-09 13:26 ` [PATCH 1/8] [PATCH] get rid of module refcounting in ctnetlink Patrick McHardy
-- strict thread matches above, loose matches on Subject: below --
2008-11-17 8:39 [PATCH 1/8] [PATCH] use nf_conntrack_get instead of atomic_inc Pablo Neira Ayuso
2008-11-17 8:41 ` [PATCH 7/8] [PATCH] dynamic calculation of event message size for ctnetlink Pablo Neira Ayuso
2008-11-17 8:44 ` David Miller
2008-11-17 15:32 ` Patrick McHardy
2008-11-18 3:33 ` Pablo Neira Ayuso
2008-11-18 10:21 ` Pablo Neira Ayuso
2008-11-18 11:03 ` Patrick McHardy
2008-11-19 0:03 ` Pablo Neira Ayuso
2008-11-19 12:05 ` Patrick McHardy
2008-11-18 11:01 ` Patrick McHardy
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).