* [PATCH] [PATCH] dynamic calculation of event message size for ctnetlink
@ 2009-03-17 9:49 Pablo Neira Ayuso
2009-03-17 12:03 ` Patrick McHardy
0 siblings, 1 reply; 9+ messages in thread
From: Pablo Neira Ayuso @ 2009-03-17 9:49 UTC (permalink / raw)
To: netfilter-devel; +Cc: kaber, holger
This patch adds dynamic message size calculation for ctnetlink. This
reduces CPU consumption since the overhead in the message trimming
is removed.
Based on a suggestion from Patrick McHardy.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
include/net/netfilter/nf_conntrack_l3proto.h | 2
include/net/netfilter/nf_conntrack_l4proto.h | 3 +
net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 6 +
net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 8 ++
net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 6 +
net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 8 ++
net/netfilter/nf_conntrack_core.c | 6 +
net/netfilter/nf_conntrack_netlink.c | 103 ++++++++++++++++++++++++
net/netfilter/nf_conntrack_proto_dccp.c | 10 ++
net/netfilter/nf_conntrack_proto_gre.c | 1
net/netfilter/nf_conntrack_proto_sctp.c | 12 +++
net/netfilter/nf_conntrack_proto_tcp.c | 14 +++
net/netfilter/nf_conntrack_proto_udp.c | 2
net/netfilter/nf_conntrack_proto_udplite.c | 2
14 files changed, 182 insertions(+), 1 deletions(-)
diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h
index 0378676..e0007f9 100644
--- a/include/net/netfilter/nf_conntrack_l3proto.h
+++ b/include/net/netfilter/nf_conntrack_l3proto.h
@@ -55,6 +55,8 @@ struct nf_conntrack_l3proto
int (*nlattr_to_tuple)(struct nlattr *tb[],
struct nf_conntrack_tuple *t);
+
+ size_t (*nlattr_size)(void);
const struct nla_policy *nla_policy;
#ifdef CONFIG_SYSCTL
diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index debdaf7..fcb549c 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -72,6 +72,8 @@ struct nf_conntrack_l4proto
const struct nf_conntrack_tuple *t);
int (*nlattr_to_tuple)(struct nlattr *tb[],
struct nf_conntrack_tuple *t);
+ size_t (*nlattr_size)(void);
+ size_t (*nlattr_protoinfo_size)(void);
const struct nla_policy *nla_policy;
#ifdef CONFIG_SYSCTL
@@ -115,6 +117,7 @@ extern int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple);
extern int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[],
struct nf_conntrack_tuple *t);
+extern size_t nf_ct_port_nlattr_size(void);
extern const struct nla_policy nf_ct_port_nla_policy[];
#ifdef CONFIG_SYSCTL
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 4beb04f..4273aa7 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -326,6 +326,11 @@ static int ipv4_nlattr_to_tuple(struct nlattr *tb[],
return 0;
}
+
+static size_t ipv4_nlattr_size(void)
+{
+ return nla_total_size(sizeof(u_int32_t))*2;
+}
#endif
static struct nf_sockopt_ops so_getorigdst = {
@@ -346,6 +351,7 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
.tuple_to_nlattr = ipv4_tuple_to_nlattr,
.nlattr_to_tuple = ipv4_nlattr_to_tuple,
+ .nlattr_size = ipv4_nlattr_size,
.nla_policy = ipv4_nla_policy,
#endif
#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 2a8bee2..bf7a8dc 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -262,6 +262,13 @@ static int icmp_nlattr_to_tuple(struct nlattr *tb[],
return 0;
}
+
+static size_t icmp_nlattr_size(void)
+{
+ return nla_total_size(sizeof(u_int8_t)) +
+ nla_total_size(sizeof(u_int8_t)) +
+ nla_total_size(sizeof(u_int16_t));
+}
#endif
#ifdef CONFIG_SYSCTL
@@ -310,6 +317,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly =
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
.tuple_to_nlattr = icmp_tuple_to_nlattr,
.nlattr_to_tuple = icmp_nlattr_to_tuple,
+ .nlattr_size = icmp_nlattr_size,
.nla_policy = icmp_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 727b953..4d3573e 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -341,6 +341,11 @@ static int ipv6_nlattr_to_tuple(struct nlattr *tb[],
return 0;
}
+
+static size_t ipv6_nlattr_size(void)
+{
+ return nla_total_size(sizeof(u_int32_t)*4)*2;
+}
#endif
struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = {
@@ -353,6 +358,7 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = {
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
.tuple_to_nlattr = ipv6_tuple_to_nlattr,
.nlattr_to_tuple = ipv6_nlattr_to_tuple,
+ .nlattr_size = ipv6_nlattr_size,
.nla_policy = ipv6_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index c323643..43b7341 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -264,6 +264,13 @@ static int icmpv6_nlattr_to_tuple(struct nlattr *tb[],
return 0;
}
+
+static size_t icmpv6_nlattr_size(void)
+{
+ return nla_total_size(sizeof(u_int8_t)) +
+ nla_total_size(sizeof(u_int8_t)) +
+ nla_total_size(sizeof(u_int16_t));
+}
#endif
#ifdef CONFIG_SYSCTL
@@ -296,6 +303,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly =
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
.tuple_to_nlattr = icmpv6_tuple_to_nlattr,
.nlattr_to_tuple = icmpv6_nlattr_to_tuple,
+ .nlattr_size = icmpv6_nlattr_size,
.nla_policy = icmpv6_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 90ce9dd..8b9dbb7 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -902,6 +902,12 @@ int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[],
return 0;
}
EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_to_tuple);
+
+size_t nf_ct_port_nlattr_size(void)
+{
+ return nla_total_size(sizeof(u_int16_t))*2;
+}
+EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_size);
#endif
/* Used by ipt_REJECT and ip6t_REJECT. */
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index cb78aa0..c5a31e5 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -404,6 +404,107 @@ nla_put_failure:
}
#ifdef CONFIG_NF_CONNTRACK_EVENTS
+static inline size_t calculate_tuple_room_size(const struct nf_conn *ct)
+{
+ struct nf_conntrack_l3proto *l3proto;
+ struct nf_conntrack_l4proto *l4proto;
+ size_t size;
+
+ rcu_read_lock();
+ /* nested attributes CTA_TUPLE_[ORIG|REPLY] plus CTA_TUPLE_IP */
+ size = nla_total_size(0) * 2;
+ l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
+ if (likely(l3proto->nlattr_size))
+ size += l3proto->nlattr_size();
+
+ /* nested attributes CTA_TUPLE_PROTO plus CTA_PROTONUM */
+ size += nla_total_size(0) + nla_total_size(sizeof(u_int8_t));
+ l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
+ if (likely(l4proto->nlattr_size))
+ size += l4proto->nlattr_size();
+
+ rcu_read_unlock();
+ return size;
+}
+
+static inline size_t calculate_protoinfo_room_size(const struct nf_conn *ct)
+{
+ size_t size = 0;
+ struct nf_conntrack_l4proto *l4proto;
+
+ rcu_read_lock();
+ l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
+ if (l4proto->nlattr_protoinfo_size)
+ size = l4proto->nlattr_protoinfo_size();
+ rcu_read_unlock();
+
+ return size;
+}
+
+static inline size_t calculate_helper_room_size(const struct nf_conn *ct)
+{
+ const struct nf_conn_help *help = nfct_help(ct);
+ struct nf_conntrack_helper *helper;
+ size_t size = 0;
+
+ if (!help)
+ goto out;
+
+ rcu_read_lock();
+ helper = rcu_dereference(help->helper);
+ if (!helper)
+ goto out_unlock;
+
+ size = nla_total_size(0) + /* CTA_HELP */
+ nla_total_size(strlen(helper->name));
+out_unlock:
+ rcu_read_unlock();
+out:
+ return size;
+}
+
+static inline size_t
+ctnetlink_calculate_room_size(const struct nf_conn *ct, unsigned long events)
+{
+ size_t size = NLMSG_SPACE(sizeof(struct nfgenmsg));
+
+ size += calculate_tuple_room_size(ct) * 2 + /* original and reply */
+ nla_total_size(sizeof(u_int32_t)) + /* status */
+ nla_total_size(sizeof(u_int32_t)); /* id */
+
+#ifdef CONFIG_NF_CONNTRACK_MARK
+ if (events & IPCT_MARK || ct->mark)
+ size += nla_total_size(sizeof(u_int32_t));
+#endif
+
+ if (events & IPCT_DESTROY) {
+ const struct nf_conn_counter *acct;
+
+ acct = nf_conn_acct_find(ct);
+ if (acct) {
+ size += nla_total_size(0) * 2 +
+ nla_total_size(sizeof(u_int64_t)) * 2 * 2;
+ }
+ return size;
+ }
+
+ size += nla_total_size(sizeof(u_int32_t)); /* CTA_TIMEOUT */
+ if (events & IPCT_PROTOINFO) {
+ size += calculate_protoinfo_room_size(ct);
+ }if (events & IPCT_HELPER || nfct_help(ct))
+ size += calculate_helper_room_size(ct);
+ if (events & IPCT_RELATED)
+ size += calculate_tuple_room_size(ct->master);
+ if (events & IPCT_NATSEQADJ)
+ size += nla_total_size(0) * 2 +
+ nla_total_size(sizeof(u_int32_t)) * 3 * 2;
+#ifdef CONFIG_NF_CONNTRACK_SECMARK
+ if (events & IPCT_SECMARK || ct->secmark)
+ size += nla_total_size(sizeof(u_int32_t));
+#endif
+ return size;
+}
+
static int ctnetlink_conntrack_event(struct notifier_block *this,
unsigned long events, void *ptr)
{
@@ -437,7 +538,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
if (!item->report && !nfnetlink_has_listeners(group))
return NOTIFY_DONE;
- skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
+ skb = alloc_skb(ctnetlink_calculate_room_size(ct, events), GFP_ATOMIC);
if (!skb)
return NOTIFY_DONE;
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 8fcf176..6694173 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -657,6 +657,12 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct)
write_unlock_bh(&dccp_lock);
return 0;
}
+
+static size_t dccp_nlattr_protoinfo_size(void)
+{
+ return nla_total_size(0)*2 + /* CTA_PROTOINFO */
+ nla_total_size(sizeof(u_int8_t));
+}
#endif
#ifdef CONFIG_SYSCTL
@@ -749,6 +755,8 @@ static struct nf_conntrack_l4proto dccp_proto4 __read_mostly = {
.from_nlattr = nlattr_to_dccp,
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nlattr_size = nf_ct_port_nlattr_size,
+ .nlattr_protoinfo_size = dccp_nlattr_protoinfo_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
@@ -774,6 +782,8 @@ static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = {
.from_nlattr = nlattr_to_dccp,
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nlattr_size = nf_ct_port_nlattr_size,
+ .nlattr_protoinfo_size = dccp_nlattr_protoinfo_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 1b279f9..0156693 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -294,6 +294,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = {
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nlattr_size = nf_ct_port_nlattr_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
};
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 74e0379..3b4d75f 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -537,6 +537,14 @@ static int nlattr_to_sctp(struct nlattr *cda[], struct nf_conn *ct)
return 0;
}
+
+static size_t sctp_nlattr_protoinfo_size(void)
+{
+ return nla_total_size(0)*2 + /* CTA_PROTOINFO */
+ nla_total_size(sizeof(u_int8_t)) +
+ nla_total_size(sizeof(u_int32_t)) +
+ nla_total_size(sizeof(u_int32_t));
+}
#endif
#ifdef CONFIG_SYSCTL
@@ -671,6 +679,8 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
.from_nlattr = nlattr_to_sctp,
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nlattr_size = nf_ct_port_nlattr_size,
+ .nlattr_protoinfo_size = sctp_nlattr_protoinfo_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
@@ -699,6 +709,8 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = {
.from_nlattr = nlattr_to_sctp,
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nlattr_size = nf_ct_port_nlattr_size,
+ .nlattr_protoinfo_size = sctp_nlattr_protoinfo_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index a1edb9c..36e2876 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1181,6 +1181,16 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
return 0;
}
+
+static size_t tcp_nlattr_protoinfo_size(void)
+{
+ return nla_total_size(0)*2 + /* CTA_PROTOINFO */
+ nla_total_size(sizeof(u_int8_t)) +
+ nla_total_size(sizeof(u_int8_t)) +
+ nla_total_size(sizeof(u_int8_t)) +
+ nla_total_size(sizeof(struct nf_ct_tcp_flags)) +
+ nla_total_size(sizeof(struct nf_ct_tcp_flags));
+}
#endif
#ifdef CONFIG_SYSCTL
@@ -1399,6 +1409,8 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
.from_nlattr = nlattr_to_tcp,
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nlattr_size = nf_ct_port_nlattr_size,
+ .nlattr_protoinfo_size = tcp_nlattr_protoinfo_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
@@ -1429,6 +1441,8 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =
.from_nlattr = nlattr_to_tcp,
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nlattr_size = nf_ct_port_nlattr_size,
+ .nlattr_protoinfo_size = tcp_nlattr_protoinfo_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 2b8b1f5..e9abc8d 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -193,6 +193,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly =
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nlattr_size = nf_ct_port_nlattr_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
@@ -220,6 +221,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly =
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nlattr_size = nf_ct_port_nlattr_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index 4579d8d..90f14e7 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -181,6 +181,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly =
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nlattr_size = nf_ct_port_nlattr_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
@@ -204,6 +205,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly =
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
+ .nlattr_size = nf_ct_port_nlattr_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_SYSCTL
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH] [PATCH] dynamic calculation of event message size for ctnetlink
2009-03-17 9:49 [PATCH] [PATCH] dynamic calculation of event message size for ctnetlink Pablo Neira Ayuso
@ 2009-03-17 12:03 ` Patrick McHardy
2009-03-17 12:09 ` Pablo Neira Ayuso
2009-03-17 12:14 ` Holger Eitzenberger
0 siblings, 2 replies; 9+ messages in thread
From: Patrick McHardy @ 2009-03-17 12:03 UTC (permalink / raw)
To: Pablo Neira Ayuso; +Cc: netfilter-devel, holger
Pablo Neira Ayuso wrote:
> This patch adds dynamic message size calculation for ctnetlink. This
> reduces CPU consumption since the overhead in the message trimming
> is removed.
>
> +static inline size_t
> +ctnetlink_calculate_room_size(const struct nf_conn *ct, unsigned long events)
> +{
> + size_t size = NLMSG_SPACE(sizeof(struct nfgenmsg));
> +
> + size += calculate_tuple_room_size(ct) * 2 + /* original and reply */
> + nla_total_size(sizeof(u_int32_t)) + /* status */
> + nla_total_size(sizeof(u_int32_t)); /* id */
> +
> +#ifdef CONFIG_NF_CONNTRACK_MARK
> + if (events & IPCT_MARK || ct->mark)
> + size += nla_total_size(sizeof(u_int32_t));
> +#endif
> +
> + if (events & IPCT_DESTROY) {
> + const struct nf_conn_counter *acct;
> +
> + acct = nf_conn_acct_find(ct);
> + if (acct) {
> + size += nla_total_size(0) * 2 +
> + nla_total_size(sizeof(u_int64_t)) * 2 * 2;
> + }
> + return size;
> + }
> +...
Holger's patch pre-calculates things where possible, which reduces
run-time overhead.
> I'm not sure about this. Instead, I noticed with oprofile that we spend
> more cycles to calculate the message size. I think it's more like a
> trade-off, reducing the message size to what we need reduces the chances
> to hit ENOBUFS.
"More cycles" is to be expected for calculation (which is not done
so far), but summed up, I always suspected a measurement error and
I'd be surprised if this is correct.
So we so far have your numbers, which showed a regression, and no
numbers from Holger, which is slightly better :)
OK seriously, we need *some* numbers showing an improvement since I
have basically zero base to decide between your patches, besides the
fact that its to be expected that Holger's will be slightly faster.
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] [PATCH] dynamic calculation of event message size for ctnetlink
2009-03-17 12:03 ` Patrick McHardy
@ 2009-03-17 12:09 ` Pablo Neira Ayuso
2009-03-17 12:14 ` Holger Eitzenberger
1 sibling, 0 replies; 9+ messages in thread
From: Pablo Neira Ayuso @ 2009-03-17 12:09 UTC (permalink / raw)
To: Patrick McHardy; +Cc: netfilter-devel, holger
Patrick McHardy wrote:
> OK seriously, we need *some* numbers showing an improvement since I
> have basically zero base to decide between your patches, besides the
> fact that its to be expected that Holger's will be slightly faster.
I'll do some from my side so we can put some light on this :)
--
"Los honestos son inadaptados sociales" -- Les Luthiers
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] [PATCH] dynamic calculation of event message size for ctnetlink
2009-03-17 12:03 ` Patrick McHardy
2009-03-17 12:09 ` Pablo Neira Ayuso
@ 2009-03-17 12:14 ` Holger Eitzenberger
2009-03-17 12:16 ` Patrick McHardy
1 sibling, 1 reply; 9+ messages in thread
From: Holger Eitzenberger @ 2009-03-17 12:14 UTC (permalink / raw)
To: Patrick McHardy; +Cc: Pablo Neira Ayuso, netfilter-devel
On Tue, Mar 17, 2009 at 01:03:52PM +0100, Patrick McHardy wrote:
> OK seriously, we need *some* numbers showing an improvement since I
> have basically zero base to decide between your patches, besides the
> fact that its to be expected that Holger's will be slightly faster.
I think we can give the hard numbers in the next 1-3 days. Do you
have a special test in mind? Pablo, how did you test then?
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] [PATCH] dynamic calculation of event message size for ctnetlink
2009-03-17 12:14 ` Holger Eitzenberger
@ 2009-03-17 12:16 ` Patrick McHardy
2009-03-17 22:38 ` Pablo Neira Ayuso
0 siblings, 1 reply; 9+ messages in thread
From: Patrick McHardy @ 2009-03-17 12:16 UTC (permalink / raw)
To: Patrick McHardy, Pablo Neira Ayuso, netfilter-devel
Holger Eitzenberger wrote:
> On Tue, Mar 17, 2009 at 01:03:52PM +0100, Patrick McHardy wrote:
>
>> OK seriously, we need *some* numbers showing an improvement since I
>> have basically zero base to decide between your patches, besides the
>> fact that its to be expected that Holger's will be slightly faster.
>
> I think we can give the hard numbers in the next 1-3 days. Do you
> have a special test in mind? Pablo, how did you test then?
Nothing too complicated. I guess either a raw throughput benchmark,
some cycle counting for event delivery or event delivery throughput
would all be fine.
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] [PATCH] dynamic calculation of event message size for ctnetlink
2009-03-17 12:16 ` Patrick McHardy
@ 2009-03-17 22:38 ` Pablo Neira Ayuso
2009-03-18 4:41 ` Patrick McHardy
0 siblings, 1 reply; 9+ messages in thread
From: Pablo Neira Ayuso @ 2009-03-17 22:38 UTC (permalink / raw)
To: Patrick McHardy; +Cc: netfilter-devel, Holger Eitzenberger
[-- Attachment #1: Type: text/plain, Size: 3196 bytes --]
Patrick McHardy wrote:
> Holger Eitzenberger wrote:
>> On Tue, Mar 17, 2009 at 01:03:52PM +0100, Patrick McHardy wrote:
>>
>>> OK seriously, we need *some* numbers showing an improvement since I
>>> have basically zero base to decide between your patches, besides the
>>> fact that its to be expected that Holger's will be slightly faster.
>>
>> I think we can give the hard numbers in the next 1-3 days. Do you
>> have a special test in mind? Pablo, how did you test then?
>
> Nothing too complicated. I guess either a raw throughput benchmark,
> some cycle counting for event delivery or event delivery throughput
> would all be fine.
I have done a toy program - I know, it can be improved a lot - to get
some numbers. Please, find it attached. Here are some results that I got
in my testbed [1]. Uff, this has been hard as the numbers doesn't seem
to be very concluding.
~24000 HTTP connections/s with no events listener
= With no patch =
~19500 HTTP connections/s
AVG events/s 71779; enobufs/s=125; in 50 seconds
AVG events/s 69723; enobufs/s=123; in 89 seconds
AVG events/s 71061; enobufs/s=120; in 52 seconds
= With Pablo's =
~20500 HTTP connections/s
AVG events/s 72141; enobufs/s=151; in 65 seconds
AVG events/s 70287; enobufs/s=141; in 76 seconds
= With Holger's =
~20500-21000 HTTP connections/s
AVG events/s 68233; enobufs/s=192; in 126 seconds
AVG events/s 70241; enobufs/s=204; in 76 seconds
It seems that the results in terms of events/s are similar. While the
thoughput is slightly higher with Holger's patch, the number of enobufs
errors also increases, I don't have an explanation why enobufs errors
increases.
I still have one concern with Holger's patch and the static calculation
approach:
+ len = NLMSG_SPACE(sizeof(struct nfgenmsg))
+ + 3 * nla_total_size(0) /* CTA_TUPLE_ORIG|REPL|MASTER */
+ + 3 * nla_total_size(0) /* CTA_TUPLE_IP */
+ + 3 * nla_total_size(0) /* CTA_TUPLE_PROTO */
+ + 3 * NLA_TYPE_SIZE(u_int8_t) /* CTA_PROTO_NUM */
+ + NLA_TYPE_SIZE(u_int32_t) /* CTA_ID */
+ + NLA_TYPE_SIZE(u_int32_t) /* CTA_STATUS */
+ + 2 * nla_total_size(0) /* CTA_COUNTERS_ORIG|REPL */
+ + 2 * NLA_TYPE_SIZE(uint64_t) /* CTA_COUNTERS_PACKETS */
+ + 2 * NLA_TYPE_SIZE(uint64_t) /* CTA_COUNTERS_BYTES */
+ + NLA_TYPE_SIZE(u_int32_t) /* CTA_TIMEOUT */
+ + nla_total_size(0) /* CTA_PROTOINFO */
+ + nla_total_size(0) /* CTA_HELP */
+ + nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */
+ + NLA_TYPE_SIZE(u_int32_t) /* CTA_SECMARK */
+ + 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */
+ + 2 * NLA_TYPE_SIZE(u_int32_t) /* CTA_NAT_SEQ_CORRECTION_POS */
+ + 2 * NLA_TYPE_SIZE(u_int32_t) /* CTA_NAT_SEQ_CORRECTION_BEFORE */
+ + 2 * NLA_TYPE_SIZE(u_int32_t) /* CTA_NAT_SEQ_CORRECTION_AFTER */
+ + NLA_TYPE_SIZE(u_int32_t); /* CTA_MARK */
This calculation results in no message trim if most of those attributes
are present. However, assuming the worst case (no counters, no helper,
no mark, no master tuple, etc.), netlink_trim() may be called. My patch
calculates the exact size, so there's no trimming for any case.
[1] http://conntrack-tools.netfilter.org/testcase.html
--
"Los honestos son inadaptados sociales" -- Les Luthiers
[-- Attachment #2: ctbench2.c --]
[-- Type: text/x-csrc, Size: 1423 bytes --]
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <signal.h>
#include <libnetfilter_conntrack/libnetfilter_conntrack.h>
static int enobufs_err, total_enobufs;
static uint64_t events, total_events;
static time_t prev, start, stop;
static int event_cb(enum nf_conntrack_msg_type type,
struct nf_conntrack *ct,
void *data)
{
time_t current, diff;
events++;
current = time(NULL);
diff = current - prev;
if (diff >= 1) {
prev = current;
printf("events/s %u; enobufs/s=%u\n",
events/diff, enobufs_err/diff);
total_enobufs += enobufs_err;
total_events += events;
events = 0;
enobufs_err = 0;
}
return NFCT_CB_CONTINUE;
}
static void sighandler(int foo)
{
time_t total_time;
stop = time(NULL);
total_time = stop - start;
printf("AVG events/s %u; enobufs/s=%u; in %lu seconds\n",
(unsigned int) total_events/total_time,
(unsigned int) total_enobufs/total_time,
total_time);
exit(1);
}
int main()
{
int ret;
u_int8_t family = AF_INET;
struct nfct_handle *h;
struct nf_conntrack *ct;
signal(SIGINT, sighandler);
h = nfct_open(CONNTRACK, NFCT_ALL_CT_GROUPS);
if (!h) {
perror("nfct_open");
return 0;
}
nfct_callback_register(h, NFCT_T_ALL, event_cb, NULL);
printf("waiting for events...\n");
prev = start = time(NULL);
while (1) {
ret = nfct_catch(h);
if (ret == -1) {
if (errno == ENOBUFS)
enobufs_err++;
}
}
nfct_close(h);
}
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] [PATCH] dynamic calculation of event message size for ctnetlink
2009-03-17 22:38 ` Pablo Neira Ayuso
@ 2009-03-18 4:41 ` Patrick McHardy
2009-03-18 8:38 ` Holger Eitzenberger
0 siblings, 1 reply; 9+ messages in thread
From: Patrick McHardy @ 2009-03-18 4:41 UTC (permalink / raw)
To: Pablo Neira Ayuso; +Cc: netfilter-devel, Holger Eitzenberger
Pablo Neira Ayuso wrote:
> Patrick McHardy wrote:
>> Holger Eitzenberger wrote:
>>> On Tue, Mar 17, 2009 at 01:03:52PM +0100, Patrick McHardy wrote:
>>>
>>>> OK seriously, we need *some* numbers showing an improvement since I
>>>> have basically zero base to decide between your patches, besides the
>>>> fact that its to be expected that Holger's will be slightly faster.
>>> I think we can give the hard numbers in the next 1-3 days. Do you
>>> have a special test in mind? Pablo, how did you test then?
>> Nothing too complicated. I guess either a raw throughput benchmark,
>> some cycle counting for event delivery or event delivery throughput
>> would all be fine.
>
> I have done a toy program - I know, it can be improved a lot - to get
> some numbers. Please, find it attached. Here are some results that I got
> in my testbed [1]. Uff, this has been hard as the numbers doesn't seem
> to be very concluding.
>
> ~24000 HTTP connections/s with no events listener
>
> = With no patch =
> ~19500 HTTP connections/s
>
> AVG events/s 71779; enobufs/s=125; in 50 seconds
> AVG events/s 69723; enobufs/s=123; in 89 seconds
> AVG events/s 71061; enobufs/s=120; in 52 seconds
>
> = With Pablo's =
> ~20500 HTTP connections/s
>
> AVG events/s 72141; enobufs/s=151; in 65 seconds
> AVG events/s 70287; enobufs/s=141; in 76 seconds
>
> = With Holger's =
> ~20500-21000 HTTP connections/s
>
> AVG events/s 68233; enobufs/s=192; in 126 seconds
> AVG events/s 70241; enobufs/s=204; in 76 seconds
>
> It seems that the results in terms of events/s are similar. While the
> thoughput is slightly higher with Holger's patch, the number of enobufs
> errors also increases, I don't have an explanation why enobufs errors
> increases.
My guess would be that without either patch, the reallocation causes
less socket buffer usage. With approximate allocation, we safe the
CPU overhead of reallocating, but end up using slightly more socket
buffer space. The odd thing is that your patch also seems to increase
the overflow rate, despite using exact allocations.
Just to clarify, the test didn't use "reliable" event deliver, right?
> I still have one concern with Holger's patch and the static calculation
> approach:
>
> + len = NLMSG_SPACE(sizeof(struct nfgenmsg))
> + + 3 * nla_total_size(0) /* CTA_TUPLE_ORIG|REPL|MASTER */
> + + 3 * nla_total_size(0) /* CTA_TUPLE_IP */
> + + 3 * nla_total_size(0) /* CTA_TUPLE_PROTO */
> + + 3 * NLA_TYPE_SIZE(u_int8_t) /* CTA_PROTO_NUM */
> + + NLA_TYPE_SIZE(u_int32_t) /* CTA_ID */
> + + NLA_TYPE_SIZE(u_int32_t) /* CTA_STATUS */
> + + 2 * nla_total_size(0) /* CTA_COUNTERS_ORIG|REPL */
> + + 2 * NLA_TYPE_SIZE(uint64_t) /* CTA_COUNTERS_PACKETS */
> + + 2 * NLA_TYPE_SIZE(uint64_t) /* CTA_COUNTERS_BYTES */
> + + NLA_TYPE_SIZE(u_int32_t) /* CTA_TIMEOUT */
> + + nla_total_size(0) /* CTA_PROTOINFO */
> + + nla_total_size(0) /* CTA_HELP */
> + + nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */
> + + NLA_TYPE_SIZE(u_int32_t) /* CTA_SECMARK */
> + + 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */
> + + 2 * NLA_TYPE_SIZE(u_int32_t) /* CTA_NAT_SEQ_CORRECTION_POS */
> + + 2 * NLA_TYPE_SIZE(u_int32_t) /* CTA_NAT_SEQ_CORRECTION_BEFORE */
> + + 2 * NLA_TYPE_SIZE(u_int32_t) /* CTA_NAT_SEQ_CORRECTION_AFTER */
> + + NLA_TYPE_SIZE(u_int32_t); /* CTA_MARK */
>
> This calculation results in no message trim if most of those attributes
> are present. However, assuming the worst case (no counters, no helper,
> no mark, no master tuple, etc.), netlink_trim() may be called. My patch
> calculates the exact size, so there's no trimming for any case.
The numbers imply that its still a net win. But its a valid point, if
the common case will still result in reallocations, it might make sense
to include the space for a few of those members optionally to make
sure we don't cross the 50% waste threshold.
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] [PATCH] dynamic calculation of event message size for ctnetlink
2009-03-18 4:41 ` Patrick McHardy
@ 2009-03-18 8:38 ` Holger Eitzenberger
2009-03-25 17:22 ` Patrick McHardy
0 siblings, 1 reply; 9+ messages in thread
From: Holger Eitzenberger @ 2009-03-18 8:38 UTC (permalink / raw)
To: Patrick McHardy; +Cc: Pablo Neira Ayuso, netfilter-devel
On Wed, Mar 18, 2009 at 05:41:43AM +0100, Patrick McHardy wrote:
> >This calculation results in no message trim if most of those attributes
> >are present. However, assuming the worst case (no counters, no helper,
> >no mark, no master tuple, etc.), netlink_trim() may be called. My patch
> >calculates the exact size, so there's no trimming for any case.
>
> The numbers imply that its still a net win. But its a valid point, if
> the common case will still result in reallocations, it might make sense
> to include the space for a few of those members optionally to make
> sure we don't cross the 50% waste threshold.
That's a good point.
For reference these are the attributes of TCP conntrack event, I have
marked the optional NLAs. I don't know what the ratio is in bytes
though.
CTA_TUPLE_ORIG NLA_F_NESTED
CTA_TUPLE_IP NLA_F_NESTED
CTA_IP_V4_SRC
CTA_IP_V4_DST
CTA_TUPLE_PROTO NLA_F_NESTED
CTA_PROTO_NUM
CTA_PROTO_SRC_PORT
CTA_PROTO_DST_PORT
CTA_TUPLE_REPLY NLA_F_NESTED
CTA_TUPLE_IP NLA_F_NESTED
/* ipv6_tuple_to_nl_attr() */
CTA_IP_V4_SRC
CTA_IP_V4_DST
CTA_TUPLE_PROTO NLA_F_NESTED
CTA_PROTO_NUM
CTA_PROTO_SRC_PORT
CTA_PROTO_DST_PORT
CTA_ID
CTA_STATUS
CTA_COUNTERS_ORIG NLA_F_NESTED optional
CTA_COUNTERS_PACKETS
CTA_COUNTERS_BYTES
CTA_COUNTERS_REPLY NLA_F_NESTED optional
CTA_COUNTERS_PACKETS
CTA_COUNTERS_BYTES
CTA_TIMEOUT
CTA_PROTOINFO NLA_F_NESTED
CTA_PROTOINFO_TCP NLA_F_NESTED
CTA_PROTOINFO_TCP_STATE
CTA_PROTOINFO_TCP_WSCALE_ORIGINAL
CTA_PROTOINFO_TCP_WSCALE_REPLY
CTA_PROTOINFO_TCP_FLAGS_ORIGINAL
CTA_PROTOINFO_TCP_FLAGS_REPLY
CTA_HELP optional
CTA_HELP_NAME
CTA_SECMARK optional
CTA_TUPLE_MASTER NLA_F_NESTED optional
CTA_TUPLE_IP NLA_F_NESTED
CTA_IP_V4_SRC
CTA_IP_V4_DST
CTA_TUPLE_PROTO NLA_F_NESTED
CTA_PROTO_NUM
CTA_PROTO_SRC_PORT
CTA_PROTO_DST_PORT
CTA_NAT_SEQ_ADJ_ORIG NLA_F_NESTED optional
CTA_NAT_SEQ_CORRECTION_POS
CTA_NAT_SEQ_OFFSET_BEFORE
CTA_NAT_SEQ_OFFSET_AFTER
CTA_NAT_SEQ_ADJ_REPLY NLA_F_NESTED optional
CTA_NAT_SEQ_CORRECTION_POS
CTA_NAT_SEQ_OFFSET_BEFORE
CTA_NAT_SEQ_OFFSET_AFTER
CTA_MARK optional
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] [PATCH] dynamic calculation of event message size for ctnetlink
2009-03-18 8:38 ` Holger Eitzenberger
@ 2009-03-25 17:22 ` Patrick McHardy
0 siblings, 0 replies; 9+ messages in thread
From: Patrick McHardy @ 2009-03-25 17:22 UTC (permalink / raw)
To: Patrick McHardy, Pablo Neira Ayuso, netfilter-devel
Holger Eitzenberger wrote:
> On Wed, Mar 18, 2009 at 05:41:43AM +0100, Patrick McHardy wrote:
>
>>> This calculation results in no message trim if most of those attributes
>>> are present. However, assuming the worst case (no counters, no helper,
>>> no mark, no master tuple, etc.), netlink_trim() may be called. My patch
>>> calculates the exact size, so there's no trimming for any case.
>> The numbers imply that its still a net win. But its a valid point, if
>> the common case will still result in reallocations, it might make sense
>> to include the space for a few of those members optionally to make
>> sure we don't cross the 50% waste threshold.
>
> That's a good point.
>
> For reference these are the attributes of TCP conntrack event, I have
> marked the optional NLAs. I don't know what the ratio is in bytes
> though.
Thanks Holger. I'm going to apply your patches since they're already
an improvement, we can do further refinement on top.
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2009-03-25 17:22 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-03-17 9:49 [PATCH] [PATCH] dynamic calculation of event message size for ctnetlink Pablo Neira Ayuso
2009-03-17 12:03 ` Patrick McHardy
2009-03-17 12:09 ` Pablo Neira Ayuso
2009-03-17 12:14 ` Holger Eitzenberger
2009-03-17 12:16 ` Patrick McHardy
2009-03-17 22:38 ` Pablo Neira Ayuso
2009-03-18 4:41 ` Patrick McHardy
2009-03-18 8:38 ` Holger Eitzenberger
2009-03-25 17:22 ` Patrick McHardy
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).