* [PATCH 1/6] netfilter: change elements in x_tables
2009-01-29 19:12 [PATCH 0/6] iptables: read/write lock elimination (v0.4) Stephen Hemminger
@ 2009-01-29 19:12 ` Stephen Hemminger
2009-01-29 19:12 ` [PATCH 2/6] netfilter: remove unneeded initializations Stephen Hemminger
` (5 subsequent siblings)
6 siblings, 0 replies; 11+ messages in thread
From: Stephen Hemminger @ 2009-01-29 19:12 UTC (permalink / raw)
To: David Miller, Patrick McHardy; +Cc: netdev, netfilter-devel
[-- Attachment #1: x_tables.patch --]
[-- Type: text/plain, Size: 1062 bytes --]
Change to proper type on private pointer rather than anonymous void.
Keep active elements on same cache line.
Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
---
include/linux/netfilter/x_tables.h | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)
--- a/include/linux/netfilter/x_tables.h 2009-01-26 17:24:43.251543415 -0800
+++ b/include/linux/netfilter/x_tables.h 2009-01-26 17:29:12.510649107 -0800
@@ -349,9 +349,6 @@ struct xt_table
{
struct list_head list;
- /* A unique name... */
- const char name[XT_TABLE_MAXNAMELEN];
-
/* What hooks you will enter on */
unsigned int valid_hooks;
@@ -359,13 +356,15 @@ struct xt_table
rwlock_t lock;
/* Man behind the curtain... */
- //struct ip6t_table_info *private;
- void *private;
+ struct xt_table_info *private;
/* Set this to THIS_MODULE if you are a module, otherwise NULL */
struct module *me;
u_int8_t af; /* address/protocol family */
+
+ /* A unique name... */
+ const char name[XT_TABLE_MAXNAMELEN];
};
#include <linux/netfilter_ipv4.h>
--
^ permalink raw reply [flat|nested] 11+ messages in thread* [PATCH 2/6] netfilter: remove unneeded initializations
2009-01-29 19:12 [PATCH 0/6] iptables: read/write lock elimination (v0.4) Stephen Hemminger
2009-01-29 19:12 ` [PATCH 1/6] netfilter: change elements in x_tables Stephen Hemminger
@ 2009-01-29 19:12 ` Stephen Hemminger
2009-01-29 19:12 ` [PATCH 3/6] ebtables: " Stephen Hemminger
` (4 subsequent siblings)
6 siblings, 0 replies; 11+ messages in thread
From: Stephen Hemminger @ 2009-01-29 19:12 UTC (permalink / raw)
To: David Miller, Patrick McHardy; +Cc: netdev, netfilter-devel
[-- Attachment #1: iptables-lock-init.patch --]
[-- Type: text/plain, Size: 4593 bytes --]
Later patches change the locking on xt_table and the initialization of
the lock element is not needed since the lock is always initialized in
xt_table_register anyway.
Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
---
net/ipv4/netfilter/arptable_filter.c | 2 --
net/ipv4/netfilter/iptable_filter.c | 1 -
net/ipv4/netfilter/iptable_mangle.c | 1 -
net/ipv4/netfilter/iptable_raw.c | 1 -
net/ipv4/netfilter/iptable_security.c | 1 -
net/ipv4/netfilter/nf_nat_rule.c | 1 -
net/ipv6/netfilter/ip6table_filter.c | 1 -
net/ipv6/netfilter/ip6table_mangle.c | 1 -
net/ipv6/netfilter/ip6table_raw.c | 1 -
net/ipv6/netfilter/ip6table_security.c | 1 -
10 files changed, 11 deletions(-)
--- a/net/ipv4/netfilter/arptable_filter.c 2009-01-26 17:24:43.687542005 -0800
+++ b/net/ipv4/netfilter/arptable_filter.c 2009-01-26 19:50:37.891042244 -0800
@@ -48,8 +48,6 @@ static struct
static struct xt_table packet_filter = {
.name = "filter",
.valid_hooks = FILTER_VALID_HOOKS,
- .lock = __RW_LOCK_UNLOCKED(packet_filter.lock),
- .private = NULL,
.me = THIS_MODULE,
.af = NFPROTO_ARP,
};
--- a/net/ipv4/netfilter/iptable_filter.c 2009-01-26 17:24:43.691541994 -0800
+++ b/net/ipv4/netfilter/iptable_filter.c 2009-01-26 19:50:37.891042244 -0800
@@ -56,7 +56,6 @@ static struct
static struct xt_table packet_filter = {
.name = "filter",
.valid_hooks = FILTER_VALID_HOOKS,
- .lock = __RW_LOCK_UNLOCKED(packet_filter.lock),
.me = THIS_MODULE,
.af = AF_INET,
};
--- a/net/ipv4/netfilter/iptable_raw.c 2009-01-26 17:24:43.691541994 -0800
+++ b/net/ipv4/netfilter/iptable_raw.c 2009-01-26 19:50:37.891042244 -0800
@@ -39,7 +39,6 @@ static struct
static struct xt_table packet_raw = {
.name = "raw",
.valid_hooks = RAW_VALID_HOOKS,
- .lock = __RW_LOCK_UNLOCKED(packet_raw.lock),
.me = THIS_MODULE,
.af = AF_INET,
};
--- a/net/ipv4/netfilter/iptable_security.c 2009-01-26 17:24:43.691541994 -0800
+++ b/net/ipv4/netfilter/iptable_security.c 2009-01-26 19:50:37.891042244 -0800
@@ -60,7 +60,6 @@ static struct
static struct xt_table security_table = {
.name = "security",
.valid_hooks = SECURITY_VALID_HOOKS,
- .lock = __RW_LOCK_UNLOCKED(security_table.lock),
.me = THIS_MODULE,
.af = AF_INET,
};
--- a/net/ipv4/netfilter/nf_nat_rule.c 2009-01-26 17:24:43.695541481 -0800
+++ b/net/ipv4/netfilter/nf_nat_rule.c 2009-01-26 19:51:20.338030618 -0800
@@ -61,7 +61,6 @@ static struct
static struct xt_table nat_table = {
.name = "nat",
.valid_hooks = NAT_VALID_HOOKS,
- .lock = __RW_LOCK_UNLOCKED(nat_table.lock),
.me = THIS_MODULE,
.af = AF_INET,
};
--- a/net/ipv6/netfilter/ip6table_filter.c 2009-01-26 17:24:43.735541493 -0800
+++ b/net/ipv6/netfilter/ip6table_filter.c 2009-01-26 19:50:37.895044361 -0800
@@ -54,7 +54,6 @@ static struct
static struct xt_table packet_filter = {
.name = "filter",
.valid_hooks = FILTER_VALID_HOOKS,
- .lock = __RW_LOCK_UNLOCKED(packet_filter.lock),
.me = THIS_MODULE,
.af = AF_INET6,
};
--- a/net/ipv6/netfilter/ip6table_mangle.c 2009-01-26 17:24:43.735541493 -0800
+++ b/net/ipv6/netfilter/ip6table_mangle.c 2009-01-26 19:50:37.895044361 -0800
@@ -60,7 +60,6 @@ static struct
static struct xt_table packet_mangler = {
.name = "mangle",
.valid_hooks = MANGLE_VALID_HOOKS,
- .lock = __RW_LOCK_UNLOCKED(packet_mangler.lock),
.me = THIS_MODULE,
.af = AF_INET6,
};
--- a/net/ipv6/netfilter/ip6table_raw.c 2009-01-26 17:24:43.735541493 -0800
+++ b/net/ipv6/netfilter/ip6table_raw.c 2009-01-26 19:50:37.895044361 -0800
@@ -38,7 +38,6 @@ static struct
static struct xt_table packet_raw = {
.name = "raw",
.valid_hooks = RAW_VALID_HOOKS,
- .lock = __RW_LOCK_UNLOCKED(packet_raw.lock),
.me = THIS_MODULE,
.af = AF_INET6,
};
--- a/net/ipv6/netfilter/ip6table_security.c 2009-01-26 17:24:43.735541493 -0800
+++ b/net/ipv6/netfilter/ip6table_security.c 2009-01-26 19:50:37.895044361 -0800
@@ -59,7 +59,6 @@ static struct
static struct xt_table security_table = {
.name = "security",
.valid_hooks = SECURITY_VALID_HOOKS,
- .lock = __RW_LOCK_UNLOCKED(security_table.lock),
.me = THIS_MODULE,
.af = AF_INET6,
};
--- a/net/ipv4/netfilter/iptable_mangle.c 2009-01-26 17:24:43.691541994 -0800
+++ b/net/ipv4/netfilter/iptable_mangle.c 2009-01-26 19:50:37.895044361 -0800
@@ -67,7 +67,6 @@ static struct
static struct xt_table packet_mangler = {
.name = "mangle",
.valid_hooks = MANGLE_VALID_HOOKS,
- .lock = __RW_LOCK_UNLOCKED(packet_mangler.lock),
.me = THIS_MODULE,
.af = AF_INET,
};
--
^ permalink raw reply [flat|nested] 11+ messages in thread* [PATCH 3/6] ebtables: remove unneeded initializations
2009-01-29 19:12 [PATCH 0/6] iptables: read/write lock elimination (v0.4) Stephen Hemminger
2009-01-29 19:12 ` [PATCH 1/6] netfilter: change elements in x_tables Stephen Hemminger
2009-01-29 19:12 ` [PATCH 2/6] netfilter: remove unneeded initializations Stephen Hemminger
@ 2009-01-29 19:12 ` Stephen Hemminger
2009-01-29 19:12 ` [PATCH 4/6] netfilter: abstract xt_counters Stephen Hemminger
` (3 subsequent siblings)
6 siblings, 0 replies; 11+ messages in thread
From: Stephen Hemminger @ 2009-01-29 19:12 UTC (permalink / raw)
To: David Miller, Patrick McHardy; +Cc: netdev, netfilter-devel
[-- Attachment #1: ebtables-lock-init.patch --]
[-- Type: text/plain, Size: 1537 bytes --]
The initialization of the lock element is not needed
since the lock is always initialized in ebt_register_table.
Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
---
net/bridge/netfilter/ebtable_broute.c | 1 -
net/bridge/netfilter/ebtable_filter.c | 1 -
net/bridge/netfilter/ebtable_nat.c | 1 -
3 files changed, 3 deletions(-)
--- a/net/bridge/netfilter/ebtable_broute.c 2009-01-27 17:09:10.313100854 -0800
+++ b/net/bridge/netfilter/ebtable_broute.c 2009-01-27 17:09:15.862142852 -0800
@@ -46,7 +46,6 @@ static struct ebt_table broute_table =
.name = "broute",
.table = &initial_table,
.valid_hooks = 1 << NF_BR_BROUTING,
- .lock = __RW_LOCK_UNLOCKED(broute_table.lock),
.check = check,
.me = THIS_MODULE,
};
--- a/net/bridge/netfilter/ebtable_filter.c 2009-01-27 17:08:50.725100955 -0800
+++ b/net/bridge/netfilter/ebtable_filter.c 2009-01-27 17:08:53.828611768 -0800
@@ -55,7 +55,6 @@ static struct ebt_table frame_filter =
.name = "filter",
.table = &initial_table,
.valid_hooks = FILTER_VALID_HOOKS,
- .lock = __RW_LOCK_UNLOCKED(frame_filter.lock),
.check = check,
.me = THIS_MODULE,
};
--- a/net/bridge/netfilter/ebtable_nat.c 2009-01-27 17:09:22.896602465 -0800
+++ b/net/bridge/netfilter/ebtable_nat.c 2009-01-27 17:09:31.589085328 -0800
@@ -55,7 +55,6 @@ static struct ebt_table frame_nat =
.name = "nat",
.table = &initial_table,
.valid_hooks = NAT_VALID_HOOKS,
- .lock = __RW_LOCK_UNLOCKED(frame_nat.lock),
.check = check,
.me = THIS_MODULE,
};
--
^ permalink raw reply [flat|nested] 11+ messages in thread* [PATCH 4/6] netfilter: abstract xt_counters
2009-01-29 19:12 [PATCH 0/6] iptables: read/write lock elimination (v0.4) Stephen Hemminger
` (2 preceding siblings ...)
2009-01-29 19:12 ` [PATCH 3/6] ebtables: " Stephen Hemminger
@ 2009-01-29 19:12 ` Stephen Hemminger
2009-01-29 19:12 ` [PATCH 5/6] netfilter: use sequence number synchronization for counters Stephen Hemminger
` (2 subsequent siblings)
6 siblings, 0 replies; 11+ messages in thread
From: Stephen Hemminger @ 2009-01-29 19:12 UTC (permalink / raw)
To: David Miller, Patrick McHardy; +Cc: netdev, netfilter-devel
[-- Attachment #1: xtables-counter.patch --]
[-- Type: text/plain, Size: 5552 bytes --]
Break out the parts of the x_tables code that manipulates counters so
changes to locking are easier.
Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
---
include/linux/netfilter/x_tables.h | 15 ++++++++++++++-
net/ipv4/netfilter/arp_tables.c | 12 ++++++++----
net/ipv4/netfilter/ip_tables.c | 12 ++++++++----
net/ipv6/netfilter/ip6_tables.c | 24 ++++++++++++++----------
4 files changed, 44 insertions(+), 19 deletions(-)
--- a/include/linux/netfilter/x_tables.h 2009-01-29 09:45:14.475070733 -0800
+++ b/include/linux/netfilter/x_tables.h 2009-01-29 10:51:50.194362708 -0800
@@ -105,13 +105,26 @@ struct _xt_align
#define XT_ERROR_TARGET "ERROR"
#define SET_COUNTER(c,b,p) do { (c).bcnt = (b); (c).pcnt = (p); } while(0)
-#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
+#define SUM_COUNTER(s,c) do { (s).bcnt += (c).bcnt; (s).pcnt += (c).pcnt; } while(0)
struct xt_counters
{
u_int64_t pcnt, bcnt; /* Packet and byte counters */
};
+static inline void xt_fetch_counter(struct xt_counters *v, int cpu,
+ const struct xt_counters *c)
+{
+ *v = *c;
+}
+
+static inline void xt_incr_counter(struct xt_counters *c, unsigned b, unsigned p)
+{
+ c->pcnt += p;
+ c->bcnt += b;
+}
+
+
/* The argument to IPT_SO_ADD_COUNTERS. */
struct xt_counters_info
{
--- a/net/ipv4/netfilter/arp_tables.c 2009-01-29 09:52:23.172320248 -0800
+++ b/net/ipv4/netfilter/arp_tables.c 2009-01-29 10:53:13.152543484 -0800
@@ -256,7 +256,7 @@ unsigned int arpt_do_table(struct sk_buf
hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) +
(2 * skb->dev->addr_len);
- ADD_COUNTER(e->counters, hdr_len, 1);
+ xt_incr_counter(&e->counters, hdr_len, 1);
t = arpt_get_target(e);
@@ -662,10 +662,14 @@ static int translate_table(const char *n
/* Gets counters. */
static inline int add_entry_to_counter(const struct arpt_entry *e,
+ int cpu,
struct xt_counters total[],
unsigned int *i)
{
- ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
+ struct xt_counters c;
+
+ xt_fetch_counter(&c, cpu, &e->counters);
+ SUM_COUNTER(total[*i], c);
(*i)++;
return 0;
@@ -709,6 +713,7 @@ static void get_counters(const struct xt
ARPT_ENTRY_ITERATE(t->entries[cpu],
t->size,
add_entry_to_counter,
+ cpu,
counters,
&i);
}
@@ -1082,8 +1087,7 @@ static inline int add_counter_to_entry(s
const struct xt_counters addme[],
unsigned int *i)
{
-
- ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
+ SUM_COUNTER(e->counters, addme[*i]);
(*i)++;
return 0;
--- a/net/ipv4/netfilter/ip_tables.c 2009-01-29 09:52:23.200320370 -0800
+++ b/net/ipv4/netfilter/ip_tables.c 2009-01-29 10:52:43.235570700 -0800
@@ -366,7 +366,7 @@ ipt_do_table(struct sk_buff *skb,
if (IPT_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0)
goto no_match;
- ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
+ xt_incr_counter(&e->counters, ntohs(ip->tot_len), 1);
t = ipt_get_target(e);
IP_NF_ASSERT(t->u.kernel.target);
@@ -872,10 +872,14 @@ translate_table(const char *name,
/* Gets counters. */
static inline int
add_entry_to_counter(const struct ipt_entry *e,
+ int cpu,
struct xt_counters total[],
unsigned int *i)
{
- ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
+ struct xt_counters c;
+
+ xt_fetch_counter(&c, cpu, &e->counters);
+ SUM_COUNTER(total[*i], c);
(*i)++;
return 0;
@@ -921,6 +925,7 @@ get_counters(const struct xt_table_info
IPT_ENTRY_ITERATE(t->entries[cpu],
t->size,
add_entry_to_counter,
+ cpu,
counters,
&i);
}
@@ -1327,8 +1332,7 @@ add_counter_to_entry(struct ipt_entry *e
(long unsigned int)addme[*i].pcnt,
(long unsigned int)addme[*i].bcnt);
#endif
-
- ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
+ SUM_COUNTER(e->counters, addme[*i]);
(*i)++;
return 0;
--- a/net/ipv6/netfilter/ip6_tables.c 2009-01-29 09:52:26.618819778 -0800
+++ b/net/ipv6/netfilter/ip6_tables.c 2009-01-29 10:53:06.240695087 -0800
@@ -392,9 +392,9 @@ ip6t_do_table(struct sk_buff *skb,
if (IP6T_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0)
goto no_match;
- ADD_COUNTER(e->counters,
- ntohs(ipv6_hdr(skb)->payload_len) +
- sizeof(struct ipv6hdr), 1);
+ xt_incr_counter(&e->counters,
+ ntohs(ipv6_hdr(skb)->payload_len) +
+ sizeof(struct ipv6hdr), 1);
t = ip6t_get_target(e);
IP_NF_ASSERT(t->u.kernel.target);
@@ -901,10 +901,14 @@ translate_table(const char *name,
/* Gets counters. */
static inline int
add_entry_to_counter(const struct ip6t_entry *e,
+ int cpu,
struct xt_counters total[],
unsigned int *i)
{
- ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
+ struct xt_counters c;
+
+ xt_fetch_counter(&c, cpu, &e->counters);
+ SUM_COUNTER(total[*i], c);
(*i)++;
return 0;
@@ -948,10 +952,11 @@ get_counters(const struct xt_table_info
continue;
i = 0;
IP6T_ENTRY_ITERATE(t->entries[cpu],
- t->size,
- add_entry_to_counter,
- counters,
- &i);
+ t->size,
+ add_entry_to_counter,
+ cpu,
+ counters,
+ &i);
}
}
@@ -1357,8 +1362,7 @@ add_counter_to_entry(struct ip6t_entry *
(long unsigned int)addme[*i].pcnt,
(long unsigned int)addme[*i].bcnt);
#endif
-
- ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
+ SUM_COUNTER(e->counters, addme[*i]);
(*i)++;
return 0;
--
^ permalink raw reply [flat|nested] 11+ messages in thread* [PATCH 5/6] netfilter: use sequence number synchronization for counters
2009-01-29 19:12 [PATCH 0/6] iptables: read/write lock elimination (v0.4) Stephen Hemminger
` (3 preceding siblings ...)
2009-01-29 19:12 ` [PATCH 4/6] netfilter: abstract xt_counters Stephen Hemminger
@ 2009-01-29 19:12 ` Stephen Hemminger
2009-01-30 8:03 ` Eric Dumazet
2009-01-29 19:12 ` [PATCH 6/6] netfilter: convert x_tables to use RCU Stephen Hemminger
2009-01-29 23:39 ` [PATCH 0/6] iptables: read/write lock elimination (v0.4) Rick Jones
6 siblings, 1 reply; 11+ messages in thread
From: Stephen Hemminger @ 2009-01-29 19:12 UTC (permalink / raw)
To: David Miller, Patrick McHardy; +Cc: netdev, netfilter-devel
[-- Attachment #1: counters-seqcount.patch --]
[-- Type: text/plain, Size: 3550 bytes --]
Change how synchronization is done on the iptables counters. Use seqcount
wrapper instead of depending on reader/writer lock.
Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
---
include/linux/netfilter/x_tables.h | 14 +++-----------
net/ipv4/netfilter/arp_tables.c | 4 ++--
net/ipv4/netfilter/ip_tables.c | 4 ++--
net/ipv6/netfilter/ip6_tables.c | 4 ++--
net/netfilter/x_tables.c | 28 ++++++++++++++++++++++++++++
5 files changed, 37 insertions(+), 17 deletions(-)
4
--- a/net/ipv4/netfilter/arp_tables.c 2009-01-29 11:08:38.735069921 -0800
+++ b/net/ipv4/netfilter/arp_tables.c 2009-01-29 11:09:20.720069979 -0800
@@ -736,9 +736,9 @@ static inline struct xt_counters *alloc_
return ERR_PTR(-ENOMEM);
/* First, sum counters... */
- write_lock_bh(&table->lock);
+ local_bh_disable();
get_counters(private, counters);
- write_unlock_bh(&table->lock);
+ local_bh_enable();
return counters;
}
--- a/net/ipv4/netfilter/ip_tables.c 2009-01-29 11:08:38.723069778 -0800
+++ b/net/ipv4/netfilter/ip_tables.c 2009-01-29 11:09:20.720069979 -0800
@@ -947,9 +947,9 @@ static struct xt_counters * alloc_counte
return ERR_PTR(-ENOMEM);
/* First, sum counters... */
- write_lock_bh(&table->lock);
+ local_bh_disable();
get_counters(private, counters);
- write_unlock_bh(&table->lock);
+ local_bh_enable();
return counters;
}
--- a/net/ipv6/netfilter/ip6_tables.c 2009-01-29 11:08:38.763071181 -0800
+++ b/net/ipv6/netfilter/ip6_tables.c 2009-01-29 11:09:20.724069866 -0800
@@ -976,9 +976,9 @@ static struct xt_counters *alloc_counter
return ERR_PTR(-ENOMEM);
/* First, sum counters... */
- write_lock_bh(&table->lock);
+ local_bh_disable();
get_counters(private, counters);
- write_unlock_bh(&table->lock);
+ local_bh_enable();
return counters;
}
--- a/net/netfilter/x_tables.c 2009-01-29 11:08:38.747070716 -0800
+++ b/net/netfilter/x_tables.c 2009-01-29 11:10:03.595571234 -0800
@@ -577,6 +577,34 @@ int xt_compat_target_to_user(struct xt_e
EXPORT_SYMBOL_GPL(xt_compat_target_to_user);
#endif
+static DEFINE_PER_CPU(seqcount_t, xt_counter_sequence);
+
+void xt_fetch_counter(struct xt_counters *v, int cpu,
+ const struct xt_counters *c)
+{
+ seqcount_t *seq = &per_cpu(xt_counter_sequence, cpu);
+ unsigned start;
+
+ do {
+ start = read_seqcount_begin(seq);
+ *v = *c;
+ } while (read_seqcount_retry(seq, start));
+}
+EXPORT_SYMBOL_GPL(xt_fetch_counter);
+
+void xt_incr_counter(struct xt_counters *c, unsigned b, unsigned p)
+{
+ seqcount_t *seq = &__get_cpu_var(xt_counter_sequence);
+
+ write_seqcount_begin(seq);
+ c->pcnt += p;
+ c->bcnt += b;
+ write_seqcount_end(seq);
+
+}
+EXPORT_SYMBOL_GPL(xt_incr_counter);
+
+
struct xt_table_info *xt_alloc_table_info(unsigned int size)
{
struct xt_table_info *newinfo;
--- a/include/linux/netfilter/x_tables.h 2009-01-29 11:08:38.779071484 -0800
+++ b/include/linux/netfilter/x_tables.h 2009-01-29 11:09:20.724069866 -0800
@@ -112,17 +112,9 @@ struct xt_counters
u_int64_t pcnt, bcnt; /* Packet and byte counters */
};
-static inline void xt_fetch_counter(struct xt_counters *v, int cpu,
- const struct xt_counters *c)
-{
- *v = *c;
-}
-
-static inline void xt_incr_counter(struct xt_counters *c, unsigned b, unsigned p)
-{
- c->pcnt += p;
- c->bcnt += b;
-}
+extern void xt_fetch_counter(struct xt_counters *v, int cpu,
+ const struct xt_counters *c);
+extern void xt_incr_counter(struct xt_counters *c, unsigned b, unsigned p);
/* The argument to IPT_SO_ADD_COUNTERS. */
--
^ permalink raw reply [flat|nested] 11+ messages in thread* Re: [PATCH 5/6] netfilter: use sequence number synchronization for counters
2009-01-29 19:12 ` [PATCH 5/6] netfilter: use sequence number synchronization for counters Stephen Hemminger
@ 2009-01-30 8:03 ` Eric Dumazet
0 siblings, 0 replies; 11+ messages in thread
From: Eric Dumazet @ 2009-01-30 8:03 UTC (permalink / raw)
To: Stephen Hemminger; +Cc: David Miller, Patrick McHardy, netdev, netfilter-devel
Stephen Hemminger a écrit :
> Change how synchronization is done on the iptables counters. Use seqcount
> wrapper instead of depending on reader/writer lock.
>
> Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
>
> --- a/net/netfilter/x_tables.c 2009-01-29 11:08:38.747070716 -0800
> +++ b/net/netfilter/x_tables.c 2009-01-29 11:10:03.595571234 -0800
> @@ -577,6 +577,34 @@ int xt_compat_target_to_user(struct xt_e
> EXPORT_SYMBOL_GPL(xt_compat_target_to_user);
> #endif
>
> +static DEFINE_PER_CPU(seqcount_t, xt_counter_sequence);
> +
> +void xt_fetch_counter(struct xt_counters *v, int cpu,
> + const struct xt_counters *c)
> +{
> + seqcount_t *seq = &per_cpu(xt_counter_sequence, cpu);
> + unsigned start;
> +
> + do {
> + start = read_seqcount_begin(seq);
> + *v = *c;
> + } while (read_seqcount_retry(seq, start));
> +}
> +EXPORT_SYMBOL_GPL(xt_fetch_counter);
> +
> +void xt_incr_counter(struct xt_counters *c, unsigned b, unsigned p)
You really want an inline xt_incr_counter() function here to speedup ipt_do_table()
I agree xt_fetch_counter() is not time critical and can be outlined.
> +{
> + seqcount_t *seq = &__get_cpu_var(xt_counter_sequence);
> +
> + write_seqcount_begin(seq);
> + c->pcnt += p;
> + c->bcnt += b;
> + write_seqcount_end(seq);
> +
> +}
> +EXPORT_SYMBOL_GPL(xt_incr_counter);
> +
> +
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH 6/6] netfilter: convert x_tables to use RCU
2009-01-29 19:12 [PATCH 0/6] iptables: read/write lock elimination (v0.4) Stephen Hemminger
` (4 preceding siblings ...)
2009-01-29 19:12 ` [PATCH 5/6] netfilter: use sequence number synchronization for counters Stephen Hemminger
@ 2009-01-29 19:12 ` Stephen Hemminger
2009-01-29 23:39 ` [PATCH 0/6] iptables: read/write lock elimination (v0.4) Rick Jones
6 siblings, 0 replies; 11+ messages in thread
From: Stephen Hemminger @ 2009-01-29 19:12 UTC (permalink / raw)
To: David Miller, Patrick McHardy; +Cc: netdev, netfilter-devel
[-- Attachment #1: iptables-rcu.patch --]
[-- Type: text/plain, Size: 8125 bytes --]
Replace existing reader/writer lock with Read-Copy-Update to
elminate the overhead of a read lock on each incoming packet.
This should reduce the overhead of iptables especially on SMP
systems.
The previous code used a reader-writer lock for two purposes.
The first was to ensure that the xt_table_info reference was not in
process of being changed. Since xt_table_info is only freed via one
routine, it was a direct conversion to RCU.
The other use of the reader-writer lock was to to block changes
to counters while they were being read. This synchronization was
fixed by the previous patch. But still need to make sure table info
isn't going away.
Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
---
include/linux/netfilter/x_tables.h | 10 ++++++--
net/ipv4/netfilter/arp_tables.c | 12 ++++-----
net/ipv4/netfilter/ip_tables.c | 12 ++++-----
net/ipv6/netfilter/ip6_tables.c | 12 ++++-----
net/netfilter/x_tables.c | 45 ++++++++++++++++++++++++++-----------
5 files changed, 58 insertions(+), 33 deletions(-)
--- a/include/linux/netfilter/x_tables.h 2009-01-29 11:09:20.724069866 -0800
+++ b/include/linux/netfilter/x_tables.h 2009-01-29 11:10:49.823071289 -0800
@@ -357,8 +357,8 @@ struct xt_table
/* What hooks you will enter on */
unsigned int valid_hooks;
- /* Lock for the curtain */
- rwlock_t lock;
+ /* Lock for curtain */
+ spinlock_t lock;
/* Man behind the curtain... */
struct xt_table_info *private;
@@ -388,6 +388,12 @@ struct xt_table_info
unsigned int hook_entry[NF_INET_NUMHOOKS];
unsigned int underflow[NF_INET_NUMHOOKS];
+ /* For the dustman... */
+ union {
+ struct rcu_head rcu;
+ struct work_struct work;
+ };
+
/* ipt_entry tables: one per CPU */
/* Note : this field MUST be the last one, see XT_TABLE_INFO_SZ */
char *entries[1];
--- a/net/ipv4/netfilter/arp_tables.c 2009-01-29 11:09:20.720069979 -0800
+++ b/net/ipv4/netfilter/arp_tables.c 2009-01-29 11:10:49.823071289 -0800
@@ -237,8 +237,8 @@ unsigned int arpt_do_table(struct sk_buf
indev = in ? in->name : nulldevname;
outdev = out ? out->name : nulldevname;
- read_lock_bh(&table->lock);
- private = table->private;
+ rcu_read_lock_bh();
+ private = rcu_dereference(table->private);
table_base = (void *)private->entries[smp_processor_id()];
e = get_entry(table_base, private->hook_entry[hook]);
back = get_entry(table_base, private->underflow[hook]);
@@ -311,7 +311,7 @@ unsigned int arpt_do_table(struct sk_buf
e = (void *)e + e->next_offset;
}
} while (!hotdrop);
- read_unlock_bh(&table->lock);
+ rcu_read_unlock_bh();
if (hotdrop)
return NF_DROP;
@@ -1152,8 +1152,8 @@ static int do_add_counters(struct net *n
goto free;
}
- write_lock_bh(&t->lock);
- private = t->private;
+ rcu_read_lock_bh();
+ private = rcu_dereference(t->private);
if (private->number != num_counters) {
ret = -EINVAL;
goto unlock_up_free;
@@ -1168,7 +1168,7 @@ static int do_add_counters(struct net *n
paddc,
&i);
unlock_up_free:
- write_unlock_bh(&t->lock);
+ rcu_read_unlock_bh();
xt_table_unlock(t);
module_put(t->me);
free:
--- a/net/ipv4/netfilter/ip_tables.c 2009-01-29 11:09:20.720069979 -0800
+++ b/net/ipv4/netfilter/ip_tables.c 2009-01-29 11:10:49.827070289 -0800
@@ -347,9 +347,9 @@ ipt_do_table(struct sk_buff *skb,
mtpar.family = tgpar.family = NFPROTO_IPV4;
tgpar.hooknum = hook;
- read_lock_bh(&table->lock);
+ rcu_read_lock_bh();
IP_NF_ASSERT(table->valid_hooks & (1 << hook));
- private = table->private;
+ private = rcu_dereference(table->private);
table_base = (void *)private->entries[smp_processor_id()];
e = get_entry(table_base, private->hook_entry[hook]);
@@ -445,7 +445,7 @@ ipt_do_table(struct sk_buff *skb,
}
} while (!hotdrop);
- read_unlock_bh(&table->lock);
+ rcu_read_unlock_bh();
#ifdef DEBUG_ALLOW_ALL
return NF_ACCEPT;
@@ -1397,8 +1397,8 @@ do_add_counters(struct net *net, void __
goto free;
}
- write_lock_bh(&t->lock);
- private = t->private;
+ rcu_read_lock_bh();
+ private = rcu_dereference(t->private);
if (private->number != num_counters) {
ret = -EINVAL;
goto unlock_up_free;
@@ -1413,7 +1413,7 @@ do_add_counters(struct net *net, void __
paddc,
&i);
unlock_up_free:
- write_unlock_bh(&t->lock);
+ rcu_read_unlock_bh();
xt_table_unlock(t);
module_put(t->me);
free:
--- a/net/ipv6/netfilter/ip6_tables.c 2009-01-29 11:09:20.724069866 -0800
+++ b/net/ipv6/netfilter/ip6_tables.c 2009-01-29 11:10:49.827070289 -0800
@@ -373,9 +373,9 @@ ip6t_do_table(struct sk_buff *skb,
mtpar.family = tgpar.family = NFPROTO_IPV6;
tgpar.hooknum = hook;
- read_lock_bh(&table->lock);
+ rcu_read_lock_bh();
IP_NF_ASSERT(table->valid_hooks & (1 << hook));
- private = table->private;
+ private = rcu_dereference(table->private);
table_base = (void *)private->entries[smp_processor_id()];
e = get_entry(table_base, private->hook_entry[hook]);
@@ -474,7 +474,7 @@ ip6t_do_table(struct sk_buff *skb,
#ifdef CONFIG_NETFILTER_DEBUG
((struct ip6t_entry *)table_base)->comefrom = NETFILTER_LINK_POISON;
#endif
- read_unlock_bh(&table->lock);
+ rcu_read_unlock_bh();
#ifdef DEBUG_ALLOW_ALL
return NF_ACCEPT;
@@ -1428,8 +1428,8 @@ do_add_counters(struct net *net, void __
goto free;
}
- write_lock_bh(&t->lock);
- private = t->private;
+ rcu_read_lock_bh();
+ private = rcu_dereference(t->private);
if (private->number != num_counters) {
ret = -EINVAL;
goto unlock_up_free;
@@ -1444,7 +1444,7 @@ do_add_counters(struct net *net, void __
paddc,
&i);
unlock_up_free:
- write_unlock_bh(&t->lock);
+ rcu_read_unlock_bh();
xt_table_unlock(t);
module_put(t->me);
free:
--- a/net/netfilter/x_tables.c 2009-01-29 11:10:03.595571234 -0800
+++ b/net/netfilter/x_tables.c 2009-01-29 11:11:43.496192079 -0800
@@ -639,18 +639,37 @@ struct xt_table_info *xt_alloc_table_inf
}
EXPORT_SYMBOL(xt_alloc_table_info);
-void xt_free_table_info(struct xt_table_info *info)
+/* callback to do free for vmalloc'd case */
+static void xt_free_table_info_work(struct work_struct *arg)
{
- int cpu;
+ struct xt_table_info *info = container_of(arg, struct xt_table_info, work);
+ unsigned int cpu;
- for_each_possible_cpu(cpu) {
- if (info->size <= PAGE_SIZE)
- kfree(info->entries[cpu]);
- else
- vfree(info->entries[cpu]);
- }
+ for_each_possible_cpu(cpu)
+ vfree(info->entries[cpu]);
kfree(info);
}
+
+static void xt_free_table_info_rcu(struct rcu_head *arg)
+{
+ struct xt_table_info *info = container_of(arg, struct xt_table_info, rcu);
+
+ if (info->size <= PAGE_SIZE) {
+ unsigned int cpu;
+ for_each_possible_cpu(cpu)
+ kfree(info->entries[cpu]);
+ kfree(info);
+ } else {
+ /* can't safely call vfree in current context */
+ INIT_WORK(&info->work, xt_free_table_info_work);
+ schedule_work(&info->work);
+ }
+}
+
+void xt_free_table_info(struct xt_table_info *info)
+{
+ call_rcu(&info->rcu, xt_free_table_info_rcu);
+}
EXPORT_SYMBOL(xt_free_table_info);
/* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */
@@ -699,20 +718,20 @@ xt_replace_table(struct xt_table *table,
struct xt_table_info *oldinfo, *private;
/* Do the substitution. */
- write_lock_bh(&table->lock);
+ spin_lock_bh(&table->lock);
private = table->private;
/* Check inside lock: is the old number correct? */
if (num_counters != private->number) {
duprintf("num_counters != table->private->number (%u/%u)\n",
num_counters, private->number);
- write_unlock_bh(&table->lock);
+ spin_unlock_bh(&table->lock);
*error = -EAGAIN;
return NULL;
}
oldinfo = private;
- table->private = newinfo;
+ rcu_assign_pointer(table->private, newinfo);
newinfo->initial_entries = oldinfo->initial_entries;
- write_unlock_bh(&table->lock);
+ spin_unlock_bh(&table->lock);
return oldinfo;
}
@@ -747,7 +766,7 @@ struct xt_table *xt_register_table(struc
/* Simplifies replace_table code. */
table->private = bootstrap;
- rwlock_init(&table->lock);
+ spin_lock_init(&table->lock);
if (!xt_replace_table(table, 0, newinfo, &ret))
goto unlock;
--
^ permalink raw reply [flat|nested] 11+ messages in thread* Re: [PATCH 0/6] iptables: read/write lock elimination (v0.4)
2009-01-29 19:12 [PATCH 0/6] iptables: read/write lock elimination (v0.4) Stephen Hemminger
` (5 preceding siblings ...)
2009-01-29 19:12 ` [PATCH 6/6] netfilter: convert x_tables to use RCU Stephen Hemminger
@ 2009-01-29 23:39 ` Rick Jones
6 siblings, 0 replies; 11+ messages in thread
From: Rick Jones @ 2009-01-29 23:39 UTC (permalink / raw)
To: Stephen Hemminger; +Cc: David Miller, Patrick McHardy, netdev, netfilter-devel
Stephen Hemminger wrote:
> This is an update to last round of patches, using Eric's suggestion
> to move the sequence count to top level per cpu variable. Also encapsulated
> the counter manipulation.
Is this version one you would like me to toss into the crucible of the 32-core
setup, or do you want it to have more eyeball time first?
rick jones
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH 4/6] netfilter: abstract xt_counters
2009-01-30 21:57 [PATCH 0/6] iptables: eliminate read/write lock (v0.4) Stephen Hemminger
@ 2009-01-30 21:57 ` Stephen Hemminger
2009-02-01 12:25 ` Eric Dumazet
0 siblings, 1 reply; 11+ messages in thread
From: Stephen Hemminger @ 2009-01-30 21:57 UTC (permalink / raw)
To: David Miller; +Cc: netdev
[-- Attachment #1: xtables-counter.patch --]
[-- Type: text/plain, Size: 6013 bytes --]
Break out the parts of the x_tables code that manipulates counters so
changes to locking are easier.
Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
---
include/linux/netfilter/x_tables.h | 6 +++++-
net/ipv4/netfilter/arp_tables.c | 9 +++++----
net/ipv4/netfilter/ip_tables.c | 9 +++++----
net/ipv6/netfilter/ip6_tables.c | 21 +++++++++++----------
net/netfilter/x_tables.c | 15 +++++++++++++++
5 files changed, 41 insertions(+), 19 deletions(-)
--- a/include/linux/netfilter/x_tables.h 2009-01-30 08:31:48.630454493 -0800
+++ b/include/linux/netfilter/x_tables.h 2009-01-30 09:14:01.294680339 -0800
@@ -105,13 +105,17 @@ struct _xt_align
#define XT_ERROR_TARGET "ERROR"
#define SET_COUNTER(c,b,p) do { (c).bcnt = (b); (c).pcnt = (p); } while(0)
-#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
struct xt_counters
{
u_int64_t pcnt, bcnt; /* Packet and byte counters */
};
+extern void xt_add_counter(struct xt_counters *c, unsigned b, unsigned p);
+extern void xt_sum_counter(struct xt_counters *t,
+ int cpu, const struct xt_counters *c);
+
+
/* The argument to IPT_SO_ADD_COUNTERS. */
struct xt_counters_info
{
--- a/net/ipv4/netfilter/arp_tables.c 2009-01-30 08:31:48.569479503 -0800
+++ b/net/ipv4/netfilter/arp_tables.c 2009-01-30 09:12:40.181542286 -0800
@@ -256,7 +256,7 @@ unsigned int arpt_do_table(struct sk_buf
hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) +
(2 * skb->dev->addr_len);
- ADD_COUNTER(e->counters, hdr_len, 1);
+ xt_add_counter(&e->counters, hdr_len, 1);
t = arpt_get_target(e);
@@ -662,10 +662,11 @@ static int translate_table(const char *n
/* Gets counters. */
static inline int add_entry_to_counter(const struct arpt_entry *e,
+ int cpu,
struct xt_counters total[],
unsigned int *i)
{
- ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
+ xt_sum_counter(total, cpu, &e->counters);
(*i)++;
return 0;
@@ -709,6 +710,7 @@ static void get_counters(const struct xt
ARPT_ENTRY_ITERATE(t->entries[cpu],
t->size,
add_entry_to_counter,
+ cpu,
counters,
&i);
}
@@ -1082,8 +1084,7 @@ static inline int add_counter_to_entry(s
const struct xt_counters addme[],
unsigned int *i)
{
-
- ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
+ xt_add_counter(&e->counters, addme[*i].bcnt, addme[*i].pcnt);
(*i)++;
return 0;
--- a/net/ipv4/netfilter/ip_tables.c 2009-01-30 08:31:48.538730580 -0800
+++ b/net/ipv4/netfilter/ip_tables.c 2009-01-30 09:12:40.169542168 -0800
@@ -366,7 +366,7 @@ ipt_do_table(struct sk_buff *skb,
if (IPT_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0)
goto no_match;
- ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
+ xt_add_counter(&e->counters, ntohs(ip->tot_len), 1);
t = ipt_get_target(e);
IP_NF_ASSERT(t->u.kernel.target);
@@ -872,10 +872,11 @@ translate_table(const char *name,
/* Gets counters. */
static inline int
add_entry_to_counter(const struct ipt_entry *e,
+ int cpu,
struct xt_counters total[],
unsigned int *i)
{
- ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
+ xt_sum_counter(total, cpu, &e->counters);
(*i)++;
return 0;
@@ -921,6 +922,7 @@ get_counters(const struct xt_table_info
IPT_ENTRY_ITERATE(t->entries[cpu],
t->size,
add_entry_to_counter,
+ cpu,
counters,
&i);
}
@@ -1327,8 +1329,7 @@ add_counter_to_entry(struct ipt_entry *e
(long unsigned int)addme[*i].pcnt,
(long unsigned int)addme[*i].bcnt);
#endif
-
- ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
+ xt_add_counter(&e->counters, addme[*i].bcnt, addme[*i].pcnt);
(*i)++;
return 0;
--- a/net/ipv6/netfilter/ip6_tables.c 2009-01-30 08:31:48.605479850 -0800
+++ b/net/ipv6/netfilter/ip6_tables.c 2009-01-30 09:12:40.205542065 -0800
@@ -392,9 +392,9 @@ ip6t_do_table(struct sk_buff *skb,
if (IP6T_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0)
goto no_match;
- ADD_COUNTER(e->counters,
- ntohs(ipv6_hdr(skb)->payload_len) +
- sizeof(struct ipv6hdr), 1);
+ xt_add_counter(&e->counters,
+ ntohs(ipv6_hdr(skb)->payload_len) +
+ sizeof(struct ipv6hdr), 1);
t = ip6t_get_target(e);
IP_NF_ASSERT(t->u.kernel.target);
@@ -901,10 +901,11 @@ translate_table(const char *name,
/* Gets counters. */
static inline int
add_entry_to_counter(const struct ip6t_entry *e,
+ int cpu,
struct xt_counters total[],
unsigned int *i)
{
- ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
+ xt_sum_counter(total, cpu, &e->counters);
(*i)++;
return 0;
@@ -948,10 +949,11 @@ get_counters(const struct xt_table_info
continue;
i = 0;
IP6T_ENTRY_ITERATE(t->entries[cpu],
- t->size,
- add_entry_to_counter,
- counters,
- &i);
+ t->size,
+ add_entry_to_counter,
+ cpu,
+ counters,
+ &i);
}
}
@@ -1357,8 +1359,7 @@ add_counter_to_entry(struct ip6t_entry *
(long unsigned int)addme[*i].pcnt,
(long unsigned int)addme[*i].bcnt);
#endif
-
- ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
+ xt_add_counter(&e->counters, addme[*i].bcnt, addme[*i].pcnt);
(*i)++;
return 0;
--- a/net/netfilter/x_tables.c 2009-01-30 08:38:32.949293300 -0800
+++ b/net/netfilter/x_tables.c 2009-01-30 09:13:27.636792850 -0800
@@ -577,6 +577,21 @@ int xt_compat_target_to_user(struct xt_e
EXPORT_SYMBOL_GPL(xt_compat_target_to_user);
#endif
+void xt_add_counter(struct xt_counters *c, unsigned b, unsigned p)
+{
+ c->bcnt += b;
+ c->pcnt += p;
+}
+EXPORT_SYMBOL_GPL(xt_add_counter);
+
+void xt_sum_counter(struct xt_counters *t, int cpu,
+ const struct xt_counters *c)
+{
+ t->pcnt += c->pcnt;
+ t->bcnt += c->bcnt;
+}
+EXPORT_SYMBOL_GPL(xt_sum_counter);
+
struct xt_table_info *xt_alloc_table_info(unsigned int size)
{
struct xt_table_info *newinfo;
--
^ permalink raw reply [flat|nested] 11+ messages in thread* Re: [PATCH 4/6] netfilter: abstract xt_counters
2009-01-30 21:57 ` [PATCH 4/6] netfilter: abstract xt_counters Stephen Hemminger
@ 2009-02-01 12:25 ` Eric Dumazet
0 siblings, 0 replies; 11+ messages in thread
From: Eric Dumazet @ 2009-02-01 12:25 UTC (permalink / raw)
To: Stephen Hemminger; +Cc: David Miller, netdev
Stephen Hemminger a écrit :
> Break out the parts of the x_tables code that manipulates counters so
> changes to locking are easier.
>
> Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
>
>
> ---
> include/linux/netfilter/x_tables.h | 6 +++++-
> net/ipv4/netfilter/arp_tables.c | 9 +++++----
> net/ipv4/netfilter/ip_tables.c | 9 +++++----
> net/ipv6/netfilter/ip6_tables.c | 21 +++++++++++----------
> net/netfilter/x_tables.c | 15 +++++++++++++++
> 5 files changed, 41 insertions(+), 19 deletions(-)
>
> --- a/include/linux/netfilter/x_tables.h 2009-01-30 08:31:48.630454493 -0800
> +++ b/include/linux/netfilter/x_tables.h 2009-01-30 09:14:01.294680339 -0800
> @@ -105,13 +105,17 @@ struct _xt_align
> #define XT_ERROR_TARGET "ERROR"
>
> #define SET_COUNTER(c,b,p) do { (c).bcnt = (b); (c).pcnt = (p); } while(0)
> -#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
>
> struct xt_counters
> {
> u_int64_t pcnt, bcnt; /* Packet and byte counters */
> };
>
> +extern void xt_add_counter(struct xt_counters *c, unsigned b, unsigned p);
> +extern void xt_sum_counter(struct xt_counters *t,
> + int cpu, const struct xt_counters *c);
> +
> +
> /* The argument to IPT_SO_ADD_COUNTERS. */
> struct xt_counters_info
> {
> --- a/net/ipv4/netfilter/arp_tables.c 2009-01-30 08:31:48.569479503 -0800
> +++ b/net/ipv4/netfilter/arp_tables.c 2009-01-30 09:12:40.181542286 -0800
> @@ -256,7 +256,7 @@ unsigned int arpt_do_table(struct sk_buf
>
> hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) +
> (2 * skb->dev->addr_len);
> - ADD_COUNTER(e->counters, hdr_len, 1);
> + xt_add_counter(&e->counters, hdr_len, 1);
>
> t = arpt_get_target(e);
>
> @@ -662,10 +662,11 @@ static int translate_table(const char *n
>
> /* Gets counters. */
> static inline int add_entry_to_counter(const struct arpt_entry *e,
> + int cpu,
> struct xt_counters total[],
> unsigned int *i)
> {
> - ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
> + xt_sum_counter(total, cpu, &e->counters);
>
> (*i)++;
> return 0;
> @@ -709,6 +710,7 @@ static void get_counters(const struct xt
> ARPT_ENTRY_ITERATE(t->entries[cpu],
> t->size,
> add_entry_to_counter,
> + cpu,
> counters,
> &i);
> }
> @@ -1082,8 +1084,7 @@ static inline int add_counter_to_entry(s
> const struct xt_counters addme[],
> unsigned int *i)
> {
> -
> - ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
> + xt_add_counter(&e->counters, addme[*i].bcnt, addme[*i].pcnt);
>
> (*i)++;
> return 0;
> --- a/net/ipv4/netfilter/ip_tables.c 2009-01-30 08:31:48.538730580 -0800
> +++ b/net/ipv4/netfilter/ip_tables.c 2009-01-30 09:12:40.169542168 -0800
> @@ -366,7 +366,7 @@ ipt_do_table(struct sk_buff *skb,
> if (IPT_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0)
> goto no_match;
>
> - ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
> + xt_add_counter(&e->counters, ntohs(ip->tot_len), 1);
>
> t = ipt_get_target(e);
> IP_NF_ASSERT(t->u.kernel.target);
> @@ -872,10 +872,11 @@ translate_table(const char *name,
> /* Gets counters. */
> static inline int
> add_entry_to_counter(const struct ipt_entry *e,
> + int cpu,
> struct xt_counters total[],
> unsigned int *i)
> {
> - ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
> + xt_sum_counter(total, cpu, &e->counters);
>
> (*i)++;
> return 0;
> @@ -921,6 +922,7 @@ get_counters(const struct xt_table_info
> IPT_ENTRY_ITERATE(t->entries[cpu],
> t->size,
> add_entry_to_counter,
> + cpu,
> counters,
> &i);
> }
> @@ -1327,8 +1329,7 @@ add_counter_to_entry(struct ipt_entry *e
> (long unsigned int)addme[*i].pcnt,
> (long unsigned int)addme[*i].bcnt);
> #endif
> -
> - ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
> + xt_add_counter(&e->counters, addme[*i].bcnt, addme[*i].pcnt);
>
> (*i)++;
> return 0;
> --- a/net/ipv6/netfilter/ip6_tables.c 2009-01-30 08:31:48.605479850 -0800
> +++ b/net/ipv6/netfilter/ip6_tables.c 2009-01-30 09:12:40.205542065 -0800
> @@ -392,9 +392,9 @@ ip6t_do_table(struct sk_buff *skb,
> if (IP6T_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0)
> goto no_match;
>
> - ADD_COUNTER(e->counters,
> - ntohs(ipv6_hdr(skb)->payload_len) +
> - sizeof(struct ipv6hdr), 1);
> + xt_add_counter(&e->counters,
> + ntohs(ipv6_hdr(skb)->payload_len) +
> + sizeof(struct ipv6hdr), 1);
>
> t = ip6t_get_target(e);
> IP_NF_ASSERT(t->u.kernel.target);
> @@ -901,10 +901,11 @@ translate_table(const char *name,
> /* Gets counters. */
> static inline int
> add_entry_to_counter(const struct ip6t_entry *e,
> + int cpu,
> struct xt_counters total[],
> unsigned int *i)
> {
> - ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
> + xt_sum_counter(total, cpu, &e->counters);
>
> (*i)++;
> return 0;
> @@ -948,10 +949,11 @@ get_counters(const struct xt_table_info
> continue;
> i = 0;
> IP6T_ENTRY_ITERATE(t->entries[cpu],
> - t->size,
> - add_entry_to_counter,
> - counters,
> - &i);
> + t->size,
> + add_entry_to_counter,
> + cpu,
> + counters,
> + &i);
> }
> }
>
> @@ -1357,8 +1359,7 @@ add_counter_to_entry(struct ip6t_entry *
> (long unsigned int)addme[*i].pcnt,
> (long unsigned int)addme[*i].bcnt);
> #endif
> -
> - ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
> + xt_add_counter(&e->counters, addme[*i].bcnt, addme[*i].pcnt);
>
> (*i)++;
> return 0;
> --- a/net/netfilter/x_tables.c 2009-01-30 08:38:32.949293300 -0800
> +++ b/net/netfilter/x_tables.c 2009-01-30 09:13:27.636792850 -0800
> @@ -577,6 +577,21 @@ int xt_compat_target_to_user(struct xt_e
> EXPORT_SYMBOL_GPL(xt_compat_target_to_user);
> #endif
>
> +void xt_add_counter(struct xt_counters *c, unsigned b, unsigned p)
> +{
> + c->bcnt += b;
> + c->pcnt += p;
> +}
> +EXPORT_SYMBOL_GPL(xt_add_counter);
> +
> +void xt_sum_counter(struct xt_counters *t, int cpu,
> + const struct xt_counters *c)
> +{
> + t->pcnt += c->pcnt;
> + t->bcnt += c->bcnt;
> +}
> +EXPORT_SYMBOL_GPL(xt_sum_counter);
> +
> struct xt_table_info *xt_alloc_table_info(unsigned int size)
> {
> struct xt_table_info *newinfo;
>
First I wondered if adding out of line xt_add_counter() could slow firewalls,
I did some testings, with tbench and a small iptables setup (about 16 matched rules per packet)
CPU: Core 2, speed 3000.11 MHz (estimated)
Counted CPU_CLK_UNHALTED events (Clock cycles when not halted) with a unit mask of 0x00 (Unhalted core cycles) count 100000
samples % symbol name
3166081 12.8996 ipt_do_table
2471426 10.0694 copy_from_user
1016717 4.1424 copy_to_user
902072 3.6753 schedule
687266 2.8001 xt_add_counter
589899 2.4034 tcp_sendmsg
579619 2.3615 tcp_ack
455883 1.8574 tcp_transmit_skb
c044d110 <xt_add_counter>: /* xt_add_counter total: 687266 2.8001 */
80675 0.3287 :c044d110: push %ebp
15574 0.0635 :c044d111: mov %esp,%ebp
2 8.1e-06 :c044d113: sub $0xc,%esp
39583 0.1613 :c044d116: mov %ebx,(%esp)
4387 0.0179 :c044d119: mov %edi,0x8(%esp)
1187 0.0048 :c044d11d: mov %esi,0x4(%esp)
1601 0.0065 :c044d121: mov $0xc068ae4c,%edi
38881 0.1584 :c044d126: mov %fs:0xc0688540,%ebx
5585 0.0228 :c044d12d: add %ebx,%edi
3910 0.0159 :c044d12f: incl (%edi)
133482 0.5438 :c044d131: xor %esi,%esi
32 1.3e-04 :c044d133: add %edx,0x8(%eax)
71181 0.2900 :c044d136: mov %ecx,%edx
7986 0.0325 :c044d138: adc %esi,0xc(%eax)
88695 0.3614 :c044d13b: xor %ecx,%ecx
15 6.1e-05 :c044d13d: add %edx,(%eax)
41496 0.1691 :c044d13f: adc %ecx,0x4(%eax)
52944 0.2157 :c044d142: incl (%edi)
30759 0.1253 :c044d144: mov (%esp),%ebx
20241 0.0825 :c044d147: mov 0x4(%esp),%esi
5662 0.0231 :c044d14b: mov 0x8(%esp),%edi
2288 0.0093 :c044d14f: leave
41100 0.1675 :c044d150: ret
tbench 8 results here, after all your patches applied :
Throughput 2331 MB/sec 8 procs
And if inlined :
Throughput 2359.06 MB/sec 8 procs
and if we check inlined code/costs we see :
2597 0.1719 :c048dfee: mov -0x64(%ebp),%edx
182 0.0120 :c048dff1: movzwl 0x2(%edx),%eax
524 0.0347 :c048dff5: mov %fs:0xc0688540,%ecx
7 4.6e-04 :c048dffc: add -0x70(%ebp),%ecx
2465 0.1632 :c048dfff: incl (%ecx)
9476 0.6273 :c048e001: addl $0x1,0x60(%edi)
10068 0.6665 :c048e005: adcl $0x0,0x64(%edi)
6543 0.4332 :c048e009: xor %edx,%edx
1 6.6e-05 :c048e00b: rol $0x8,%ax
234 0.0155 :c048e00f: movzwl %ax,%eax
2198 0.1455 :c048e012: add %eax,0x68(%edi)
80 0.0053 :c048e015: adc %edx,0x6c(%edi)
2858 0.1892 :c048e018: incl (%ecx)
With upcoming work on fast percpu accesses, we might in the future see following code:
(no need for registers to compute address of variable)
mov -0x64(%ebp),%edx
movzwl 0x2(%edx),%eax
incl %fs:xt_counter_sequence
addl $0x1,0x60(%edi)
adcl $0x0,0x64(%edi)
xor %edx,%edx
rol $0x8,%ax
movzwl %ax,%eax
add %eax,0x68(%edi)
adc %edx,0x6c(%edi)
incl %fs:xt_counter_sequence
^ permalink raw reply [flat|nested] 11+ messages in thread