* [PATCH 1/6] netfilter: change elements in x_tables
2009-01-29 19:12 [PATCH 0/6] iptables: read/write lock elimination (v0.4) Stephen Hemminger
@ 2009-01-29 19:12 ` Stephen Hemminger
2009-01-29 19:12 ` [PATCH 2/6] netfilter: remove unneeded initializations Stephen Hemminger
` (5 subsequent siblings)
6 siblings, 0 replies; 9+ messages in thread
From: Stephen Hemminger @ 2009-01-29 19:12 UTC (permalink / raw)
To: David Miller, Patrick McHardy; +Cc: netdev, netfilter-devel
[-- Attachment #1: x_tables.patch --]
[-- Type: text/plain, Size: 1062 bytes --]
Change to proper type on private pointer rather than anonymous void.
Keep active elements on same cache line.
Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
---
include/linux/netfilter/x_tables.h | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)
--- a/include/linux/netfilter/x_tables.h 2009-01-26 17:24:43.251543415 -0800
+++ b/include/linux/netfilter/x_tables.h 2009-01-26 17:29:12.510649107 -0800
@@ -349,9 +349,6 @@ struct xt_table
{
struct list_head list;
- /* A unique name... */
- const char name[XT_TABLE_MAXNAMELEN];
-
/* What hooks you will enter on */
unsigned int valid_hooks;
@@ -359,13 +356,15 @@ struct xt_table
rwlock_t lock;
/* Man behind the curtain... */
- //struct ip6t_table_info *private;
- void *private;
+ struct xt_table_info *private;
/* Set this to THIS_MODULE if you are a module, otherwise NULL */
struct module *me;
u_int8_t af; /* address/protocol family */
+
+ /* A unique name... */
+ const char name[XT_TABLE_MAXNAMELEN];
};
#include <linux/netfilter_ipv4.h>
--
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 2/6] netfilter: remove unneeded initializations
2009-01-29 19:12 [PATCH 0/6] iptables: read/write lock elimination (v0.4) Stephen Hemminger
2009-01-29 19:12 ` [PATCH 1/6] netfilter: change elements in x_tables Stephen Hemminger
@ 2009-01-29 19:12 ` Stephen Hemminger
2009-01-29 19:12 ` [PATCH 3/6] ebtables: " Stephen Hemminger
` (4 subsequent siblings)
6 siblings, 0 replies; 9+ messages in thread
From: Stephen Hemminger @ 2009-01-29 19:12 UTC (permalink / raw)
To: David Miller, Patrick McHardy; +Cc: netdev, netfilter-devel
[-- Attachment #1: iptables-lock-init.patch --]
[-- Type: text/plain, Size: 4593 bytes --]
Later patches change the locking on xt_table and the initialization of
the lock element is not needed since the lock is always initialized in
xt_table_register anyway.
Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
---
net/ipv4/netfilter/arptable_filter.c | 2 --
net/ipv4/netfilter/iptable_filter.c | 1 -
net/ipv4/netfilter/iptable_mangle.c | 1 -
net/ipv4/netfilter/iptable_raw.c | 1 -
net/ipv4/netfilter/iptable_security.c | 1 -
net/ipv4/netfilter/nf_nat_rule.c | 1 -
net/ipv6/netfilter/ip6table_filter.c | 1 -
net/ipv6/netfilter/ip6table_mangle.c | 1 -
net/ipv6/netfilter/ip6table_raw.c | 1 -
net/ipv6/netfilter/ip6table_security.c | 1 -
10 files changed, 11 deletions(-)
--- a/net/ipv4/netfilter/arptable_filter.c 2009-01-26 17:24:43.687542005 -0800
+++ b/net/ipv4/netfilter/arptable_filter.c 2009-01-26 19:50:37.891042244 -0800
@@ -48,8 +48,6 @@ static struct
static struct xt_table packet_filter = {
.name = "filter",
.valid_hooks = FILTER_VALID_HOOKS,
- .lock = __RW_LOCK_UNLOCKED(packet_filter.lock),
- .private = NULL,
.me = THIS_MODULE,
.af = NFPROTO_ARP,
};
--- a/net/ipv4/netfilter/iptable_filter.c 2009-01-26 17:24:43.691541994 -0800
+++ b/net/ipv4/netfilter/iptable_filter.c 2009-01-26 19:50:37.891042244 -0800
@@ -56,7 +56,6 @@ static struct
static struct xt_table packet_filter = {
.name = "filter",
.valid_hooks = FILTER_VALID_HOOKS,
- .lock = __RW_LOCK_UNLOCKED(packet_filter.lock),
.me = THIS_MODULE,
.af = AF_INET,
};
--- a/net/ipv4/netfilter/iptable_raw.c 2009-01-26 17:24:43.691541994 -0800
+++ b/net/ipv4/netfilter/iptable_raw.c 2009-01-26 19:50:37.891042244 -0800
@@ -39,7 +39,6 @@ static struct
static struct xt_table packet_raw = {
.name = "raw",
.valid_hooks = RAW_VALID_HOOKS,
- .lock = __RW_LOCK_UNLOCKED(packet_raw.lock),
.me = THIS_MODULE,
.af = AF_INET,
};
--- a/net/ipv4/netfilter/iptable_security.c 2009-01-26 17:24:43.691541994 -0800
+++ b/net/ipv4/netfilter/iptable_security.c 2009-01-26 19:50:37.891042244 -0800
@@ -60,7 +60,6 @@ static struct
static struct xt_table security_table = {
.name = "security",
.valid_hooks = SECURITY_VALID_HOOKS,
- .lock = __RW_LOCK_UNLOCKED(security_table.lock),
.me = THIS_MODULE,
.af = AF_INET,
};
--- a/net/ipv4/netfilter/nf_nat_rule.c 2009-01-26 17:24:43.695541481 -0800
+++ b/net/ipv4/netfilter/nf_nat_rule.c 2009-01-26 19:51:20.338030618 -0800
@@ -61,7 +61,6 @@ static struct
static struct xt_table nat_table = {
.name = "nat",
.valid_hooks = NAT_VALID_HOOKS,
- .lock = __RW_LOCK_UNLOCKED(nat_table.lock),
.me = THIS_MODULE,
.af = AF_INET,
};
--- a/net/ipv6/netfilter/ip6table_filter.c 2009-01-26 17:24:43.735541493 -0800
+++ b/net/ipv6/netfilter/ip6table_filter.c 2009-01-26 19:50:37.895044361 -0800
@@ -54,7 +54,6 @@ static struct
static struct xt_table packet_filter = {
.name = "filter",
.valid_hooks = FILTER_VALID_HOOKS,
- .lock = __RW_LOCK_UNLOCKED(packet_filter.lock),
.me = THIS_MODULE,
.af = AF_INET6,
};
--- a/net/ipv6/netfilter/ip6table_mangle.c 2009-01-26 17:24:43.735541493 -0800
+++ b/net/ipv6/netfilter/ip6table_mangle.c 2009-01-26 19:50:37.895044361 -0800
@@ -60,7 +60,6 @@ static struct
static struct xt_table packet_mangler = {
.name = "mangle",
.valid_hooks = MANGLE_VALID_HOOKS,
- .lock = __RW_LOCK_UNLOCKED(packet_mangler.lock),
.me = THIS_MODULE,
.af = AF_INET6,
};
--- a/net/ipv6/netfilter/ip6table_raw.c 2009-01-26 17:24:43.735541493 -0800
+++ b/net/ipv6/netfilter/ip6table_raw.c 2009-01-26 19:50:37.895044361 -0800
@@ -38,7 +38,6 @@ static struct
static struct xt_table packet_raw = {
.name = "raw",
.valid_hooks = RAW_VALID_HOOKS,
- .lock = __RW_LOCK_UNLOCKED(packet_raw.lock),
.me = THIS_MODULE,
.af = AF_INET6,
};
--- a/net/ipv6/netfilter/ip6table_security.c 2009-01-26 17:24:43.735541493 -0800
+++ b/net/ipv6/netfilter/ip6table_security.c 2009-01-26 19:50:37.895044361 -0800
@@ -59,7 +59,6 @@ static struct
static struct xt_table security_table = {
.name = "security",
.valid_hooks = SECURITY_VALID_HOOKS,
- .lock = __RW_LOCK_UNLOCKED(security_table.lock),
.me = THIS_MODULE,
.af = AF_INET6,
};
--- a/net/ipv4/netfilter/iptable_mangle.c 2009-01-26 17:24:43.691541994 -0800
+++ b/net/ipv4/netfilter/iptable_mangle.c 2009-01-26 19:50:37.895044361 -0800
@@ -67,7 +67,6 @@ static struct
static struct xt_table packet_mangler = {
.name = "mangle",
.valid_hooks = MANGLE_VALID_HOOKS,
- .lock = __RW_LOCK_UNLOCKED(packet_mangler.lock),
.me = THIS_MODULE,
.af = AF_INET,
};
--
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 3/6] ebtables: remove unneeded initializations
2009-01-29 19:12 [PATCH 0/6] iptables: read/write lock elimination (v0.4) Stephen Hemminger
2009-01-29 19:12 ` [PATCH 1/6] netfilter: change elements in x_tables Stephen Hemminger
2009-01-29 19:12 ` [PATCH 2/6] netfilter: remove unneeded initializations Stephen Hemminger
@ 2009-01-29 19:12 ` Stephen Hemminger
2009-01-29 19:12 ` [PATCH 4/6] netfilter: abstract xt_counters Stephen Hemminger
` (3 subsequent siblings)
6 siblings, 0 replies; 9+ messages in thread
From: Stephen Hemminger @ 2009-01-29 19:12 UTC (permalink / raw)
To: David Miller, Patrick McHardy; +Cc: netdev, netfilter-devel
[-- Attachment #1: ebtables-lock-init.patch --]
[-- Type: text/plain, Size: 1537 bytes --]
The initialization of the lock element is not needed
since the lock is always initialized in ebt_register_table.
Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
---
net/bridge/netfilter/ebtable_broute.c | 1 -
net/bridge/netfilter/ebtable_filter.c | 1 -
net/bridge/netfilter/ebtable_nat.c | 1 -
3 files changed, 3 deletions(-)
--- a/net/bridge/netfilter/ebtable_broute.c 2009-01-27 17:09:10.313100854 -0800
+++ b/net/bridge/netfilter/ebtable_broute.c 2009-01-27 17:09:15.862142852 -0800
@@ -46,7 +46,6 @@ static struct ebt_table broute_table =
.name = "broute",
.table = &initial_table,
.valid_hooks = 1 << NF_BR_BROUTING,
- .lock = __RW_LOCK_UNLOCKED(broute_table.lock),
.check = check,
.me = THIS_MODULE,
};
--- a/net/bridge/netfilter/ebtable_filter.c 2009-01-27 17:08:50.725100955 -0800
+++ b/net/bridge/netfilter/ebtable_filter.c 2009-01-27 17:08:53.828611768 -0800
@@ -55,7 +55,6 @@ static struct ebt_table frame_filter =
.name = "filter",
.table = &initial_table,
.valid_hooks = FILTER_VALID_HOOKS,
- .lock = __RW_LOCK_UNLOCKED(frame_filter.lock),
.check = check,
.me = THIS_MODULE,
};
--- a/net/bridge/netfilter/ebtable_nat.c 2009-01-27 17:09:22.896602465 -0800
+++ b/net/bridge/netfilter/ebtable_nat.c 2009-01-27 17:09:31.589085328 -0800
@@ -55,7 +55,6 @@ static struct ebt_table frame_nat =
.name = "nat",
.table = &initial_table,
.valid_hooks = NAT_VALID_HOOKS,
- .lock = __RW_LOCK_UNLOCKED(frame_nat.lock),
.check = check,
.me = THIS_MODULE,
};
--
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 4/6] netfilter: abstract xt_counters
2009-01-29 19:12 [PATCH 0/6] iptables: read/write lock elimination (v0.4) Stephen Hemminger
` (2 preceding siblings ...)
2009-01-29 19:12 ` [PATCH 3/6] ebtables: " Stephen Hemminger
@ 2009-01-29 19:12 ` Stephen Hemminger
2009-01-29 19:12 ` [PATCH 5/6] netfilter: use sequence number synchronization for counters Stephen Hemminger
` (2 subsequent siblings)
6 siblings, 0 replies; 9+ messages in thread
From: Stephen Hemminger @ 2009-01-29 19:12 UTC (permalink / raw)
To: David Miller, Patrick McHardy; +Cc: netdev, netfilter-devel
[-- Attachment #1: xtables-counter.patch --]
[-- Type: text/plain, Size: 5552 bytes --]
Break out the parts of the x_tables code that manipulates counters so
changes to locking are easier.
Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
---
include/linux/netfilter/x_tables.h | 15 ++++++++++++++-
net/ipv4/netfilter/arp_tables.c | 12 ++++++++----
net/ipv4/netfilter/ip_tables.c | 12 ++++++++----
net/ipv6/netfilter/ip6_tables.c | 24 ++++++++++++++----------
4 files changed, 44 insertions(+), 19 deletions(-)
--- a/include/linux/netfilter/x_tables.h 2009-01-29 09:45:14.475070733 -0800
+++ b/include/linux/netfilter/x_tables.h 2009-01-29 10:51:50.194362708 -0800
@@ -105,13 +105,26 @@ struct _xt_align
#define XT_ERROR_TARGET "ERROR"
#define SET_COUNTER(c,b,p) do { (c).bcnt = (b); (c).pcnt = (p); } while(0)
-#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
+#define SUM_COUNTER(s,c) do { (s).bcnt += (c).bcnt; (s).pcnt += (c).pcnt; } while(0)
struct xt_counters
{
u_int64_t pcnt, bcnt; /* Packet and byte counters */
};
+static inline void xt_fetch_counter(struct xt_counters *v, int cpu,
+ const struct xt_counters *c)
+{
+ *v = *c;
+}
+
+static inline void xt_incr_counter(struct xt_counters *c, unsigned b, unsigned p)
+{
+ c->pcnt += p;
+ c->bcnt += b;
+}
+
+
/* The argument to IPT_SO_ADD_COUNTERS. */
struct xt_counters_info
{
--- a/net/ipv4/netfilter/arp_tables.c 2009-01-29 09:52:23.172320248 -0800
+++ b/net/ipv4/netfilter/arp_tables.c 2009-01-29 10:53:13.152543484 -0800
@@ -256,7 +256,7 @@ unsigned int arpt_do_table(struct sk_buf
hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) +
(2 * skb->dev->addr_len);
- ADD_COUNTER(e->counters, hdr_len, 1);
+ xt_incr_counter(&e->counters, hdr_len, 1);
t = arpt_get_target(e);
@@ -662,10 +662,14 @@ static int translate_table(const char *n
/* Gets counters. */
static inline int add_entry_to_counter(const struct arpt_entry *e,
+ int cpu,
struct xt_counters total[],
unsigned int *i)
{
- ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
+ struct xt_counters c;
+
+ xt_fetch_counter(&c, cpu, &e->counters);
+ SUM_COUNTER(total[*i], c);
(*i)++;
return 0;
@@ -709,6 +713,7 @@ static void get_counters(const struct xt
ARPT_ENTRY_ITERATE(t->entries[cpu],
t->size,
add_entry_to_counter,
+ cpu,
counters,
&i);
}
@@ -1082,8 +1087,7 @@ static inline int add_counter_to_entry(s
const struct xt_counters addme[],
unsigned int *i)
{
-
- ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
+ SUM_COUNTER(e->counters, addme[*i]);
(*i)++;
return 0;
--- a/net/ipv4/netfilter/ip_tables.c 2009-01-29 09:52:23.200320370 -0800
+++ b/net/ipv4/netfilter/ip_tables.c 2009-01-29 10:52:43.235570700 -0800
@@ -366,7 +366,7 @@ ipt_do_table(struct sk_buff *skb,
if (IPT_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0)
goto no_match;
- ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
+ xt_incr_counter(&e->counters, ntohs(ip->tot_len), 1);
t = ipt_get_target(e);
IP_NF_ASSERT(t->u.kernel.target);
@@ -872,10 +872,14 @@ translate_table(const char *name,
/* Gets counters. */
static inline int
add_entry_to_counter(const struct ipt_entry *e,
+ int cpu,
struct xt_counters total[],
unsigned int *i)
{
- ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
+ struct xt_counters c;
+
+ xt_fetch_counter(&c, cpu, &e->counters);
+ SUM_COUNTER(total[*i], c);
(*i)++;
return 0;
@@ -921,6 +925,7 @@ get_counters(const struct xt_table_info
IPT_ENTRY_ITERATE(t->entries[cpu],
t->size,
add_entry_to_counter,
+ cpu,
counters,
&i);
}
@@ -1327,8 +1332,7 @@ add_counter_to_entry(struct ipt_entry *e
(long unsigned int)addme[*i].pcnt,
(long unsigned int)addme[*i].bcnt);
#endif
-
- ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
+ SUM_COUNTER(e->counters, addme[*i]);
(*i)++;
return 0;
--- a/net/ipv6/netfilter/ip6_tables.c 2009-01-29 09:52:26.618819778 -0800
+++ b/net/ipv6/netfilter/ip6_tables.c 2009-01-29 10:53:06.240695087 -0800
@@ -392,9 +392,9 @@ ip6t_do_table(struct sk_buff *skb,
if (IP6T_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0)
goto no_match;
- ADD_COUNTER(e->counters,
- ntohs(ipv6_hdr(skb)->payload_len) +
- sizeof(struct ipv6hdr), 1);
+ xt_incr_counter(&e->counters,
+ ntohs(ipv6_hdr(skb)->payload_len) +
+ sizeof(struct ipv6hdr), 1);
t = ip6t_get_target(e);
IP_NF_ASSERT(t->u.kernel.target);
@@ -901,10 +901,14 @@ translate_table(const char *name,
/* Gets counters. */
static inline int
add_entry_to_counter(const struct ip6t_entry *e,
+ int cpu,
struct xt_counters total[],
unsigned int *i)
{
- ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
+ struct xt_counters c;
+
+ xt_fetch_counter(&c, cpu, &e->counters);
+ SUM_COUNTER(total[*i], c);
(*i)++;
return 0;
@@ -948,10 +952,11 @@ get_counters(const struct xt_table_info
continue;
i = 0;
IP6T_ENTRY_ITERATE(t->entries[cpu],
- t->size,
- add_entry_to_counter,
- counters,
- &i);
+ t->size,
+ add_entry_to_counter,
+ cpu,
+ counters,
+ &i);
}
}
@@ -1357,8 +1362,7 @@ add_counter_to_entry(struct ip6t_entry *
(long unsigned int)addme[*i].pcnt,
(long unsigned int)addme[*i].bcnt);
#endif
-
- ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
+ SUM_COUNTER(e->counters, addme[*i]);
(*i)++;
return 0;
--
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 5/6] netfilter: use sequence number synchronization for counters
2009-01-29 19:12 [PATCH 0/6] iptables: read/write lock elimination (v0.4) Stephen Hemminger
` (3 preceding siblings ...)
2009-01-29 19:12 ` [PATCH 4/6] netfilter: abstract xt_counters Stephen Hemminger
@ 2009-01-29 19:12 ` Stephen Hemminger
2009-01-30 8:03 ` Eric Dumazet
2009-01-29 19:12 ` [PATCH 6/6] netfilter: convert x_tables to use RCU Stephen Hemminger
2009-01-29 23:39 ` [PATCH 0/6] iptables: read/write lock elimination (v0.4) Rick Jones
6 siblings, 1 reply; 9+ messages in thread
From: Stephen Hemminger @ 2009-01-29 19:12 UTC (permalink / raw)
To: David Miller, Patrick McHardy; +Cc: netdev, netfilter-devel
[-- Attachment #1: counters-seqcount.patch --]
[-- Type: text/plain, Size: 3550 bytes --]
Change how synchronization is done on the iptables counters. Use seqcount
wrapper instead of depending on reader/writer lock.
Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
---
include/linux/netfilter/x_tables.h | 14 +++-----------
net/ipv4/netfilter/arp_tables.c | 4 ++--
net/ipv4/netfilter/ip_tables.c | 4 ++--
net/ipv6/netfilter/ip6_tables.c | 4 ++--
net/netfilter/x_tables.c | 28 ++++++++++++++++++++++++++++
5 files changed, 37 insertions(+), 17 deletions(-)
4
--- a/net/ipv4/netfilter/arp_tables.c 2009-01-29 11:08:38.735069921 -0800
+++ b/net/ipv4/netfilter/arp_tables.c 2009-01-29 11:09:20.720069979 -0800
@@ -736,9 +736,9 @@ static inline struct xt_counters *alloc_
return ERR_PTR(-ENOMEM);
/* First, sum counters... */
- write_lock_bh(&table->lock);
+ local_bh_disable();
get_counters(private, counters);
- write_unlock_bh(&table->lock);
+ local_bh_enable();
return counters;
}
--- a/net/ipv4/netfilter/ip_tables.c 2009-01-29 11:08:38.723069778 -0800
+++ b/net/ipv4/netfilter/ip_tables.c 2009-01-29 11:09:20.720069979 -0800
@@ -947,9 +947,9 @@ static struct xt_counters * alloc_counte
return ERR_PTR(-ENOMEM);
/* First, sum counters... */
- write_lock_bh(&table->lock);
+ local_bh_disable();
get_counters(private, counters);
- write_unlock_bh(&table->lock);
+ local_bh_enable();
return counters;
}
--- a/net/ipv6/netfilter/ip6_tables.c 2009-01-29 11:08:38.763071181 -0800
+++ b/net/ipv6/netfilter/ip6_tables.c 2009-01-29 11:09:20.724069866 -0800
@@ -976,9 +976,9 @@ static struct xt_counters *alloc_counter
return ERR_PTR(-ENOMEM);
/* First, sum counters... */
- write_lock_bh(&table->lock);
+ local_bh_disable();
get_counters(private, counters);
- write_unlock_bh(&table->lock);
+ local_bh_enable();
return counters;
}
--- a/net/netfilter/x_tables.c 2009-01-29 11:08:38.747070716 -0800
+++ b/net/netfilter/x_tables.c 2009-01-29 11:10:03.595571234 -0800
@@ -577,6 +577,34 @@ int xt_compat_target_to_user(struct xt_e
EXPORT_SYMBOL_GPL(xt_compat_target_to_user);
#endif
+static DEFINE_PER_CPU(seqcount_t, xt_counter_sequence);
+
+void xt_fetch_counter(struct xt_counters *v, int cpu,
+ const struct xt_counters *c)
+{
+ seqcount_t *seq = &per_cpu(xt_counter_sequence, cpu);
+ unsigned start;
+
+ do {
+ start = read_seqcount_begin(seq);
+ *v = *c;
+ } while (read_seqcount_retry(seq, start));
+}
+EXPORT_SYMBOL_GPL(xt_fetch_counter);
+
+void xt_incr_counter(struct xt_counters *c, unsigned b, unsigned p)
+{
+ seqcount_t *seq = &__get_cpu_var(xt_counter_sequence);
+
+ write_seqcount_begin(seq);
+ c->pcnt += p;
+ c->bcnt += b;
+ write_seqcount_end(seq);
+
+}
+EXPORT_SYMBOL_GPL(xt_incr_counter);
+
+
struct xt_table_info *xt_alloc_table_info(unsigned int size)
{
struct xt_table_info *newinfo;
--- a/include/linux/netfilter/x_tables.h 2009-01-29 11:08:38.779071484 -0800
+++ b/include/linux/netfilter/x_tables.h 2009-01-29 11:09:20.724069866 -0800
@@ -112,17 +112,9 @@ struct xt_counters
u_int64_t pcnt, bcnt; /* Packet and byte counters */
};
-static inline void xt_fetch_counter(struct xt_counters *v, int cpu,
- const struct xt_counters *c)
-{
- *v = *c;
-}
-
-static inline void xt_incr_counter(struct xt_counters *c, unsigned b, unsigned p)
-{
- c->pcnt += p;
- c->bcnt += b;
-}
+extern void xt_fetch_counter(struct xt_counters *v, int cpu,
+ const struct xt_counters *c);
+extern void xt_incr_counter(struct xt_counters *c, unsigned b, unsigned p);
/* The argument to IPT_SO_ADD_COUNTERS. */
--
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 5/6] netfilter: use sequence number synchronization for counters
2009-01-29 19:12 ` [PATCH 5/6] netfilter: use sequence number synchronization for counters Stephen Hemminger
@ 2009-01-30 8:03 ` Eric Dumazet
0 siblings, 0 replies; 9+ messages in thread
From: Eric Dumazet @ 2009-01-30 8:03 UTC (permalink / raw)
To: Stephen Hemminger; +Cc: David Miller, Patrick McHardy, netdev, netfilter-devel
Stephen Hemminger a écrit :
> Change how synchronization is done on the iptables counters. Use seqcount
> wrapper instead of depending on reader/writer lock.
>
> Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
>
> --- a/net/netfilter/x_tables.c 2009-01-29 11:08:38.747070716 -0800
> +++ b/net/netfilter/x_tables.c 2009-01-29 11:10:03.595571234 -0800
> @@ -577,6 +577,34 @@ int xt_compat_target_to_user(struct xt_e
> EXPORT_SYMBOL_GPL(xt_compat_target_to_user);
> #endif
>
> +static DEFINE_PER_CPU(seqcount_t, xt_counter_sequence);
> +
> +void xt_fetch_counter(struct xt_counters *v, int cpu,
> + const struct xt_counters *c)
> +{
> + seqcount_t *seq = &per_cpu(xt_counter_sequence, cpu);
> + unsigned start;
> +
> + do {
> + start = read_seqcount_begin(seq);
> + *v = *c;
> + } while (read_seqcount_retry(seq, start));
> +}
> +EXPORT_SYMBOL_GPL(xt_fetch_counter);
> +
> +void xt_incr_counter(struct xt_counters *c, unsigned b, unsigned p)
You really want an inline xt_incr_counter() function here to speedup ipt_do_table()
I agree xt_fetch_counter() is not time critical and can be outlined.
> +{
> + seqcount_t *seq = &__get_cpu_var(xt_counter_sequence);
> +
> + write_seqcount_begin(seq);
> + c->pcnt += p;
> + c->bcnt += b;
> + write_seqcount_end(seq);
> +
> +}
> +EXPORT_SYMBOL_GPL(xt_incr_counter);
> +
> +
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 6/6] netfilter: convert x_tables to use RCU
2009-01-29 19:12 [PATCH 0/6] iptables: read/write lock elimination (v0.4) Stephen Hemminger
` (4 preceding siblings ...)
2009-01-29 19:12 ` [PATCH 5/6] netfilter: use sequence number synchronization for counters Stephen Hemminger
@ 2009-01-29 19:12 ` Stephen Hemminger
2009-01-29 23:39 ` [PATCH 0/6] iptables: read/write lock elimination (v0.4) Rick Jones
6 siblings, 0 replies; 9+ messages in thread
From: Stephen Hemminger @ 2009-01-29 19:12 UTC (permalink / raw)
To: David Miller, Patrick McHardy; +Cc: netdev, netfilter-devel
[-- Attachment #1: iptables-rcu.patch --]
[-- Type: text/plain, Size: 8125 bytes --]
Replace existing reader/writer lock with Read-Copy-Update to
elminate the overhead of a read lock on each incoming packet.
This should reduce the overhead of iptables especially on SMP
systems.
The previous code used a reader-writer lock for two purposes.
The first was to ensure that the xt_table_info reference was not in
process of being changed. Since xt_table_info is only freed via one
routine, it was a direct conversion to RCU.
The other use of the reader-writer lock was to to block changes
to counters while they were being read. This synchronization was
fixed by the previous patch. But still need to make sure table info
isn't going away.
Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
---
include/linux/netfilter/x_tables.h | 10 ++++++--
net/ipv4/netfilter/arp_tables.c | 12 ++++-----
net/ipv4/netfilter/ip_tables.c | 12 ++++-----
net/ipv6/netfilter/ip6_tables.c | 12 ++++-----
net/netfilter/x_tables.c | 45 ++++++++++++++++++++++++++-----------
5 files changed, 58 insertions(+), 33 deletions(-)
--- a/include/linux/netfilter/x_tables.h 2009-01-29 11:09:20.724069866 -0800
+++ b/include/linux/netfilter/x_tables.h 2009-01-29 11:10:49.823071289 -0800
@@ -357,8 +357,8 @@ struct xt_table
/* What hooks you will enter on */
unsigned int valid_hooks;
- /* Lock for the curtain */
- rwlock_t lock;
+ /* Lock for curtain */
+ spinlock_t lock;
/* Man behind the curtain... */
struct xt_table_info *private;
@@ -388,6 +388,12 @@ struct xt_table_info
unsigned int hook_entry[NF_INET_NUMHOOKS];
unsigned int underflow[NF_INET_NUMHOOKS];
+ /* For the dustman... */
+ union {
+ struct rcu_head rcu;
+ struct work_struct work;
+ };
+
/* ipt_entry tables: one per CPU */
/* Note : this field MUST be the last one, see XT_TABLE_INFO_SZ */
char *entries[1];
--- a/net/ipv4/netfilter/arp_tables.c 2009-01-29 11:09:20.720069979 -0800
+++ b/net/ipv4/netfilter/arp_tables.c 2009-01-29 11:10:49.823071289 -0800
@@ -237,8 +237,8 @@ unsigned int arpt_do_table(struct sk_buf
indev = in ? in->name : nulldevname;
outdev = out ? out->name : nulldevname;
- read_lock_bh(&table->lock);
- private = table->private;
+ rcu_read_lock_bh();
+ private = rcu_dereference(table->private);
table_base = (void *)private->entries[smp_processor_id()];
e = get_entry(table_base, private->hook_entry[hook]);
back = get_entry(table_base, private->underflow[hook]);
@@ -311,7 +311,7 @@ unsigned int arpt_do_table(struct sk_buf
e = (void *)e + e->next_offset;
}
} while (!hotdrop);
- read_unlock_bh(&table->lock);
+ rcu_read_unlock_bh();
if (hotdrop)
return NF_DROP;
@@ -1152,8 +1152,8 @@ static int do_add_counters(struct net *n
goto free;
}
- write_lock_bh(&t->lock);
- private = t->private;
+ rcu_read_lock_bh();
+ private = rcu_dereference(t->private);
if (private->number != num_counters) {
ret = -EINVAL;
goto unlock_up_free;
@@ -1168,7 +1168,7 @@ static int do_add_counters(struct net *n
paddc,
&i);
unlock_up_free:
- write_unlock_bh(&t->lock);
+ rcu_read_unlock_bh();
xt_table_unlock(t);
module_put(t->me);
free:
--- a/net/ipv4/netfilter/ip_tables.c 2009-01-29 11:09:20.720069979 -0800
+++ b/net/ipv4/netfilter/ip_tables.c 2009-01-29 11:10:49.827070289 -0800
@@ -347,9 +347,9 @@ ipt_do_table(struct sk_buff *skb,
mtpar.family = tgpar.family = NFPROTO_IPV4;
tgpar.hooknum = hook;
- read_lock_bh(&table->lock);
+ rcu_read_lock_bh();
IP_NF_ASSERT(table->valid_hooks & (1 << hook));
- private = table->private;
+ private = rcu_dereference(table->private);
table_base = (void *)private->entries[smp_processor_id()];
e = get_entry(table_base, private->hook_entry[hook]);
@@ -445,7 +445,7 @@ ipt_do_table(struct sk_buff *skb,
}
} while (!hotdrop);
- read_unlock_bh(&table->lock);
+ rcu_read_unlock_bh();
#ifdef DEBUG_ALLOW_ALL
return NF_ACCEPT;
@@ -1397,8 +1397,8 @@ do_add_counters(struct net *net, void __
goto free;
}
- write_lock_bh(&t->lock);
- private = t->private;
+ rcu_read_lock_bh();
+ private = rcu_dereference(t->private);
if (private->number != num_counters) {
ret = -EINVAL;
goto unlock_up_free;
@@ -1413,7 +1413,7 @@ do_add_counters(struct net *net, void __
paddc,
&i);
unlock_up_free:
- write_unlock_bh(&t->lock);
+ rcu_read_unlock_bh();
xt_table_unlock(t);
module_put(t->me);
free:
--- a/net/ipv6/netfilter/ip6_tables.c 2009-01-29 11:09:20.724069866 -0800
+++ b/net/ipv6/netfilter/ip6_tables.c 2009-01-29 11:10:49.827070289 -0800
@@ -373,9 +373,9 @@ ip6t_do_table(struct sk_buff *skb,
mtpar.family = tgpar.family = NFPROTO_IPV6;
tgpar.hooknum = hook;
- read_lock_bh(&table->lock);
+ rcu_read_lock_bh();
IP_NF_ASSERT(table->valid_hooks & (1 << hook));
- private = table->private;
+ private = rcu_dereference(table->private);
table_base = (void *)private->entries[smp_processor_id()];
e = get_entry(table_base, private->hook_entry[hook]);
@@ -474,7 +474,7 @@ ip6t_do_table(struct sk_buff *skb,
#ifdef CONFIG_NETFILTER_DEBUG
((struct ip6t_entry *)table_base)->comefrom = NETFILTER_LINK_POISON;
#endif
- read_unlock_bh(&table->lock);
+ rcu_read_unlock_bh();
#ifdef DEBUG_ALLOW_ALL
return NF_ACCEPT;
@@ -1428,8 +1428,8 @@ do_add_counters(struct net *net, void __
goto free;
}
- write_lock_bh(&t->lock);
- private = t->private;
+ rcu_read_lock_bh();
+ private = rcu_dereference(t->private);
if (private->number != num_counters) {
ret = -EINVAL;
goto unlock_up_free;
@@ -1444,7 +1444,7 @@ do_add_counters(struct net *net, void __
paddc,
&i);
unlock_up_free:
- write_unlock_bh(&t->lock);
+ rcu_read_unlock_bh();
xt_table_unlock(t);
module_put(t->me);
free:
--- a/net/netfilter/x_tables.c 2009-01-29 11:10:03.595571234 -0800
+++ b/net/netfilter/x_tables.c 2009-01-29 11:11:43.496192079 -0800
@@ -639,18 +639,37 @@ struct xt_table_info *xt_alloc_table_inf
}
EXPORT_SYMBOL(xt_alloc_table_info);
-void xt_free_table_info(struct xt_table_info *info)
+/* callback to do free for vmalloc'd case */
+static void xt_free_table_info_work(struct work_struct *arg)
{
- int cpu;
+ struct xt_table_info *info = container_of(arg, struct xt_table_info, work);
+ unsigned int cpu;
- for_each_possible_cpu(cpu) {
- if (info->size <= PAGE_SIZE)
- kfree(info->entries[cpu]);
- else
- vfree(info->entries[cpu]);
- }
+ for_each_possible_cpu(cpu)
+ vfree(info->entries[cpu]);
kfree(info);
}
+
+static void xt_free_table_info_rcu(struct rcu_head *arg)
+{
+ struct xt_table_info *info = container_of(arg, struct xt_table_info, rcu);
+
+ if (info->size <= PAGE_SIZE) {
+ unsigned int cpu;
+ for_each_possible_cpu(cpu)
+ kfree(info->entries[cpu]);
+ kfree(info);
+ } else {
+ /* can't safely call vfree in current context */
+ INIT_WORK(&info->work, xt_free_table_info_work);
+ schedule_work(&info->work);
+ }
+}
+
+void xt_free_table_info(struct xt_table_info *info)
+{
+ call_rcu(&info->rcu, xt_free_table_info_rcu);
+}
EXPORT_SYMBOL(xt_free_table_info);
/* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */
@@ -699,20 +718,20 @@ xt_replace_table(struct xt_table *table,
struct xt_table_info *oldinfo, *private;
/* Do the substitution. */
- write_lock_bh(&table->lock);
+ spin_lock_bh(&table->lock);
private = table->private;
/* Check inside lock: is the old number correct? */
if (num_counters != private->number) {
duprintf("num_counters != table->private->number (%u/%u)\n",
num_counters, private->number);
- write_unlock_bh(&table->lock);
+ spin_unlock_bh(&table->lock);
*error = -EAGAIN;
return NULL;
}
oldinfo = private;
- table->private = newinfo;
+ rcu_assign_pointer(table->private, newinfo);
newinfo->initial_entries = oldinfo->initial_entries;
- write_unlock_bh(&table->lock);
+ spin_unlock_bh(&table->lock);
return oldinfo;
}
@@ -747,7 +766,7 @@ struct xt_table *xt_register_table(struc
/* Simplifies replace_table code. */
table->private = bootstrap;
- rwlock_init(&table->lock);
+ spin_lock_init(&table->lock);
if (!xt_replace_table(table, 0, newinfo, &ret))
goto unlock;
--
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 0/6] iptables: read/write lock elimination (v0.4)
2009-01-29 19:12 [PATCH 0/6] iptables: read/write lock elimination (v0.4) Stephen Hemminger
` (5 preceding siblings ...)
2009-01-29 19:12 ` [PATCH 6/6] netfilter: convert x_tables to use RCU Stephen Hemminger
@ 2009-01-29 23:39 ` Rick Jones
6 siblings, 0 replies; 9+ messages in thread
From: Rick Jones @ 2009-01-29 23:39 UTC (permalink / raw)
To: Stephen Hemminger; +Cc: David Miller, Patrick McHardy, netdev, netfilter-devel
Stephen Hemminger wrote:
> This is an update to last round of patches, using Eric's suggestion
> to move the sequence count to top level per cpu variable. Also encapsulated
> the counter manipulation.
Is this version one you would like me to toss into the crucible of the 32-core
setup, or do you want it to have more eyeball time first?
rick jones
^ permalink raw reply [flat|nested] 9+ messages in thread