From: Stephen Hemminger <shemminger@vyatta.com>
To: paulmck@linux.vnet.ibm.com
Cc: Eric Dumazet <dada1@cosmosbay.com>,
David Miller <davem@davemloft.net>,
netdev@vger.kernel.org
Subject: Re: [PATCH 3/3] iptables: lock free counters (alternate version)
Date: Tue, 3 Feb 2009 15:18:23 -0800 [thread overview]
Message-ID: <20090203151823.457b24f1@extreme> (raw)
In-Reply-To: <20090203231124.GL6607@linux.vnet.ibm.com>
This is the "interesting bits" of what I am testing.
What it does is swap in new counters, sum, then put counter values
back onto the current cpu.
The replace case is easier since the counters are "old" at that point.
--- a/net/ipv4/netfilter/ip_tables.c 2009-02-02 15:06:29.684249364 -0800
+++ b/net/ipv4/netfilter/ip_tables.c 2009-02-03 15:08:30.230188116 -0800
@@ -347,10 +347,12 @@ ipt_do_table(struct sk_buff *skb,
mtpar.family = tgpar.family = NFPROTO_IPV4;
tgpar.hooknum = hook;
- read_lock_bh(&table->lock);
IP_NF_ASSERT(table->valid_hooks & (1 << hook));
- private = table->private;
- table_base = (void *)private->entries[smp_processor_id()];
+
+ rcu_read_lock_bh();
+ private = rcu_dereference(table->private);
+ table_base = rcu_dereference(private->entries[smp_processor_id()]);
+
e = get_entry(table_base, private->hook_entry[hook]);
/* For return from builtin chain */
@@ -445,7 +447,7 @@ ipt_do_table(struct sk_buff *skb,
}
} while (!hotdrop);
- read_unlock_bh(&table->lock);
+ rcu_read_unlock_bh();
#ifdef DEBUG_ALLOW_ALL
return NF_ACCEPT;
@@ -924,13 +926,62 @@ get_counters(const struct xt_table_info
counters,
&i);
}
+
+}
+
+/* Swap existing counter entries with new zeroed ones */
+static void swap_counters(struct xt_table_info *o, struct xt_table_info *n)
+{
+ unsigned int cpu;
+
+ for_each_possible_cpu(cpu) {
+ void *p = o->entries[cpu];
+ memset(n->entries[cpu], 0, n->size);
+
+ rcu_assign_pointer(o->entries[cpu], n->entries[cpu]);
+ n->entries[cpu] = p;
+ }
+
+ /* Wait until smoke has cleared */
+ synchronize_net();
+}
+
+/* We're lazy, and add to the first CPU; overflow works its fey magic
+ * and everything is OK. */
+static int
+add_counter_to_entry(struct ipt_entry *e,
+ const struct xt_counters addme[],
+ unsigned int *i)
+{
+ ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
+
+ (*i)++;
+ return 0;
+}
+
+/* Take values from counters and add them back onto the current cpu */
+static void put_counters(struct xt_table_info *t,
+ const struct xt_counters counters[])
+{
+ unsigned int i, cpu;
+
+ local_bh_disable();
+ cpu = smp_processor_id();
+ i = 0;
+ IPT_ENTRY_ITERATE(t->entries[cpu],
+ t->size,
+ add_counter_to_entry,
+ counters,
+ &i);
+ local_bh_enable();
}
static struct xt_counters * alloc_counters(struct xt_table *table)
{
unsigned int countersize;
struct xt_counters *counters;
- const struct xt_table_info *private = table->private;
+ struct xt_table_info *private = table->private;
+ struct xt_table_info *tmp;
/* We need atomic snapshot of counters: rest doesn't change
(other than comefrom, which userspace doesn't care
@@ -939,14 +990,26 @@ static struct xt_counters * alloc_counte
counters = vmalloc_node(countersize, numa_node_id());
if (counters == NULL)
- return ERR_PTR(-ENOMEM);
+ goto nomem;
+
+ tmp = xt_alloc_table_info(private->size);
+ if (!tmp)
+ goto free_counters;
- /* First, sum counters... */
- write_lock_bh(&table->lock);
+ mutex_lock(&table->lock);
+ swap_counters(private, tmp);
get_counters(private, counters);
- write_unlock_bh(&table->lock);
+ put_counters(private, counters);
+ mutex_unlock(&table->lock);
+
+ xt_free_table_info(tmp);
return counters;
+
+ free_counters:
+ vfree(counters);
+ nomem:
+ return ERR_PTR(-ENOMEM);
}
static int
@@ -1242,7 +1305,9 @@ __do_replace(struct net *net, const char
module_put(t->me);
/* Get the old counters. */
+ synchronize_net();
get_counters(oldinfo, counters);
+
/* Decrease module usage counts and free resource */
loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
@@ -1312,27 +1377,6 @@ do_replace(struct net *net, void __user
return ret;
}
-/* We're lazy, and add to the first CPU; overflow works its fey magic
- * and everything is OK. */
-static int
-add_counter_to_entry(struct ipt_entry *e,
- const struct xt_counters addme[],
- unsigned int *i)
-{
-#if 0
- duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
- *i,
- (long unsigned int)e->counters.pcnt,
- (long unsigned int)e->counters.bcnt,
- (long unsigned int)addme[*i].pcnt,
- (long unsigned int)addme[*i].bcnt);
-#endif
-
- ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
-
- (*i)++;
- return 0;
-}
static int
do_add_counters(struct net *net, void __user *user, unsigned int len, int compat)
@@ -1393,13 +1437,14 @@ do_add_counters(struct net *net, void __
goto free;
}
- write_lock_bh(&t->lock);
+ mutex_lock(&t->lock);
private = t->private;
if (private->number != num_counters) {
ret = -EINVAL;
goto unlock_up_free;
}
+ preempt_disable();
i = 0;
/* Choose the copy that is on our node */
loc_cpu_entry = private->entries[raw_smp_processor_id()];
@@ -1408,8 +1453,9 @@ do_add_counters(struct net *net, void __
add_counter_to_entry,
paddc,
&i);
+ preempt_enable();
unlock_up_free:
- write_unlock_bh(&t->lock);
+ mutex_unlock(&t->lock);
xt_table_unlock(t);
module_put(t->me);
free:
next prev parent reply other threads:[~2009-02-03 23:18 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-01-30 21:57 [PATCH 0/6] iptables: eliminate read/write lock (v0.4) Stephen Hemminger
2009-01-30 21:57 ` [PATCH 1/6] netfilter: change elements in x_tables Stephen Hemminger
2009-01-30 21:57 ` [PATCH 2/6] netfilter: remove unneeded initializations Stephen Hemminger
2009-01-30 21:57 ` [PATCH 3/6] ebtables: " Stephen Hemminger
2009-01-30 21:57 ` [PATCH 4/6] netfilter: abstract xt_counters Stephen Hemminger
2009-02-01 12:25 ` Eric Dumazet
2009-02-02 23:33 ` [PATCH 3/3] iptables: lock free counters (alternate version) Stephen Hemminger
2009-02-03 19:00 ` Eric Dumazet
2009-02-03 19:19 ` Eric Dumazet
2009-02-03 19:32 ` Paul E. McKenney
2009-02-03 20:20 ` Eric Dumazet
2009-02-03 20:44 ` Stephen Hemminger
2009-02-03 21:05 ` Eric Dumazet
2009-02-03 21:10 ` Paul E. McKenney
2009-02-03 21:22 ` Stephen Hemminger
2009-02-03 21:27 ` Rick Jones
2009-02-03 23:11 ` Paul E. McKenney
2009-02-03 23:18 ` Stephen Hemminger [this message]
2009-01-30 21:57 ` [PATCH 5/6] netfilter: use sequence number synchronization for counters Stephen Hemminger
2009-01-30 21:57 ` [PATCH 6/6] netfilter: convert x_tables to use RCU Stephen Hemminger
2009-01-31 17:27 ` Eric Dumazet
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20090203151823.457b24f1@extreme \
--to=shemminger@vyatta.com \
--cc=dada1@cosmosbay.com \
--cc=davem@davemloft.net \
--cc=netdev@vger.kernel.org \
--cc=paulmck@linux.vnet.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).