* [PATCH] netfilter: save the hash of the tuple in the original direction for latter use
@ 2010-08-17 7:16 Changli Gao
2010-08-17 8:30 ` Eric Dumazet
0 siblings, 1 reply; 8+ messages in thread
From: Changli Gao @ 2010-08-17 7:16 UTC (permalink / raw)
To: Patrick McHardy
Cc: David S. Miller, netfilter-devel, linux-kernel, Changli Gao
Since we don't change the tuple in the original direction, we can save it
in ct->tuplehash[IP_CT_DIR_REPLY].hnode.pprev for __nf_conntrack_confirm()
use.
__hash_conntrack() is split into two steps: ____hash_conntrack() is used
to get the raw hash, and __hash_bucket() is used to get the bucket id.
In SYN-flood case, early_drop() doesn't need to recompute the hash again.
Signed-off-by: Changli Gao <xiaosuo@gmail.com>
---
net/netfilter/nf_conntrack_core.c | 117 ++++++++++++++++++++++++++------------
1 file changed, 82 insertions(+), 35 deletions(-)
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index df3eedb..947f0a9 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -65,14 +65,23 @@ EXPORT_SYMBOL_GPL(nf_conntrack_max);
DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
-static int nf_conntrack_hash_rnd_initted;
-static unsigned int nf_conntrack_hash_rnd;
-
-static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
- u16 zone, unsigned int size, unsigned int rnd)
+static u32 ____hash_conntrack(const struct nf_conntrack_tuple *tuple, u16 zone)
{
unsigned int n;
u_int32_t h;
+ static int rnd_initted;
+ static unsigned int rnd;
+ static DEFINE_SPINLOCK(rnd_lock);
+
+ if (unlikely(!rnd_initted)) {
+ spin_lock_bh(&rnd_lock);
+ if (!rnd_initted) {
+ get_random_bytes(&rnd, sizeof(rnd));
+ wmb();
+ rnd_initted = 1;
+ }
+ spin_unlock_bh(&rnd_lock);
+ }
/* The direction must be ignored, so we hash everything up to the
* destination ports (which is a multiple of 4) and treat the last
@@ -83,14 +92,29 @@ static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
zone ^ rnd ^ (((__force __u16)tuple->dst.u.all << 16) |
tuple->dst.protonum));
- return ((u64)h * size) >> 32;
+ return h;
+}
+
+static u32 __hash_bucket(u32 __hash, unsigned int size)
+{
+ return ((u64)__hash * size) >> 32;
+}
+
+static u32 hash_bucket(u32 __hash, const struct net *net)
+{
+ return __hash_bucket(__hash, net->ct.htable_size);
+}
+
+static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
+ u16 zone, unsigned int size)
+{
+ return __hash_bucket(____hash_conntrack(tuple, zone), size);
}
static inline u_int32_t hash_conntrack(const struct net *net, u16 zone,
const struct nf_conntrack_tuple *tuple)
{
- return __hash_conntrack(tuple, zone, net->ct.htable_size,
- nf_conntrack_hash_rnd);
+ return __hash_conntrack(tuple, zone, net->ct.htable_size);
}
bool
@@ -292,13 +316,13 @@ static void death_by_timeout(unsigned long ul_conntrack)
* OR
* - Caller must lock nf_conntrack_lock before calling this function
*/
-struct nf_conntrack_tuple_hash *
-__nf_conntrack_find(struct net *net, u16 zone,
- const struct nf_conntrack_tuple *tuple)
+static struct nf_conntrack_tuple_hash *
+____nf_conntrack_find(struct net *net, u16 zone,
+ const struct nf_conntrack_tuple *tuple, u32 __hash)
{
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_node *n;
- unsigned int hash = hash_conntrack(net, zone, tuple);
+ unsigned int hash = hash_bucket(__hash, net);
/* Disable BHs the entire time since we normally need to disable them
* at least once for the stats anyway.
@@ -327,19 +351,27 @@ begin:
return NULL;
}
+
+struct nf_conntrack_tuple_hash *
+__nf_conntrack_find(struct net *net, u16 zone,
+ const struct nf_conntrack_tuple *tuple)
+{
+ return ____nf_conntrack_find(net, zone, tuple,
+ ____hash_conntrack(tuple, zone));
+}
EXPORT_SYMBOL_GPL(__nf_conntrack_find);
/* Find a connection corresponding to a tuple. */
-struct nf_conntrack_tuple_hash *
-nf_conntrack_find_get(struct net *net, u16 zone,
- const struct nf_conntrack_tuple *tuple)
+static struct nf_conntrack_tuple_hash *
+__nf_conntrack_find_get(struct net *net, u16 zone,
+ const struct nf_conntrack_tuple *tuple, u32 __hash)
{
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
rcu_read_lock();
begin:
- h = __nf_conntrack_find(net, zone, tuple);
+ h = ____nf_conntrack_find(net, zone, tuple, __hash);
if (h) {
ct = nf_ct_tuplehash_to_ctrack(h);
if (unlikely(nf_ct_is_dying(ct) ||
@@ -357,6 +389,14 @@ begin:
return h;
}
+
+struct nf_conntrack_tuple_hash *
+nf_conntrack_find_get(struct net *net, u16 zone,
+ const struct nf_conntrack_tuple *tuple)
+{
+ return __nf_conntrack_find_get(net, zone, tuple,
+ ____hash_conntrack(tuple, zone));
+}
EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
static void __nf_conntrack_hash_insert(struct nf_conn *ct,
@@ -409,7 +449,8 @@ __nf_conntrack_confirm(struct sk_buff *skb)
return NF_ACCEPT;
zone = nf_ct_zone(ct);
- hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ /* reuse the __hash saved before */
+ hash = hash_bucket(*(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev, net);
repl_hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
/* We're not in hash table, and we refuse to set up related
@@ -567,25 +608,20 @@ static noinline int early_drop(struct net *net, unsigned int hash)
return dropped;
}
-struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
- const struct nf_conntrack_tuple *orig,
- const struct nf_conntrack_tuple *repl,
- gfp_t gfp)
+static struct nf_conn *
+__nf_conntrack_alloc(struct net *net, u16 zone,
+ const struct nf_conntrack_tuple *orig,
+ const struct nf_conntrack_tuple *repl,
+ gfp_t gfp, u32 __hash)
{
struct nf_conn *ct;
- if (unlikely(!nf_conntrack_hash_rnd_initted)) {
- get_random_bytes(&nf_conntrack_hash_rnd,
- sizeof(nf_conntrack_hash_rnd));
- nf_conntrack_hash_rnd_initted = 1;
- }
-
/* We don't want any race condition at early drop stage */
atomic_inc(&net->ct.count);
if (nf_conntrack_max &&
unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) {
- unsigned int hash = hash_conntrack(net, zone, orig);
+ unsigned int hash = hash_bucket(__hash, net);
if (!early_drop(net, hash)) {
atomic_dec(&net->ct.count);
if (net_ratelimit())
@@ -616,7 +652,8 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL;
ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
- ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev = NULL;
+ /* save __hash for reusing when confirming */
+ *(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = __hash;
/* Don't set timer yet: wait for confirmation */
setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct);
write_pnet(&ct->ct_net, net);
@@ -643,6 +680,14 @@ out_free:
return ERR_PTR(-ENOMEM);
#endif
}
+
+struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
+ const struct nf_conntrack_tuple *orig,
+ const struct nf_conntrack_tuple *repl,
+ gfp_t gfp)
+{
+ return __nf_conntrack_alloc(net, zone, orig, repl, gfp, 0);
+}
EXPORT_SYMBOL_GPL(nf_conntrack_alloc);
void nf_conntrack_free(struct nf_conn *ct)
@@ -664,7 +709,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
struct nf_conntrack_l3proto *l3proto,
struct nf_conntrack_l4proto *l4proto,
struct sk_buff *skb,
- unsigned int dataoff)
+ unsigned int dataoff, u32 __hash)
{
struct nf_conn *ct;
struct nf_conn_help *help;
@@ -678,7 +723,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
return NULL;
}
- ct = nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC);
+ ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC,
+ __hash);
if (IS_ERR(ct)) {
pr_debug("Can't allocate conntrack.\n");
return (struct nf_conntrack_tuple_hash *)ct;
@@ -755,6 +801,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
+ u32 __hash;
if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
dataoff, l3num, protonum, &tuple, l3proto,
@@ -764,10 +811,11 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
}
/* look for tuple match */
- h = nf_conntrack_find_get(net, zone, &tuple);
+ __hash = ____hash_conntrack(&tuple, zone);
+ h = __nf_conntrack_find_get(net, zone, &tuple, __hash);
if (!h) {
h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
- skb, dataoff);
+ skb, dataoff, __hash);
if (!h)
return NULL;
if (IS_ERR(h))
@@ -1307,8 +1355,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
ct = nf_ct_tuplehash_to_ctrack(h);
hlist_nulls_del_rcu(&h->hnnode);
bucket = __hash_conntrack(&h->tuple, nf_ct_zone(ct),
- hashsize,
- nf_conntrack_hash_rnd);
+ hashsize);
hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
}
}
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH] netfilter: save the hash of the tuple in the original direction for latter use
2010-08-17 7:16 [PATCH] netfilter: save the hash of the tuple in the original direction for latter use Changli Gao
@ 2010-08-17 8:30 ` Eric Dumazet
2010-08-17 8:46 ` Changli Gao
0 siblings, 1 reply; 8+ messages in thread
From: Eric Dumazet @ 2010-08-17 8:30 UTC (permalink / raw)
To: Changli Gao
Cc: Patrick McHardy, David S. Miller, netfilter-devel, linux-kernel
Le mardi 17 août 2010 à 15:16 +0800, Changli Gao a écrit :
> Since we don't change the tuple in the original direction, we can save it
> in ct->tuplehash[IP_CT_DIR_REPLY].hnode.pprev for __nf_conntrack_confirm()
> use.
>
> __hash_conntrack() is split into two steps: ____hash_conntrack() is used
> to get the raw hash, and __hash_bucket() is used to get the bucket id.
>
> In SYN-flood case, early_drop() doesn't need to recompute the hash again.
>
> Signed-off-by: Changli Gao <xiaosuo@gmail.com>
> ---
> net/netfilter/nf_conntrack_core.c | 117 ++++++++++++++++++++++++++------------
> 1 file changed, 82 insertions(+), 35 deletions(-)
> diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
> index df3eedb..947f0a9 100644
> --- a/net/netfilter/nf_conntrack_core.c
> +++ b/net/netfilter/nf_conntrack_core.c
> @@ -65,14 +65,23 @@ EXPORT_SYMBOL_GPL(nf_conntrack_max);
> DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
> EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
>
> -static int nf_conntrack_hash_rnd_initted;
> -static unsigned int nf_conntrack_hash_rnd;
> -
> -static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
> - u16 zone, unsigned int size, unsigned int rnd)
> +static u32 ____hash_conntrack(const struct nf_conntrack_tuple *tuple, u16 zone)
> {
> unsigned int n;
> u_int32_t h;
> + static int rnd_initted;
> + static unsigned int rnd;
> + static DEFINE_SPINLOCK(rnd_lock);
> +
> + if (unlikely(!rnd_initted)) {
> + spin_lock_bh(&rnd_lock);
> + if (!rnd_initted) {
> + get_random_bytes(&rnd, sizeof(rnd));
> + wmb();
> + rnd_initted = 1;
> + }
> + spin_unlock_bh(&rnd_lock);
> + }
>
Three variables ?
static atomic_t rnd __read_mostly;
if (unlikely(!atomic_read(&rnd))) {
unsigned int val;
get_random_bytes(&val, sizeof(val));
if (!val)
val = 1;
atomic_cmpxchg(&rnd, 0, val);
}
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] netfilter: save the hash of the tuple in the original direction for latter use
2010-08-17 8:30 ` Eric Dumazet
@ 2010-08-17 8:46 ` Changli Gao
2010-08-17 9:01 ` Eric Dumazet
0 siblings, 1 reply; 8+ messages in thread
From: Changli Gao @ 2010-08-17 8:46 UTC (permalink / raw)
To: Eric Dumazet
Cc: Patrick McHardy, David S. Miller, netfilter-devel, linux-kernel
On Tue, Aug 17, 2010 at 4:30 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
>
> Three variables ?
>
> static atomic_t rnd __read_mostly;
>
> if (unlikely(!atomic_read(&rnd))) {
> unsigned int val;
>
> get_random_bytes(&val, sizeof(val));
> if (!val)
> val = 1;
> atomic_cmpxchg(&rnd, 0, val);
> }
>
Good idea. However, atomic_t is a volatile variable, and it prevent
ILP. I think maybe it hurts the likely case. cmpxchg() is an atomic
operations, so is the bellow code better?
static unsigned long rnd __read_mostly;
if (unlikely(!rnd)) {
unsigned long val;
get_random_bytes(&val, sizeof(val));
if (!val)
val = 1;
cmpxchg(&rnd, 0, val);
}
Thanks.
--
Regards,
Changli Gao(xiaosuo@gmail.com)
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] netfilter: save the hash of the tuple in the original direction for latter use
2010-08-17 8:46 ` Changli Gao
@ 2010-08-17 9:01 ` Eric Dumazet
2010-08-17 9:32 ` Changli Gao
2010-08-19 3:59 ` Changli Gao
0 siblings, 2 replies; 8+ messages in thread
From: Eric Dumazet @ 2010-08-17 9:01 UTC (permalink / raw)
To: Changli Gao
Cc: Patrick McHardy, David S. Miller, netfilter-devel, linux-kernel
Le mardi 17 août 2010 à 16:46 +0800, Changli Gao a écrit :
> On Tue, Aug 17, 2010 at 4:30 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> >
> > Three variables ?
> >
> > static atomic_t rnd __read_mostly;
> >
> > if (unlikely(!atomic_read(&rnd))) {
> > unsigned int val;
> >
> > get_random_bytes(&val, sizeof(val));
> > if (!val)
> > val = 1;
> > atomic_cmpxchg(&rnd, 0, val);
> > }
> >
>
>
> Good idea. However, atomic_t is a volatile variable, and it prevent
> ILP. I think maybe it hurts the likely case. cmpxchg() is an atomic
> operations, so is the bellow code better?
>
I am not sure what you mean by ILP.
On x86, reading twice same memory location is faster than
reading it and store in temp variable (on stack)
reading from stack
> static unsigned long rnd __read_mostly;
>
> if (unlikely(!rnd)) {
> unsigned long val;
>
> get_random_bytes(&val, sizeof(val));
> if (!val)
> val = 1;
> cmpxchg(&rnd, 0, val);
> }
>
> Thanks.
>
I am not sure we must use a long (we really need 4 bytes only), and last
time I tried to use cmpxchg(), I was being told it was not available on
all arches.
But seeing it used in kernel/pid.c, maybe its not true anymore (that is,
__HAVE_ARCH_CMPXCHG is always defined to 1)
Since its a recent change (in kernel/pid.c), I would wait a bit and see
if an arch maintainer complains ;)
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] netfilter: save the hash of the tuple in the original direction for latter use
2010-08-17 9:01 ` Eric Dumazet
@ 2010-08-17 9:32 ` Changli Gao
2010-08-19 3:59 ` Changli Gao
1 sibling, 0 replies; 8+ messages in thread
From: Changli Gao @ 2010-08-17 9:32 UTC (permalink / raw)
To: Eric Dumazet
Cc: Patrick McHardy, David S. Miller, netfilter-devel, linux-kernel
On Tue, Aug 17, 2010 at 5:01 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> Le mardi 17 août 2010 à 16:46 +0800, Changli Gao a écrit :
>> On Tue, Aug 17, 2010 at 4:30 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
>> >
>> > Three variables ?
>> >
>> > static atomic_t rnd __read_mostly;
>> >
>> > if (unlikely(!atomic_read(&rnd))) {
>> > unsigned int val;
>> >
>> > get_random_bytes(&val, sizeof(val));
>> > if (!val)
>> > val = 1;
>> > atomic_cmpxchg(&rnd, 0, val);
>> > }
>> >
>>
>>
>> Good idea. However, atomic_t is a volatile variable, and it prevent
>> ILP. I think maybe it hurts the likely case. cmpxchg() is an atomic
>> operations, so is the bellow code better?
>>
>
> I am not sure what you mean by ILP.
I mean http://en.wikipedia.org/wiki/Instruction_level_parallelism
and volatile will suppress compiler optimization.
>
> On x86, reading twice same memory location is faster than
> reading it and store in temp variable (on stack)
> reading from stack
Thanks for this info. But compiler may store it in a register. In
fact, we can't control the behavior of compiler in C.
>
>> static unsigned long rnd __read_mostly;
>>
>> if (unlikely(!rnd)) {
>> unsigned long val;
>>
>> get_random_bytes(&val, sizeof(val));
>> if (!val)
>> val = 1;
>> cmpxchg(&rnd, 0, val);
>> }
>>
>> Thanks.
>>
>
> I am not sure we must use a long (we really need 4 bytes only),
Yea, 4 bytes is enough.
> and last
> time I tried to use cmpxchg(), I was being told it was not available on
> all arches.
>
> But seeing it used in kernel/pid.c, maybe its not true anymore (that is,
> __HAVE_ARCH_CMPXCHG is always defined to 1)
>
> Since its a recent change (in kernel/pid.c), I would wait a bit and see
> if an arch maintainer complains ;)
>
Thanks. I saw it. These are two cmpxchg() in file:
include/asm-generic/cmpxchg.h
include/asm-generic/system.h
static inline unsigned long __cmpxchg(volatile unsigned long *m,
unsigned long old, unsigned long new)
{
unsigned long retval;
unsigned long flags;
local_irq_save(flags);
retval = *m;
if (retval == old)
*m = new;
local_irq_restore(flags);
return retval;
}
#define cmpxchg(ptr, o, n) \
((__typeof__(*(ptr))) __cmpxchg((unsigned long *)(ptr), \
(unsigned long)(o), \
(unsigned long)(n)))
This one seems buggy when using it to non unsigned long variables. And
in kernel/pid.c, the last_pid variable is int.
--
Regards,
Changli Gao(xiaosuo@gmail.com)
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] netfilter: save the hash of the tuple in the original direction for latter use
2010-08-17 9:01 ` Eric Dumazet
2010-08-17 9:32 ` Changli Gao
@ 2010-08-19 3:59 ` Changli Gao
2010-08-19 12:35 ` Mathieu Desnoyers
1 sibling, 1 reply; 8+ messages in thread
From: Changli Gao @ 2010-08-19 3:59 UTC (permalink / raw)
To: Eric Dumazet
Cc: Patrick McHardy, David S. Miller, netfilter-devel, linux-kernel,
Mathieu Desnoyers
On Tue, Aug 17, 2010 at 5:01 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
>
> I am not sure we must use a long (we really need 4 bytes only), and last
> time I tried to use cmpxchg(), I was being told it was not available on
> all arches.
>
> But seeing it used in kernel/pid.c, maybe its not true anymore (that is,
> __HAVE_ARCH_CMPXCHG is always defined to 1)
>
> Since its a recent change (in kernel/pid.c), I would wait a bit and see
> if an arch maintainer complains ;)
>
>
I searched the code, and found ext4, btrfs, lockdep, perf and trace
all use cmpxchg(). And after this patch serial
(http://linux.derkeiler.com/Mailing-Lists/Kernel/2007-08/msg05032.html)
was merged, maybe cmpxchg() is generic. Is there an SMP processor
doesn't support cmpxchg()?
--
Regards,
Changli Gao(xiaosuo@gmail.com)
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] netfilter: save the hash of the tuple in the original direction for latter use
2010-08-19 3:59 ` Changli Gao
@ 2010-08-19 12:35 ` Mathieu Desnoyers
2010-08-19 15:31 ` Eric Dumazet
0 siblings, 1 reply; 8+ messages in thread
From: Mathieu Desnoyers @ 2010-08-19 12:35 UTC (permalink / raw)
To: Changli Gao
Cc: Eric Dumazet, Patrick McHardy, David S. Miller, netfilter-devel,
linux-kernel
* Changli Gao (xiaosuo@gmail.com) wrote:
> On Tue, Aug 17, 2010 at 5:01 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> >
> > I am not sure we must use a long (we really need 4 bytes only), and last
> > time I tried to use cmpxchg(), I was being told it was not available on
> > all arches.
> >
> > But seeing it used in kernel/pid.c, maybe its not true anymore (that is,
> > __HAVE_ARCH_CMPXCHG is always defined to 1)
> >
> > Since its a recent change (in kernel/pid.c), I would wait a bit and see
> > if an arch maintainer complains ;)
> >
> >
>
> I searched the code, and found ext4, btrfs, lockdep, perf and trace
> all use cmpxchg(). And after this patch serial
> (http://linux.derkeiler.com/Mailing-Lists/Kernel/2007-08/msg05032.html)
> was merged, maybe cmpxchg() is generic. Is there an SMP processor
> doesn't support cmpxchg()?
I made sure it was available on all architectures with that patchset,
with the intended goal to used it (cmpxchg, cmpxchg_local for 32/64-bit
and cmpxchg64, cmpxchg64_local for 64-bit) in arch-agnostic tracer code.
I added a cmpxchg emulated with irqs off for uniprocessor-only
architectures that did not have CAS support. sparc32 has moved to a
hashed locked scheme for its atomic operations long ago, which makes the
full 32 bits available.
So, all in all, I'd be surprised if an architecture would lack
cmpxchg() today. It might be possible that a new architecture that just
came in would not have taken care of deploying cmpxchg, but that should
be easily fixable.
Please see Documentation/atomic_ops.txt for details.
Thanks,
Mathieu
>
> --
> Regards,
> Changli Gao(xiaosuo@gmail.com)
--
Mathieu Desnoyers
Operating System Efficiency R&D Consultant
EfficiOS Inc.
http://www.efficios.com
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] netfilter: save the hash of the tuple in the original direction for latter use
2010-08-19 12:35 ` Mathieu Desnoyers
@ 2010-08-19 15:31 ` Eric Dumazet
0 siblings, 0 replies; 8+ messages in thread
From: Eric Dumazet @ 2010-08-19 15:31 UTC (permalink / raw)
To: Mathieu Desnoyers
Cc: Changli Gao, Patrick McHardy, David S. Miller, netfilter-devel,
linux-kernel
Le jeudi 19 août 2010 à 08:35 -0400, Mathieu Desnoyers a écrit :
> I made sure it was available on all architectures with that patchset,
> with the intended goal to used it (cmpxchg, cmpxchg_local for 32/64-bit
> and cmpxchg64, cmpxchg64_local for 64-bit) in arch-agnostic tracer code.
>
> I added a cmpxchg emulated with irqs off for uniprocessor-only
> architectures that did not have CAS support. sparc32 has moved to a
> hashed locked scheme for its atomic operations long ago, which makes the
> full 32 bits available.
>
> So, all in all, I'd be surprised if an architecture would lack
> cmpxchg() today. It might be possible that a new architecture that just
> came in would not have taken care of deploying cmpxchg, but that should
> be easily fixable.
Thats pretty cool, thanks for clarification !
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2010-08-19 15:31 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-08-17 7:16 [PATCH] netfilter: save the hash of the tuple in the original direction for latter use Changli Gao
2010-08-17 8:30 ` Eric Dumazet
2010-08-17 8:46 ` Changli Gao
2010-08-17 9:01 ` Eric Dumazet
2010-08-17 9:32 ` Changli Gao
2010-08-19 3:59 ` Changli Gao
2010-08-19 12:35 ` Mathieu Desnoyers
2010-08-19 15:31 ` Eric Dumazet
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).