All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH nf-next-2.6] netfilter: CLUSTERIP: RCU conversion
@ 2010-06-09 16:11 Eric Dumazet
  2010-06-09 16:30 ` Patrick McHardy
  0 siblings, 1 reply; 6+ messages in thread
From: Eric Dumazet @ 2010-06-09 16:11 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: Netfilter Development Mailinglist

- clusterip_lock becomes a spinlock
- lockless lookups

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 net/ipv4/netfilter/ipt_CLUSTERIP.c |   40 ++++++++++++++++-----------
 1 file changed, 24 insertions(+), 16 deletions(-)

diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index f91c94b..d83ad7f 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -53,12 +53,13 @@ struct clusterip_config {
 #endif
 	enum clusterip_hashmode hash_mode;	/* which hashing mode */
 	u_int32_t hash_initval;			/* hash initialization */
+	struct rcu_head rcu;
 };
 
 static LIST_HEAD(clusterip_configs);
 
 /* clusterip_lock protects the clusterip_configs list */
-static DEFINE_RWLOCK(clusterip_lock);
+static DEFINE_SPINLOCK(clusterip_lock);
 
 #ifdef CONFIG_PROC_FS
 static const struct file_operations clusterip_proc_fops;
@@ -71,11 +72,17 @@ clusterip_config_get(struct clusterip_config *c)
 	atomic_inc(&c->refcount);
 }
 
+
+static void clusterip_config_rcu_free(struct rcu_head *head)
+{
+	kfree(container_of(head, struct clusterip_config, rcu));
+}
+
 static inline void
 clusterip_config_put(struct clusterip_config *c)
 {
 	if (atomic_dec_and_test(&c->refcount))
-		kfree(c);
+		call_rcu_bh(&c->rcu, clusterip_config_rcu_free);
 }
 
 /* decrease the count of entries using/referencing this config.  If last
@@ -84,10 +91,10 @@ clusterip_config_put(struct clusterip_config *c)
 static inline void
 clusterip_config_entry_put(struct clusterip_config *c)
 {
-	write_lock_bh(&clusterip_lock);
+	spin_lock_bh(&clusterip_lock);
 	if (atomic_dec_and_test(&c->entries)) {
 		list_del(&c->list);
-		write_unlock_bh(&clusterip_lock);
+		spin_unlock_bh(&clusterip_lock);
 
 		dev_mc_del(c->dev, c->clustermac);
 		dev_put(c->dev);
@@ -100,7 +107,7 @@ clusterip_config_entry_put(struct clusterip_config *c)
 #endif
 		return;
 	}
-	write_unlock_bh(&clusterip_lock);
+	spin_unlock_bh(&clusterip_lock);
 }
 
 static struct clusterip_config *
@@ -108,7 +115,7 @@ __clusterip_config_find(__be32 clusterip)
 {
 	struct clusterip_config *c;
 
-	list_for_each_entry(c, &clusterip_configs, list) {
+	list_for_each_entry_rcu(c, &clusterip_configs, list) {
 		if (c->clusterip == clusterip)
 			return c;
 	}
@@ -121,16 +128,14 @@ clusterip_config_find_get(__be32 clusterip, int entry)
 {
 	struct clusterip_config *c;
 
-	read_lock_bh(&clusterip_lock);
+	rcu_read_lock_bh();
 	c = __clusterip_config_find(clusterip);
-	if (!c) {
-		read_unlock_bh(&clusterip_lock);
-		return NULL;
+	if (c) {
+		atomic_inc(&c->refcount);
+		if (entry)
+			atomic_inc(&c->entries);
 	}
-	atomic_inc(&c->refcount);
-	if (entry)
-		atomic_inc(&c->entries);
-	read_unlock_bh(&clusterip_lock);
+	rcu_read_unlock_bh();
 
 	return c;
 }
@@ -181,9 +186,9 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip,
 	}
 #endif
 
-	write_lock_bh(&clusterip_lock);
+	spin_lock_bh(&clusterip_lock);
 	list_add(&c->list, &clusterip_configs);
-	write_unlock_bh(&clusterip_lock);
+	spin_unlock_bh(&clusterip_lock);
 
 	return c;
 }
@@ -733,6 +738,9 @@ static void __exit clusterip_tg_exit(void)
 #endif
 	nf_unregister_hook(&cip_arp_ops);
 	xt_unregister_target(&clusterip_tg_reg);
+
+	/* Wait for completion of call_rcu_bh()'s (clusterip_config_rcu_free) */
+	rcu_barrier_bh();
 }
 
 module_init(clusterip_tg_init);



^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH nf-next-2.6] netfilter: CLUSTERIP: RCU conversion
  2010-06-09 16:11 [PATCH nf-next-2.6] netfilter: CLUSTERIP: RCU conversion Eric Dumazet
@ 2010-06-09 16:30 ` Patrick McHardy
  2010-06-09 17:11   ` Eric Dumazet
  0 siblings, 1 reply; 6+ messages in thread
From: Patrick McHardy @ 2010-06-09 16:30 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Netfilter Development Mailinglist

Eric Dumazet wrote:
> @@ -121,16 +128,14 @@ clusterip_config_find_get(__be32 clusterip, int entry)
>  {
>  	struct clusterip_config *c;
>  
> -	read_lock_bh(&clusterip_lock);
> +	rcu_read_lock_bh();
>  	c = __clusterip_config_find(clusterip);
> -	if (!c) {
> -		read_unlock_bh(&clusterip_lock);
> -		return NULL;
> +	if (c) {
> +		atomic_inc(&c->refcount);
> +		if (entry)
> +			atomic_inc(&c->entries);
>   

Shouldn't this be using atomic_inc_not_zero() to avoid races with
clusterip_config_entry_put()?


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH nf-next-2.6] netfilter: CLUSTERIP: RCU conversion
  2010-06-09 16:30 ` Patrick McHardy
@ 2010-06-09 17:11   ` Eric Dumazet
  2010-06-14 16:29     ` Patrick McHardy
  0 siblings, 1 reply; 6+ messages in thread
From: Eric Dumazet @ 2010-06-09 17:11 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: Netfilter Development Mailinglist

Le mercredi 09 juin 2010 à 18:30 +0200, Patrick McHardy a écrit :
> Eric Dumazet wrote:
> > @@ -121,16 +128,14 @@ clusterip_config_find_get(__be32 clusterip, int entry)
> >  {
> >  	struct clusterip_config *c;
> >  
> > -	read_lock_bh(&clusterip_lock);
> > +	rcu_read_lock_bh();
> >  	c = __clusterip_config_find(clusterip);
> > -	if (!c) {
> > -		read_unlock_bh(&clusterip_lock);
> > -		return NULL;
> > +	if (c) {
> > +		atomic_inc(&c->refcount);
> > +		if (entry)
> > +			atomic_inc(&c->entries);
> >   
> 
> Shouldn't this be using atomic_inc_not_zero() to avoid races with
> clusterip_config_entry_put()?
> 

Ah yes, I should stop doing patches today !

Hmm, I am not sure current code is correct ?

clusterip_config_find_get() can return a struct clusterip_config pointer
to an entry just that was given to clusterip_config_entry_put()
(c->entries == 0)

Then we access c->dev, while clusterip_config_entry_put() did a
dev_put(c->dev) on it ?

So following 'standard rcu lookup' works, but race is still there ?

static inline struct clusterip_config *
clusterip_config_find_get(__be32 clusterip, int entry)
{
        struct clusterip_config *c;

        rcu_read_lock_bh();
        c = __clusterip_config_find(clusterip);
        if (c) {
                if (unlikely(!atomic_inc_not_zero(&c->refcount)))
                        c  = NULL;
                else if (entry)
                        atomic_inc(&c->entries);
        }
        rcu_read_unlock_bh();

        return c;
}


updated patch for reference :

diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index f91c94b..ecd77b1 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -53,12 +53,13 @@ struct clusterip_config {
 #endif
 	enum clusterip_hashmode hash_mode;	/* which hashing mode */
 	u_int32_t hash_initval;			/* hash initialization */
+	struct rcu_head rcu;
 };
 
 static LIST_HEAD(clusterip_configs);
 
 /* clusterip_lock protects the clusterip_configs list */
-static DEFINE_RWLOCK(clusterip_lock);
+static DEFINE_SPINLOCK(clusterip_lock);
 
 #ifdef CONFIG_PROC_FS
 static const struct file_operations clusterip_proc_fops;
@@ -71,11 +72,17 @@ clusterip_config_get(struct clusterip_config *c)
 	atomic_inc(&c->refcount);
 }
 
+
+static void clusterip_config_rcu_free(struct rcu_head *head)
+{
+	kfree(container_of(head, struct clusterip_config, rcu));
+}
+
 static inline void
 clusterip_config_put(struct clusterip_config *c)
 {
 	if (atomic_dec_and_test(&c->refcount))
-		kfree(c);
+		call_rcu_bh(&c->rcu, clusterip_config_rcu_free);
 }
 
 /* decrease the count of entries using/referencing this config.  If last
@@ -84,10 +91,10 @@ clusterip_config_put(struct clusterip_config *c)
 static inline void
 clusterip_config_entry_put(struct clusterip_config *c)
 {
-	write_lock_bh(&clusterip_lock);
+	spin_lock_bh(&clusterip_lock);
 	if (atomic_dec_and_test(&c->entries)) {
 		list_del(&c->list);
-		write_unlock_bh(&clusterip_lock);
+		spin_unlock_bh(&clusterip_lock);
 
 		dev_mc_del(c->dev, c->clustermac);
 		dev_put(c->dev);
@@ -100,7 +107,7 @@ clusterip_config_entry_put(struct clusterip_config *c)
 #endif
 		return;
 	}
-	write_unlock_bh(&clusterip_lock);
+	spin_unlock_bh(&clusterip_lock);
 }
 
 static struct clusterip_config *
@@ -108,7 +115,7 @@ __clusterip_config_find(__be32 clusterip)
 {
 	struct clusterip_config *c;
 
-	list_for_each_entry(c, &clusterip_configs, list) {
+	list_for_each_entry_rcu(c, &clusterip_configs, list) {
 		if (c->clusterip == clusterip)
 			return c;
 	}
@@ -121,16 +128,15 @@ clusterip_config_find_get(__be32 clusterip, int entry)
 {
 	struct clusterip_config *c;
 
-	read_lock_bh(&clusterip_lock);
+	rcu_read_lock_bh();
 	c = __clusterip_config_find(clusterip);
-	if (!c) {
-		read_unlock_bh(&clusterip_lock);
-		return NULL;
+	if (c) {
+		if (unlikely(!atomic_inc_not_zero(&c->refcount)))
+			c  = NULL;
+		else if (entry)
+			atomic_inc(&c->entries);
 	}
-	atomic_inc(&c->refcount);
-	if (entry)
-		atomic_inc(&c->entries);
-	read_unlock_bh(&clusterip_lock);
+	rcu_read_unlock_bh();
 
 	return c;
 }
@@ -181,9 +187,9 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip,
 	}
 #endif
 
-	write_lock_bh(&clusterip_lock);
+	spin_lock_bh(&clusterip_lock);
 	list_add(&c->list, &clusterip_configs);
-	write_unlock_bh(&clusterip_lock);
+	spin_unlock_bh(&clusterip_lock);
 
 	return c;
 }
@@ -733,6 +739,9 @@ static void __exit clusterip_tg_exit(void)
 #endif
 	nf_unregister_hook(&cip_arp_ops);
 	xt_unregister_target(&clusterip_tg_reg);
+
+	/* Wait for completion of call_rcu_bh()'s (clusterip_config_rcu_free) */
+	rcu_barrier_bh();
 }
 
 module_init(clusterip_tg_init);


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH nf-next-2.6] netfilter: CLUSTERIP: RCU conversion
  2010-06-09 17:11   ` Eric Dumazet
@ 2010-06-14 16:29     ` Patrick McHardy
  2010-06-14 19:59       ` Eric Dumazet
  0 siblings, 1 reply; 6+ messages in thread
From: Patrick McHardy @ 2010-06-14 16:29 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Netfilter Development Mailinglist

Eric Dumazet wrote:
> Le mercredi 09 juin 2010 à 18:30 +0200, Patrick McHardy a écrit :
>   
>> Eric Dumazet wrote:
>>     
>>> @@ -121,16 +128,14 @@ clusterip_config_find_get(__be32 clusterip, int entry)
>>>  {
>>>  	struct clusterip_config *c;
>>>  
>>> -	read_lock_bh(&clusterip_lock);
>>> +	rcu_read_lock_bh();
>>>  	c = __clusterip_config_find(clusterip);
>>> -	if (!c) {
>>> -		read_unlock_bh(&clusterip_lock);
>>> -		return NULL;
>>> +	if (c) {
>>> +		atomic_inc(&c->refcount);
>>> +		if (entry)
>>> +			atomic_inc(&c->entries);
>>>   
>>>       
>> Shouldn't this be using atomic_inc_not_zero() to avoid races with
>> clusterip_config_entry_put()?
>>
>>     
>
> Ah yes, I should stop doing patches today !
>
> Hmm, I am not sure current code is correct ?
>
> clusterip_config_find_get() can return a struct clusterip_config pointer
> to an entry just that was given to clusterip_config_entry_put()
> (c->entries == 0)
>
> Then we access c->dev, while clusterip_config_entry_put() did a
> dev_put(c->dev) on it ?
>   

In the clusterip_tg_check() case we hold a "entries" reference, so
this can't happen. In arp_mangle() the device pointer is only used
for comparison (except for the pr_debug statement), so at least it
won't crash.

I wonder why this isn't simply using the refcount for both rule
entries and other references.

> So following 'standard rcu lookup' works, but race is still there ?
>
> static inline struct clusterip_config *
> clusterip_config_find_get(__be32 clusterip, int entry)
> {
>         struct clusterip_config *c;
>
>         rcu_read_lock_bh();
>         c = __clusterip_config_find(clusterip);
>         if (c) {
>                 if (unlikely(!atomic_inc_not_zero(&c->refcount)))
>                         c  = NULL;
>                 else if (entry)
>                         atomic_inc(&c->entries);
>         }
>         rcu_read_unlock_bh();
>
>         return c;
> }
>
>
> updated patch for reference :
>
> diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
> index f91c94b..ecd77b1 100644
> --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
> +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
> @@ -53,12 +53,13 @@ struct clusterip_config {
>  #endif
>  	enum clusterip_hashmode hash_mode;	/* which hashing mode */
>  	u_int32_t hash_initval;			/* hash initialization */
> +	struct rcu_head rcu;
>  };
>  
>  static LIST_HEAD(clusterip_configs);
>  
>  /* clusterip_lock protects the clusterip_configs list */
> -static DEFINE_RWLOCK(clusterip_lock);
> +static DEFINE_SPINLOCK(clusterip_lock);
>  
>  #ifdef CONFIG_PROC_FS
>  static const struct file_operations clusterip_proc_fops;
> @@ -71,11 +72,17 @@ clusterip_config_get(struct clusterip_config *c)
>  	atomic_inc(&c->refcount);
>  }
>  
> +
> +static void clusterip_config_rcu_free(struct rcu_head *head)
> +{
> +	kfree(container_of(head, struct clusterip_config, rcu));
> +}
> +
>  static inline void
>  clusterip_config_put(struct clusterip_config *c)
>  {
>  	if (atomic_dec_and_test(&c->refcount))
> -		kfree(c);
> +		call_rcu_bh(&c->rcu, clusterip_config_rcu_free);
>  }
>  
>  /* decrease the count of entries using/referencing this config.  If last
> @@ -84,10 +91,10 @@ clusterip_config_put(struct clusterip_config *c)
>  static inline void
>  clusterip_config_entry_put(struct clusterip_config *c)
>  {
> -	write_lock_bh(&clusterip_lock);
> +	spin_lock_bh(&clusterip_lock);
>  	if (atomic_dec_and_test(&c->entries)) {
>  		list_del(&c->list);
> -		write_unlock_bh(&clusterip_lock);
> +		spin_unlock_bh(&clusterip_lock);
>  
>  		dev_mc_del(c->dev, c->clustermac);
>  		dev_put(c->dev);
> @@ -100,7 +107,7 @@ clusterip_config_entry_put(struct clusterip_config *c)
>  #endif
>  		return;
>  	}
> -	write_unlock_bh(&clusterip_lock);
> +	spin_unlock_bh(&clusterip_lock);
>  }
>  
>  static struct clusterip_config *
> @@ -108,7 +115,7 @@ __clusterip_config_find(__be32 clusterip)
>  {
>  	struct clusterip_config *c;
>  
> -	list_for_each_entry(c, &clusterip_configs, list) {
> +	list_for_each_entry_rcu(c, &clusterip_configs, list) {
>  		if (c->clusterip == clusterip)
>  			return c;
>  	}
> @@ -121,16 +128,15 @@ clusterip_config_find_get(__be32 clusterip, int entry)
>  {
>  	struct clusterip_config *c;
>  
> -	read_lock_bh(&clusterip_lock);
> +	rcu_read_lock_bh();
>  	c = __clusterip_config_find(clusterip);
> -	if (!c) {
> -		read_unlock_bh(&clusterip_lock);
> -		return NULL;
> +	if (c) {
> +		if (unlikely(!atomic_inc_not_zero(&c->refcount)))
> +			c  = NULL;
> +		else if (entry)
> +			atomic_inc(&c->entries);
>  	}
> -	atomic_inc(&c->refcount);
> -	if (entry)
> -		atomic_inc(&c->entries);
> -	read_unlock_bh(&clusterip_lock);
> +	rcu_read_unlock_bh();
>  
>  	return c;
>  }
> @@ -181,9 +187,9 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip,
>  	}
>  #endif
>  
> -	write_lock_bh(&clusterip_lock);
> +	spin_lock_bh(&clusterip_lock);
>  	list_add(&c->list, &clusterip_configs);
> -	write_unlock_bh(&clusterip_lock);
> +	spin_unlock_bh(&clusterip_lock);
>  
>  	return c;
>  }
> @@ -733,6 +739,9 @@ static void __exit clusterip_tg_exit(void)
>  #endif
>  	nf_unregister_hook(&cip_arp_ops);
>  	xt_unregister_target(&clusterip_tg_reg);
> +
> +	/* Wait for completion of call_rcu_bh()'s (clusterip_config_rcu_free) */
> +	rcu_barrier_bh();
>  }
>  
>  module_init(clusterip_tg_init);
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
>   

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH nf-next-2.6] netfilter: CLUSTERIP: RCU conversion
  2010-06-14 16:29     ` Patrick McHardy
@ 2010-06-14 19:59       ` Eric Dumazet
  2010-06-15 11:10         ` Patrick McHardy
  0 siblings, 1 reply; 6+ messages in thread
From: Eric Dumazet @ 2010-06-14 19:59 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: Netfilter Development Mailinglist

Le lundi 14 juin 2010 à 18:29 +0200, Patrick McHardy a écrit :

> In the clusterip_tg_check() case we hold a "entries" reference, so
> this can't happen. In arp_mangle() the device pointer is only used
> for comparison (except for the pr_debug statement), so at least it
> won't crash.
> 
> I wonder why this isn't simply using the refcount for both rule
> entries and other references.
> 

So there is no race. Since arp_mangle() runs in a non preempt section,
rcu grace period of device dismantle protects us.

Thanks !

[PATCH nf-next-2.6 v2] netfilter: CLUSTERIP: RCU conversion

- clusterip_lock becomes a spinlock
- lockless lookups
- kfree() deferred after RCU grace period
- rcu_barrier_bh() inserted in clusterip_tg_exit()

v2) 
- As Patrick pointed out, we use atomic_inc_not_zero() in
clusterip_config_find_get().
- list_add_rcu() and list_del_rcu() variants are used.
- atomic_dec_and_lock() used in clusterip_config_entry_put()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 net/ipv4/netfilter/ipt_CLUSTERIP.c |   48 ++++++++++++++++-----------
 1 file changed, 29 insertions(+), 19 deletions(-)

diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index f91c94b..64d0875 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -53,12 +53,13 @@ struct clusterip_config {
 #endif
 	enum clusterip_hashmode hash_mode;	/* which hashing mode */
 	u_int32_t hash_initval;			/* hash initialization */
+	struct rcu_head rcu;
 };
 
 static LIST_HEAD(clusterip_configs);
 
 /* clusterip_lock protects the clusterip_configs list */
-static DEFINE_RWLOCK(clusterip_lock);
+static DEFINE_SPINLOCK(clusterip_lock);
 
 #ifdef CONFIG_PROC_FS
 static const struct file_operations clusterip_proc_fops;
@@ -71,11 +72,17 @@ clusterip_config_get(struct clusterip_config *c)
 	atomic_inc(&c->refcount);
 }
 
+
+static void clusterip_config_rcu_free(struct rcu_head *head)
+{
+	kfree(container_of(head, struct clusterip_config, rcu));
+}
+
 static inline void
 clusterip_config_put(struct clusterip_config *c)
 {
 	if (atomic_dec_and_test(&c->refcount))
-		kfree(c);
+		call_rcu_bh(&c->rcu, clusterip_config_rcu_free);
 }
 
 /* decrease the count of entries using/referencing this config.  If last
@@ -84,10 +91,11 @@ clusterip_config_put(struct clusterip_config *c)
 static inline void
 clusterip_config_entry_put(struct clusterip_config *c)
 {
-	write_lock_bh(&clusterip_lock);
-	if (atomic_dec_and_test(&c->entries)) {
-		list_del(&c->list);
-		write_unlock_bh(&clusterip_lock);
+	local_bh_disable();
+	if (atomic_dec_and_lock(&c->entries, &clusterip_lock)) {
+		list_del_rcu(&c->list);
+		spin_unlock(&clusterip_lock);
+		local_bh_enable();
 
 		dev_mc_del(c->dev, c->clustermac);
 		dev_put(c->dev);
@@ -100,7 +108,7 @@ clusterip_config_entry_put(struct clusterip_config *c)
 #endif
 		return;
 	}
-	write_unlock_bh(&clusterip_lock);
+	local_bh_enable();
 }
 
 static struct clusterip_config *
@@ -108,7 +116,7 @@ __clusterip_config_find(__be32 clusterip)
 {
 	struct clusterip_config *c;
 
-	list_for_each_entry(c, &clusterip_configs, list) {
+	list_for_each_entry_rcu(c, &clusterip_configs, list) {
 		if (c->clusterip == clusterip)
 			return c;
 	}
@@ -121,16 +129,15 @@ clusterip_config_find_get(__be32 clusterip, int entry)
 {
 	struct clusterip_config *c;
 
-	read_lock_bh(&clusterip_lock);
+	rcu_read_lock_bh();
 	c = __clusterip_config_find(clusterip);
-	if (!c) {
-		read_unlock_bh(&clusterip_lock);
-		return NULL;
+	if (c) {
+		if (unlikely(!atomic_inc_not_zero(&c->refcount)))
+			c = NULL;
+		else if (entry)
+			atomic_inc(&c->entries);
 	}
-	atomic_inc(&c->refcount);
-	if (entry)
-		atomic_inc(&c->entries);
-	read_unlock_bh(&clusterip_lock);
+	rcu_read_unlock_bh();
 
 	return c;
 }
@@ -181,9 +188,9 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip,
 	}
 #endif
 
-	write_lock_bh(&clusterip_lock);
-	list_add(&c->list, &clusterip_configs);
-	write_unlock_bh(&clusterip_lock);
+	spin_lock_bh(&clusterip_lock);
+	list_add_rcu(&c->list, &clusterip_configs);
+	spin_unlock_bh(&clusterip_lock);
 
 	return c;
 }
@@ -733,6 +740,9 @@ static void __exit clusterip_tg_exit(void)
 #endif
 	nf_unregister_hook(&cip_arp_ops);
 	xt_unregister_target(&clusterip_tg_reg);
+
+	/* Wait for completion of call_rcu_bh()'s (clusterip_config_rcu_free) */
+	rcu_barrier_bh();
 }
 
 module_init(clusterip_tg_init);


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH nf-next-2.6] netfilter: CLUSTERIP: RCU conversion
  2010-06-14 19:59       ` Eric Dumazet
@ 2010-06-15 11:10         ` Patrick McHardy
  0 siblings, 0 replies; 6+ messages in thread
From: Patrick McHardy @ 2010-06-15 11:10 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Netfilter Development Mailinglist

Eric Dumazet wrote:
> [PATCH nf-next-2.6 v2] netfilter: CLUSTERIP: RCU conversion
>
> - clusterip_lock becomes a spinlock
> - lockless lookups
> - kfree() deferred after RCU grace period
> - rcu_barrier_bh() inserted in clusterip_tg_exit()
>
> v2) 
> - As Patrick pointed out, we use atomic_inc_not_zero() in
> clusterip_config_find_get().
> - list_add_rcu() and list_del_rcu() variants are used.
> - atomic_dec_and_lock() used in clusterip_config_entry_put()
>   

Applied, thanks Eric.


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2010-06-15 11:10 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-06-09 16:11 [PATCH nf-next-2.6] netfilter: CLUSTERIP: RCU conversion Eric Dumazet
2010-06-09 16:30 ` Patrick McHardy
2010-06-09 17:11   ` Eric Dumazet
2010-06-14 16:29     ` Patrick McHardy
2010-06-14 19:59       ` Eric Dumazet
2010-06-15 11:10         ` Patrick McHardy

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.