From: Patrick McHardy <kaber@trash.net>
To: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Netfilter Development Mailinglist <netfilter-devel@vger.kernel.org>
Subject: Re: [PATCH nf-next-2.6] netfilter: CLUSTERIP: RCU conversion
Date: Mon, 14 Jun 2010 18:29:03 +0200 [thread overview]
Message-ID: <4C1658CF.4040804@trash.net> (raw)
In-Reply-To: <1276103507.2442.214.camel@edumazet-laptop>
Eric Dumazet wrote:
> Le mercredi 09 juin 2010 à 18:30 +0200, Patrick McHardy a écrit :
>
>> Eric Dumazet wrote:
>>
>>> @@ -121,16 +128,14 @@ clusterip_config_find_get(__be32 clusterip, int entry)
>>> {
>>> struct clusterip_config *c;
>>>
>>> - read_lock_bh(&clusterip_lock);
>>> + rcu_read_lock_bh();
>>> c = __clusterip_config_find(clusterip);
>>> - if (!c) {
>>> - read_unlock_bh(&clusterip_lock);
>>> - return NULL;
>>> + if (c) {
>>> + atomic_inc(&c->refcount);
>>> + if (entry)
>>> + atomic_inc(&c->entries);
>>>
>>>
>> Shouldn't this be using atomic_inc_not_zero() to avoid races with
>> clusterip_config_entry_put()?
>>
>>
>
> Ah yes, I should stop doing patches today !
>
> Hmm, I am not sure current code is correct ?
>
> clusterip_config_find_get() can return a struct clusterip_config pointer
> to an entry just that was given to clusterip_config_entry_put()
> (c->entries == 0)
>
> Then we access c->dev, while clusterip_config_entry_put() did a
> dev_put(c->dev) on it ?
>
In the clusterip_tg_check() case we hold a "entries" reference, so
this can't happen. In arp_mangle() the device pointer is only used
for comparison (except for the pr_debug statement), so at least it
won't crash.
I wonder why this isn't simply using the refcount for both rule
entries and other references.
> So following 'standard rcu lookup' works, but race is still there ?
>
> static inline struct clusterip_config *
> clusterip_config_find_get(__be32 clusterip, int entry)
> {
> struct clusterip_config *c;
>
> rcu_read_lock_bh();
> c = __clusterip_config_find(clusterip);
> if (c) {
> if (unlikely(!atomic_inc_not_zero(&c->refcount)))
> c = NULL;
> else if (entry)
> atomic_inc(&c->entries);
> }
> rcu_read_unlock_bh();
>
> return c;
> }
>
>
> updated patch for reference :
>
> diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
> index f91c94b..ecd77b1 100644
> --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
> +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
> @@ -53,12 +53,13 @@ struct clusterip_config {
> #endif
> enum clusterip_hashmode hash_mode; /* which hashing mode */
> u_int32_t hash_initval; /* hash initialization */
> + struct rcu_head rcu;
> };
>
> static LIST_HEAD(clusterip_configs);
>
> /* clusterip_lock protects the clusterip_configs list */
> -static DEFINE_RWLOCK(clusterip_lock);
> +static DEFINE_SPINLOCK(clusterip_lock);
>
> #ifdef CONFIG_PROC_FS
> static const struct file_operations clusterip_proc_fops;
> @@ -71,11 +72,17 @@ clusterip_config_get(struct clusterip_config *c)
> atomic_inc(&c->refcount);
> }
>
> +
> +static void clusterip_config_rcu_free(struct rcu_head *head)
> +{
> + kfree(container_of(head, struct clusterip_config, rcu));
> +}
> +
> static inline void
> clusterip_config_put(struct clusterip_config *c)
> {
> if (atomic_dec_and_test(&c->refcount))
> - kfree(c);
> + call_rcu_bh(&c->rcu, clusterip_config_rcu_free);
> }
>
> /* decrease the count of entries using/referencing this config. If last
> @@ -84,10 +91,10 @@ clusterip_config_put(struct clusterip_config *c)
> static inline void
> clusterip_config_entry_put(struct clusterip_config *c)
> {
> - write_lock_bh(&clusterip_lock);
> + spin_lock_bh(&clusterip_lock);
> if (atomic_dec_and_test(&c->entries)) {
> list_del(&c->list);
> - write_unlock_bh(&clusterip_lock);
> + spin_unlock_bh(&clusterip_lock);
>
> dev_mc_del(c->dev, c->clustermac);
> dev_put(c->dev);
> @@ -100,7 +107,7 @@ clusterip_config_entry_put(struct clusterip_config *c)
> #endif
> return;
> }
> - write_unlock_bh(&clusterip_lock);
> + spin_unlock_bh(&clusterip_lock);
> }
>
> static struct clusterip_config *
> @@ -108,7 +115,7 @@ __clusterip_config_find(__be32 clusterip)
> {
> struct clusterip_config *c;
>
> - list_for_each_entry(c, &clusterip_configs, list) {
> + list_for_each_entry_rcu(c, &clusterip_configs, list) {
> if (c->clusterip == clusterip)
> return c;
> }
> @@ -121,16 +128,15 @@ clusterip_config_find_get(__be32 clusterip, int entry)
> {
> struct clusterip_config *c;
>
> - read_lock_bh(&clusterip_lock);
> + rcu_read_lock_bh();
> c = __clusterip_config_find(clusterip);
> - if (!c) {
> - read_unlock_bh(&clusterip_lock);
> - return NULL;
> + if (c) {
> + if (unlikely(!atomic_inc_not_zero(&c->refcount)))
> + c = NULL;
> + else if (entry)
> + atomic_inc(&c->entries);
> }
> - atomic_inc(&c->refcount);
> - if (entry)
> - atomic_inc(&c->entries);
> - read_unlock_bh(&clusterip_lock);
> + rcu_read_unlock_bh();
>
> return c;
> }
> @@ -181,9 +187,9 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip,
> }
> #endif
>
> - write_lock_bh(&clusterip_lock);
> + spin_lock_bh(&clusterip_lock);
> list_add(&c->list, &clusterip_configs);
> - write_unlock_bh(&clusterip_lock);
> + spin_unlock_bh(&clusterip_lock);
>
> return c;
> }
> @@ -733,6 +739,9 @@ static void __exit clusterip_tg_exit(void)
> #endif
> nf_unregister_hook(&cip_arp_ops);
> xt_unregister_target(&clusterip_tg_reg);
> +
> + /* Wait for completion of call_rcu_bh()'s (clusterip_config_rcu_free) */
> + rcu_barrier_bh();
> }
>
> module_init(clusterip_tg_init);
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
>
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
next prev parent reply other threads:[~2010-06-14 16:29 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-06-09 16:11 [PATCH nf-next-2.6] netfilter: CLUSTERIP: RCU conversion Eric Dumazet
2010-06-09 16:30 ` Patrick McHardy
2010-06-09 17:11 ` Eric Dumazet
2010-06-14 16:29 ` Patrick McHardy [this message]
2010-06-14 19:59 ` Eric Dumazet
2010-06-15 11:10 ` Patrick McHardy
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4C1658CF.4040804@trash.net \
--to=kaber@trash.net \
--cc=eric.dumazet@gmail.com \
--cc=netfilter-devel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.