netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* Re: [PATCH tip/core/rcu 05/13] net: add checking to rcu_dereference() primitives
       [not found] ` <1265932839-25899-5-git-send-email-paulmck@linux.vnet.ibm.com>
@ 2010-02-12  4:15   ` Eric Dumazet
  2010-02-14  8:23     ` Ingo Molnar
  2010-02-14  8:34     ` Michał Mirosław
  0 siblings, 2 replies; 5+ messages in thread
From: Eric Dumazet @ 2010-02-12  4:15 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: linux-kernel, mingo, laijs, dipankar, akpm, mathieu.desnoyers,
	josh, dvhltc, niv, tglx, peterz, rostedt, Valdis.Kletnieks,
	dhowells, David Miller, netdev

Le jeudi 11 février 2010 à 16:00 -0800, Paul E. McKenney a écrit :
> Update rcu_dereference() primitives to use new lockdep-based checking.
> The rcu_dereference() in __in6_dev_get() may be protected either by
> rcu_read_lock() or RTNL, per Eric Dumazet.  The rcu_dereference()
> in __sk_free() is protected by the fact that it is never reached if an
> update could change it.  Check for this by using rcu_dereference_check()
> to verify that the struct sock's ->sk_wmem_alloc counter is zero.
> 
> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

CC to netdev and David Miller, network maintainer.

Acked-by: Eric Dumazet <eric.dumazet@gmail.com>

Thanks Paul, great work !

> ---
>  include/linux/rtnetlink.h |    3 +++
>  include/net/addrconf.h    |    4 +++-
>  net/core/dev.c            |    2 +-
>  net/core/filter.c         |    6 +++---
>  net/core/rtnetlink.c      |    8 ++++++++
>  net/core/sock.c           |    3 ++-
>  net/decnet/dn_route.c     |   14 +++++++-------
>  net/ipv4/route.c          |   14 +++++++-------
>  net/packet/af_packet.c    |    2 +-
>  9 files changed, 35 insertions(+), 21 deletions(-)
> 
> diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
> index 05330fc..5c52fa4 100644
> --- a/include/linux/rtnetlink.h
> +++ b/include/linux/rtnetlink.h
> @@ -735,6 +735,9 @@ extern void rtnl_lock(void);
>  extern void rtnl_unlock(void);
>  extern int rtnl_trylock(void);
>  extern int rtnl_is_locked(void);
> +#ifdef CONFIG_PROVE_LOCKING
> +extern int lockdep_rtnl_is_held(void);
> +#endif /* #ifdef CONFIG_PROVE_LOCKING */
>  
>  extern void rtnetlink_init(void);
>  extern void __rtnl_unlock(void);
> diff --git a/include/net/addrconf.h b/include/net/addrconf.h
> index 0f7c378..45375b4 100644
> --- a/include/net/addrconf.h
> +++ b/include/net/addrconf.h
> @@ -177,7 +177,9 @@ extern int unregister_inet6addr_notifier(struct notifier_block *nb);
>  static inline struct inet6_dev *
>  __in6_dev_get(struct net_device *dev)
>  {
> -	return rcu_dereference(dev->ip6_ptr);
> +	return rcu_dereference_check(dev->ip6_ptr,
> +				     rcu_read_lock_held() ||
> +				     lockdep_rtnl_is_held());
>  }
>  
>  static inline struct inet6_dev *
> diff --git a/net/core/dev.c b/net/core/dev.c
> index be9924f..0d0ff82 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -2041,7 +2041,7 @@ gso:
>  	rcu_read_lock_bh();
>  
>  	txq = dev_pick_tx(dev, skb);
> -	q = rcu_dereference(txq->qdisc);
> +	q = rcu_dereference_bh(txq->qdisc);
>  
>  #ifdef CONFIG_NET_CLS_ACT
>  	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 08db7b9..3541aa4 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -86,7 +86,7 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
>  		return err;
>  
>  	rcu_read_lock_bh();
> -	filter = rcu_dereference(sk->sk_filter);
> +	filter = rcu_dereference_bh(sk->sk_filter);
>  	if (filter) {
>  		unsigned int pkt_len = sk_run_filter(skb, filter->insns,
>  				filter->len);
> @@ -521,7 +521,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
>  	}
>  
>  	rcu_read_lock_bh();
> -	old_fp = rcu_dereference(sk->sk_filter);
> +	old_fp = rcu_dereference_bh(sk->sk_filter);
>  	rcu_assign_pointer(sk->sk_filter, fp);
>  	rcu_read_unlock_bh();
>  
> @@ -536,7 +536,7 @@ int sk_detach_filter(struct sock *sk)
>  	struct sk_filter *filter;
>  
>  	rcu_read_lock_bh();
> -	filter = rcu_dereference(sk->sk_filter);
> +	filter = rcu_dereference_bh(sk->sk_filter);
>  	if (filter) {
>  		rcu_assign_pointer(sk->sk_filter, NULL);
>  		sk_filter_delayed_uncharge(sk, filter);
> diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
> index 794bcb8..4c7d3f6 100644
> --- a/net/core/rtnetlink.c
> +++ b/net/core/rtnetlink.c
> @@ -89,6 +89,14 @@ int rtnl_is_locked(void)
>  }
>  EXPORT_SYMBOL(rtnl_is_locked);
>  
> +#ifdef CONFIG_PROVE_LOCKING
> +int lockdep_rtnl_is_held(void)
> +{
> +	return lockdep_is_held(&rtnl_mutex);
> +}
> +EXPORT_SYMBOL(lockdep_rtnl_is_held);
> +#endif /* #ifdef CONFIG_PROVE_LOCKING */
> +
>  static struct rtnl_link *rtnl_msg_handlers[NPROTO];
>  
>  static inline int rtm_msgindex(int msgtype)
> diff --git a/net/core/sock.c b/net/core/sock.c
> index e1f6f22..305cba4 100644
> --- a/net/core/sock.c
> +++ b/net/core/sock.c
> @@ -1073,7 +1073,8 @@ static void __sk_free(struct sock *sk)
>  	if (sk->sk_destruct)
>  		sk->sk_destruct(sk);
>  
> -	filter = rcu_dereference(sk->sk_filter);
> +	filter = rcu_dereference_check(sk->sk_filter,
> +				       atomic_read(&sk->sk_wmem_alloc) == 0);
>  	if (filter) {
>  		sk_filter_uncharge(sk, filter);
>  		rcu_assign_pointer(sk->sk_filter, NULL);
> diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
> index a032840..a7bf03c 100644
> --- a/net/decnet/dn_route.c
> +++ b/net/decnet/dn_route.c
> @@ -1155,8 +1155,8 @@ static int __dn_route_output_key(struct dst_entry **pprt, const struct flowi *fl
>  
>  	if (!(flags & MSG_TRYHARD)) {
>  		rcu_read_lock_bh();
> -		for(rt = rcu_dereference(dn_rt_hash_table[hash].chain); rt;
> -			rt = rcu_dereference(rt->u.dst.dn_next)) {
> +		for (rt = rcu_dereference_bh(dn_rt_hash_table[hash].chain); rt;
> +			rt = rcu_dereference_bh(rt->u.dst.dn_next)) {
>  			if ((flp->fld_dst == rt->fl.fld_dst) &&
>  			    (flp->fld_src == rt->fl.fld_src) &&
>  			    (flp->mark == rt->fl.mark) &&
> @@ -1618,9 +1618,9 @@ int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb)
>  		if (h > s_h)
>  			s_idx = 0;
>  		rcu_read_lock_bh();
> -		for(rt = rcu_dereference(dn_rt_hash_table[h].chain), idx = 0;
> +		for(rt = rcu_dereference_bh(dn_rt_hash_table[h].chain), idx = 0;
>  			rt;
> -			rt = rcu_dereference(rt->u.dst.dn_next), idx++) {
> +			rt = rcu_dereference_bh(rt->u.dst.dn_next), idx++) {
>  			if (idx < s_idx)
>  				continue;
>  			skb_dst_set(skb, dst_clone(&rt->u.dst));
> @@ -1654,12 +1654,12 @@ static struct dn_route *dn_rt_cache_get_first(struct seq_file *seq)
>  
>  	for(s->bucket = dn_rt_hash_mask; s->bucket >= 0; --s->bucket) {
>  		rcu_read_lock_bh();
> -		rt = dn_rt_hash_table[s->bucket].chain;
> +		rt = rcu_dereference_bh(dn_rt_hash_table[s->bucket].chain);
>  		if (rt)
>  			break;
>  		rcu_read_unlock_bh();
>  	}
> -	return rcu_dereference(rt);
> +	return rt;
>  }
>  
>  static struct dn_route *dn_rt_cache_get_next(struct seq_file *seq, struct dn_route *rt)
> @@ -1674,7 +1674,7 @@ static struct dn_route *dn_rt_cache_get_next(struct seq_file *seq, struct dn_rou
>  		rcu_read_lock_bh();
>  		rt = dn_rt_hash_table[s->bucket].chain;
>  	}
> -	return rcu_dereference(rt);
> +	return rcu_dereference_bh(rt);
>  }
>  
>  static void *dn_rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
> diff --git a/net/ipv4/route.c b/net/ipv4/route.c
> index e446496..3476b3b 100644
> --- a/net/ipv4/route.c
> +++ b/net/ipv4/route.c
> @@ -287,12 +287,12 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq)
>  		if (!rt_hash_table[st->bucket].chain)
>  			continue;
>  		rcu_read_lock_bh();
> -		r = rcu_dereference(rt_hash_table[st->bucket].chain);
> +		r = rcu_dereference_bh(rt_hash_table[st->bucket].chain);
>  		while (r) {
>  			if (dev_net(r->u.dst.dev) == seq_file_net(seq) &&
>  			    r->rt_genid == st->genid)
>  				return r;
> -			r = rcu_dereference(r->u.dst.rt_next);
> +			r = rcu_dereference_bh(r->u.dst.rt_next);
>  		}
>  		rcu_read_unlock_bh();
>  	}
> @@ -314,7 +314,7 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq,
>  		rcu_read_lock_bh();
>  		r = rt_hash_table[st->bucket].chain;
>  	}
> -	return rcu_dereference(r);
> +	return rcu_dereference_bh(r);
>  }
>  
>  static struct rtable *rt_cache_get_next(struct seq_file *seq,
> @@ -2687,8 +2687,8 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
>  	hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net));
>  
>  	rcu_read_lock_bh();
> -	for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
> -		rth = rcu_dereference(rth->u.dst.rt_next)) {
> +	for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth;
> +		rth = rcu_dereference_bh(rth->u.dst.rt_next)) {
>  		if (rth->fl.fl4_dst == flp->fl4_dst &&
>  		    rth->fl.fl4_src == flp->fl4_src &&
>  		    rth->fl.iif == 0 &&
> @@ -3006,8 +3006,8 @@ int ip_rt_dump(struct sk_buff *skb,  struct netlink_callback *cb)
>  		if (!rt_hash_table[h].chain)
>  			continue;
>  		rcu_read_lock_bh();
> -		for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt;
> -		     rt = rcu_dereference(rt->u.dst.rt_next), idx++) {
> +		for (rt = rcu_dereference_bh(rt_hash_table[h].chain), idx = 0; rt;
> +		     rt = rcu_dereference_bh(rt->u.dst.rt_next), idx++) {
>  			if (!net_eq(dev_net(rt->u.dst.dev), net) || idx < s_idx)
>  				continue;
>  			if (rt_is_expired(rt))
> diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
> index f126d18..939471e 100644
> --- a/net/packet/af_packet.c
> +++ b/net/packet/af_packet.c
> @@ -508,7 +508,7 @@ static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
>  	struct sk_filter *filter;
>  
>  	rcu_read_lock_bh();
> -	filter = rcu_dereference(sk->sk_filter);
> +	filter = rcu_dereference_bh(sk->sk_filter);
>  	if (filter != NULL)
>  		res = sk_run_filter(skb, filter->insns, filter->len);
>  	rcu_read_unlock_bh();

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH tip/core/rcu 05/13] net: add checking to rcu_dereference() primitives
  2010-02-12  4:15   ` [PATCH tip/core/rcu 05/13] net: add checking to rcu_dereference() primitives Eric Dumazet
@ 2010-02-14  8:23     ` Ingo Molnar
  2010-02-15  7:18       ` David Miller
  2010-02-14  8:34     ` Michał Mirosław
  1 sibling, 1 reply; 5+ messages in thread
From: Ingo Molnar @ 2010-02-14  8:23 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Paul E. McKenney, linux-kernel, laijs, dipankar, akpm,
	mathieu.desnoyers, josh, dvhltc, niv, tglx, peterz, rostedt,
	Valdis.Kletnieks, dhowells, David Miller, netdev


* Eric Dumazet <eric.dumazet@gmail.com> wrote:

> Le jeudi 11 f??vrier 2010 ?? 16:00 -0800, Paul E. McKenney a ??crit :
> > Update rcu_dereference() primitives to use new lockdep-based checking.
> > The rcu_dereference() in __in6_dev_get() may be protected either by
> > rcu_read_lock() or RTNL, per Eric Dumazet.  The rcu_dereference()
> > in __sk_free() is protected by the fact that it is never reached if an
> > update could change it.  Check for this by using rcu_dereference_check()
> > to verify that the struct sock's ->sk_wmem_alloc counter is zero.
> > 
> > Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> 
> CC to netdev and David Miller, network maintainer.
> 
> Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
> 
> Thanks Paul, great work !

Dave, does this look good to you too? Cannot pick up the rest of the patchset 
without these checks/annotations into the RCU tree as there's too many 
warnings triggering in the networking code. So it's an all-or-nothing 
patchset in that regard.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH tip/core/rcu 05/13] net: add checking to rcu_dereference() primitives
  2010-02-12  4:15   ` [PATCH tip/core/rcu 05/13] net: add checking to rcu_dereference() primitives Eric Dumazet
  2010-02-14  8:23     ` Ingo Molnar
@ 2010-02-14  8:34     ` Michał Mirosław
  2010-02-14  8:50       ` Eric Dumazet
  1 sibling, 1 reply; 5+ messages in thread
From: Michał Mirosław @ 2010-02-14  8:34 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Paul E. McKenney, linux-kernel, mingo, laijs, dipankar, akpm,
	mathieu.desnoyers, josh, dvhltc, niv, tglx, peterz, rostedt,
	Valdis.Kletnieks, dhowells, David Miller, netdev

2010/2/12 Eric Dumazet <eric.dumazet@gmail.com>:
> Le jeudi 11 février 2010 à 16:00 -0800, Paul E. McKenney a écrit :
[...]
>> @@ -1654,12 +1654,12 @@ static struct dn_route *dn_rt_cache_get_first(struct seq_file *seq)
>>
>>       for(s->bucket = dn_rt_hash_mask; s->bucket >= 0; --s->bucket) {
>>               rcu_read_lock_bh();
>> -             rt = dn_rt_hash_table[s->bucket].chain;
>> +             rt = rcu_dereference_bh(dn_rt_hash_table[s->bucket].chain);
>>               if (rt)
>>                       break;
>>               rcu_read_unlock_bh();
>>       }
>> -     return rcu_dereference(rt);
>> +     return rt;
>>  }

Isn't there a bug? Looks like data pointed to by rt should be
protected by RCU, but the rcu_read_lock is withdrawn before access.

Best Regards,
Michał Mirosław

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH tip/core/rcu 05/13] net: add checking to rcu_dereference()  primitives
  2010-02-14  8:34     ` Michał Mirosław
@ 2010-02-14  8:50       ` Eric Dumazet
  0 siblings, 0 replies; 5+ messages in thread
From: Eric Dumazet @ 2010-02-14  8:50 UTC (permalink / raw)
  To: Michał Mirosław
  Cc: Paul E. McKenney, linux-kernel, mingo, laijs, dipankar, akpm,
	mathieu.desnoyers, josh, dvhltc, niv, tglx, peterz, rostedt,
	Valdis.Kletnieks, dhowells, David Miller, netdev

Le dimanche 14 février 2010 à 09:34 +0100, Michał Mirosław a écrit :
> 2010/2/12 Eric Dumazet <eric.dumazet@gmail.com>:
> > Le jeudi 11 février 2010 à 16:00 -0800, Paul E. McKenney a écrit :
> [...]
> >> @@ -1654,12 +1654,12 @@ static struct dn_route *dn_rt_cache_get_first(struct seq_file *seq)
> >>
> >>       for(s->bucket = dn_rt_hash_mask; s->bucket >= 0; --s->bucket) {
> >>               rcu_read_lock_bh();
> >> -             rt = dn_rt_hash_table[s->bucket].chain;
> >> +             rt = rcu_dereference_bh(dn_rt_hash_table[s->bucket].chain);
> >>               if (rt)
> >>                       break;
> >>               rcu_read_unlock_bh();
> >>       }
> >> -     return rcu_dereference(rt);
> >> +     return rt;
> >>  }
> 
> Isn't there a bug? Looks like data pointed to by rt should be
> protected by RCU, but the rcu_read_lock is withdrawn before access.
> 

Not really a bug, since we exit from dn_rt_cache_get_first() with
rcu_read_lock_bh()

We call the unlock only if NULL is returned, and rcu_dereference(NULL)
can be done in any context.

Paul had to move the rcu_dereference() so that no lockdep warning
triggers for rcu_dereference(NULL), its more a cleanup than a bug fix.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH tip/core/rcu 05/13] net: add checking to rcu_dereference() primitives
  2010-02-14  8:23     ` Ingo Molnar
@ 2010-02-15  7:18       ` David Miller
  0 siblings, 0 replies; 5+ messages in thread
From: David Miller @ 2010-02-15  7:18 UTC (permalink / raw)
  To: mingo
  Cc: eric.dumazet, paulmck, linux-kernel, laijs, dipankar, akpm,
	mathieu.desnoyers, josh, dvhltc, niv, tglx, peterz, rostedt,
	Valdis.Kletnieks, dhowells, netdev

From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 14 Feb 2010 09:23:14 +0100

> 
> * Eric Dumazet <eric.dumazet@gmail.com> wrote:
> 
>> Le jeudi 11 f??vrier 2010 ?? 16:00 -0800, Paul E. McKenney a ??crit :
>> > Update rcu_dereference() primitives to use new lockdep-based checking.
>> > The rcu_dereference() in __in6_dev_get() may be protected either by
>> > rcu_read_lock() or RTNL, per Eric Dumazet.  The rcu_dereference()
>> > in __sk_free() is protected by the fact that it is never reached if an
>> > update could change it.  Check for this by using rcu_dereference_check()
>> > to verify that the struct sock's ->sk_wmem_alloc counter is zero.
>> > 
>> > Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
>> 
>> CC to netdev and David Miller, network maintainer.
>> 
>> Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
>> 
>> Thanks Paul, great work !
> 
> Dave, does this look good to you too? Cannot pick up the rest of the patchset 
> without these checks/annotations into the RCU tree as there's too many 
> warnings triggering in the networking code. So it's an all-or-nothing 
> patchset in that regard.

Looks good:

Acked-by: David S. Miller <davem@davemloft.net>

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2010-02-15  7:18 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <20100212000016.GA25781@linux.vnet.ibm.com>
     [not found] ` <1265932839-25899-5-git-send-email-paulmck@linux.vnet.ibm.com>
2010-02-12  4:15   ` [PATCH tip/core/rcu 05/13] net: add checking to rcu_dereference() primitives Eric Dumazet
2010-02-14  8:23     ` Ingo Molnar
2010-02-15  7:18       ` David Miller
2010-02-14  8:34     ` Michał Mirosław
2010-02-14  8:50       ` Eric Dumazet

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).