* [PATCH net-next v2 1/2] inetpeer: add namespace support for inetpeer @ 2012-06-05 7:52 Gao feng [not found] ` <1338882737-11914-1-git-send-email-gaofeng-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org> ` (2 more replies) 0 siblings, 3 replies; 13+ messages in thread From: Gao feng @ 2012-06-05 7:52 UTC (permalink / raw) To: serge.hallyn-Z7WLFzj8eWMS+FvcfC7Uqw, ebiederm-aS9lmoZGLiVWk0Htik3J/w, herbert-lOAM2aK0SrRLBo1qDEOMRrpzq4S04n8Q, steffen.klassert-opNxpl+3fjRBDgjK7y7TUQ, davem-fT/PcQaiUtIeIZ0/mPfg9Q, eric.dumazet-Re5JQEeQqe8AvxtiuMwx3w Cc: netdev-u79uwXL29TY76Z2rM5mHXA, containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA now inetpeer doesn't support namespace,the information will be leaking across namespace. this patch move the global vars v4_peers and v6_peers to netns_ipv4 and netns_ipv6 as a field peers. add struct pernet_operations inetpeer_ops to initial pernet inetpeer data. and change family_to_base and inet_getpeer to support namespace. Signed-off-by: Gao feng <gaofeng-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org> --- include/net/inetpeer.h | 10 ++++--- include/net/netns/ipv4.h | 1 + include/net/netns/ipv6.h | 1 + net/ipv4/inetpeer.c | 68 +++++++++++++++++++++++++++++++++------------ net/ipv4/route.c | 2 +- 5 files changed, 59 insertions(+), 23 deletions(-) diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index b94765e..4a50449 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -72,7 +72,9 @@ static inline bool inet_metrics_new(const struct inet_peer *p) } /* can be called with or without local BH being disabled */ -struct inet_peer *inet_getpeer(const struct inetpeer_addr *daddr, int create); +struct inet_peer *inet_getpeer(struct net *net, + const struct inetpeer_addr *daddr, + int create); static inline struct inet_peer *inet_getpeer_v4(__be32 v4daddr, int create) { @@ -80,7 +82,7 @@ static inline struct inet_peer *inet_getpeer_v4(__be32 v4daddr, int create) daddr.addr.a4 = v4daddr; daddr.family = AF_INET; - return inet_getpeer(&daddr, create); + return inet_getpeer(&init_net, &daddr, create); } static inline struct inet_peer *inet_getpeer_v6(const struct in6_addr *v6daddr, int create) @@ -89,14 +91,14 @@ static inline struct inet_peer *inet_getpeer_v6(const struct in6_addr *v6daddr, *(struct in6_addr *)daddr.addr.a6 = *v6daddr; daddr.family = AF_INET6; - return inet_getpeer(&daddr, create); + return inet_getpeer(&init_net, &daddr, create); } /* can be called from BH context or outside */ extern void inet_putpeer(struct inet_peer *p); extern bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout); -extern void inetpeer_invalidate_tree(int family); +extern void inetpeer_invalidate_tree(struct net *net, int family); /* * temporary check to make sure we dont access rid, ip_id_count, tcp_ts, diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index bbd023a..0855e09 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -31,6 +31,7 @@ struct netns_ipv4 { struct sock **icmp_sk; struct sock *tcp_sock; + struct inet_peer_base *peers; struct netns_frags frags; #ifdef CONFIG_NETFILTER struct xt_table *iptable_filter; diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index b42be53..df0a545 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -33,6 +33,7 @@ struct netns_ipv6 { struct netns_sysctl_ipv6 sysctl; struct ipv6_devconf *devconf_all; struct ipv6_devconf *devconf_dflt; + struct inet_peer_base *peers; struct netns_frags frags; #ifdef CONFIG_NETFILTER struct xt_table *ip6table_filter; diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index d4d61b6..0dbf1c8 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -88,18 +88,6 @@ struct inet_peer_base { int total; }; -static struct inet_peer_base v4_peers = { - .root = peer_avl_empty_rcu, - .lock = __SEQLOCK_UNLOCKED(v4_peers.lock), - .total = 0, -}; - -static struct inet_peer_base v6_peers = { - .root = peer_avl_empty_rcu, - .lock = __SEQLOCK_UNLOCKED(v6_peers.lock), - .total = 0, -}; - #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */ /* Exported for sysctl_net_ipv4. */ @@ -153,6 +141,45 @@ static void inetpeer_gc_worker(struct work_struct *work) schedule_delayed_work(&gc_work, gc_delay); } +static int __net_init inetpeer_net_init(struct net *net) +{ + + net->ipv4.peers = kzalloc(sizeof(struct inet_peer_base), + GFP_KERNEL); + if (net->ipv4.peers == NULL) + return -ENOMEM; + + net->ipv4.peers->root = peer_avl_empty_rcu; + seqlock_init(&net->ipv4.peers->lock); + + net->ipv6.peers = kzalloc(sizeof(struct inet_peer_base), + GFP_KERNEL); + if (net->ipv6.peers == NULL) + goto out_ipv6; + + net->ipv6.peers->root = peer_avl_empty_rcu; + seqlock_init(&net->ipv6.peers->lock); + + return 0; +out_ipv6: + kfree(net->ipv4.peers); + return -ENOMEM; +} + +static void __net_exit inetpeer_net_exit(struct net *net) +{ + inetpeer_invalidate_tree(net, AF_INET); + kfree(net->ipv4.peers); + + inetpeer_invalidate_tree(net, AF_INET6); + kfree(net->ipv6.peers); +} + +static struct pernet_operations inetpeer_ops = { + .init = inetpeer_net_init, + .exit = inetpeer_net_exit, +}; + /* Called from ip_output.c:ip_init */ void __init inet_initpeers(void) { @@ -177,6 +204,7 @@ void __init inet_initpeers(void) NULL); INIT_DELAYED_WORK_DEFERRABLE(&gc_work, inetpeer_gc_worker); + register_pernet_subsys(&inetpeer_ops); } static int addr_compare(const struct inetpeer_addr *a, @@ -401,9 +429,10 @@ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base, call_rcu(&p->rcu, inetpeer_free_rcu); } -static struct inet_peer_base *family_to_base(int family) +static struct inet_peer_base *family_to_base(struct net *net, + int family) { - return family == AF_INET ? &v4_peers : &v6_peers; + return family == AF_INET ? net->ipv4.peers : net->ipv6.peers; } /* perform garbage collect on all items stacked during a lookup */ @@ -443,10 +472,12 @@ static int inet_peer_gc(struct inet_peer_base *base, return cnt; } -struct inet_peer *inet_getpeer(const struct inetpeer_addr *daddr, int create) +struct inet_peer *inet_getpeer(struct net *net, + const struct inetpeer_addr *daddr, + int create) { struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr; - struct inet_peer_base *base = family_to_base(daddr->family); + struct inet_peer_base *base = family_to_base(net, daddr->family); struct inet_peer *p; unsigned int sequence; int invalidated, gccnt = 0; @@ -560,10 +591,10 @@ bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout) } EXPORT_SYMBOL(inet_peer_xrlim_allow); -void inetpeer_invalidate_tree(int family) +void inetpeer_invalidate_tree(struct net *net, int family) { struct inet_peer *old, *new, *prev; - struct inet_peer_base *base = family_to_base(family); + struct inet_peer_base *base = family_to_base(net, family); write_seqlock_bh(&base->lock); @@ -586,3 +617,3 @@ out: write_sequnlock_bh(&base->lock); } EXPORT_SYMBOL(inetpeer_invalidate_tree); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index ffcb3b0..e5b18b8 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -938,7 +938,7 @@ static void rt_cache_invalidate(struct net *net) get_random_bytes(&shuffle, sizeof(shuffle)); atomic_add(shuffle + 1U, &net->ipv4.rt_genid); - inetpeer_invalidate_tree(AF_INET); + inetpeer_invalidate_tree(net, AF_INET); } /* -- 1.7.7.6 ^ permalink raw reply related [flat|nested] 13+ messages in thread
[parent not found: <1338882737-11914-1-git-send-email-gaofeng-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>]
* [PATCH net-next v2 2/2] inetpeer: add parameter net for inet_getpeer_v4, v6 [not found] ` <1338882737-11914-1-git-send-email-gaofeng-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org> @ 2012-06-05 7:52 ` Gao feng 0 siblings, 0 replies; 13+ messages in thread From: Gao feng @ 2012-06-05 7:52 UTC (permalink / raw) To: serge.hallyn-Z7WLFzj8eWMS+FvcfC7Uqw, ebiederm-aS9lmoZGLiVWk0Htik3J/w, herbert-lOAM2aK0SrRLBo1qDEOMRrpzq4S04n8Q, steffen.klassert-opNxpl+3fjRBDgjK7y7TUQ, davem-fT/PcQaiUtIeIZ0/mPfg9Q, eric.dumazet-Re5JQEeQqe8AvxtiuMwx3w Cc: netdev-u79uwXL29TY76Z2rM5mHXA, containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA add struct net as a parameter of inet_getpeer_v[4,6], use net to replace &init_net. and modify some places to provide net for inet_getpeer_v[4,6] Signed-off-by: Gao feng <gaofeng-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org> --- include/net/inetpeer.h | 12 ++++++++---- net/ipv4/inet_fragment.c | 2 +- net/ipv4/ip_fragment.c | 6 +++++- net/ipv4/route.c | 8 +++++--- net/ipv4/tcp_ipv4.c | 6 ++++-- net/ipv6/route.c | 3 ++- net/ipv6/tcp_ipv6.c | 6 ++++-- 7 files changed, 29 insertions(+), 14 deletions(-) diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index 4a50449..31101e0 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -76,22 +76,26 @@ struct inet_peer *inet_getpeer(struct net *net, const struct inetpeer_addr *daddr, int create); -static inline struct inet_peer *inet_getpeer_v4(__be32 v4daddr, int create) +static inline struct inet_peer *inet_getpeer_v4(struct net *net, + __be32 v4daddr, + int create) { struct inetpeer_addr daddr; daddr.addr.a4 = v4daddr; daddr.family = AF_INET; - return inet_getpeer(&init_net, &daddr, create); + return inet_getpeer(net, &daddr, create); } -static inline struct inet_peer *inet_getpeer_v6(const struct in6_addr *v6daddr, int create) +static inline struct inet_peer *inet_getpeer_v6(struct net *net, + const struct in6_addr *v6daddr, + int create) { struct inetpeer_addr daddr; *(struct in6_addr *)daddr.addr.a6 = *v6daddr; daddr.family = AF_INET6; - return inet_getpeer(&init_net, &daddr, create); + return inet_getpeer(net, &daddr, create); } /* can be called from BH context or outside */ diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 5ff2a51..85190e6 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -243,12 +243,12 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, if (q == NULL) return NULL; + q->net = nf; f->constructor(q, arg); atomic_add(f->qsize, &nf->mem); setup_timer(&q->timer, f->frag_expire, (unsigned long)q); spin_lock_init(&q->lock); atomic_set(&q->refcnt, 1); - q->net = nf; return q; } diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 9dbd3dd..22c6bab 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -171,6 +171,10 @@ static void frag_kfree_skb(struct netns_frags *nf, struct sk_buff *skb) static void ip4_frag_init(struct inet_frag_queue *q, void *a) { struct ipq *qp = container_of(q, struct ipq, q); + struct netns_ipv4 *ipv4 = container_of(q->net, struct netns_ipv4, + frags); + struct net *net = container_of(ipv4, struct net, ipv4); + struct ip4_create_arg *arg = a; qp->protocol = arg->iph->protocol; @@ -180,7 +184,7 @@ static void ip4_frag_init(struct inet_frag_queue *q, void *a) qp->daddr = arg->iph->daddr; qp->user = arg->user; qp->peer = sysctl_ipfrag_max_dist ? - inet_getpeer_v4(arg->iph->saddr, 1) : NULL; + inet_getpeer_v4(net, arg->iph->saddr, 1) : NULL; } static __inline__ void ip4_frag_free(struct inet_frag_queue *q) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index e5b18b8..448e56b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1328,9 +1328,10 @@ static u32 rt_peer_genid(void) void rt_bind_peer(struct rtable *rt, __be32 daddr, int create) { + struct net *net = dev_net(rt->dst.dev); struct inet_peer *peer; - peer = inet_getpeer_v4(daddr, create); + peer = inet_getpeer_v4(net, daddr, create); if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL) inet_putpeer(peer); @@ -1694,7 +1695,7 @@ unsigned short ip_rt_frag_needed(struct net *net, const struct iphdr *iph, unsigned short est_mtu = 0; struct inet_peer *peer; - peer = inet_getpeer_v4(iph->daddr, 1); + peer = inet_getpeer_v4(net, iph->daddr, 1); if (peer) { unsigned short mtu = new_mtu; @@ -1935,6 +1936,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, struct fib_info *fi) { + struct net *net = dev_net(rt->dst.dev); struct inet_peer *peer; int create = 0; @@ -1944,7 +1946,7 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, if (fl4 && (fl4->flowi4_flags & FLOWI_FLAG_PRECOW_METRICS)) create = 1; - rt->peer = peer = inet_getpeer_v4(rt->rt_dst, create); + rt->peer = peer = inet_getpeer_v4(net, rt->rt_dst, create); if (peer) { rt->rt_peer_genid = rt_peer_genid(); if (inet_metrics_new(peer)) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a43b87d..50d4bee 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1822,11 +1822,12 @@ struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it) { struct rtable *rt = (struct rtable *) __sk_dst_get(sk); struct inet_sock *inet = inet_sk(sk); + struct net *net = sock_net(sk); struct inet_peer *peer; if (!rt || inet->cork.fl.u.ip4.daddr != inet->inet_daddr) { - peer = inet_getpeer_v4(inet->inet_daddr, 1); + peer = inet_getpeer_v4(net, inet->inet_daddr, 1); *release_it = true; } else { if (!rt->peer) @@ -1842,8 +1843,9 @@ EXPORT_SYMBOL(tcp_v4_get_peer); void *tcp_v4_tw_get_peer(struct sock *sk) { const struct inet_timewait_sock *tw = inet_twsk(sk); + struct net *net = sock_net(sk); - return inet_getpeer_v4(tw->tw_daddr, 1); + return inet_getpeer_v4(net, tw->tw_daddr, 1); } EXPORT_SYMBOL(tcp_v4_tw_get_peer); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 999a982..4eca013 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -306,9 +306,10 @@ static u32 rt6_peer_genid(void) void rt6_bind_peer(struct rt6_info *rt, int create) { + struct net *net = dev_net(rt->dst.dev); struct inet_peer *peer; - peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create); + peer = inet_getpeer_v6(net, &rt->rt6i_dst.addr, create); if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL) inet_putpeer(peer); else diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 554d599..56aae14 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1734,11 +1734,12 @@ static struct inet_peer *tcp_v6_get_peer(struct sock *sk, bool *release_it) { struct rt6_info *rt = (struct rt6_info *) __sk_dst_get(sk); struct ipv6_pinfo *np = inet6_sk(sk); + struct net *net = sock_net(sk); struct inet_peer *peer; if (!rt || !ipv6_addr_equal(&np->daddr, &rt->rt6i_dst.addr)) { - peer = inet_getpeer_v6(&np->daddr, 1); + peer = inet_getpeer_v6(net, &np->daddr, 1); *release_it = true; } else { if (!rt->rt6i_peer) @@ -1754,11 +1755,12 @@ static void *tcp_v6_tw_get_peer(struct sock *sk) { const struct inet6_timewait_sock *tw6 = inet6_twsk(sk); const struct inet_timewait_sock *tw = inet_twsk(sk); + struct net *net = sock_net(sk); if (tw->tw_family == AF_INET) return tcp_v4_tw_get_peer(sk); - return inet_getpeer_v6(&tw6->tw_v6_daddr, 1); + return inet_getpeer_v6(net, &tw6->tw_v6_daddr, 1); } static struct timewait_sock_ops tcp6_timewait_sock_ops = { -- 1.7.7.6 ^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH net-next v2 1/2] inetpeer: add namespace support for inetpeer 2012-06-05 7:52 [PATCH net-next v2 1/2] inetpeer: add namespace support for inetpeer Gao feng [not found] ` <1338882737-11914-1-git-send-email-gaofeng-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org> @ 2012-06-05 8:57 ` Eric Dumazet 2012-06-05 11:27 ` Steffen Klassert 2012-06-05 12:29 ` Gao feng 2012-06-06 17:43 ` David Miller 2 siblings, 2 replies; 13+ messages in thread From: Eric Dumazet @ 2012-06-05 8:57 UTC (permalink / raw) To: Gao feng Cc: serge.hallyn, ebiederm, herbert, steffen.klassert, davem, netdev, containers On Tue, 2012-06-05 at 15:52 +0800, Gao feng wrote: > +static void __net_exit inetpeer_net_exit(struct net *net) > +{ > + inetpeer_invalidate_tree(net, AF_INET); > + kfree(net->ipv4.peers); > + > + inetpeer_invalidate_tree(net, AF_INET6); > + kfree(net->ipv6.peers); > +} > + Are we 1000% sure no code ever run in inetpeer land after this call ? I would add net->ipv4.peers = NULL; net->ipv6.peers = NULL; to catch NULL deref instead of strange errors, just in case. By the way, I think we have a bug in inetpeer_gc_worker() Steffen ? We have no rcu grace period to make sure the following is safe : if (!atomic_read(&p->refcnt)) { list_del(&p->gc_list); kmem_cache_free(peer_cachep, p); } I'll post a fix like : diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index d4d61b6..07731b5 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -137,7 +137,7 @@ static void inetpeer_gc_worker(struct work_struct *work) n = list_entry(p->gc_list.next, struct inet_peer, gc_list); - if (!atomic_read(&p->refcnt)) { + if (atomic_cmpxchg(&p->refcnt, 0, -1) == 0) { list_del(&p->gc_list); kmem_cache_free(peer_cachep, p); } ^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH net-next v2 1/2] inetpeer: add namespace support for inetpeer 2012-06-05 8:57 ` [PATCH net-next v2 1/2] inetpeer: add namespace support for inetpeer Eric Dumazet @ 2012-06-05 11:27 ` Steffen Klassert 2012-06-05 12:00 ` Eric Dumazet 2012-06-05 12:29 ` Gao feng 1 sibling, 1 reply; 13+ messages in thread From: Steffen Klassert @ 2012-06-05 11:27 UTC (permalink / raw) To: Eric Dumazet Cc: herbert-lOAM2aK0SrRLBo1qDEOMRrpzq4S04n8Q, netdev-u79uwXL29TY76Z2rM5mHXA, containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA, davem-fT/PcQaiUtIeIZ0/mPfg9Q, ebiederm-aS9lmoZGLiVWk0Htik3J/w On Tue, Jun 05, 2012 at 10:57:06AM +0200, Eric Dumazet wrote: > On Tue, 2012-06-05 at 15:52 +0800, Gao feng wrote: > > > +static void __net_exit inetpeer_net_exit(struct net *net) > > +{ > > + inetpeer_invalidate_tree(net, AF_INET); > > + kfree(net->ipv4.peers); > > + > > + inetpeer_invalidate_tree(net, AF_INET6); > > + kfree(net->ipv6.peers); > > +} > > + > > Are we 1000% sure no code ever run in inetpeer land after this call ? > > I would add > net->ipv4.peers = NULL; > net->ipv6.peers = NULL; > > to catch NULL deref instead of strange errors, just in case. I thought about that too, and I'm not absolutely sure. The rest of this patch looks ok to me. > > By the way, I think we have a bug in inetpeer_gc_worker() > > Steffen ? > > We have no rcu grace period to make sure the following is safe : > > if (!atomic_read(&p->refcnt)) { > list_del(&p->gc_list); > kmem_cache_free(peer_cachep, p); > } I think this is ok as it is. inetpeer_invalidate_tree() unlinks the whole inetpeer tree from the inetpeer base and adds it to a gc_list. These intetpeer entries are stale, they can't be looked up again. So noone should increment the refcount, they just wait until the refcount get zero. ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH net-next v2 1/2] inetpeer: add namespace support for inetpeer 2012-06-05 11:27 ` Steffen Klassert @ 2012-06-05 12:00 ` Eric Dumazet 2012-06-05 12:15 ` Steffen Klassert 0 siblings, 1 reply; 13+ messages in thread From: Eric Dumazet @ 2012-06-05 12:00 UTC (permalink / raw) To: Steffen Klassert Cc: Gao feng, serge.hallyn, ebiederm, herbert, davem, netdev, containers On Tue, 2012-06-05 at 13:27 +0200, Steffen Klassert wrote: > > > > By the way, I think we have a bug in inetpeer_gc_worker() > > > > Steffen ? > > > > We have no rcu grace period to make sure the following is safe : > > > > if (!atomic_read(&p->refcnt)) { > > list_del(&p->gc_list); > > kmem_cache_free(peer_cachep, p); > > } > > I think this is ok as it is. inetpeer_invalidate_tree() > unlinks the whole inetpeer tree from the inetpeer base and > adds it to a gc_list. These intetpeer entries are stale, > they can't be looked up again. So noone should increment the > refcount, they just wait until the refcount get zero. > Its not OK, lookups are done under rcu. Since there is no RCU grace period, the worker free the entries while another cpus are doing their lookups. Alternative would be to wait a RCU grace period before feeding them to worker. ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH net-next v2 1/2] inetpeer: add namespace support for inetpeer 2012-06-05 12:00 ` Eric Dumazet @ 2012-06-05 12:15 ` Steffen Klassert [not found] ` <20120605121510.GD27795-opNxpl+3fjRBDgjK7y7TUQ@public.gmane.org> 0 siblings, 1 reply; 13+ messages in thread From: Steffen Klassert @ 2012-06-05 12:15 UTC (permalink / raw) To: Eric Dumazet Cc: herbert-lOAM2aK0SrRLBo1qDEOMRrpzq4S04n8Q, netdev-u79uwXL29TY76Z2rM5mHXA, containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA, davem-fT/PcQaiUtIeIZ0/mPfg9Q, ebiederm-aS9lmoZGLiVWk0Htik3J/w On Tue, Jun 05, 2012 at 02:00:30PM +0200, Eric Dumazet wrote: > On Tue, 2012-06-05 at 13:27 +0200, Steffen Klassert wrote: > > > > > > > By the way, I think we have a bug in inetpeer_gc_worker() > > > > > > Steffen ? > > > > > > We have no rcu grace period to make sure the following is safe : > > > > > > if (!atomic_read(&p->refcnt)) { > > > list_del(&p->gc_list); > > > kmem_cache_free(peer_cachep, p); > > > } > > > > I think this is ok as it is. inetpeer_invalidate_tree() > > unlinks the whole inetpeer tree from the inetpeer base and > > adds it to a gc_list. These intetpeer entries are stale, > > they can't be looked up again. So noone should increment the > > refcount, they just wait until the refcount get zero. > > > > Its not OK, lookups are done under rcu. > > Since there is no RCU grace period, the worker free the entries while > another cpus are doing their lookups. > Lookups are done under rcu, yes. But a lookup will not find these stale entries because the whole interpeer tree is removed from the inetpeer base before the worker is scheduled. A lookup would have to cerate a new inetpeer entry in this case. ^ permalink raw reply [flat|nested] 13+ messages in thread
[parent not found: <20120605121510.GD27795-opNxpl+3fjRBDgjK7y7TUQ@public.gmane.org>]
* Re: [PATCH net-next v2 1/2] inetpeer: add namespace support for inetpeer [not found] ` <20120605121510.GD27795-opNxpl+3fjRBDgjK7y7TUQ@public.gmane.org> @ 2012-06-05 12:18 ` Eric Dumazet 0 siblings, 0 replies; 13+ messages in thread From: Eric Dumazet @ 2012-06-05 12:18 UTC (permalink / raw) To: Steffen Klassert Cc: herbert-lOAM2aK0SrRLBo1qDEOMRrpzq4S04n8Q, netdev-u79uwXL29TY76Z2rM5mHXA, containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA, davem-fT/PcQaiUtIeIZ0/mPfg9Q, ebiederm-aS9lmoZGLiVWk0Htik3J/w On Tue, 2012-06-05 at 14:15 +0200, Steffen Klassert wrote: > Lookups are done under rcu, yes. But a lookup will not find > these stale entries because the whole interpeer tree is removed > from the inetpeer base before the worker is scheduled. A lookup > would have to cerate a new inetpeer entry in this case. > Thats simply not true. You don't respect basic RCU rules. Please read them. ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH net-next v2 1/2] inetpeer: add namespace support for inetpeer 2012-06-05 8:57 ` [PATCH net-next v2 1/2] inetpeer: add namespace support for inetpeer Eric Dumazet 2012-06-05 11:27 ` Steffen Klassert @ 2012-06-05 12:29 ` Gao feng 1 sibling, 0 replies; 13+ messages in thread From: Gao feng @ 2012-06-05 12:29 UTC (permalink / raw) To: Eric Dumazet Cc: steffen.klassert-opNxpl+3fjRBDgjK7y7TUQ, herbert-lOAM2aK0SrRLBo1qDEOMRrpzq4S04n8Q, netdev-u79uwXL29TY76Z2rM5mHXA, containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA, ebiederm-aS9lmoZGLiVWk0Htik3J/w, davem-fT/PcQaiUtIeIZ0/mPfg9Q 于 2012年06月05日 16:57, Eric Dumazet 写道: > On Tue, 2012-06-05 at 15:52 +0800, Gao feng wrote: > >> +static void __net_exit inetpeer_net_exit(struct net *net) >> +{ >> + inetpeer_invalidate_tree(net, AF_INET); >> + kfree(net->ipv4.peers); >> + >> + inetpeer_invalidate_tree(net, AF_INET6); >> + kfree(net->ipv6.peers); >> +} >> + > > Are we 1000% sure no code ever run in inetpeer land after this call ? I am not sure,I need more time to research it. I just do kfree peers here without set NULL pointer is beacuse there is the same code with fib6_main_tbl in fib6_net_exit and it seems work well. Anyway, I will research it. > > I would add > net->ipv4.peers = NULL; > net->ipv6.peers = NULL; > > to catch NULL deref instead of strange errors, just in case. > > By the way, I think we have a bug in inetpeer_gc_worker() > > Steffen ? > > We have no rcu grace period to make sure the following is safe : > > if (!atomic_read(&p->refcnt)) { > list_del(&p->gc_list); > kmem_cache_free(peer_cachep, p); > } > > I'll post a fix like : > > diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c > index d4d61b6..07731b5 100644 > --- a/net/ipv4/inetpeer.c > +++ b/net/ipv4/inetpeer.c > @@ -137,7 +137,7 @@ static void inetpeer_gc_worker(struct work_struct *work) > > n = list_entry(p->gc_list.next, struct inet_peer, gc_list); > > - if (!atomic_read(&p->refcnt)) { > + if (atomic_cmpxchg(&p->refcnt, 0, -1) == 0) { > list_del(&p->gc_list); > kmem_cache_free(peer_cachep, p); > } > > > > -- > To unsubscribe from this list: send the line "unsubscribe netdev" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > _______________________________________________ Containers mailing list Containers@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/containers ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH net-next v2 1/2] inetpeer: add namespace support for inetpeer 2012-06-05 7:52 [PATCH net-next v2 1/2] inetpeer: add namespace support for inetpeer Gao feng [not found] ` <1338882737-11914-1-git-send-email-gaofeng-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org> 2012-06-05 8:57 ` [PATCH net-next v2 1/2] inetpeer: add namespace support for inetpeer Eric Dumazet @ 2012-06-06 17:43 ` David Miller [not found] ` <20120606.104323.1698446129331745479.davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org> 2 siblings, 1 reply; 13+ messages in thread From: David Miller @ 2012-06-06 17:43 UTC (permalink / raw) To: gaofeng Cc: serge.hallyn, ebiederm, herbert, steffen.klassert, eric.dumazet, netdev, containers From: Gao feng <gaofeng@cn.fujitsu.com> Date: Tue, 5 Jun 2012 15:52:17 +0800 > now inetpeer doesn't support namespace,the information will > be leaking across namespace. > > this patch move the global vars v4_peers and v6_peers to > netns_ipv4 and netns_ipv6 as a field peers. > > add struct pernet_operations inetpeer_ops to initial pernet > inetpeer data. > > and change family_to_base and inet_getpeer to support namespace. > > Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com> As stated yesterday we have to move the inetpeer tree roots into the FIB rule entries to fix other bugs, and that as a result will transparently fix this problem too. So I'm dropping these two patches and will work on the mentioned approach to this fix. Thanks. ^ permalink raw reply [flat|nested] 13+ messages in thread
[parent not found: <20120606.104323.1698446129331745479.davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org>]
* Re: [PATCH net-next v2 1/2] inetpeer: add namespace support for inetpeer [not found] ` <20120606.104323.1698446129331745479.davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org> @ 2012-06-07 21:43 ` David Miller [not found] ` <20120607.144301.1259354794384347085.davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org> 0 siblings, 1 reply; 13+ messages in thread From: David Miller @ 2012-06-07 21:43 UTC (permalink / raw) To: gaofeng-BthXqXjhjHXQFUHtdCDX3A Cc: steffen.klassert-opNxpl+3fjRBDgjK7y7TUQ, herbert-lOAM2aK0SrRLBo1qDEOMRrpzq4S04n8Q, eric.dumazet-Re5JQEeQqe8AvxtiuMwx3w, netdev-u79uwXL29TY76Z2rM5mHXA, containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA, ebiederm-aS9lmoZGLiVWk0Htik3J/w From: David Miller <davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org> Date: Wed, 06 Jun 2012 10:43:23 -0700 (PDT) > So I'm dropping these two patches and will work on the mentioned > approach to this fix. It turns out that even if I move the inetpeer roots into the FIB rules layer, we still need your changes Gao. But your patches are corrupted. For example, in this patch, the final hunk for net/ipv4/inetpeer.c has no differences only context. That's an extremely corrupted patch. Please resolve this, and add the NULL pointer settings during network namespace shutdown that Eric Dumazet asked for. Thanks. ^ permalink raw reply [flat|nested] 13+ messages in thread
[parent not found: <20120607.144301.1259354794384347085.davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org>]
* Re: [PATCH net-next v2 1/2] inetpeer: add namespace support for inetpeer [not found] ` <20120607.144301.1259354794384347085.davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org> @ 2012-06-08 1:44 ` Gao feng 2012-06-08 3:53 ` David Miller 0 siblings, 1 reply; 13+ messages in thread From: Gao feng @ 2012-06-08 1:44 UTC (permalink / raw) To: David Miller Cc: steffen.klassert-opNxpl+3fjRBDgjK7y7TUQ, herbert-lOAM2aK0SrRLBo1qDEOMRrpzq4S04n8Q, eric.dumazet-Re5JQEeQqe8AvxtiuMwx3w, netdev-u79uwXL29TY76Z2rM5mHXA, containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA, ebiederm-aS9lmoZGLiVWk0Htik3J/w 于 2012年06月08日 05:43, David Miller 写道: > From: David Miller <davem@davemloft.net> > Date: Wed, 06 Jun 2012 10:43:23 -0700 (PDT) > >> So I'm dropping these two patches and will work on the mentioned >> approach to this fix. > > It turns out that even if I move the inetpeer roots into the FIB rules > layer, we still need your changes Gao. > > But your patches are corrupted. For example, in this patch, > the final hunk for net/ipv4/inetpeer.c has no differences only > context. That's an extremely corrupted patch. Sorry for my pool english, I don't understand this. Can you explain it for me? > > Please resolve this, and add the NULL pointer settings during network > namespace shutdown that Eric Dumazet asked for. OK, I will do it. Thanks. _______________________________________________ Containers mailing list Containers@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/containers ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH net-next v2 1/2] inetpeer: add namespace support for inetpeer 2012-06-08 1:44 ` Gao feng @ 2012-06-08 3:53 ` David Miller [not found] ` <20120607.205348.913373847915721607.davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org> 0 siblings, 1 reply; 13+ messages in thread From: David Miller @ 2012-06-08 3:53 UTC (permalink / raw) To: gaofeng Cc: serge.hallyn, ebiederm, herbert, steffen.klassert, eric.dumazet, netdev, containers From: Gao feng <gaofeng@cn.fujitsu.com> Date: Fri, 08 Jun 2012 09:44:04 +0800 > Sorry for my pool english, I don't understand this. > Can you explain it for me? Look at the end of the parts of the patch that change net/ipv4/inetpeer.c: @@ -586,3 +617,3 @@ out: write_sequnlock_bh(&base->lock); } EXPORT_SYMBOL(inetpeer_invalidate_tree); That makes no sense, it's a patch hunk with no "+" or "-" lines. Your patch was corrupted by something. This is extremely irritating. I would recommend that you email patches to yourself, and try to apply the copies you receive in those emails. Because that's what the people trying to use your patch are going to do. ^ permalink raw reply [flat|nested] 13+ messages in thread
[parent not found: <20120607.205348.913373847915721607.davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org>]
* Re: [PATCH net-next v2 1/2] inetpeer: add namespace support for inetpeer [not found] ` <20120607.205348.913373847915721607.davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org> @ 2012-06-08 5:27 ` Gao feng 0 siblings, 0 replies; 13+ messages in thread From: Gao feng @ 2012-06-08 5:27 UTC (permalink / raw) To: David Miller Cc: steffen.klassert-opNxpl+3fjRBDgjK7y7TUQ, herbert-lOAM2aK0SrRLBo1qDEOMRrpzq4S04n8Q, eric.dumazet-Re5JQEeQqe8AvxtiuMwx3w, netdev-u79uwXL29TY76Z2rM5mHXA, containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA, ebiederm-aS9lmoZGLiVWk0Htik3J/w 于 2012年06月08日 11:53, David Miller 写道: > From: Gao feng <gaofeng@cn.fujitsu.com> > Date: Fri, 08 Jun 2012 09:44:04 +0800 > >> Sorry for my pool english, I don't understand this. >> Can you explain it for me? > > Look at the end of the parts of the patch that change > net/ipv4/inetpeer.c: > > @@ -586,3 +617,3 @@ out: > write_sequnlock_bh(&base->lock); > } > EXPORT_SYMBOL(inetpeer_invalidate_tree); > > That makes no sense, it's a patch hunk with no "+" or "-" > lines. Thanks David,got it. > > Your patch was corrupted by something. > > This is extremely irritating. I would recommend that you email > patches to yourself, and try to apply the copies you receive > in those emails. Because that's what the people trying to use > your patch are going to do. > Sorry, I will try it myself next time. ;) _______________________________________________ Containers mailing list Containers@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/containers ^ permalink raw reply [flat|nested] 13+ messages in thread
end of thread, other threads:[~2012-06-08 5:27 UTC | newest] Thread overview: 13+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2012-06-05 7:52 [PATCH net-next v2 1/2] inetpeer: add namespace support for inetpeer Gao feng [not found] ` <1338882737-11914-1-git-send-email-gaofeng-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org> 2012-06-05 7:52 ` [PATCH net-next v2 2/2] inetpeer: add parameter net for inet_getpeer_v4, v6 Gao feng 2012-06-05 8:57 ` [PATCH net-next v2 1/2] inetpeer: add namespace support for inetpeer Eric Dumazet 2012-06-05 11:27 ` Steffen Klassert 2012-06-05 12:00 ` Eric Dumazet 2012-06-05 12:15 ` Steffen Klassert [not found] ` <20120605121510.GD27795-opNxpl+3fjRBDgjK7y7TUQ@public.gmane.org> 2012-06-05 12:18 ` Eric Dumazet 2012-06-05 12:29 ` Gao feng 2012-06-06 17:43 ` David Miller [not found] ` <20120606.104323.1698446129331745479.davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org> 2012-06-07 21:43 ` David Miller [not found] ` <20120607.144301.1259354794384347085.davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org> 2012-06-08 1:44 ` Gao feng 2012-06-08 3:53 ` David Miller [not found] ` <20120607.205348.913373847915721607.davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org> 2012-06-08 5:27 ` Gao feng
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).