From mboxrd@z Thu Jan 1 00:00:00 1970 From: Shannon Nelson Subject: Re: [PATCH ipsec] net: xfrm_policy: fix device unregistration hang Date: Mon, 12 Feb 2018 09:55:48 -0800 Message-ID: References: <1518456093-13316-1-git-send-email-eyal.birger@gmail.com> Mime-Version: 1.0 Content-Type: text/plain; charset=utf-8; format=flowed Content-Transfer-Encoding: 7bit Cc: herbert@gondor.apana.org.au, davem@davemloft.net, netdev@vger.kernel.org, fw@strlen.de, shmulik@metanetworks.com To: Eyal Birger , steffen.klassert@secunet.com Return-path: Received: from aserp2120.oracle.com ([141.146.126.78]:38642 "EHLO aserp2120.oracle.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751109AbeBLR4V (ORCPT ); Mon, 12 Feb 2018 12:56:21 -0500 In-Reply-To: <1518456093-13316-1-git-send-email-eyal.birger@gmail.com> Content-Language: en-US Sender: netdev-owner@vger.kernel.org List-ID: On 2/12/2018 9:21 AM, Eyal Birger wrote: > In setups like the following: > > Host A -- Host B > tun0 -- ipsec -- eth0 -- eth0 -- ipsec -- tun0 > > where tun0 are tunnel devices using dst_cache (ipip, ipip6, etc...). > > Unregistration of an underlying eth0 device leads to the following log > messages: > > unregister_netdevice: waiting for eth0 to become free. Usage count = 2 > > This is since xfrm dsts device references are not released upon device > unregistration when the xfrm dst is cached in a dst_cache. > > Several approaches for fixing this were discussed in [1]; this commit keeps > track of allocated xdsts and releases their device references on a netdev > unregister event. > > Signed-off-by: Eyal Birger > > [1] https://patchwork.ozlabs.org/patch/869025/ > --- > include/net/xfrm.h | 10 ++----- > net/xfrm/xfrm_policy.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 84 insertions(+), 7 deletions(-) > > diff --git a/include/net/xfrm.h b/include/net/xfrm.h > index 7d20776..c1c8493 100644 > --- a/include/net/xfrm.h > +++ b/include/net/xfrm.h > @@ -994,6 +994,8 @@ struct xfrm_dst { > u32 child_mtu_cached; > u32 route_cookie; > u32 path_cookie; > + struct list_head xfrm_dst_gc; > + struct xfrm_dst_list *xfrm_dst_gc_list; > }; > > static inline struct dst_entry *xfrm_dst_path(const struct dst_entry *dst) > @@ -1025,13 +1027,7 @@ static inline void xfrm_dst_set_child(struct xfrm_dst *xdst, struct dst_entry *c > xdst->child = child; > } > > -static inline void xfrm_dst_destroy(struct xfrm_dst *xdst) > -{ > - xfrm_pols_put(xdst->pols, xdst->num_pols); > - dst_release(xdst->route); > - if (likely(xdst->u.dst.xfrm)) > - xfrm_state_put(xdst->u.dst.xfrm); > -} > +void xfrm_dst_destroy(struct xfrm_dst *xdst); > #endif > > void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev); > diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c > index 7a23078..1da8a65 100644 > --- a/net/xfrm/xfrm_policy.c > +++ b/net/xfrm/xfrm_policy.c > @@ -45,6 +45,12 @@ struct xfrm_flo { > u8 flags; > }; > > +struct xfrm_dst_list { > + spinlock_t lock; /* sync writers */ > + struct list_head head; > +}; > + > +static DEFINE_PER_CPU_ALIGNED(struct xfrm_dst_list, xfrm_dst_gc_list); > static DEFINE_PER_CPU(struct xfrm_dst *, xfrm_last_dst); > static struct work_struct *xfrm_pcpu_work __read_mostly; > static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock); > @@ -94,6 +100,51 @@ __xfrm6_selector_match(const struct xfrm_selector *sel, const struct flowi *fl) > (fl6->flowi6_oif == sel->ifindex || !sel->ifindex); > } > > +static void xfrm_dst_add_to_gc_list(struct xfrm_dst *xdst) > +{ > + struct xfrm_dst_list *xl = raw_cpu_ptr(&xfrm_dst_gc_list); > + > + xdst->xfrm_dst_gc_list = xl; > + > + spin_lock_bh(&xl->lock); > + list_add_tail(&xdst->xfrm_dst_gc, &xl->head); > + spin_unlock_bh(&xl->lock); > +} > + > +void xfrm_dst_destroy(struct xfrm_dst *xdst) > +{ > + xfrm_pols_put(xdst->pols, xdst->num_pols); > + dst_release(xdst->route); > + if (likely(xdst->u.dst.xfrm)) > + xfrm_state_put(xdst->u.dst.xfrm); > + if (!list_empty(&xdst->xfrm_dst_gc)) { > + struct xfrm_dst_list *xl = xdst->xfrm_dst_gc_list; > + > + spin_lock_bh(&xl->lock); > + list_del(&xdst->xfrm_dst_gc); > + spin_unlock_bh(&xl->lock); > + } > +} > +EXPORT_SYMBOL_GPL(xfrm_dst_destroy); > + > +static void xfrm_flush_dev(struct net_device *dev) > +{ > + struct xfrm_dst *xdst; > + int cpu; > + > + for_each_possible_cpu(cpu) { > + struct xfrm_dst_list *xl = &per_cpu(xfrm_dst_gc_list, cpu); > + > + spin_lock_bh(&xl->lock); > + list_for_each_entry(xdst, &xl->head, xfrm_dst_gc) { > + if (xdst->u.dst.dev != dev) > + continue; > + dst_dev_put(&xdst->u.dst); > + } > + spin_unlock_bh(&xl->lock); > + } > +} > + > bool xfrm_selector_match(const struct xfrm_selector *sel, const struct flowi *fl, > unsigned short family) > { > @@ -1581,6 +1632,8 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, > } > > bundle[i] = xdst; > + xfrm_dst_add_to_gc_list(xdst); > + > if (!xdst_prev) > xdst0 = xdst; > else > @@ -2937,6 +2990,20 @@ static void xfrm_policy_fini(struct net *net) > xfrm_hash_free(net->xfrm.policy_byidx, sz); > } > > +static int xfrm_netdev_event(struct notifier_block *this, unsigned long event, > + void *ptr) > +{ > + struct net_device *dev = netdev_notifier_info_to_dev(ptr); > + > + if (event == NETDEV_UNREGISTER) > + xfrm_flush_dev(dev); > + return NOTIFY_DONE; > +} > + > +static struct notifier_block xfrm_netdev_notifier = { > + .notifier_call = xfrm_netdev_event, > +}; > + Instead of creating yet another netdev notifier, can this be wired into xfrm_dev_unregister(), the existing notifier in xfrm_decide.c which calls into xfrm_policy_cache_flush()? sln > static int __net_init xfrm_net_init(struct net *net) > { > int rv; > @@ -2984,6 +3051,19 @@ static struct pernet_operations __net_initdata xfrm_net_ops = { > .exit = xfrm_net_exit, > }; > > +static void __init xfrm_dst_gc_init(void) > +{ > + int cpu; > + > + for_each_possible_cpu(cpu) { > + struct xfrm_dst_list *xl = &per_cpu(xfrm_dst_gc_list, cpu); > + > + INIT_LIST_HEAD(&xl->head); > + spin_lock_init(&xl->lock); > + } > + register_netdevice_notifier(&xfrm_netdev_notifier); > +} > + > void __init xfrm_init(void) > { > int i; > @@ -2998,6 +3078,7 @@ void __init xfrm_init(void) > register_pernet_subsys(&xfrm_net_ops); > seqcount_init(&xfrm_policy_hash_generation); > xfrm_input_init(); > + xfrm_dst_gc_init(); > } > > #ifdef CONFIG_AUDITSYSCALL >