stable.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Sasha Levin <sashal@kernel.org>
To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
Cc: Cong Wang <xiyou.wangcong@gmail.com>,
	Steffen Klassert <steffen.klassert@secunet.com>,
	Sasha Levin <sashal@kernel.org>,
	netdev@vger.kernel.org
Subject: [PATCH AUTOSEL 4.19 34/57] xfrm: destroy xfrm_state synchronously on net exit path
Date: Fri, 29 Mar 2019 21:28:27 -0400	[thread overview]
Message-ID: <20190330012854.32212-34-sashal@kernel.org> (raw)
In-Reply-To: <20190330012854.32212-1-sashal@kernel.org>

From: Cong Wang <xiyou.wangcong@gmail.com>

[ Upstream commit f75a2804da391571563c4b6b29e7797787332673 ]

xfrm_state_put() moves struct xfrm_state to the GC list
and schedules the GC work to clean it up. On net exit call
path, xfrm_state_flush() is called to clean up and
xfrm_flush_gc() is called to wait for the GC work to complete
before exit.

However, this doesn't work because one of the ->destructor(),
ipcomp_destroy(), schedules the same GC work again inside
the GC work. It is hard to wait for such a nested async
callback. This is also why syzbot still reports the following
warning:

 WARNING: CPU: 1 PID: 33 at net/ipv6/xfrm6_tunnel.c:351 xfrm6_tunnel_net_exit+0x2cb/0x500 net/ipv6/xfrm6_tunnel.c:351
 ...
  ops_exit_list.isra.0+0xb0/0x160 net/core/net_namespace.c:153
  cleanup_net+0x51d/0xb10 net/core/net_namespace.c:551
  process_one_work+0xd0c/0x1ce0 kernel/workqueue.c:2153
  worker_thread+0x143/0x14a0 kernel/workqueue.c:2296
  kthread+0x357/0x430 kernel/kthread.c:246
  ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:352

In fact, it is perfectly fine to bypass GC and destroy xfrm_state
synchronously on net exit call path, because it is in process context
and doesn't need a work struct to do any blocking work.

This patch introduces xfrm_state_put_sync() which simply bypasses
GC, and lets its callers to decide whether to use this synchronous
version. On net exit path, xfrm_state_fini() and
xfrm6_tunnel_net_exit() use it. And, as ipcomp_destroy() itself is
blocking, it can use xfrm_state_put_sync() directly too.

Also rename xfrm_state_gc_destroy() to ___xfrm_state_destroy() to
reflect this change.

Fixes: b48c05ab5d32 ("xfrm: Fix warning in xfrm6_tunnel_net_exit.")
Reported-and-tested-by: syzbot+e9aebef558e3ed673934@syzkaller.appspotmail.com
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 include/net/xfrm.h      | 12 +++++++++---
 net/ipv6/xfrm6_tunnel.c |  2 +-
 net/key/af_key.c        |  2 +-
 net/xfrm/xfrm_state.c   | 30 +++++++++++++++++++-----------
 net/xfrm/xfrm_user.c    |  2 +-
 5 files changed, 31 insertions(+), 17 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index da588def3c61..5e3daf53b3d1 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -850,7 +850,7 @@ static inline void xfrm_pols_put(struct xfrm_policy **pols, int npols)
 		xfrm_pol_put(pols[i]);
 }
 
-void __xfrm_state_destroy(struct xfrm_state *);
+void __xfrm_state_destroy(struct xfrm_state *, bool);
 
 static inline void __xfrm_state_put(struct xfrm_state *x)
 {
@@ -860,7 +860,13 @@ static inline void __xfrm_state_put(struct xfrm_state *x)
 static inline void xfrm_state_put(struct xfrm_state *x)
 {
 	if (refcount_dec_and_test(&x->refcnt))
-		__xfrm_state_destroy(x);
+		__xfrm_state_destroy(x, false);
+}
+
+static inline void xfrm_state_put_sync(struct xfrm_state *x)
+{
+	if (refcount_dec_and_test(&x->refcnt))
+		__xfrm_state_destroy(x, true);
 }
 
 static inline void xfrm_state_hold(struct xfrm_state *x)
@@ -1616,7 +1622,7 @@ struct xfrmk_spdinfo {
 
 struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq);
 int xfrm_state_delete(struct xfrm_state *x);
-int xfrm_state_flush(struct net *net, u8 proto, bool task_valid);
+int xfrm_state_flush(struct net *net, u8 proto, bool task_valid, bool sync);
 int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid);
 void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si);
 void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si);
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index f5b4febeaa25..bc65db782bfb 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -344,8 +344,8 @@ static void __net_exit xfrm6_tunnel_net_exit(struct net *net)
 	struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
 	unsigned int i;
 
-	xfrm_state_flush(net, IPSEC_PROTO_ANY, false);
 	xfrm_flush_gc();
+	xfrm_state_flush(net, IPSEC_PROTO_ANY, false, true);
 
 	for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++)
 		WARN_ON_ONCE(!hlist_empty(&xfrm6_tn->spi_byaddr[i]));
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 7da629d59717..7d4bed955060 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1773,7 +1773,7 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, const struct sadb_m
 	if (proto == 0)
 		return -EINVAL;
 
-	err = xfrm_state_flush(net, proto, true);
+	err = xfrm_state_flush(net, proto, true, false);
 	err2 = unicast_flush_resp(sk, hdr);
 	if (err || err2) {
 		if (err == -ESRCH) /* empty table - go quietly */
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index cc0203efb584..3f729cd512af 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -432,7 +432,7 @@ void xfrm_state_free(struct xfrm_state *x)
 }
 EXPORT_SYMBOL(xfrm_state_free);
 
-static void xfrm_state_gc_destroy(struct xfrm_state *x)
+static void ___xfrm_state_destroy(struct xfrm_state *x)
 {
 	tasklet_hrtimer_cancel(&x->mtimer);
 	del_timer_sync(&x->rtimer);
@@ -474,7 +474,7 @@ static void xfrm_state_gc_task(struct work_struct *work)
 	synchronize_rcu();
 
 	hlist_for_each_entry_safe(x, tmp, &gc_list, gclist)
-		xfrm_state_gc_destroy(x);
+		___xfrm_state_destroy(x);
 }
 
 static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
@@ -598,14 +598,19 @@ struct xfrm_state *xfrm_state_alloc(struct net *net)
 }
 EXPORT_SYMBOL(xfrm_state_alloc);
 
-void __xfrm_state_destroy(struct xfrm_state *x)
+void __xfrm_state_destroy(struct xfrm_state *x, bool sync)
 {
 	WARN_ON(x->km.state != XFRM_STATE_DEAD);
 
-	spin_lock_bh(&xfrm_state_gc_lock);
-	hlist_add_head(&x->gclist, &xfrm_state_gc_list);
-	spin_unlock_bh(&xfrm_state_gc_lock);
-	schedule_work(&xfrm_state_gc_work);
+	if (sync) {
+		synchronize_rcu();
+		___xfrm_state_destroy(x);
+	} else {
+		spin_lock_bh(&xfrm_state_gc_lock);
+		hlist_add_head(&x->gclist, &xfrm_state_gc_list);
+		spin_unlock_bh(&xfrm_state_gc_lock);
+		schedule_work(&xfrm_state_gc_work);
+	}
 }
 EXPORT_SYMBOL(__xfrm_state_destroy);
 
@@ -708,7 +713,7 @@ xfrm_dev_state_flush_secctx_check(struct net *net, struct net_device *dev, bool
 }
 #endif
 
-int xfrm_state_flush(struct net *net, u8 proto, bool task_valid)
+int xfrm_state_flush(struct net *net, u8 proto, bool task_valid, bool sync)
 {
 	int i, err = 0, cnt = 0;
 
@@ -730,7 +735,10 @@ int xfrm_state_flush(struct net *net, u8 proto, bool task_valid)
 				err = xfrm_state_delete(x);
 				xfrm_audit_state_delete(x, err ? 0 : 1,
 							task_valid);
-				xfrm_state_put(x);
+				if (sync)
+					xfrm_state_put_sync(x);
+				else
+					xfrm_state_put(x);
 				if (!err)
 					cnt++;
 
@@ -2217,7 +2225,7 @@ void xfrm_state_delete_tunnel(struct xfrm_state *x)
 		if (atomic_read(&t->tunnel_users) == 2)
 			xfrm_state_delete(t);
 		atomic_dec(&t->tunnel_users);
-		xfrm_state_put(t);
+		xfrm_state_put_sync(t);
 		x->tunnel = NULL;
 	}
 }
@@ -2377,8 +2385,8 @@ void xfrm_state_fini(struct net *net)
 	unsigned int sz;
 
 	flush_work(&net->xfrm.state_hash_work);
-	xfrm_state_flush(net, IPSEC_PROTO_ANY, false);
 	flush_work(&xfrm_state_gc_work);
+	xfrm_state_flush(net, IPSEC_PROTO_ANY, false, true);
 
 	WARN_ON(!list_empty(&net->xfrm.state_all));
 
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index ab557827aac0..7e4904b93004 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1932,7 +1932,7 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct xfrm_usersa_flush *p = nlmsg_data(nlh);
 	int err;
 
-	err = xfrm_state_flush(net, p->proto, true);
+	err = xfrm_state_flush(net, p->proto, true, false);
 	if (err) {
 		if (err == -ESRCH) /* empty table */
 			return 0;
-- 
2.19.1


  parent reply	other threads:[~2019-03-30  1:39 UTC|newest]

Thread overview: 49+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-03-30  1:27 [PATCH AUTOSEL 4.19 01/57] drm/cirrus: Use drm_framebuffer_put to avoid kernel oops in clean-up Sasha Levin
2019-03-30  1:27 ` [PATCH AUTOSEL 4.19 02/57] gpio: pxa: handle corner case of unprobed device Sasha Levin
2019-03-30  1:27 ` [PATCH AUTOSEL 4.19 03/57] rsi: improve kernel thread handling to fix kernel panic Sasha Levin
2019-03-30  1:27 ` [PATCH AUTOSEL 4.19 04/57] f2fs: fix to avoid NULL pointer dereference on se->discard_map Sasha Levin
2019-03-30  1:27 ` [PATCH AUTOSEL 4.19 05/57] 9p: do not trust pdu content for stat item size Sasha Levin
2019-03-30  1:27 ` [PATCH AUTOSEL 4.19 06/57] 9p locks: add mount option for lock retry interval Sasha Levin
2019-03-30  1:28 ` [PATCH AUTOSEL 4.19 07/57] ASoC: Fix UBSAN warning at snd_soc_get/put_volsw_sx() Sasha Levin
2019-03-30  1:28 ` [PATCH AUTOSEL 4.19 08/57] f2fs: fix to do sanity check with current segment number Sasha Levin
2019-03-30  1:28 ` [PATCH AUTOSEL 4.19 09/57] netfilter: xt_cgroup: shrink size of v2 path Sasha Levin
2019-03-30  1:28 ` [PATCH AUTOSEL 4.19 10/57] serial: uartps: console_setup() can't be placed to init section Sasha Levin
2019-03-30  1:28 ` [PATCH AUTOSEL 4.19 11/57] powerpc/pseries: Remove prrn_work workqueue Sasha Levin
2019-03-30  1:28 ` [PATCH AUTOSEL 4.19 12/57] media: au0828: cannot kfree dev before usb disconnect Sasha Levin
2019-03-30  1:28 ` [PATCH AUTOSEL 4.19 13/57] Bluetooth: Fix debugfs NULL pointer dereference Sasha Levin
2019-03-30  1:28 ` [PATCH AUTOSEL 4.19 14/57] HID: i2c-hid: override HID descriptors for certain devices Sasha Levin
2019-03-30  1:28 ` [PATCH AUTOSEL 4.19 15/57] pinctrl: core: make sure strcmp() doesn't get a null parameter Sasha Levin
2019-03-30  1:28 ` [PATCH AUTOSEL 4.19 16/57] ARM: samsung: Limit SAMSUNG_PM_CHECK config option to non-Exynos platforms Sasha Levin
2019-03-30  1:28 ` [PATCH AUTOSEL 4.19 17/57] usbip: fix vhci_hcd controller counting Sasha Levin
2019-03-30  1:28 ` [PATCH AUTOSEL 4.19 18/57] ACPI / SBS: Fix GPE storm on recent MacBookPro's Sasha Levin
2019-03-30  1:28 ` [PATCH AUTOSEL 4.19 19/57] HID: usbhid: Add quirk for Redragon/Dragonrise Seymur 2 Sasha Levin
2019-03-30  1:28 ` [PATCH AUTOSEL 4.19 20/57] KVM: nVMX: restore host state in nested_vmx_vmexit for VMFail Sasha Levin
2019-03-30  1:28 ` [PATCH AUTOSEL 4.19 21/57] compiler.h: update definition of unreachable() Sasha Levin
2019-03-30  1:28 ` [PATCH AUTOSEL 4.19 22/57] netfilter: nf_flow_table: remove flowtable hook flush routine in netns exit routine Sasha Levin
2019-03-30  1:28 ` [PATCH AUTOSEL 4.19 23/57] f2fs: cleanup dirty pages if recover failed Sasha Levin
2019-03-30  1:28 ` [PATCH AUTOSEL 4.19 24/57] net: stmmac: Set OWN bit for jumbo frames Sasha Levin
2019-03-30  1:28 ` [PATCH AUTOSEL 4.19 25/57] cifs: fallback to older infolevels on findfirst queryinfo retry Sasha Levin
2019-03-30  1:28 ` [PATCH AUTOSEL 4.19 26/57] kernel: hung_task.c: disable on suspend Sasha Levin
2019-03-30  1:28 ` [PATCH AUTOSEL 4.19 27/57] platform/x86: Add Intel AtomISP2 dummy / power-management driver Sasha Levin
2019-03-30  1:28 ` [PATCH AUTOSEL 4.19 28/57] nvme-pci: fix conflicting p2p resource adds Sasha Levin
2019-04-01 17:36   ` Heitke, Kenneth
2019-03-30  1:28 ` [PATCH AUTOSEL 4.19 29/57] drm/ttm: Fix bo_global and mem_global kfree error Sasha Levin
2019-03-30  1:28 ` [PATCH AUTOSEL 4.19 30/57] ALSA: hda: fix front speakers on Huawei MBXP Sasha Levin
2019-03-30  1:28 ` [PATCH AUTOSEL 4.19 31/57] ACPI: EC / PM: Disable non-wakeup GPEs for suspend-to-idle Sasha Levin
2019-03-30  1:28 ` [PATCH AUTOSEL 4.19 32/57] net/rds: fix warn in rds_message_alloc_sgs Sasha Levin
2019-03-30  1:28 ` [PATCH AUTOSEL 4.19 33/57] blk-mq: protect debugfs_create_files() from failures Sasha Levin
2019-03-30  5:43   ` Greg Kroah-Hartman
2019-04-03 16:17     ` Sasha Levin
2019-03-30  1:28 ` Sasha Levin [this message]
2019-03-30  1:28 ` [PATCH AUTOSEL 4.19 35/57] crypto: sha256/arm - fix crash bug in Thumb2 build Sasha Levin
2019-03-30  1:28 ` [PATCH AUTOSEL 4.19 36/57] crypto: sha512/arm " Sasha Levin
2019-03-30  1:28 ` [PATCH AUTOSEL 4.19 37/57] net: ip6_gre: fix possible NULL pointer dereference in ip6erspan_set_version Sasha Levin
2019-03-30  1:28 ` [PATCH AUTOSEL 4.19 38/57] iommu/dmar: Fix buffer overflow during PCI bus notification Sasha Levin
2019-03-30  1:28 ` [PATCH AUTOSEL 4.19 39/57] scsi: core: Avoid that system resume triggers a kernel warning Sasha Levin
2019-03-30  1:28 ` [PATCH AUTOSEL 4.19 40/57] kvm: properly check debugfs dentry before using it Sasha Levin
2019-03-30  5:43   ` Greg Kroah-Hartman
2019-04-03 16:16     ` Sasha Levin
2019-03-30  1:28 ` [PATCH AUTOSEL 4.19 41/57] soc/tegra: pmc: Drop locking from tegra_powergate_is_powered() Sasha Levin
2019-03-30  1:28 ` [PATCH AUTOSEL 4.19 42/57] ext4: prohibit fstrim in norecovery mode Sasha Levin
2019-03-30  1:28 ` [PATCH AUTOSEL 4.19 43/57] lkdtm: Print real addresses Sasha Levin
2019-03-30  1:28 ` [PATCH AUTOSEL 4.19 44/57] lkdtm: Add tests for NULL pointer dereference Sasha Levin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190330012854.32212-34-sashal@kernel.org \
    --to=sashal@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=stable@vger.kernel.org \
    --cc=steffen.klassert@secunet.com \
    --cc=xiyou.wangcong@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).