stable.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org,
	syzbot+46f513c3033d592409d2@syzkaller.appspotmail.com,
	Thomas Gleixner <tglx@linutronix.de>,
	"Paul E. McKenney" <paulmck@kernel.org>,
	Jamal Hadi Salim <jhs@mojatatu.com>,
	Jiri Pirko <jiri@resnulli.us>,
	Cong Wang <xiyou.wangcong@gmail.com>,
	"David S. Miller" <davem@davemloft.net>
Subject: [PATCH 5.4 08/41] net_sched: add a temporary refcnt for struct tcindex_data
Date: Sat, 11 Apr 2020 14:09:17 +0200	[thread overview]
Message-ID: <20200411115504.704586285@linuxfoundation.org> (raw)
In-Reply-To: <20200411115504.124035693@linuxfoundation.org>

From: Cong Wang <xiyou.wangcong@gmail.com>

[ Upstream commit 304e024216a802a7dc8ba75d36de82fa136bbf3e ]

Although we intentionally use an ordered workqueue for all tc
filter works, the ordering is not guaranteed by RCU work,
given that tcf_queue_work() is esstenially a call_rcu().

This problem is demostrated by Thomas:

  CPU 0:
    tcf_queue_work()
      tcf_queue_work(&r->rwork, tcindex_destroy_rexts_work);

  -> Migration to CPU 1

  CPU 1:
     tcf_queue_work(&p->rwork, tcindex_destroy_work);

so the 2nd work could be queued before the 1st one, which leads
to a free-after-free.

Enforcing this order in RCU work is hard as it requires to change
RCU code too. Fortunately we can workaround this problem in tcindex
filter by taking a temporary refcnt, we only refcnt it right before
we begin to destroy it. This simplifies the code a lot as a full
refcnt requires much more changes in tcindex_set_parms().

Reported-by: syzbot+46f513c3033d592409d2@syzkaller.appspotmail.com
Fixes: 3d210534cc93 ("net_sched: fix a race condition in tcindex_destroy()")
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Reviewed-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/sched/cls_tcindex.c |   44 ++++++++++++++++++++++++++++++++++++++------
 1 file changed, 38 insertions(+), 6 deletions(-)

--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -11,6 +11,7 @@
 #include <linux/skbuff.h>
 #include <linux/errno.h>
 #include <linux/slab.h>
+#include <linux/refcount.h>
 #include <net/act_api.h>
 #include <net/netlink.h>
 #include <net/pkt_cls.h>
@@ -26,9 +27,12 @@
 #define DEFAULT_HASH_SIZE	64	/* optimized for diffserv */
 
 
+struct tcindex_data;
+
 struct tcindex_filter_result {
 	struct tcf_exts		exts;
 	struct tcf_result	res;
+	struct tcindex_data	*p;
 	struct rcu_work		rwork;
 };
 
@@ -49,6 +53,7 @@ struct tcindex_data {
 	u32 hash;		/* hash table size; 0 if undefined */
 	u32 alloc_hash;		/* allocated size */
 	u32 fall_through;	/* 0: only classify if explicit match */
+	refcount_t refcnt;	/* a temporary refcnt for perfect hash */
 	struct rcu_work rwork;
 };
 
@@ -57,6 +62,20 @@ static inline int tcindex_filter_is_set(
 	return tcf_exts_has_actions(&r->exts) || r->res.classid;
 }
 
+static void tcindex_data_get(struct tcindex_data *p)
+{
+	refcount_inc(&p->refcnt);
+}
+
+static void tcindex_data_put(struct tcindex_data *p)
+{
+	if (refcount_dec_and_test(&p->refcnt)) {
+		kfree(p->perfect);
+		kfree(p->h);
+		kfree(p);
+	}
+}
+
 static struct tcindex_filter_result *tcindex_lookup(struct tcindex_data *p,
 						    u16 key)
 {
@@ -141,6 +160,7 @@ static void __tcindex_destroy_rexts(stru
 {
 	tcf_exts_destroy(&r->exts);
 	tcf_exts_put_net(&r->exts);
+	tcindex_data_put(r->p);
 }
 
 static void tcindex_destroy_rexts_work(struct work_struct *work)
@@ -212,6 +232,8 @@ found:
 		else
 			__tcindex_destroy_fexts(f);
 	} else {
+		tcindex_data_get(p);
+
 		if (tcf_exts_get_net(&r->exts))
 			tcf_queue_work(&r->rwork, tcindex_destroy_rexts_work);
 		else
@@ -228,9 +250,7 @@ static void tcindex_destroy_work(struct
 					      struct tcindex_data,
 					      rwork);
 
-	kfree(p->perfect);
-	kfree(p->h);
-	kfree(p);
+	tcindex_data_put(p);
 }
 
 static inline int
@@ -248,9 +268,11 @@ static const struct nla_policy tcindex_p
 };
 
 static int tcindex_filter_result_init(struct tcindex_filter_result *r,
+				      struct tcindex_data *p,
 				      struct net *net)
 {
 	memset(r, 0, sizeof(*r));
+	r->p = p;
 	return tcf_exts_init(&r->exts, net, TCA_TCINDEX_ACT,
 			     TCA_TCINDEX_POLICE);
 }
@@ -290,6 +312,7 @@ static int tcindex_alloc_perfect_hash(st
 				    TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
 		if (err < 0)
 			goto errout;
+		cp->perfect[i].p = cp;
 	}
 
 	return 0;
@@ -334,6 +357,7 @@ tcindex_set_parms(struct net *net, struc
 	cp->alloc_hash = p->alloc_hash;
 	cp->fall_through = p->fall_through;
 	cp->tp = tp;
+	refcount_set(&cp->refcnt, 1); /* Paired with tcindex_destroy_work() */
 
 	if (tb[TCA_TCINDEX_HASH])
 		cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]);
@@ -366,7 +390,7 @@ tcindex_set_parms(struct net *net, struc
 	}
 	cp->h = p->h;
 
-	err = tcindex_filter_result_init(&new_filter_result, net);
+	err = tcindex_filter_result_init(&new_filter_result, cp, net);
 	if (err < 0)
 		goto errout_alloc;
 	if (old_r)
@@ -434,7 +458,7 @@ tcindex_set_parms(struct net *net, struc
 			goto errout_alloc;
 		f->key = handle;
 		f->next = NULL;
-		err = tcindex_filter_result_init(&f->result, net);
+		err = tcindex_filter_result_init(&f->result, cp, net);
 		if (err < 0) {
 			kfree(f);
 			goto errout_alloc;
@@ -447,7 +471,7 @@ tcindex_set_parms(struct net *net, struc
 	}
 
 	if (old_r && old_r != r) {
-		err = tcindex_filter_result_init(old_r, net);
+		err = tcindex_filter_result_init(old_r, cp, net);
 		if (err < 0) {
 			kfree(f);
 			goto errout_alloc;
@@ -571,6 +595,14 @@ static void tcindex_destroy(struct tcf_p
 		for (i = 0; i < p->hash; i++) {
 			struct tcindex_filter_result *r = p->perfect + i;
 
+			/* tcf_queue_work() does not guarantee the ordering we
+			 * want, so we have to take this refcnt temporarily to
+			 * ensure 'p' is freed after all tcindex_filter_result
+			 * here. Imperfect hash does not need this, because it
+			 * uses linked lists rather than an array.
+			 */
+			tcindex_data_get(p);
+
 			tcf_unbind_filter(tp, &r->res);
 			if (tcf_exts_get_net(&r->exts))
 				tcf_queue_work(&r->rwork,



  parent reply	other threads:[~2020-04-11 12:18 UTC|newest]

Thread overview: 45+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-04-11 12:09 [PATCH 5.4 00/41] 5.4.32-rc1 review Greg Kroah-Hartman
2020-04-11 12:09 ` [PATCH 5.4 01/41] net: phy: realtek: fix handling of RTL8105e-integrated PHY Greg Kroah-Hartman
2020-04-11 12:09 ` [PATCH 5.4 02/41] cxgb4: fix MPS index overwrite when setting MAC address Greg Kroah-Hartman
2020-04-11 12:09 ` [PATCH 5.4 03/41] ipv6: dont auto-add link-local address to lag ports Greg Kroah-Hartman
2020-04-11 12:09 ` [PATCH 5.4 04/41] net: dsa: bcm_sf2: Do not register slave MDIO bus with OF Greg Kroah-Hartman
2020-04-11 12:09 ` [PATCH 5.4 05/41] net: dsa: bcm_sf2: Ensure correct sub-node is parsed Greg Kroah-Hartman
2020-04-11 12:09 ` [PATCH 5.4 06/41] net: dsa: mt7530: fix null pointer dereferencing in port5 setup Greg Kroah-Hartman
2020-04-11 12:09 ` [PATCH 5.4 07/41] net: phy: micrel: kszphy_resume(): add delay after genphy_resume() before accessing PHY registers Greg Kroah-Hartman
2020-04-11 12:09 ` Greg Kroah-Hartman [this message]
2020-04-11 12:09 ` [PATCH 5.4 09/41] net_sched: fix a missing refcnt in tcindex_init() Greg Kroah-Hartman
2020-04-11 12:09 ` [PATCH 5.4 10/41] net: stmmac: dwmac1000: fix out-of-bounds mac address reg setting Greg Kroah-Hartman
2020-04-11 12:09 ` [PATCH 5.4 11/41] slcan: Dont transmit uninitialized stack data in padding Greg Kroah-Hartman
2020-04-11 12:09 ` [PATCH 5.4 12/41] tun: Dont put_page() for all negative return values from XDP program Greg Kroah-Hartman
2020-04-11 12:09 ` [PATCH 5.4 13/41] mlxsw: spectrum_flower: Do not stop at FLOW_ACTION_VLAN_MANGLE Greg Kroah-Hartman
2020-04-11 12:09 ` [PATCH 5.4 14/41] r8169: change back SG and TSO to be disabled by default Greg Kroah-Hartman
2020-04-11 12:09 ` [PATCH 5.4 15/41] s390: prevent leaking kernel address in BEAR Greg Kroah-Hartman
2020-04-11 12:09 ` [PATCH 5.4 16/41] random: always use batched entropy for get_random_u{32,64} Greg Kroah-Hartman
2020-04-11 12:09 ` [PATCH 5.4 17/41] usb: dwc3: gadget: Wrap around when skip TRBs Greg Kroah-Hartman
2020-04-11 12:09 ` [PATCH 5.4 18/41] uapi: rename ext2_swab() to swab() and share globally in swab.h Greg Kroah-Hartman
2020-04-11 12:09 ` [PATCH 5.4 19/41] slub: improve bit diffusion for freelist ptr obfuscation Greg Kroah-Hartman
2020-04-11 12:09 ` [PATCH 5.4 20/41] tools/accounting/getdelays.c: fix netlink attribute length Greg Kroah-Hartman
2020-04-11 12:09 ` [PATCH 5.4 21/41] hwrng: imx-rngc - fix an error path Greg Kroah-Hartman
2020-04-11 12:09 ` [PATCH 5.4 22/41] ACPI: PM: Add acpi_[un]register_wakeup_handler() Greg Kroah-Hartman
2020-04-11 12:09 ` [PATCH 5.4 23/41] platform/x86: intel_int0002_vgpio: Use acpi_register_wakeup_handler() Greg Kroah-Hartman
2020-04-11 12:09 ` [PATCH 5.4 24/41] ASoC: jz4740-i2s: Fix divider written at incorrect offset in register Greg Kroah-Hartman
2020-04-11 12:09 ` [PATCH 5.4 25/41] IB/hfi1: Call kobject_put() when kobject_init_and_add() fails Greg Kroah-Hartman
2020-04-11 12:09 ` [PATCH 5.4 26/41] IB/hfi1: Fix memory leaks in sysfs registration and unregistration Greg Kroah-Hartman
2020-04-11 12:09 ` [PATCH 5.4 27/41] IB/mlx5: Replace tunnel mpls capability bits for tunnel_offloads Greg Kroah-Hartman
2020-04-11 12:09 ` [PATCH 5.4 28/41] ARM: imx: Enable ARM_ERRATA_814220 for i.MX6UL and i.MX7D Greg Kroah-Hartman
2020-04-11 12:09 ` [PATCH 5.4 29/41] ARM: imx: only select ARM_ERRATA_814220 for ARMv7-A Greg Kroah-Hartman
2020-04-11 12:09 ` [PATCH 5.4 30/41] ceph: remove the extra slashes in the server path Greg Kroah-Hartman
2020-04-11 12:09 ` [PATCH 5.4 31/41] ceph: canonicalize server path in place Greg Kroah-Hartman
2020-04-11 12:09 ` [PATCH 5.4 32/41] include/uapi/linux/swab.h: fix userspace breakage, use __BITS_PER_LONG for swap Greg Kroah-Hartman
2020-04-11 12:09 ` [PATCH 5.4 33/41] RDMA/ucma: Put a lock around every call to the rdma_cm layer Greg Kroah-Hartman
2020-04-11 12:09 ` [PATCH 5.4 34/41] RDMA/cma: Teach lockdep about the order of rtnl and lock Greg Kroah-Hartman
2020-04-11 12:09 ` [PATCH 5.4 35/41] RDMA/siw: Fix passive connection establishment Greg Kroah-Hartman
2020-04-11 12:09 ` [PATCH 5.4 36/41] Bluetooth: RFCOMM: fix ODEBUG bug in rfcomm_dev_ioctl Greg Kroah-Hartman
2020-04-11 12:09 ` [PATCH 5.4 37/41] RDMA/cm: Update num_paths in cma_resolve_iboe_route error flow Greg Kroah-Hartman
2020-04-11 12:09 ` [PATCH 5.4 38/41] blk-mq: Keep set->nr_hw_queues and set->map[].nr_queues in sync Greg Kroah-Hartman
2020-04-11 12:09 ` [PATCH 5.4 39/41] fbcon: fix null-ptr-deref in fbcon_switch Greg Kroah-Hartman
2020-04-11 12:09 ` [PATCH 5.4 40/41] drm/i915: Fix ref->mutex deadlock in i915_active_wait() Greg Kroah-Hartman
2020-04-11 12:09 ` [PATCH 5.4 41/41] iommu/vt-d: Allow devices with RMRRs to use identity domain Greg Kroah-Hartman
2020-04-11 20:42 ` [PATCH 5.4 00/41] 5.4.32-rc1 review Guenter Roeck
2020-04-12  7:28 ` Naresh Kamboju
2020-04-14 10:36 ` Jon Hunter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200411115504.704586285@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=davem@davemloft.net \
    --cc=jhs@mojatatu.com \
    --cc=jiri@resnulli.us \
    --cc=linux-kernel@vger.kernel.org \
    --cc=paulmck@kernel.org \
    --cc=stable@vger.kernel.org \
    --cc=syzbot+46f513c3033d592409d2@syzkaller.appspotmail.com \
    --cc=tglx@linutronix.de \
    --cc=xiyou.wangcong@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).