public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org, Marco Elver <elver@google.com>,
	"Peter Zijlstra (Intel)" <peterz@infradead.org>
Subject: [PATCH 5.10 08/25] perf: Rework perf_event_exit_event()
Date: Fri,  4 Feb 2022 10:20:15 +0100	[thread overview]
Message-ID: <20220204091914.560626177@linuxfoundation.org> (raw)
In-Reply-To: <20220204091914.280602669@linuxfoundation.org>

From: Peter Zijlstra <peterz@infradead.org>

commit ef54c1a476aef7eef26fe13ea10dc090952c00f8 upstream.

Make perf_event_exit_event() more robust, such that we can use it from
other contexts. Specifically the up and coming remove_on_exec.

For this to work we need to address a few issues. Remove_on_exec will
not destroy the entire context, so we cannot rely on TASK_TOMBSTONE to
disable event_function_call() and we thus have to use
perf_remove_from_context().

When using perf_remove_from_context(), there's two races to consider.
The first is against close(), where we can have concurrent tear-down
of the event. The second is against child_list iteration, which should
not find a half baked event.

To address this, teach perf_remove_from_context() to special case
!ctx->is_active and about DETACH_CHILD.

[ elver@google.com: fix racing parent/child exit in sync_child_event(). ]
Signed-off-by: Marco Elver <elver@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210408103605.1676875-2-elver@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/perf_event.h |    1 
 kernel/events/core.c       |  144 +++++++++++++++++++++++++--------------------
 2 files changed, 81 insertions(+), 64 deletions(-)

--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -607,6 +607,7 @@ struct swevent_hlist {
 #define PERF_ATTACH_TASK_DATA	0x08
 #define PERF_ATTACH_ITRACE	0x10
 #define PERF_ATTACH_SCHED_CB	0x20
+#define PERF_ATTACH_CHILD	0x40
 
 struct perf_cgroup;
 struct perf_buffer;
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2276,6 +2276,26 @@ out:
 	perf_event__header_size(leader);
 }
 
+static void sync_child_event(struct perf_event *child_event);
+
+static void perf_child_detach(struct perf_event *event)
+{
+	struct perf_event *parent_event = event->parent;
+
+	if (!(event->attach_state & PERF_ATTACH_CHILD))
+		return;
+
+	event->attach_state &= ~PERF_ATTACH_CHILD;
+
+	if (WARN_ON_ONCE(!parent_event))
+		return;
+
+	lockdep_assert_held(&parent_event->child_mutex);
+
+	sync_child_event(event);
+	list_del_init(&event->child_list);
+}
+
 static bool is_orphaned_event(struct perf_event *event)
 {
 	return event->state == PERF_EVENT_STATE_DEAD;
@@ -2383,6 +2403,7 @@ group_sched_out(struct perf_event *group
 }
 
 #define DETACH_GROUP	0x01UL
+#define DETACH_CHILD	0x02UL
 
 /*
  * Cross CPU call to remove a performance event
@@ -2406,6 +2427,8 @@ __perf_remove_from_context(struct perf_e
 	event_sched_out(event, cpuctx, ctx);
 	if (flags & DETACH_GROUP)
 		perf_group_detach(event);
+	if (flags & DETACH_CHILD)
+		perf_child_detach(event);
 	list_del_event(event, ctx);
 
 	if (!ctx->nr_events && ctx->is_active) {
@@ -2437,25 +2460,21 @@ static void perf_remove_from_context(str
 
 	lockdep_assert_held(&ctx->mutex);
 
-	event_function_call(event, __perf_remove_from_context, (void *)flags);
-
 	/*
-	 * The above event_function_call() can NO-OP when it hits
-	 * TASK_TOMBSTONE. In that case we must already have been detached
-	 * from the context (by perf_event_exit_event()) but the grouping
-	 * might still be in-tact.
-	 */
-	WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
-	if ((flags & DETACH_GROUP) &&
-	    (event->attach_state & PERF_ATTACH_GROUP)) {
-		/*
-		 * Since in that case we cannot possibly be scheduled, simply
-		 * detach now.
-		 */
-		raw_spin_lock_irq(&ctx->lock);
-		perf_group_detach(event);
+	 * Because of perf_event_exit_task(), perf_remove_from_context() ought
+	 * to work in the face of TASK_TOMBSTONE, unlike every other
+	 * event_function_call() user.
+	 */
+	raw_spin_lock_irq(&ctx->lock);
+	if (!ctx->is_active) {
+		__perf_remove_from_context(event, __get_cpu_context(ctx),
+					   ctx, (void *)flags);
 		raw_spin_unlock_irq(&ctx->lock);
+		return;
 	}
+	raw_spin_unlock_irq(&ctx->lock);
+
+	event_function_call(event, __perf_remove_from_context, (void *)flags);
 }
 
 /*
@@ -12330,14 +12349,17 @@ void perf_pmu_migrate_context(struct pmu
 }
 EXPORT_SYMBOL_GPL(perf_pmu_migrate_context);
 
-static void sync_child_event(struct perf_event *child_event,
-			       struct task_struct *child)
+static void sync_child_event(struct perf_event *child_event)
 {
 	struct perf_event *parent_event = child_event->parent;
 	u64 child_val;
 
-	if (child_event->attr.inherit_stat)
-		perf_event_read_event(child_event, child);
+	if (child_event->attr.inherit_stat) {
+		struct task_struct *task = child_event->ctx->task;
+
+		if (task && task != TASK_TOMBSTONE)
+			perf_event_read_event(child_event, task);
+	}
 
 	child_val = perf_event_count(child_event);
 
@@ -12352,60 +12374,53 @@ static void sync_child_event(struct perf
 }
 
 static void
-perf_event_exit_event(struct perf_event *child_event,
-		      struct perf_event_context *child_ctx,
-		      struct task_struct *child)
+perf_event_exit_event(struct perf_event *event, struct perf_event_context *ctx)
 {
-	struct perf_event *parent_event = child_event->parent;
+	struct perf_event *parent_event = event->parent;
+	unsigned long detach_flags = 0;
 
-	/*
-	 * Do not destroy the 'original' grouping; because of the context
-	 * switch optimization the original events could've ended up in a
-	 * random child task.
-	 *
-	 * If we were to destroy the original group, all group related
-	 * operations would cease to function properly after this random
-	 * child dies.
-	 *
-	 * Do destroy all inherited groups, we don't care about those
-	 * and being thorough is better.
-	 */
-	raw_spin_lock_irq(&child_ctx->lock);
-	WARN_ON_ONCE(child_ctx->is_active);
+	if (parent_event) {
+		/*
+		 * Do not destroy the 'original' grouping; because of the
+		 * context switch optimization the original events could've
+		 * ended up in a random child task.
+		 *
+		 * If we were to destroy the original group, all group related
+		 * operations would cease to function properly after this
+		 * random child dies.
+		 *
+		 * Do destroy all inherited groups, we don't care about those
+		 * and being thorough is better.
+		 */
+		detach_flags = DETACH_GROUP | DETACH_CHILD;
+		mutex_lock(&parent_event->child_mutex);
+	}
 
-	if (parent_event)
-		perf_group_detach(child_event);
-	list_del_event(child_event, child_ctx);
-	perf_event_set_state(child_event, PERF_EVENT_STATE_EXIT); /* is_event_hup() */
-	raw_spin_unlock_irq(&child_ctx->lock);
+	perf_remove_from_context(event, detach_flags);
+
+	raw_spin_lock_irq(&ctx->lock);
+	if (event->state > PERF_EVENT_STATE_EXIT)
+		perf_event_set_state(event, PERF_EVENT_STATE_EXIT);
+	raw_spin_unlock_irq(&ctx->lock);
 
 	/*
-	 * Parent events are governed by their filedesc, retain them.
+	 * Child events can be freed.
 	 */
-	if (!parent_event) {
-		perf_event_wakeup(child_event);
+	if (parent_event) {
+		mutex_unlock(&parent_event->child_mutex);
+		/*
+		 * Kick perf_poll() for is_event_hup();
+		 */
+		perf_event_wakeup(parent_event);
+		free_event(event);
+		put_event(parent_event);
 		return;
 	}
-	/*
-	 * Child events can be cleaned up.
-	 */
-
-	sync_child_event(child_event, child);
 
 	/*
-	 * Remove this event from the parent's list
-	 */
-	WARN_ON_ONCE(parent_event->ctx->parent_ctx);
-	mutex_lock(&parent_event->child_mutex);
-	list_del_init(&child_event->child_list);
-	mutex_unlock(&parent_event->child_mutex);
-
-	/*
-	 * Kick perf_poll() for is_event_hup().
+	 * Parent events are governed by their filedesc, retain them.
 	 */
-	perf_event_wakeup(parent_event);
-	free_event(child_event);
-	put_event(parent_event);
+	perf_event_wakeup(event);
 }
 
 static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
@@ -12462,7 +12477,7 @@ static void perf_event_exit_task_context
 	perf_event_task(child, child_ctx, 0);
 
 	list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry)
-		perf_event_exit_event(child_event, child_ctx, child);
+		perf_event_exit_event(child_event, child_ctx);
 
 	mutex_unlock(&child_ctx->mutex);
 
@@ -12722,6 +12737,7 @@ inherit_event(struct perf_event *parent_
 	 */
 	raw_spin_lock_irqsave(&child_ctx->lock, flags);
 	add_event_to_ctx(child_event, child_ctx);
+	child_event->attach_state |= PERF_ATTACH_CHILD;
 	raw_spin_unlock_irqrestore(&child_ctx->lock, flags);
 
 	/*



  parent reply	other threads:[~2022-02-04  9:23 UTC|newest]

Thread overview: 41+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-02-04  9:20 [PATCH 5.10 00/25] 5.10.97-rc1 review Greg Kroah-Hartman
2022-02-04  9:20 ` [PATCH 5.10 01/25] PCI: pciehp: Fix infinite loop in IRQ handler upon power fault Greg Kroah-Hartman
2022-02-04  9:20 ` [PATCH 5.10 02/25] net: ipa: fix atomic update in ipa_endpoint_replenish() Greg Kroah-Hartman
2022-02-04  9:20 ` [PATCH 5.10 03/25] net: ipa: use a bitmap for endpoint replenish_enabled Greg Kroah-Hartman
2022-02-04  9:20 ` [PATCH 5.10 04/25] net: ipa: prevent concurrent replenish Greg Kroah-Hartman
2022-02-04  9:20 ` [PATCH 5.10 05/25] Revert "drivers: bus: simple-pm-bus: Add support for probing simple bus only devices" Greg Kroah-Hartman
2022-02-04  9:20 ` [PATCH 5.10 06/25] KVM: x86: Forcibly leave nested virt when SMM state is toggled Greg Kroah-Hartman
2022-02-04  9:20 ` [PATCH 5.10 07/25] psi: Fix uaf issue when psi trigger is destroyed while being polled Greg Kroah-Hartman
2022-02-04  9:20 ` Greg Kroah-Hartman [this message]
2022-02-04  9:37   ` [PATCH 5.10 08/25] perf: Rework perf_event_exit_event() Pavel Machek
2022-02-04  9:40     ` Greg Kroah-Hartman
2022-02-04 10:12       ` Pavel Machek
2022-02-05 10:25         ` Greg Kroah-Hartman
2022-02-04  9:20 ` [PATCH 5.10 09/25] perf/core: Fix cgroup event list management Greg Kroah-Hartman
2022-02-04  9:20 ` [PATCH 5.10 10/25] x86/mce: Add Xeon Sapphire Rapids to list of CPUs that support PPIN Greg Kroah-Hartman
2022-02-04  9:20 ` [PATCH 5.10 11/25] x86/cpu: Add Xeon Icelake-D " Greg Kroah-Hartman
2022-02-04  9:20 ` [PATCH 5.10 12/25] drm/vc4: hdmi: Make sure the device is powered with CEC Greg Kroah-Hartman
2022-02-05 11:40   ` Alexey Khoroshilov
2022-02-05 11:53     ` Greg Kroah-Hartman
2022-02-05 12:04       ` Alexey Khoroshilov
2022-02-04  9:20 ` [PATCH 5.10 13/25] cgroup-v1: Require capabilities to set release_agent Greg Kroah-Hartman
2022-02-04  9:20 ` [PATCH 5.10 14/25] net/mlx5e: Fix handling of wrong devices during bond netevent Greg Kroah-Hartman
2022-02-04  9:20 ` [PATCH 5.10 15/25] net/mlx5: Use del_timer_sync in fw reset flow of halting poll Greg Kroah-Hartman
2022-02-04  9:20 ` [PATCH 5.10 16/25] net/mlx5: E-Switch, Fix uninitialized variable modact Greg Kroah-Hartman
2022-02-04  9:20 ` [PATCH 5.10 17/25] ipheth: fix EOVERFLOW in ipheth_rcvbulk_callback Greg Kroah-Hartman
2022-02-04  9:20 ` [PATCH 5.10 18/25] net: amd-xgbe: ensure to reset the tx_timer_active flag Greg Kroah-Hartman
2022-02-04  9:20 ` [PATCH 5.10 19/25] net: amd-xgbe: Fix skb data length underflow Greg Kroah-Hartman
2022-02-04  9:20 ` [PATCH 5.10 20/25] fanotify: Fix stale file descriptor in copy_event_to_user() Greg Kroah-Hartman
2022-02-04  9:20 ` [PATCH 5.10 21/25] net: sched: fix use-after-free in tc_new_tfilter() Greg Kroah-Hartman
2022-02-04  9:20 ` [PATCH 5.10 22/25] rtnetlink: make sure to refresh master_dev/m_ops in __rtnl_newlink() Greg Kroah-Hartman
2022-02-04  9:20 ` [PATCH 5.10 23/25] cpuset: Fix the bug that subpart_cpus updated wrongly in update_cpumask() Greg Kroah-Hartman
2022-02-04  9:20 ` [PATCH 5.10 24/25] af_packet: fix data-race in packet_setsockopt / packet_setsockopt Greg Kroah-Hartman
2022-02-04  9:20 ` [PATCH 5.10 25/25] tcp: add missing tcp_skb_can_collapse() test in tcp_shift_skb_data() Greg Kroah-Hartman
2022-02-04 11:31 ` [PATCH 5.10 00/25] 5.10.97-rc1 review Pavel Machek
2022-02-04 17:33 ` Florian Fainelli
2022-02-04 19:11 ` Fox Chen
2022-02-04 20:32 ` Shuah Khan
2022-02-04 21:08 ` Guenter Roeck
2022-02-04 23:30 ` Slade Watkins
2022-02-05  7:01 ` Naresh Kamboju
2022-02-05 14:30 ` Sudip Mukherjee

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220204091914.560626177@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=elver@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=peterz@infradead.org \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox