All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 RESEND] perf/core: Fix missing read event generation on task exit
@ 2025-12-09  4:16 Thaumy Cheng
  2025-12-09  9:23 ` [tip: perf/urgent] " tip-bot2 for Thaumy Cheng
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Thaumy Cheng @ 2025-12-09  4:16 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Ian Rogers, Adrian Hunter, James Clark
  Cc: linux-perf-users, linux-kernel, Thaumy Cheng

For events with inherit_stat enabled, a "read" event will be generated
to collect per task event counts on task exit.

The call chain is as follows:

do_exit
  -> perf_event_exit_task
    -> perf_event_exit_task_context
      -> perf_event_exit_event
        -> perf_remove_from_context
          -> perf_child_detach
            -> sync_child_event
              -> perf_event_read_event

However, the child event context detaches the task too early in
perf_event_exit_task_context, which causes sync_child_event to never
generate the read event in this case, since child_event->ctx->task is
always set to TASK_TOMBSTONE. Fix that by moving context lock section
backward to ensure ctx->task is not set to TASK_TOMBSTONE before
generating the read event.

Because perf_event_free_task calls perf_event_exit_task_context with
exit = false to tear down all child events from the context, and the
task never lived, accessing the task PID can lead to a use-after-free.

To fix that, let sync_child_event read task from argument and move the
call to the only place it should be triggered to avoid the effect of
setting ctx->task to TASK_TOMESTONE, and add a task parameter to
perf_event_exit_event to trigger the sync_child_event properly when
needed.

This bug can be reproduced by running "perf record -s" and attaching to
any program that generates perf events in its child tasks. If we check
the result with "perf report -T", the last line of the report will leave
an empty table like "# PID  TID", which is expected to contain the
per-task event counts by design.

Fixes: ef54c1a476ae ("perf: Rework perf_event_exit_event()")
Signed-off-by: Thaumy Cheng <thaumy.love@gmail.com>
---
Changes in v3:
- Fix the bug in a more direct way by moving the call to
  sync_child_event and bring back the task param to
  perf_event_exit_event.
  This approach avoids the event unscheduling issue in v2.

Changes in v2:
- Only trigger read event on task exit.
- Rename perf_event_exit_event to perf_event_detach_event.
- Link to v2: https://lore.kernel.org/all/20250817132742.85154-1-thaumy.love@gmail.com/

Changes in v1:
- Set TASK_TOMBSTONE after the read event is tirggered.
- Link to v1: https://lore.kernel.org/all/20250720000424.12572-1-thaumy.love@gmail.com/

 kernel/events/core.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 177e57c1a362..618e7947c358 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2316,7 +2316,8 @@ static void perf_group_detach(struct perf_event *event)
 	perf_event__header_size(leader);
 }

-static void sync_child_event(struct perf_event *child_event);
+static void sync_child_event(struct perf_event *child_event,
+			     struct task_struct *task);

 static void perf_child_detach(struct perf_event *event)
 {
@@ -2336,7 +2337,6 @@ static void perf_child_detach(struct perf_event *event)
 	lockdep_assert_held(&parent_event->child_mutex);
 	 */

-	sync_child_event(event);
 	list_del_init(&event->child_list);
 }

@@ -4587,6 +4587,7 @@ static void perf_event_enable_on_exec(struct perf_event_context *ctx)
 static void perf_remove_from_owner(struct perf_event *event);
 static void perf_event_exit_event(struct perf_event *event,
 				  struct perf_event_context *ctx,
+				  struct task_struct *task,
 				  bool revoke);

 /*
@@ -4614,7 +4615,7 @@ static void perf_event_remove_on_exec(struct perf_event_context *ctx)

 		modified = true;

-		perf_event_exit_event(event, ctx, false);
+		perf_event_exit_event(event, ctx, ctx->task, false);
 	}

 	raw_spin_lock_irqsave(&ctx->lock, flags);
@@ -12437,7 +12438,7 @@ static void __pmu_detach_event(struct pmu *pmu, struct perf_event *event,
 	/*
 	 * De-schedule the event and mark it REVOKED.
 	 */
-	perf_event_exit_event(event, ctx, true);
+	perf_event_exit_event(event, ctx, ctx->task, true);

 	/*
 	 * All _free_event() bits that rely on event->pmu:
@@ -13994,14 +13995,13 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
 }
 EXPORT_SYMBOL_GPL(perf_pmu_migrate_context);

-static void sync_child_event(struct perf_event *child_event)
+static void sync_child_event(struct perf_event *child_event,
+			     struct task_struct *task)
 {
 	struct perf_event *parent_event = child_event->parent;
 	u64 child_val;

 	if (child_event->attr.inherit_stat) {
-		struct task_struct *task = child_event->ctx->task;
-
 		if (task && task != TASK_TOMBSTONE)
 			perf_event_read_event(child_event, task);
 	}
@@ -14020,7 +14020,9 @@ static void sync_child_event(struct perf_event *child_event)

 static void
 perf_event_exit_event(struct perf_event *event,
-		      struct perf_event_context *ctx, bool revoke)
+		      struct perf_event_context *ctx,
+		      struct task_struct *task,
+		      bool revoke)
 {
 	struct perf_event *parent_event = event->parent;
 	unsigned long detach_flags = DETACH_EXIT;
@@ -14043,6 +14045,9 @@ perf_event_exit_event(struct perf_event *event,
 		mutex_lock(&parent_event->child_mutex);
 		/* PERF_ATTACH_ITRACE might be set concurrently */
 		attach_state = READ_ONCE(event->attach_state);
+
+		if (attach_state & PERF_ATTACH_CHILD)
+			sync_child_event(event, task);
 	}

 	if (revoke)
@@ -14134,7 +14139,7 @@ static void perf_event_exit_task_context(struct task_struct *task, bool exit)
 		perf_event_task(task, ctx, 0);

 	list_for_each_entry_safe(child_event, next, &ctx->event_list, event_entry)
-		perf_event_exit_event(child_event, ctx, false);
+		perf_event_exit_event(child_event, ctx, exit ? task : NULL, false);

 	mutex_unlock(&ctx->mutex);

--
2.51.0

^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2025-12-09 11:24 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-12-09  4:16 [PATCH v3 RESEND] perf/core: Fix missing read event generation on task exit Thaumy Cheng
2025-12-09  9:23 ` [tip: perf/urgent] " tip-bot2 for Thaumy Cheng
2025-12-09  9:38 ` [PATCH v3 RESEND] " Peter Zijlstra
2025-12-09 11:23   ` Ingo Molnar
2025-12-09 11:24 ` [tip: perf/urgent] " tip-bot2 for Thaumy Cheng

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.