Linux Perf Users
 help / color / mirror / Atom feed
From: Frederic Weisbecker <frederic@kernel.org>
To: "Lai, Yi" <yi1.lai@linux.intel.com>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>,
	linux-perf-users@vger.kernel.org, linux-kernel@vger.kernel.org,
	Adrian Hunter <adrian.hunter@intel.com>,
	Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	Arnaldo Carvalho de Melo <acme@kernel.org>,
	Daniel Bristot de Oliveira <bristot@kernel.org>,
	Ian Rogers <irogers@google.com>, Ingo Molnar <mingo@redhat.com>,
	Jiri Olsa <jolsa@kernel.org>,
	Kan Liang <kan.liang@linux.intel.com>,
	Marco Elver <elver@google.com>,
	Mark Rutland <mark.rutland@arm.com>,
	Namhyung Kim <namhyung@kernel.org>,
	Peter Zijlstra <peterz@infradead.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Arnaldo Carvalho de Melo <acme@redhat.com>,
	yi1.lai@intel.com, syzkaller-bugs@googlegroups.com
Subject: Re: [PATCH v4 2/6] perf: Enqueue SIGTRAP always via task_work.
Date: Mon, 28 Oct 2024 13:21:39 +0100	[thread overview]
Message-ID: <Zx-B0wK3xqRQsCOS@localhost.localdomain> (raw)
In-Reply-To: <Zx9Losv4YcJowaP/@ly-workstation>

Le Mon, Oct 28, 2024 at 04:30:26PM +0800, Lai, Yi a écrit :
> [  300.651268] INFO: task repro:671 blocked for more than 147 seconds.
> [  300.651706]       Not tainted 6.12.0-rc4-42f7652d3eb5+ #1
> [  300.652006] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
> [  300.652430] task:repro           state:D stack:0     pid:671   tgid:671   ppid:670    flags:0x00004002
> [  300.652939] Call Trace:
> [  300.653088]  <TASK>
> [  300.653221]  __schedule+0xe13/0x33a0
> [  300.653474]  ? __pfx___schedule+0x10/0x10
> [  300.653704]  ? lock_release+0x441/0x870
> [  300.653946]  ? __pfx_lock_release+0x10/0x10
> [  300.654184]  ? trace_lock_acquire+0x139/0x1b0
> [  300.654439]  ? lock_acquire+0x80/0xb0
> [  300.654651]  ? schedule+0x216/0x3f0
> [  300.654859]  schedule+0xf6/0x3f0
> [  300.655083]  _free_event+0x531/0x14c0
> [  300.655317]  perf_event_release_kernel+0x648/0x870
> [  300.655597]  ? __pfx_perf_event_release_kernel+0x10/0x10
> [  300.655899]  ? trace_hardirqs_on+0x51/0x60
> [  300.656176]  ? __sanitizer_cov_trace_const_cmp2+0x1c/0x30
> [  300.656474]  ? __pfx_perf_release+0x10/0x10
> [  300.656697]  perf_release+0x3a/0x50
> [  300.656916]  __fput+0x414/0xb60
> [  300.657163]  ____fput+0x22/0x30
> [  300.657335]  task_work_run+0x19c/0x2b0

Ah the perf_pending_task work is pending but perf_pending_task_sync()
fails to cancel there:

	/*
	 * If the task is queued to the current task's queue, we
	 * obviously can't wait for it to complete. Simply cancel it.
	 */
	if (task_work_cancel(current, head)) {
		event->pending_work = 0;
		local_dec(&event->ctx->nr_no_switch_fast);
		return;
	}

And that's because the work is not anymore on the task work
list in task->task_works. Instead it's in the executing list
in task_work_run(). It's a blind spot for task_work_cancel()
if the current task is already running the task works. And it
does since it's running the fput delayed work.

Something like this untested?

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 449dd64ed9ac..035580fa2c81 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1163,6 +1163,7 @@ struct task_struct {
 	unsigned int			sas_ss_flags;
 
 	struct callback_head		*task_works;
+	struct callback_head		*task_works_running;
 
 #ifdef CONFIG_AUDIT
 #ifdef CONFIG_AUDITSYSCALL
diff --git a/include/linux/task_work.h b/include/linux/task_work.h
index cf5e7e891a77..fdd70f09a7f0 100644
--- a/include/linux/task_work.h
+++ b/include/linux/task_work.h
@@ -33,6 +33,7 @@ struct callback_head *task_work_cancel_match(struct task_struct *task,
 	bool (*match)(struct callback_head *, void *data), void *data);
 struct callback_head *task_work_cancel_func(struct task_struct *, task_work_func_t);
 bool task_work_cancel(struct task_struct *task, struct callback_head *cb);
+bool task_work_cancel_current(struct callback_head *cb);
 void task_work_run(void);
 
 static inline void exit_task_work(struct task_struct *task)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index e3589c4287cb..1b15f3c83595 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5305,7 +5305,7 @@ static void perf_pending_task_sync(struct perf_event *event)
 	 * If the task is queued to the current task's queue, we
 	 * obviously can't wait for it to complete. Simply cancel it.
 	 */
-	if (task_work_cancel(current, head)) {
+	if (task_work_cancel_current(head)) {
 		event->pending_work = 0;
 		local_dec(&event->ctx->nr_no_switch_fast);
 		return;
diff --git a/kernel/fork.c b/kernel/fork.c
index 89ceb4a68af2..1b898701d888 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2450,6 +2450,7 @@ __latent_entropy struct task_struct *copy_process(
 
 	p->pdeath_signal = 0;
 	p->task_works = NULL;
+	p->task_works_running = NULL;
 	clear_posix_cputimers_work(p);
 
 #ifdef CONFIG_KRETPROBES
diff --git a/kernel/task_work.c b/kernel/task_work.c
index 5d14d639ac71..2efa81a6cbf6 100644
--- a/kernel/task_work.c
+++ b/kernel/task_work.c
@@ -184,6 +184,26 @@ bool task_work_cancel(struct task_struct *task, struct callback_head *cb)
 	return ret == cb;
 }
 
+bool task_work_cancel_current(struct callback_head *cb)
+{
+	struct callback_head **running;
+
+	if (task_work_cancel(current, cb))
+		return true;
+
+	running = &current->task_works_running;
+	while (*running) {
+		if (*running == cb) {
+			*running = cb->next;
+			return true;
+		}
+		running = &(*running)->next;
+	}
+
+	return false;
+}
+
+
 /**
  * task_work_run - execute the works added by task_work_add()
  *
@@ -195,7 +215,7 @@ bool task_work_cancel(struct task_struct *task, struct callback_head *cb)
 void task_work_run(void)
 {
 	struct task_struct *task = current;
-	struct callback_head *work, *head, *next;
+	struct callback_head *work, *head;
 
 	for (;;) {
 		/*
@@ -223,10 +243,11 @@ void task_work_run(void)
 		raw_spin_lock_irq(&task->pi_lock);
 		raw_spin_unlock_irq(&task->pi_lock);
 
+		WARN_ON_ONCE(task->task_works_running);
 		do {
-			next = work->next;
+			task->task_works_running = work->next;
 			work->func(work);
-			work = next;
+			work = task->task_works_running;
 			cond_resched();
 		} while (work);
 	}


  reply	other threads:[~2024-10-28 12:21 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-06-24 15:15 [PATCH v4 0/6] perf: Make SIGTRAP and __perf_pending_irq() work on RT Sebastian Andrzej Siewior
2024-06-24 15:15 ` [PATCH v4 1/6] perf: Move irq_work_queue() where the event is prepared Sebastian Andrzej Siewior
2024-06-24 15:15 ` [PATCH v4 2/6] perf: Enqueue SIGTRAP always via task_work Sebastian Andrzej Siewior
2024-07-01 12:28   ` Peter Zijlstra
2024-07-01 13:27     ` Sebastian Andrzej Siewior
2024-07-02  9:01       ` Peter Zijlstra
2024-10-28  8:30   ` Lai, Yi
2024-10-28 12:21     ` Frederic Weisbecker [this message]
2024-10-29 17:21       ` Sebastian Andrzej Siewior
2024-10-30 14:07         ` Sebastian Andrzej Siewior
2024-10-30 15:46           ` Frederic Weisbecker
2024-11-07 14:46             ` Sebastian Andrzej Siewior
2024-11-08 13:11               ` Frederic Weisbecker
2024-11-08 19:08                 ` Oleg Nesterov
2024-11-08 22:26                   ` Frederic Weisbecker
2024-11-11 12:08                     ` Sebastian Andrzej Siewior
2024-12-04  3:02                       ` Lai, Yi
2024-12-04 13:48                       ` Oleg Nesterov
2024-12-05  0:19                         ` Frederic Weisbecker
2024-12-05  9:20                           ` Oleg Nesterov
2024-12-05 10:05                             ` Frederic Weisbecker
2024-12-05 10:28                               ` Oleg Nesterov
2024-12-13 22:52                                 ` Frederic Weisbecker
2024-12-16 19:19                                   ` Oleg Nesterov
2024-06-24 15:15 ` [PATCH v4 3/6] perf: Shrink the size of the recursion counter Sebastian Andrzej Siewior
2024-07-01 12:31   ` Peter Zijlstra
2024-07-01 12:56     ` Sebastian Andrzej Siewior
2024-07-01 13:10       ` Peter Zijlstra
2024-06-24 15:15 ` [PATCH v4 4/6] perf: Move swevent_htable::recursion into task_struct Sebastian Andrzej Siewior
2024-06-24 15:15 ` [PATCH v4 5/6] perf: Don't disable preemption in perf_pending_task() Sebastian Andrzej Siewior
2024-06-24 15:15 ` [PATCH v4 6/6] perf: Split __perf_pending_irq() out of perf_pending_irq() Sebastian Andrzej Siewior
2024-06-25 13:42 ` [PATCH v4 0/6] perf: Make SIGTRAP and __perf_pending_irq() work on RT Marco Elver

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Zx-B0wK3xqRQsCOS@localhost.localdomain \
    --to=frederic@kernel.org \
    --cc=acme@kernel.org \
    --cc=acme@redhat.com \
    --cc=adrian.hunter@intel.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=bigeasy@linutronix.de \
    --cc=bristot@kernel.org \
    --cc=elver@google.com \
    --cc=irogers@google.com \
    --cc=jolsa@kernel.org \
    --cc=kan.liang@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=peterz@infradead.org \
    --cc=syzkaller-bugs@googlegroups.com \
    --cc=tglx@linutronix.de \
    --cc=yi1.lai@intel.com \
    --cc=yi1.lai@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox