All of lore.kernel.org
 help / color / mirror / Atom feed
From: Frederic Weisbecker <frederic@kernel.org>
To: "Lai, Yi" <yi1.lai@linux.intel.com>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>,
	linux-perf-users@vger.kernel.org, linux-kernel@vger.kernel.org,
	Adrian Hunter <adrian.hunter@intel.com>,
	Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	Arnaldo Carvalho de Melo <acme@kernel.org>,
	Daniel Bristot de Oliveira <bristot@kernel.org>,
	Ian Rogers <irogers@google.com>, Ingo Molnar <mingo@redhat.com>,
	Jiri Olsa <jolsa@kernel.org>,
	Kan Liang <kan.liang@linux.intel.com>,
	Marco Elver <elver@google.com>,
	Mark Rutland <mark.rutland@arm.com>,
	Namhyung Kim <namhyung@kernel.org>,
	Peter Zijlstra <peterz@infradead.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Arnaldo Carvalho de Melo <acme@redhat.com>,
	yi1.lai@intel.com, syzkaller-bugs@googlegroups.com
Subject: Re: [PATCH v4 2/6] perf: Enqueue SIGTRAP always via task_work.
Date: Mon, 28 Oct 2024 13:21:39 +0100	[thread overview]
Message-ID: <Zx-B0wK3xqRQsCOS@localhost.localdomain> (raw)
In-Reply-To: <Zx9Losv4YcJowaP/@ly-workstation>

Le Mon, Oct 28, 2024 at 04:30:26PM +0800, Lai, Yi a écrit :
> [  300.651268] INFO: task repro:671 blocked for more than 147 seconds.
> [  300.651706]       Not tainted 6.12.0-rc4-42f7652d3eb5+ #1
> [  300.652006] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
> [  300.652430] task:repro           state:D stack:0     pid:671   tgid:671   ppid:670    flags:0x00004002
> [  300.652939] Call Trace:
> [  300.653088]  <TASK>
> [  300.653221]  __schedule+0xe13/0x33a0
> [  300.653474]  ? __pfx___schedule+0x10/0x10
> [  300.653704]  ? lock_release+0x441/0x870
> [  300.653946]  ? __pfx_lock_release+0x10/0x10
> [  300.654184]  ? trace_lock_acquire+0x139/0x1b0
> [  300.654439]  ? lock_acquire+0x80/0xb0
> [  300.654651]  ? schedule+0x216/0x3f0
> [  300.654859]  schedule+0xf6/0x3f0
> [  300.655083]  _free_event+0x531/0x14c0
> [  300.655317]  perf_event_release_kernel+0x648/0x870
> [  300.655597]  ? __pfx_perf_event_release_kernel+0x10/0x10
> [  300.655899]  ? trace_hardirqs_on+0x51/0x60
> [  300.656176]  ? __sanitizer_cov_trace_const_cmp2+0x1c/0x30
> [  300.656474]  ? __pfx_perf_release+0x10/0x10
> [  300.656697]  perf_release+0x3a/0x50
> [  300.656916]  __fput+0x414/0xb60
> [  300.657163]  ____fput+0x22/0x30
> [  300.657335]  task_work_run+0x19c/0x2b0

Ah the perf_pending_task work is pending but perf_pending_task_sync()
fails to cancel there:

	/*
	 * If the task is queued to the current task's queue, we
	 * obviously can't wait for it to complete. Simply cancel it.
	 */
	if (task_work_cancel(current, head)) {
		event->pending_work = 0;
		local_dec(&event->ctx->nr_no_switch_fast);
		return;
	}

And that's because the work is not anymore on the task work
list in task->task_works. Instead it's in the executing list
in task_work_run(). It's a blind spot for task_work_cancel()
if the current task is already running the task works. And it
does since it's running the fput delayed work.

Something like this untested?

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 449dd64ed9ac..035580fa2c81 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1163,6 +1163,7 @@ struct task_struct {
 	unsigned int			sas_ss_flags;
 
 	struct callback_head		*task_works;
+	struct callback_head		*task_works_running;
 
 #ifdef CONFIG_AUDIT
 #ifdef CONFIG_AUDITSYSCALL
diff --git a/include/linux/task_work.h b/include/linux/task_work.h
index cf5e7e891a77..fdd70f09a7f0 100644
--- a/include/linux/task_work.h
+++ b/include/linux/task_work.h
@@ -33,6 +33,7 @@ struct callback_head *task_work_cancel_match(struct task_struct *task,
 	bool (*match)(struct callback_head *, void *data), void *data);
 struct callback_head *task_work_cancel_func(struct task_struct *, task_work_func_t);
 bool task_work_cancel(struct task_struct *task, struct callback_head *cb);
+bool task_work_cancel_current(struct callback_head *cb);
 void task_work_run(void);
 
 static inline void exit_task_work(struct task_struct *task)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index e3589c4287cb..1b15f3c83595 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5305,7 +5305,7 @@ static void perf_pending_task_sync(struct perf_event *event)
 	 * If the task is queued to the current task's queue, we
 	 * obviously can't wait for it to complete. Simply cancel it.
 	 */
-	if (task_work_cancel(current, head)) {
+	if (task_work_cancel_current(head)) {
 		event->pending_work = 0;
 		local_dec(&event->ctx->nr_no_switch_fast);
 		return;
diff --git a/kernel/fork.c b/kernel/fork.c
index 89ceb4a68af2..1b898701d888 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2450,6 +2450,7 @@ __latent_entropy struct task_struct *copy_process(
 
 	p->pdeath_signal = 0;
 	p->task_works = NULL;
+	p->task_works_running = NULL;
 	clear_posix_cputimers_work(p);
 
 #ifdef CONFIG_KRETPROBES
diff --git a/kernel/task_work.c b/kernel/task_work.c
index 5d14d639ac71..2efa81a6cbf6 100644
--- a/kernel/task_work.c
+++ b/kernel/task_work.c
@@ -184,6 +184,26 @@ bool task_work_cancel(struct task_struct *task, struct callback_head *cb)
 	return ret == cb;
 }
 
+bool task_work_cancel_current(struct callback_head *cb)
+{
+	struct callback_head **running;
+
+	if (task_work_cancel(current, cb))
+		return true;
+
+	running = &current->task_works_running;
+	while (*running) {
+		if (*running == cb) {
+			*running = cb->next;
+			return true;
+		}
+		running = &(*running)->next;
+	}
+
+	return false;
+}
+
+
 /**
  * task_work_run - execute the works added by task_work_add()
  *
@@ -195,7 +215,7 @@ bool task_work_cancel(struct task_struct *task, struct callback_head *cb)
 void task_work_run(void)
 {
 	struct task_struct *task = current;
-	struct callback_head *work, *head, *next;
+	struct callback_head *work, *head;
 
 	for (;;) {
 		/*
@@ -223,10 +243,11 @@ void task_work_run(void)
 		raw_spin_lock_irq(&task->pi_lock);
 		raw_spin_unlock_irq(&task->pi_lock);
 
+		WARN_ON_ONCE(task->task_works_running);
 		do {
-			next = work->next;
+			task->task_works_running = work->next;
 			work->func(work);
-			work = next;
+			work = task->task_works_running;
 			cond_resched();
 		} while (work);
 	}


  reply	other threads:[~2024-10-28 12:21 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-06-24 15:15 [PATCH v4 0/6] perf: Make SIGTRAP and __perf_pending_irq() work on RT Sebastian Andrzej Siewior
2024-06-24 15:15 ` [PATCH v4 1/6] perf: Move irq_work_queue() where the event is prepared Sebastian Andrzej Siewior
2024-06-24 15:15 ` [PATCH v4 2/6] perf: Enqueue SIGTRAP always via task_work Sebastian Andrzej Siewior
2024-07-01 12:28   ` Peter Zijlstra
2024-07-01 13:27     ` Sebastian Andrzej Siewior
2024-07-02  9:01       ` Peter Zijlstra
2024-10-28  8:30   ` Lai, Yi
2024-10-28 12:21     ` Frederic Weisbecker [this message]
2024-10-29 17:21       ` Sebastian Andrzej Siewior
2024-10-30 14:07         ` Sebastian Andrzej Siewior
2024-10-30 15:46           ` Frederic Weisbecker
2024-11-07 14:46             ` Sebastian Andrzej Siewior
2024-11-08 13:11               ` Frederic Weisbecker
2024-11-08 19:08                 ` Oleg Nesterov
2024-11-08 22:26                   ` Frederic Weisbecker
2024-11-11 12:08                     ` Sebastian Andrzej Siewior
2024-12-04  3:02                       ` Lai, Yi
2024-12-04 13:48                       ` Oleg Nesterov
2024-12-05  0:19                         ` Frederic Weisbecker
2024-12-05  9:20                           ` Oleg Nesterov
2024-12-05 10:05                             ` Frederic Weisbecker
2024-12-05 10:28                               ` Oleg Nesterov
2024-12-13 22:52                                 ` Frederic Weisbecker
2024-12-16 19:19                                   ` Oleg Nesterov
2024-06-24 15:15 ` [PATCH v4 3/6] perf: Shrink the size of the recursion counter Sebastian Andrzej Siewior
2024-07-01 12:31   ` Peter Zijlstra
2024-07-01 12:56     ` Sebastian Andrzej Siewior
2024-07-01 13:10       ` Peter Zijlstra
2024-06-24 15:15 ` [PATCH v4 4/6] perf: Move swevent_htable::recursion into task_struct Sebastian Andrzej Siewior
2024-06-24 15:15 ` [PATCH v4 5/6] perf: Don't disable preemption in perf_pending_task() Sebastian Andrzej Siewior
2024-06-24 15:15 ` [PATCH v4 6/6] perf: Split __perf_pending_irq() out of perf_pending_irq() Sebastian Andrzej Siewior
2024-06-25 13:42 ` [PATCH v4 0/6] perf: Make SIGTRAP and __perf_pending_irq() work on RT Marco Elver

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Zx-B0wK3xqRQsCOS@localhost.localdomain \
    --to=frederic@kernel.org \
    --cc=acme@kernel.org \
    --cc=acme@redhat.com \
    --cc=adrian.hunter@intel.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=bigeasy@linutronix.de \
    --cc=bristot@kernel.org \
    --cc=elver@google.com \
    --cc=irogers@google.com \
    --cc=jolsa@kernel.org \
    --cc=kan.liang@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=peterz@infradead.org \
    --cc=syzkaller-bugs@googlegroups.com \
    --cc=tglx@linutronix.de \
    --cc=yi1.lai@intel.com \
    --cc=yi1.lai@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.