From: Frederic Weisbecker <frederic@kernel.org>
To: "Lai, Yi" <yi1.lai@linux.intel.com>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>,
linux-perf-users@vger.kernel.org, linux-kernel@vger.kernel.org,
Adrian Hunter <adrian.hunter@intel.com>,
Alexander Shishkin <alexander.shishkin@linux.intel.com>,
Arnaldo Carvalho de Melo <acme@kernel.org>,
Daniel Bristot de Oliveira <bristot@kernel.org>,
Ian Rogers <irogers@google.com>, Ingo Molnar <mingo@redhat.com>,
Jiri Olsa <jolsa@kernel.org>,
Kan Liang <kan.liang@linux.intel.com>,
Marco Elver <elver@google.com>,
Mark Rutland <mark.rutland@arm.com>,
Namhyung Kim <namhyung@kernel.org>,
Peter Zijlstra <peterz@infradead.org>,
Thomas Gleixner <tglx@linutronix.de>,
Arnaldo Carvalho de Melo <acme@redhat.com>,
yi1.lai@intel.com, syzkaller-bugs@googlegroups.com
Subject: Re: [PATCH v4 2/6] perf: Enqueue SIGTRAP always via task_work.
Date: Mon, 28 Oct 2024 13:21:39 +0100 [thread overview]
Message-ID: <Zx-B0wK3xqRQsCOS@localhost.localdomain> (raw)
In-Reply-To: <Zx9Losv4YcJowaP/@ly-workstation>
Le Mon, Oct 28, 2024 at 04:30:26PM +0800, Lai, Yi a écrit :
> [ 300.651268] INFO: task repro:671 blocked for more than 147 seconds.
> [ 300.651706] Not tainted 6.12.0-rc4-42f7652d3eb5+ #1
> [ 300.652006] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
> [ 300.652430] task:repro state:D stack:0 pid:671 tgid:671 ppid:670 flags:0x00004002
> [ 300.652939] Call Trace:
> [ 300.653088] <TASK>
> [ 300.653221] __schedule+0xe13/0x33a0
> [ 300.653474] ? __pfx___schedule+0x10/0x10
> [ 300.653704] ? lock_release+0x441/0x870
> [ 300.653946] ? __pfx_lock_release+0x10/0x10
> [ 300.654184] ? trace_lock_acquire+0x139/0x1b0
> [ 300.654439] ? lock_acquire+0x80/0xb0
> [ 300.654651] ? schedule+0x216/0x3f0
> [ 300.654859] schedule+0xf6/0x3f0
> [ 300.655083] _free_event+0x531/0x14c0
> [ 300.655317] perf_event_release_kernel+0x648/0x870
> [ 300.655597] ? __pfx_perf_event_release_kernel+0x10/0x10
> [ 300.655899] ? trace_hardirqs_on+0x51/0x60
> [ 300.656176] ? __sanitizer_cov_trace_const_cmp2+0x1c/0x30
> [ 300.656474] ? __pfx_perf_release+0x10/0x10
> [ 300.656697] perf_release+0x3a/0x50
> [ 300.656916] __fput+0x414/0xb60
> [ 300.657163] ____fput+0x22/0x30
> [ 300.657335] task_work_run+0x19c/0x2b0
Ah the perf_pending_task work is pending but perf_pending_task_sync()
fails to cancel there:
/*
* If the task is queued to the current task's queue, we
* obviously can't wait for it to complete. Simply cancel it.
*/
if (task_work_cancel(current, head)) {
event->pending_work = 0;
local_dec(&event->ctx->nr_no_switch_fast);
return;
}
And that's because the work is not anymore on the task work
list in task->task_works. Instead it's in the executing list
in task_work_run(). It's a blind spot for task_work_cancel()
if the current task is already running the task works. And it
does since it's running the fput delayed work.
Something like this untested?
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 449dd64ed9ac..035580fa2c81 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1163,6 +1163,7 @@ struct task_struct {
unsigned int sas_ss_flags;
struct callback_head *task_works;
+ struct callback_head *task_works_running;
#ifdef CONFIG_AUDIT
#ifdef CONFIG_AUDITSYSCALL
diff --git a/include/linux/task_work.h b/include/linux/task_work.h
index cf5e7e891a77..fdd70f09a7f0 100644
--- a/include/linux/task_work.h
+++ b/include/linux/task_work.h
@@ -33,6 +33,7 @@ struct callback_head *task_work_cancel_match(struct task_struct *task,
bool (*match)(struct callback_head *, void *data), void *data);
struct callback_head *task_work_cancel_func(struct task_struct *, task_work_func_t);
bool task_work_cancel(struct task_struct *task, struct callback_head *cb);
+bool task_work_cancel_current(struct callback_head *cb);
void task_work_run(void);
static inline void exit_task_work(struct task_struct *task)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index e3589c4287cb..1b15f3c83595 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5305,7 +5305,7 @@ static void perf_pending_task_sync(struct perf_event *event)
* If the task is queued to the current task's queue, we
* obviously can't wait for it to complete. Simply cancel it.
*/
- if (task_work_cancel(current, head)) {
+ if (task_work_cancel_current(head)) {
event->pending_work = 0;
local_dec(&event->ctx->nr_no_switch_fast);
return;
diff --git a/kernel/fork.c b/kernel/fork.c
index 89ceb4a68af2..1b898701d888 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2450,6 +2450,7 @@ __latent_entropy struct task_struct *copy_process(
p->pdeath_signal = 0;
p->task_works = NULL;
+ p->task_works_running = NULL;
clear_posix_cputimers_work(p);
#ifdef CONFIG_KRETPROBES
diff --git a/kernel/task_work.c b/kernel/task_work.c
index 5d14d639ac71..2efa81a6cbf6 100644
--- a/kernel/task_work.c
+++ b/kernel/task_work.c
@@ -184,6 +184,26 @@ bool task_work_cancel(struct task_struct *task, struct callback_head *cb)
return ret == cb;
}
+bool task_work_cancel_current(struct callback_head *cb)
+{
+ struct callback_head **running;
+
+ if (task_work_cancel(current, cb))
+ return true;
+
+ running = ¤t->task_works_running;
+ while (*running) {
+ if (*running == cb) {
+ *running = cb->next;
+ return true;
+ }
+ running = &(*running)->next;
+ }
+
+ return false;
+}
+
+
/**
* task_work_run - execute the works added by task_work_add()
*
@@ -195,7 +215,7 @@ bool task_work_cancel(struct task_struct *task, struct callback_head *cb)
void task_work_run(void)
{
struct task_struct *task = current;
- struct callback_head *work, *head, *next;
+ struct callback_head *work, *head;
for (;;) {
/*
@@ -223,10 +243,11 @@ void task_work_run(void)
raw_spin_lock_irq(&task->pi_lock);
raw_spin_unlock_irq(&task->pi_lock);
+ WARN_ON_ONCE(task->task_works_running);
do {
- next = work->next;
+ task->task_works_running = work->next;
work->func(work);
- work = next;
+ work = task->task_works_running;
cond_resched();
} while (work);
}
next prev parent reply other threads:[~2024-10-28 12:21 UTC|newest]
Thread overview: 32+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-06-24 15:15 [PATCH v4 0/6] perf: Make SIGTRAP and __perf_pending_irq() work on RT Sebastian Andrzej Siewior
2024-06-24 15:15 ` [PATCH v4 1/6] perf: Move irq_work_queue() where the event is prepared Sebastian Andrzej Siewior
2024-06-24 15:15 ` [PATCH v4 2/6] perf: Enqueue SIGTRAP always via task_work Sebastian Andrzej Siewior
2024-07-01 12:28 ` Peter Zijlstra
2024-07-01 13:27 ` Sebastian Andrzej Siewior
2024-07-02 9:01 ` Peter Zijlstra
2024-10-28 8:30 ` Lai, Yi
2024-10-28 12:21 ` Frederic Weisbecker [this message]
2024-10-29 17:21 ` Sebastian Andrzej Siewior
2024-10-30 14:07 ` Sebastian Andrzej Siewior
2024-10-30 15:46 ` Frederic Weisbecker
2024-11-07 14:46 ` Sebastian Andrzej Siewior
2024-11-08 13:11 ` Frederic Weisbecker
2024-11-08 19:08 ` Oleg Nesterov
2024-11-08 22:26 ` Frederic Weisbecker
2024-11-11 12:08 ` Sebastian Andrzej Siewior
2024-12-04 3:02 ` Lai, Yi
2024-12-04 13:48 ` Oleg Nesterov
2024-12-05 0:19 ` Frederic Weisbecker
2024-12-05 9:20 ` Oleg Nesterov
2024-12-05 10:05 ` Frederic Weisbecker
2024-12-05 10:28 ` Oleg Nesterov
2024-12-13 22:52 ` Frederic Weisbecker
2024-12-16 19:19 ` Oleg Nesterov
2024-06-24 15:15 ` [PATCH v4 3/6] perf: Shrink the size of the recursion counter Sebastian Andrzej Siewior
2024-07-01 12:31 ` Peter Zijlstra
2024-07-01 12:56 ` Sebastian Andrzej Siewior
2024-07-01 13:10 ` Peter Zijlstra
2024-06-24 15:15 ` [PATCH v4 4/6] perf: Move swevent_htable::recursion into task_struct Sebastian Andrzej Siewior
2024-06-24 15:15 ` [PATCH v4 5/6] perf: Don't disable preemption in perf_pending_task() Sebastian Andrzej Siewior
2024-06-24 15:15 ` [PATCH v4 6/6] perf: Split __perf_pending_irq() out of perf_pending_irq() Sebastian Andrzej Siewior
2024-06-25 13:42 ` [PATCH v4 0/6] perf: Make SIGTRAP and __perf_pending_irq() work on RT Marco Elver
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=Zx-B0wK3xqRQsCOS@localhost.localdomain \
--to=frederic@kernel.org \
--cc=acme@kernel.org \
--cc=acme@redhat.com \
--cc=adrian.hunter@intel.com \
--cc=alexander.shishkin@linux.intel.com \
--cc=bigeasy@linutronix.de \
--cc=bristot@kernel.org \
--cc=elver@google.com \
--cc=irogers@google.com \
--cc=jolsa@kernel.org \
--cc=kan.liang@linux.intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-perf-users@vger.kernel.org \
--cc=mark.rutland@arm.com \
--cc=mingo@redhat.com \
--cc=namhyung@kernel.org \
--cc=peterz@infradead.org \
--cc=syzkaller-bugs@googlegroups.com \
--cc=tglx@linutronix.de \
--cc=yi1.lai@intel.com \
--cc=yi1.lai@linux.intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.