stable.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3] sched/deadline: Fix race in push_dl_task
@ 2025-04-08  4:50 Harshit Agarwal
  2025-04-09  5:32 ` Juri Lelli
  2025-09-03  8:05 ` [tip: sched/core] sched/deadline: Fix race in push_dl_task() tip-bot2 for Harshit Agarwal
  0 siblings, 2 replies; 4+ messages in thread
From: Harshit Agarwal @ 2025-04-08  4:50 UTC (permalink / raw)
  To: Ingo Molnar, Peter Zijlstra, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Steven Rostedt, Ben Segall, Mel Gorman,
	Valentin Schneider, linux-kernel
  Cc: Harshit Agarwal, stable

When a CPU chooses to call push_dl_task and picks a task to push to
another CPU's runqueue then it will call find_lock_later_rq method
which would take a double lock on both CPUs' runqueues. If one of the
locks aren't readily available, it may lead to dropping the current
runqueue lock and reacquiring both the locks at once. During this window
it is possible that the task is already migrated and is running on some
other CPU. These cases are already handled. However, if the task is
migrated and has already been executed and another CPU is now trying to
wake it up (ttwu) such that it is queued again on the runqeue
(on_rq is 1) and also if the task was run by the same CPU, then the
current checks will pass even though the task was migrated out and is no
longer in the pushable tasks list.
Please go through the original rt change for more details on the issue.

To fix this, after the lock is obtained inside the find_lock_later_rq,
it ensures that the task is still at the head of pushable tasks list.
Also removed some checks that are no longer needed with the addition of
this new check.
However, the new check of pushable tasks list only applies when
find_lock_later_rq is called by push_dl_task. For the other caller i.e.
dl_task_offline_migration, existing checks are used.

Signed-off-by: Harshit Agarwal <harshit@nutanix.com>
Cc: stable@vger.kernel.org
---
Changes in v3:
- Incorporated review comments from Juri around the commit message as
  well as around the comment regarding checks in find_lock_later_rq.
- Link to v2:
  https://lore.kernel.org/stable/20250317022325.52791-1-harshit@nutanix.com/

Changes in v2:
- As per Juri's suggestion, moved the check inside find_lock_later_rq
  similar to rt change. Here we distinguish among the push_dl_task
  caller vs dl_task_offline_migration by checking if the task is
  throttled or not.
- Fixed the commit message to refer to the rt change by title.
- Link to v1:
  https://lore.kernel.org/lkml/20250307204255.60640-1-harshit@nutanix.com/
---
 kernel/sched/deadline.c | 73 +++++++++++++++++++++++++++--------------
 1 file changed, 49 insertions(+), 24 deletions(-)

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 38e4537790af..e0c95f33e1ed 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -2621,6 +2621,25 @@ static int find_later_rq(struct task_struct *task)
 	return -1;
 }
 
+static struct task_struct *pick_next_pushable_dl_task(struct rq *rq)
+{
+	struct task_struct *p;
+
+	if (!has_pushable_dl_tasks(rq))
+		return NULL;
+
+	p = __node_2_pdl(rb_first_cached(&rq->dl.pushable_dl_tasks_root));
+
+	WARN_ON_ONCE(rq->cpu != task_cpu(p));
+	WARN_ON_ONCE(task_current(rq, p));
+	WARN_ON_ONCE(p->nr_cpus_allowed <= 1);
+
+	WARN_ON_ONCE(!task_on_rq_queued(p));
+	WARN_ON_ONCE(!dl_task(p));
+
+	return p;
+}
+
 /* Locks the rq it finds */
 static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
 {
@@ -2648,12 +2667,37 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
 
 		/* Retry if something changed. */
 		if (double_lock_balance(rq, later_rq)) {
-			if (unlikely(task_rq(task) != rq ||
+			/*
+			 * double_lock_balance had to release rq->lock, in the
+			 * meantime, task may no longer be fit to be migrated.
+			 * Check the following to ensure that the task is
+			 * still suitable for migration:
+			 * 1. It is possible the task was scheduled,
+			 *    migrate_disabled was set and then got preempted,
+			 *    so we must check the task migration disable
+			 *    flag.
+			 * 2. The CPU picked is in the task's affinity.
+			 * 3. For throttled task (dl_task_offline_migration),
+			 *    check the following:
+			 *    - the task is not on the rq anymore (it was
+			 *      migrated)
+			 *    - the task is not on CPU anymore
+			 *    - the task is still a dl task
+			 *    - the task is not queued on the rq anymore
+			 * 4. For the non-throttled task (push_dl_task), the
+			 *    check to ensure that this task is still at the
+			 *    head of the pushable tasks list is enough.
+			 */
+			if (unlikely(is_migration_disabled(task) ||
 				     !cpumask_test_cpu(later_rq->cpu, &task->cpus_mask) ||
-				     task_on_cpu(rq, task) ||
-				     !dl_task(task) ||
-				     is_migration_disabled(task) ||
-				     !task_on_rq_queued(task))) {
+				     (task->dl.dl_throttled &&
+				      (task_rq(task) != rq ||
+				       task_on_cpu(rq, task) ||
+				       !dl_task(task) ||
+				       !task_on_rq_queued(task))) ||
+				     (!task->dl.dl_throttled &&
+				      task != pick_next_pushable_dl_task(rq)))) {
+
 				double_unlock_balance(rq, later_rq);
 				later_rq = NULL;
 				break;
@@ -2676,25 +2720,6 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
 	return later_rq;
 }
 
-static struct task_struct *pick_next_pushable_dl_task(struct rq *rq)
-{
-	struct task_struct *p;
-
-	if (!has_pushable_dl_tasks(rq))
-		return NULL;
-
-	p = __node_2_pdl(rb_first_cached(&rq->dl.pushable_dl_tasks_root));
-
-	WARN_ON_ONCE(rq->cpu != task_cpu(p));
-	WARN_ON_ONCE(task_current(rq, p));
-	WARN_ON_ONCE(p->nr_cpus_allowed <= 1);
-
-	WARN_ON_ONCE(!task_on_rq_queued(p));
-	WARN_ON_ONCE(!dl_task(p));
-
-	return p;
-}
-
 /*
  * See if the non running -deadline tasks on this rq
  * can be sent to some other CPU where they can preempt
-- 
2.49.0.111.g5b97a56fa0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH v3] sched/deadline: Fix race in push_dl_task
  2025-04-08  4:50 [PATCH v3] sched/deadline: Fix race in push_dl_task Harshit Agarwal
@ 2025-04-09  5:32 ` Juri Lelli
  2025-08-07 20:17   ` Harshit Agarwal
  2025-09-03  8:05 ` [tip: sched/core] sched/deadline: Fix race in push_dl_task() tip-bot2 for Harshit Agarwal
  1 sibling, 1 reply; 4+ messages in thread
From: Juri Lelli @ 2025-04-09  5:32 UTC (permalink / raw)
  To: Harshit Agarwal
  Cc: Ingo Molnar, Peter Zijlstra, Vincent Guittot, Dietmar Eggemann,
	Steven Rostedt, Ben Segall, Mel Gorman, Valentin Schneider,
	linux-kernel, stable

Hello,

On 08/04/25 04:50, Harshit Agarwal wrote:
> When a CPU chooses to call push_dl_task and picks a task to push to
> another CPU's runqueue then it will call find_lock_later_rq method
> which would take a double lock on both CPUs' runqueues. If one of the
> locks aren't readily available, it may lead to dropping the current
> runqueue lock and reacquiring both the locks at once. During this window
> it is possible that the task is already migrated and is running on some
> other CPU. These cases are already handled. However, if the task is
> migrated and has already been executed and another CPU is now trying to
> wake it up (ttwu) such that it is queued again on the runqeue
> (on_rq is 1) and also if the task was run by the same CPU, then the
> current checks will pass even though the task was migrated out and is no
> longer in the pushable tasks list.
> Please go through the original rt change for more details on the issue.
> 
> To fix this, after the lock is obtained inside the find_lock_later_rq,
> it ensures that the task is still at the head of pushable tasks list.
> Also removed some checks that are no longer needed with the addition of
> this new check.
> However, the new check of pushable tasks list only applies when
> find_lock_later_rq is called by push_dl_task. For the other caller i.e.
> dl_task_offline_migration, existing checks are used.
> 
> Signed-off-by: Harshit Agarwal <harshit@nutanix.com>
> Cc: stable@vger.kernel.org
> ---

The new version looks good to me.

Some final minor touches to changelog/comment might still be required,
but Peter maybe you can do it if/when picking up the change?

Anyway,

Acked-by: Juri Lelli <juri.lelli@redhat.com>

Thanks!
Juri


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v3] sched/deadline: Fix race in push_dl_task
  2025-04-09  5:32 ` Juri Lelli
@ 2025-08-07 20:17   ` Harshit Agarwal
  0 siblings, 0 replies; 4+ messages in thread
From: Harshit Agarwal @ 2025-08-07 20:17 UTC (permalink / raw)
  To: Juri Lelli
  Cc: Ingo Molnar, Peter Zijlstra, Vincent Guittot, Dietmar Eggemann,
	Steven Rostedt, Ben Segall, Mel Gorman, Valentin Schneider,
	linux-kernel@vger.kernel.org, stable@vger.kernel.org

Hi Peter, Juri,

Just bumping this thread so we don’t miss merging this deadline change.
Please let me know is there is anything I need to do.

Thanks,
Harshit


> 
> The new version looks good to me.
> 
> Some final minor touches to changelog/comment might still be required,
> but Peter maybe you can do it if/when picking up the change?
> 
> Anyway,
> 
> Acked-by: Juri Lelli <juri.lelli@redhat.com>
> 
> Thanks!
> Juri



^ permalink raw reply	[flat|nested] 4+ messages in thread

* [tip: sched/core] sched/deadline: Fix race in push_dl_task()
  2025-04-08  4:50 [PATCH v3] sched/deadline: Fix race in push_dl_task Harshit Agarwal
  2025-04-09  5:32 ` Juri Lelli
@ 2025-09-03  8:05 ` tip-bot2 for Harshit Agarwal
  1 sibling, 0 replies; 4+ messages in thread
From: tip-bot2 for Harshit Agarwal @ 2025-09-03  8:05 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Harshit Agarwal, Peter Zijlstra (Intel), Juri Lelli, stable, x86,
	linux-kernel

The following commit has been merged into the sched/core branch of tip:

Commit-ID:     8fd5485fb4f3d9da3977fd783fcb8e5452463420
Gitweb:        https://git.kernel.org/tip/8fd5485fb4f3d9da3977fd783fcb8e5452463420
Author:        Harshit Agarwal <harshit@nutanix.com>
AuthorDate:    Tue, 08 Apr 2025 04:50:21 
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Wed, 03 Sep 2025 10:03:12 +02:00

sched/deadline: Fix race in push_dl_task()

When a CPU chooses to call push_dl_task and picks a task to push to
another CPU's runqueue then it will call find_lock_later_rq method
which would take a double lock on both CPUs' runqueues. If one of the
locks aren't readily available, it may lead to dropping the current
runqueue lock and reacquiring both the locks at once. During this window
it is possible that the task is already migrated and is running on some
other CPU. These cases are already handled. However, if the task is
migrated and has already been executed and another CPU is now trying to
wake it up (ttwu) such that it is queued again on the runqeue
(on_rq is 1) and also if the task was run by the same CPU, then the
current checks will pass even though the task was migrated out and is no
longer in the pushable tasks list.
Please go through the original rt change for more details on the issue.

To fix this, after the lock is obtained inside the find_lock_later_rq,
it ensures that the task is still at the head of pushable tasks list.
Also removed some checks that are no longer needed with the addition of
this new check.
However, the new check of pushable tasks list only applies when
find_lock_later_rq is called by push_dl_task. For the other caller i.e.
dl_task_offline_migration, existing checks are used.

Signed-off-by: Harshit Agarwal <harshit@nutanix.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Juri Lelli <juri.lelli@redhat.com>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20250408045021.3283624-1-harshit@nutanix.com
---
 kernel/sched/deadline.c | 73 ++++++++++++++++++++++++++--------------
 1 file changed, 49 insertions(+), 24 deletions(-)

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index f253012..5b64bc6 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -2580,6 +2580,25 @@ static int find_later_rq(struct task_struct *task)
 	return -1;
 }
 
+static struct task_struct *pick_next_pushable_dl_task(struct rq *rq)
+{
+	struct task_struct *p;
+
+	if (!has_pushable_dl_tasks(rq))
+		return NULL;
+
+	p = __node_2_pdl(rb_first_cached(&rq->dl.pushable_dl_tasks_root));
+
+	WARN_ON_ONCE(rq->cpu != task_cpu(p));
+	WARN_ON_ONCE(task_current(rq, p));
+	WARN_ON_ONCE(p->nr_cpus_allowed <= 1);
+
+	WARN_ON_ONCE(!task_on_rq_queued(p));
+	WARN_ON_ONCE(!dl_task(p));
+
+	return p;
+}
+
 /* Locks the rq it finds */
 static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
 {
@@ -2607,12 +2626,37 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
 
 		/* Retry if something changed. */
 		if (double_lock_balance(rq, later_rq)) {
-			if (unlikely(task_rq(task) != rq ||
+			/*
+			 * double_lock_balance had to release rq->lock, in the
+			 * meantime, task may no longer be fit to be migrated.
+			 * Check the following to ensure that the task is
+			 * still suitable for migration:
+			 * 1. It is possible the task was scheduled,
+			 *    migrate_disabled was set and then got preempted,
+			 *    so we must check the task migration disable
+			 *    flag.
+			 * 2. The CPU picked is in the task's affinity.
+			 * 3. For throttled task (dl_task_offline_migration),
+			 *    check the following:
+			 *    - the task is not on the rq anymore (it was
+			 *      migrated)
+			 *    - the task is not on CPU anymore
+			 *    - the task is still a dl task
+			 *    - the task is not queued on the rq anymore
+			 * 4. For the non-throttled task (push_dl_task), the
+			 *    check to ensure that this task is still at the
+			 *    head of the pushable tasks list is enough.
+			 */
+			if (unlikely(is_migration_disabled(task) ||
 				     !cpumask_test_cpu(later_rq->cpu, &task->cpus_mask) ||
-				     task_on_cpu(rq, task) ||
-				     !dl_task(task) ||
-				     is_migration_disabled(task) ||
-				     !task_on_rq_queued(task))) {
+				     (task->dl.dl_throttled &&
+				      (task_rq(task) != rq ||
+				       task_on_cpu(rq, task) ||
+				       !dl_task(task) ||
+				       !task_on_rq_queued(task))) ||
+				     (!task->dl.dl_throttled &&
+				      task != pick_next_pushable_dl_task(rq)))) {
+
 				double_unlock_balance(rq, later_rq);
 				later_rq = NULL;
 				break;
@@ -2635,25 +2679,6 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
 	return later_rq;
 }
 
-static struct task_struct *pick_next_pushable_dl_task(struct rq *rq)
-{
-	struct task_struct *p;
-
-	if (!has_pushable_dl_tasks(rq))
-		return NULL;
-
-	p = __node_2_pdl(rb_first_cached(&rq->dl.pushable_dl_tasks_root));
-
-	WARN_ON_ONCE(rq->cpu != task_cpu(p));
-	WARN_ON_ONCE(task_current(rq, p));
-	WARN_ON_ONCE(p->nr_cpus_allowed <= 1);
-
-	WARN_ON_ONCE(!task_on_rq_queued(p));
-	WARN_ON_ONCE(!dl_task(p));
-
-	return p;
-}
-
 /*
  * See if the non running -deadline tasks on this rq
  * can be sent to some other CPU where they can preempt

^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2025-09-03  8:05 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-04-08  4:50 [PATCH v3] sched/deadline: Fix race in push_dl_task Harshit Agarwal
2025-04-09  5:32 ` Juri Lelli
2025-08-07 20:17   ` Harshit Agarwal
2025-09-03  8:05 ` [tip: sched/core] sched/deadline: Fix race in push_dl_task() tip-bot2 for Harshit Agarwal

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).