public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: mingo@kernel.org, tj@kernel.org, void@manifault.com
Cc: peterz@infradead.org, juri.lelli@redhat.com,
	vincent.guittot@linaro.org, dietmar.eggemann@arm.com,
	rostedt@goodmis.org, bsegall@google.com, mgorman@suse.de,
	vschneid@redhat.com, linux-kernel@vger.kernel.org,
	joelaf@google.com
Subject: [PATCH 7/9] sched: Rework dl_server
Date: Wed, 14 Aug 2024 00:25:55 +0200	[thread overview]
Message-ID: <20240813224016.259853414@infradead.org> (raw)
In-Reply-To: 20240813222548.049744955@infradead.org

When a task is selected through a dl_server, it will have p->dl_server
set, such that it can account runtime to the dl_server, see
update_curr_task().

Currently p->dl_server is set in pick*task() whenever it goes through
the dl_server, clearing it is a bit of a mess though. The trivial
solution is clearing it on the final put (now that we have this
location).

However, this gives a problem when:

	p = pick_task(rq);
	if (p)
		put_prev_set_next_task(rq, prev, next);

picks the same task but through a different path, notably when it goes
from picking through the dl_server to a direct pick or vice-versa. In
that case we cannot readily determine wether we should clear or
preserve p->dl_server.

An additional complication is pick_*task() setting p->dl_server for a
remote pick, it might still need to update runtime before it schedules
the core_pick.

Close all these holes and remove all the random clearing of
p->dl_server by:

 - having pick_*task() manage rq->dl_server

 - having the final put_prev_task() clear p->dl_server

 - having the first set_next_task() set p->dl_server = rq->dl_server

 - complicate the core_sched code to save/restore rq->dl_server where
   appropriate.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 kernel/sched/core.c     |   40 +++++++++++++++-------------------------
 kernel/sched/deadline.c |    2 +-
 kernel/sched/fair.c     |   10 ++--------
 kernel/sched/sched.h    |   14 ++++++++++++++
 4 files changed, 32 insertions(+), 34 deletions(-)

--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3668,8 +3668,6 @@ ttwu_do_activate(struct rq *rq, struct t
 		rq->idle_stamp = 0;
 	}
 #endif
-
-	p->dl_server = NULL;
 }
 
 /*
@@ -5859,14 +5857,6 @@ static void prev_balance(struct rq *rq,
 			break;
 	}
 #endif
-
-	/*
-	 * We've updated @prev and no longer need the server link, clear it.
-	 * Must be done before ->pick_next_task() because that can (re)set
-	 * ->dl_server.
-	 */
-	if (prev->dl_server)
-		prev->dl_server = NULL;
 }
 
 /*
@@ -5878,6 +5868,8 @@ __pick_next_task(struct rq *rq, struct t
 	const struct sched_class *class;
 	struct task_struct *p;
 
+	rq->dl_server = NULL;
+
 	/*
 	 * Optimization: we know that if all tasks are in the fair class we can
 	 * call that function directly, but only if the @prev task wasn't of a
@@ -5897,20 +5889,6 @@ __pick_next_task(struct rq *rq, struct t
 			put_prev_set_next_task(rq, prev, p);
 		}
 
-		/*
-		 * This is a normal CFS pick, but the previous could be a DL pick.
-		 * Clear it as previous is no longer picked.
-		 */
-		if (prev->dl_server)
-			prev->dl_server = NULL;
-
-		/*
-		 * This is the fast path; it cannot be a DL server pick;
-		 * therefore even if @p == @prev, ->dl_server must be NULL.
-		 */
-		if (p->dl_server)
-			p->dl_server = NULL;
-
 		return p;
 	}
 
@@ -5958,6 +5936,8 @@ static inline struct task_struct *pick_t
 	const struct sched_class *class;
 	struct task_struct *p;
 
+	rq->dl_server = NULL;
+
 	for_each_class(class) {
 		p = class->pick_task(rq);
 		if (p)
@@ -5996,6 +5976,7 @@ pick_next_task(struct rq *rq, struct tas
 		 * another cpu during offline.
 		 */
 		rq->core_pick = NULL;
+		rq->core_dl_server = NULL;
 		return __pick_next_task(rq, prev, rf);
 	}
 
@@ -6014,7 +5995,9 @@ pick_next_task(struct rq *rq, struct tas
 		WRITE_ONCE(rq->core_sched_seq, rq->core->core_pick_seq);
 
 		next = rq->core_pick;
+		rq->dl_server = rq->core_dl_server;
 		rq->core_pick = NULL;
+		rq->core_dl_server = NULL;
 		goto out_set_next;
 	}
 
@@ -6059,6 +6042,7 @@ pick_next_task(struct rq *rq, struct tas
 		next = pick_task(rq);
 		if (!next->core_cookie) {
 			rq->core_pick = NULL;
+			rq->core_dl_server = NULL;
 			/*
 			 * For robustness, update the min_vruntime_fi for
 			 * unconstrained picks as well.
@@ -6086,7 +6070,9 @@ pick_next_task(struct rq *rq, struct tas
 		if (i != cpu && (rq_i != rq->core || !core_clock_updated))
 			update_rq_clock(rq_i);
 
-		p = rq_i->core_pick = pick_task(rq_i);
+		rq_i->core_pick = p = pick_task(rq_i);
+		rq_i->core_dl_server = rq_i->dl_server;
+
 		if (!max || prio_less(max, p, fi_before))
 			max = p;
 	}
@@ -6110,6 +6096,7 @@ pick_next_task(struct rq *rq, struct tas
 		}
 
 		rq_i->core_pick = p;
+		rq_i->core_dl_server = NULL;
 
 		if (p == rq_i->idle) {
 			if (rq_i->nr_running) {
@@ -6170,6 +6157,7 @@ pick_next_task(struct rq *rq, struct tas
 
 		if (i == cpu) {
 			rq_i->core_pick = NULL;
+			rq_i->core_dl_server = NULL;
 			continue;
 		}
 
@@ -6178,6 +6166,7 @@ pick_next_task(struct rq *rq, struct tas
 
 		if (rq_i->curr == rq_i->core_pick) {
 			rq_i->core_pick = NULL;
+			rq_i->core_dl_server = NULL;
 			continue;
 		}
 
@@ -8401,6 +8390,7 @@ void __init sched_init(void)
 #ifdef CONFIG_SCHED_CORE
 		rq->core = rq;
 		rq->core_pick = NULL;
+		rq->core_dl_server = NULL;
 		rq->core_enabled = 0;
 		rq->core_tree = RB_ROOT;
 		rq->core_forceidle_count = 0;
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -2429,7 +2429,7 @@ static struct task_struct *__pick_task_d
 			update_curr_dl_se(rq, dl_se, 0);
 			goto again;
 		}
-		p->dl_server = dl_se;
+		rq->dl_server = dl_se;
 	} else {
 		p = dl_task_of(dl_se);
 	}
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8747,14 +8747,6 @@ static struct task_struct *pick_task_fai
 		cfs_rq = group_cfs_rq(se);
 	} while (cfs_rq);
 
-	/*
-	 * This can be called from directly from CFS's ->pick_task() or indirectly
-	 * from DL's ->pick_task when fair server is enabled. In the indirect case,
-	 * DL will set ->dl_server just after this function is called, so its Ok to
-	 * clear. In the direct case, we are picking directly so we must clear it.
-	 */
-	task_of(se)->dl_server = NULL;
-
 	return task_of(se);
 }
 
@@ -8778,6 +8770,8 @@ pick_next_task_fair(struct rq *rq, struc
 	if (prev->sched_class != &fair_sched_class)
 		goto simple;
 
+	__put_prev_set_next_dl_server(rq, prev, p);
+
 	/*
 	 * Because of the set_next_buddy() in dequeue_task_fair() it is rather
 	 * likely that a next task is from the same cgroup as the current.
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1066,6 +1066,7 @@ struct rq {
 	unsigned int		nr_uninterruptible;
 
 	struct task_struct __rcu	*curr;
+	struct sched_dl_entity	*dl_server;
 	struct task_struct	*idle;
 	struct task_struct	*stop;
 	unsigned long		next_balance;
@@ -1193,6 +1194,7 @@ struct rq {
 	/* per rq */
 	struct rq		*core;
 	struct task_struct	*core_pick;
+	struct sched_dl_entity	*core_dl_server;
 	unsigned int		core_enabled;
 	unsigned int		core_sched_seq;
 	struct rb_root		core_tree;
@@ -2370,12 +2372,24 @@ static inline void set_next_task(struct
 	next->sched_class->set_next_task(rq, next, false);
 }
 
+static inline void
+__put_prev_set_next_dl_server(struct rq *rq,
+			      struct task_struct *prev,
+			      struct task_struct *next)
+{
+	prev->dl_server = NULL;
+	next->dl_server = rq->dl_server;
+	rq->dl_server = NULL;
+}
+
 static inline void put_prev_set_next_task(struct rq *rq,
 					  struct task_struct *prev,
 					  struct task_struct *next)
 {
 	WARN_ON_ONCE(rq->curr != prev);
 
+	__put_prev_set_next_dl_server(rq, prev, next);
+
 	if (next == prev)
 		return;
 



  parent reply	other threads:[~2024-08-13 22:50 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-08-13 22:25 [PATCH 0/9] sched: Prepare for sched_ext Peter Zijlstra
2024-08-13 22:25 ` [PATCH 1/9] sched: Use set_next_task(.first) where required Peter Zijlstra
2024-09-03 13:38   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2024-08-13 22:25 ` [PATCH 2/9] sched: Fixup set_next_task() implementations Peter Zijlstra
2024-09-03 13:38   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2024-08-13 22:25 ` [PATCH 3/9] sched: Clean up DL server vs core sched Peter Zijlstra
2024-09-03 13:38   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2024-08-13 22:25 ` [PATCH 4/9] sched: Split up put_prev_task_balance() Peter Zijlstra
2024-09-03 13:38   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2024-08-13 22:25 ` [PATCH 5/9] sched: Rework pick_next_task() Peter Zijlstra
2024-09-03 13:38   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2024-08-13 22:25 ` [PATCH 6/9] sched: Combine the last put_prev_task() and the first set_next_task() Peter Zijlstra
2024-09-03 13:38   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2024-08-13 22:25 ` Peter Zijlstra [this message]
2024-09-03 13:38   ` [tip: sched/core] sched: Rework dl_server tip-bot2 for Peter Zijlstra
2024-08-13 22:25 ` [PATCH 8/9] sched: Add put_prev_task(.next) Peter Zijlstra
2024-09-03 13:38   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2024-08-13 22:25 ` [PATCH 9/9] sched: Add pick_task(.core) Peter Zijlstra
2024-08-13 22:59   ` Peter Zijlstra
2024-08-14 10:42   ` Juri Lelli
2024-08-21 23:05   ` Tejun Heo
2024-09-03 13:31     ` Peter Zijlstra
2024-08-17 21:56 ` [PATCH 0/9] sched: Prepare for sched_ext Joel Fernandes
2024-08-21 21:41 ` Joel Fernandes
2024-08-22 12:58   ` Joel Fernandes
2024-08-22 13:15     ` Joel Fernandes
2024-08-22 13:37       ` Joel Fernandes
2024-08-22 15:48   ` Peter Zijlstra
2024-09-04 13:56   ` Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240813224016.259853414@infradead.org \
    --to=peterz@infradead.org \
    --cc=bsegall@google.com \
    --cc=dietmar.eggemann@arm.com \
    --cc=joelaf@google.com \
    --cc=juri.lelli@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mgorman@suse.de \
    --cc=mingo@kernel.org \
    --cc=rostedt@goodmis.org \
    --cc=tj@kernel.org \
    --cc=vincent.guittot@linaro.org \
    --cc=void@manifault.com \
    --cc=vschneid@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox