Re: [PATCH v5 7/7] sched/fair: Fair server interface

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

From: Peter Zijlstra <peterz@infradead.org>
To: Daniel Bristot de Oliveira <bristot@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>,
	Juri Lelli <juri.lelli@redhat.com>,
	Vincent Guittot <vincent.guittot@linaro.org>,
	Dietmar Eggemann <dietmar.eggemann@arm.com>,
	Steven Rostedt <rostedt@goodmis.org>,
	Ben Segall <bsegall@google.com>, Mel Gorman <mgorman@suse.de>,
	Valentin Schneider <vschneid@redhat.com>,
	linux-kernel@vger.kernel.org,
	Luca Abeni <luca.abeni@santannapisa.it>,
	Tommaso Cucinotta <tommaso.cucinotta@santannapisa.it>,
	Thomas Gleixner <tglx@linutronix.de>,
	Joel Fernandes <joel@joelfernandes.org>,
	Vineeth Pillai <vineeth@bitbyteword.org>,
	Shuah Khan <skhan@linuxfoundation.org>,
	Phil Auld <pauld@redhat.com>
Subject: Re: [PATCH v5 7/7] sched/fair: Fair server interface
Date: Tue, 7 Nov 2023 15:44:24 +0100	[thread overview]
Message-ID: <20231107144424.GX8262@noisy.programming.kicks-ass.net> (raw)
In-Reply-To: <9a7222ed-88f8-4a3f-9d83-09b7fb977c27@kernel.org>

On Mon, Nov 06, 2023 at 05:29:49PM +0100, Daniel Bristot de Oliveira wrote:

> I was thinking about moving the entire throttling machinery inside CONFIG_RT_GROUP_SCHED
> for now, because GROUP_SCHED depends on it, no?

This builds and boots..

--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -9930,8 +9930,6 @@ void __init sched_init(void)
 #endif /* CONFIG_RT_GROUP_SCHED */
 	}
 
-	init_rt_bandwidth(&def_rt_bandwidth, global_rt_period(), global_rt_runtime());
-
 #ifdef CONFIG_SMP
 	init_defrootdomain();
 #endif
@@ -9986,7 +9984,6 @@ void __init sched_init(void)
 		init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, NULL);
 #endif /* CONFIG_FAIR_GROUP_SCHED */
 
-		rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime;
 #ifdef CONFIG_RT_GROUP_SCHED
 		init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, NULL);
 #endif
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1480,6 +1480,7 @@ static void update_curr_dl_se(struct rq
 	if (dl_se == &rq->fair_server)
 		return;
 
+#ifdef CONFIG_RT_GROUP_SCHED
 	/*
 	 * Because -- for now -- we share the rt bandwidth, we need to
 	 * account our runtime there too, otherwise actual rt tasks
@@ -1504,6 +1505,7 @@ static void update_curr_dl_se(struct rq
 			rt_rq->rt_time += delta_exec;
 		raw_spin_unlock(&rt_rq->rt_runtime_lock);
 	}
+#endif
 }
 
 void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec)
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -8,10 +8,6 @@ int sched_rr_timeslice = RR_TIMESLICE;
 /* More than 4 hours if BW_SHIFT equals 20. */
 static const u64 max_rt_runtime = MAX_BW;
 
-static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun);
-
-struct rt_bandwidth def_rt_bandwidth;
-
 /*
  * period over which we measure -rt task CPU usage in us.
  * default: 1s
@@ -67,6 +63,40 @@ static int __init sched_rt_sysctl_init(v
 late_initcall(sched_rt_sysctl_init);
 #endif
 
+void init_rt_rq(struct rt_rq *rt_rq)
+{
+	struct rt_prio_array *array;
+	int i;
+
+	array = &rt_rq->active;
+	for (i = 0; i < MAX_RT_PRIO; i++) {
+		INIT_LIST_HEAD(array->queue + i);
+		__clear_bit(i, array->bitmap);
+	}
+	/* delimiter for bitsearch: */
+	__set_bit(MAX_RT_PRIO, array->bitmap);
+
+#if defined CONFIG_SMP
+	rt_rq->highest_prio.curr = MAX_RT_PRIO-1;
+	rt_rq->highest_prio.next = MAX_RT_PRIO-1;
+	rt_rq->overloaded = 0;
+	plist_head_init(&rt_rq->pushable_tasks);
+#endif /* CONFIG_SMP */
+	/* We start is dequeued state, because no RT tasks are queued */
+	rt_rq->rt_queued = 0;
+
+#ifdef CONFIG_RT_GROUP_SCHED
+	rt_rq->rt_time = 0;
+	rt_rq->rt_throttled = 0;
+	rt_rq->rt_runtime = 0;
+	raw_spin_lock_init(&rt_rq->rt_runtime_lock);
+#endif
+}
+
+#ifdef CONFIG_RT_GROUP_SCHED
+
+static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun);
+
 static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer)
 {
 	struct rt_bandwidth *rt_b =
@@ -131,35 +161,6 @@ static void start_rt_bandwidth(struct rt
 	do_start_rt_bandwidth(rt_b);
 }
 
-void init_rt_rq(struct rt_rq *rt_rq)
-{
-	struct rt_prio_array *array;
-	int i;
-
-	array = &rt_rq->active;
-	for (i = 0; i < MAX_RT_PRIO; i++) {
-		INIT_LIST_HEAD(array->queue + i);
-		__clear_bit(i, array->bitmap);
-	}
-	/* delimiter for bitsearch: */
-	__set_bit(MAX_RT_PRIO, array->bitmap);
-
-#if defined CONFIG_SMP
-	rt_rq->highest_prio.curr = MAX_RT_PRIO-1;
-	rt_rq->highest_prio.next = MAX_RT_PRIO-1;
-	rt_rq->overloaded = 0;
-	plist_head_init(&rt_rq->pushable_tasks);
-#endif /* CONFIG_SMP */
-	/* We start is dequeued state, because no RT tasks are queued */
-	rt_rq->rt_queued = 0;
-
-	rt_rq->rt_time = 0;
-	rt_rq->rt_throttled = 0;
-	rt_rq->rt_runtime = 0;
-	raw_spin_lock_init(&rt_rq->rt_runtime_lock);
-}
-
-#ifdef CONFIG_RT_GROUP_SCHED
 static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b)
 {
 	hrtimer_cancel(&rt_b->rt_period_timer);
@@ -254,9 +255,6 @@ int alloc_rt_sched_group(struct task_gro
 	if (!tg->rt_se)
 		goto err;
 
-	init_rt_bandwidth(&tg->rt_bandwidth,
-			ktime_to_ns(def_rt_bandwidth.rt_period), 0);
-
 	for_each_possible_cpu(i) {
 		rt_rq = kzalloc_node(sizeof(struct rt_rq),
 				     GFP_KERNEL, cpu_to_node(i));
@@ -605,70 +603,6 @@ static inline struct rt_bandwidth *sched
 	return &rt_rq->tg->rt_bandwidth;
 }
 
-#else /* !CONFIG_RT_GROUP_SCHED */
-
-static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
-{
-	return rt_rq->rt_runtime;
-}
-
-static inline u64 sched_rt_period(struct rt_rq *rt_rq)
-{
-	return ktime_to_ns(def_rt_bandwidth.rt_period);
-}
-
-typedef struct rt_rq *rt_rq_iter_t;
-
-#define for_each_rt_rq(rt_rq, iter, rq) \
-	for ((void) iter, rt_rq = &rq->rt; rt_rq; rt_rq = NULL)
-
-#define for_each_sched_rt_entity(rt_se) \
-	for (; rt_se; rt_se = NULL)
-
-static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
-{
-	return NULL;
-}
-
-static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
-{
-	struct rq *rq = rq_of_rt_rq(rt_rq);
-
-	if (!rt_rq->rt_nr_running)
-		return;
-
-	enqueue_top_rt_rq(rt_rq);
-	resched_curr(rq);
-}
-
-static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
-{
-	dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running);
-}
-
-static inline int rt_rq_throttled(struct rt_rq *rt_rq)
-{
-	return rt_rq->rt_throttled;
-}
-
-static inline const struct cpumask *sched_rt_period_mask(void)
-{
-	return cpu_online_mask;
-}
-
-static inline
-struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu)
-{
-	return &cpu_rq(cpu)->rt;
-}
-
-static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
-{
-	return &def_rt_bandwidth;
-}
-
-#endif /* CONFIG_RT_GROUP_SCHED */
-
 bool sched_rt_bandwidth_account(struct rt_rq *rt_rq)
 {
 	struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
@@ -860,7 +794,7 @@ static int do_sched_rt_period_timer(stru
 	const struct cpumask *span;
 
 	span = sched_rt_period_mask();
-#ifdef CONFIG_RT_GROUP_SCHED
+
 	/*
 	 * FIXME: isolated CPUs should really leave the root task group,
 	 * whether they are isolcpus or were isolated via cpusets, lest
@@ -872,7 +806,7 @@ static int do_sched_rt_period_timer(stru
 	 */
 	if (rt_b == &root_task_group.rt_bandwidth)
 		span = cpu_online_mask;
-#endif
+
 	for_each_cpu(i, span) {
 		int enqueue = 0;
 		struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
@@ -939,18 +873,6 @@ static int do_sched_rt_period_timer(stru
 	return idle;
 }
 
-static inline int rt_se_prio(struct sched_rt_entity *rt_se)
-{
-#ifdef CONFIG_RT_GROUP_SCHED
-	struct rt_rq *rt_rq = group_rt_rq(rt_se);
-
-	if (rt_rq)
-		return rt_rq->highest_prio.curr;
-#endif
-
-	return rt_task_of(rt_se)->prio;
-}
-
 static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
 {
 	u64 runtime = sched_rt_runtime(rt_rq);
@@ -994,6 +916,70 @@ static int sched_rt_runtime_exceeded(str
 	return 0;
 }
 
+#else /* !CONFIG_RT_GROUP_SCHED */
+
+typedef struct rt_rq *rt_rq_iter_t;
+
+#define for_each_rt_rq(rt_rq, iter, rq) \
+	for ((void) iter, rt_rq = &rq->rt; rt_rq; rt_rq = NULL)
+
+#define for_each_sched_rt_entity(rt_se) \
+	for (; rt_se; rt_se = NULL)
+
+static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
+{
+	return NULL;
+}
+
+static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
+{
+	struct rq *rq = rq_of_rt_rq(rt_rq);
+
+	if (!rt_rq->rt_nr_running)
+		return;
+
+	enqueue_top_rt_rq(rt_rq);
+	resched_curr(rq);
+}
+
+static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
+{
+	dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running);
+}
+
+static inline int rt_rq_throttled(struct rt_rq *rt_rq)
+{
+	return false;
+}
+
+static inline const struct cpumask *sched_rt_period_mask(void)
+{
+	return cpu_online_mask;
+}
+
+static inline
+struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu)
+{
+	return &cpu_rq(cpu)->rt;
+}
+
+static void __enable_runtime(struct rq *rq) { }
+static void __disable_runtime(struct rq *rq) { }
+
+#endif /* CONFIG_RT_GROUP_SCHED */
+
+static inline int rt_se_prio(struct sched_rt_entity *rt_se)
+{
+#ifdef CONFIG_RT_GROUP_SCHED
+	struct rt_rq *rt_rq = group_rt_rq(rt_se);
+
+	if (rt_rq)
+		return rt_rq->highest_prio.curr;
+#endif
+
+	return rt_task_of(rt_se)->prio;
+}
+
 /*
  * Update the current task's runtime statistics. Skip current tasks that
  * are not in our scheduling class.
@@ -1001,7 +987,6 @@ static int sched_rt_runtime_exceeded(str
 static void update_curr_rt(struct rq *rq)
 {
 	struct task_struct *curr = rq->curr;
-	struct sched_rt_entity *rt_se = &curr->rt;
 	s64 delta_exec;
 
 	if (curr->sched_class != &rt_sched_class)
@@ -1011,6 +996,9 @@ static void update_curr_rt(struct rq *rq
 	if (unlikely(delta_exec <= 0))
 		return;
 
+#ifdef CONFIG_RT_GROUP_SCHED
+	struct sched_rt_entity *rt_se = &curr->rt;
+
 	if (!rt_bandwidth_enabled())
 		return;
 
@@ -1029,6 +1017,7 @@ static void update_curr_rt(struct rq *rq
 				do_start_rt_bandwidth(sched_rt_bandwidth(rt_rq));
 		}
 	}
+#endif
 }
 
 static void
@@ -1185,7 +1174,6 @@ dec_rt_group(struct sched_rt_entity *rt_
 static void
 inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 {
-	start_rt_bandwidth(&def_rt_bandwidth);
 }
 
 static inline
@@ -2913,19 +2901,6 @@ int sched_rt_can_attach(struct task_grou
 #ifdef CONFIG_SYSCTL
 static int sched_rt_global_constraints(void)
 {
-	unsigned long flags;
-	int i;
-
-	raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
-	for_each_possible_cpu(i) {
-		struct rt_rq *rt_rq = &cpu_rq(i)->rt;
-
-		raw_spin_lock(&rt_rq->rt_runtime_lock);
-		rt_rq->rt_runtime = global_rt_runtime();
-		raw_spin_unlock(&rt_rq->rt_runtime_lock);
-	}
-	raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags);
-
 	return 0;
 }
 #endif /* CONFIG_SYSCTL */
@@ -2945,12 +2920,6 @@ static int sched_rt_global_validate(void
 
 static void sched_rt_do_global(void)
 {
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
-	def_rt_bandwidth.rt_runtime = global_rt_runtime();
-	def_rt_bandwidth.rt_period = ns_to_ktime(global_rt_period());
-	raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags);
 }
 
 static int sched_rt_handler(struct ctl_table *table, int write, void *buffer,
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -698,13 +698,13 @@ struct rt_rq {
 #endif /* CONFIG_SMP */
 	int			rt_queued;
 
+#ifdef CONFIG_RT_GROUP_SCHED
 	int			rt_throttled;
 	u64			rt_time;
 	u64			rt_runtime;
 	/* Nests inside the rq lock: */
 	raw_spinlock_t		rt_runtime_lock;
 
-#ifdef CONFIG_RT_GROUP_SCHED
 	unsigned int		rt_nr_boosted;
 
 	struct rq		*rq;
@@ -2460,7 +2460,6 @@ extern void reweight_task(struct task_st
 extern void resched_curr(struct rq *rq);
 extern void resched_cpu(int cpu);
 
-extern struct rt_bandwidth def_rt_bandwidth;
 extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
 extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq);

next prev parent reply	other threads:[~2023-11-07 14:44 UTC|newest]

Thread overview: 76+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-11-04 10:59 [PATCH v5 0/7] SCHED_DEADLINE server infrastructure Daniel Bristot de Oliveira
2023-11-04 10:59 ` [PATCH v5 1/7] sched: Unify runtime accounting across classes Daniel Bristot de Oliveira
2023-11-15  9:04   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2023-11-04 10:59 ` [PATCH v5 2/7] sched/deadline: Collect sched_dl_entity initialization Daniel Bristot de Oliveira
2023-11-15  9:04   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2023-11-04 10:59 ` [PATCH v5 3/7] sched/deadline: Move bandwidth accounting into {en,de}queue_dl_entity Daniel Bristot de Oliveira
2023-11-15  9:04   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2023-11-04 10:59 ` [PATCH v5 4/7] sched/deadline: Introduce deadline servers Daniel Bristot de Oliveira
2023-11-15  9:04   ` [tip: sched/core] " tip-bot2 for Peter Zijlstra
2023-11-04 10:59 ` [PATCH v5 5/7] sched/fair: Add trivial fair server Daniel Bristot de Oliveira
2023-11-06 14:24   ` Peter Zijlstra
2023-11-06 14:26     ` Daniel Bristot de Oliveira
2023-11-04 10:59 ` [PATCH v5 6/7] sched/deadline: Deferrable dl server Daniel Bristot de Oliveira
2023-11-06 14:55   ` Peter Zijlstra
2023-11-06 17:05     ` Daniel Bristot de Oliveira
2023-11-06 19:32   ` Joel Fernandes
2023-11-06 21:32     ` Joel Fernandes
2023-11-06 21:37       ` Joel Fernandes
2023-11-07 11:58         ` Daniel Bristot de Oliveira
2023-11-08  2:42           ` Joel Fernandes
2023-11-07 16:47         ` Steven Rostedt
2023-11-07 17:35           ` Steven Rostedt
2023-11-07 17:46             ` Steven Rostedt
2023-11-07 17:54             ` Steven Rostedt
2023-11-07 19:32               ` Steven Rostedt
2023-11-07 20:07                 ` Steven Rostedt
2023-11-07 17:37           ` Daniel Bristot de Oliveira
2023-11-07 18:50             ` Daniel Bristot de Oliveira
2023-11-08  3:20               ` Joel Fernandes
2023-11-08  8:01                 ` Daniel Bristot de Oliveira
2023-11-08 18:25                   ` Joel Fernandes
2023-11-08 12:44               ` Peter Zijlstra
2023-11-08 12:50                 ` Peter Zijlstra
2023-11-08 14:52                   ` Daniel Bristot de Oliveira
2023-11-08 13:46                 ` Daniel Bristot de Oliveira
2023-11-08 13:58                 ` Daniel Bristot de Oliveira
2023-11-08 15:14                 ` Juri Lelli
2023-11-08 16:57                   ` Peter Zijlstra
2023-11-08  2:37           ` Joel Fernandes
2023-11-07  7:30     ` Daniel Bristot de Oliveira
2023-11-07 16:37   ` Steven Rostedt
2023-11-13 15:05   ` kernel test robot
2024-03-20  0:03   ` Joel Fernandes
2024-03-20 19:24     ` Daniel Bristot de Oliveira
2024-03-21 16:15       ` Joel Fernandes
2024-03-23 14:37         ` Joel Fernandes
2024-04-05 14:35         ` Daniel Bristot de Oliveira
2024-04-08 17:11           ` Steven Rostedt
2023-11-04 10:59 ` [PATCH v5 7/7] sched/fair: Fair server interface Daniel Bristot de Oliveira
2023-11-04 15:18   ` kernel test robot
2023-11-05  0:55   ` kernel test robot
2023-11-06 15:40   ` Peter Zijlstra
2023-11-06 16:29     ` Daniel Bristot de Oliveira
2023-11-07  8:16       ` Peter Zijlstra
2023-11-07 14:06         ` Daniel Bristot de Oliveira
2023-11-07 14:44       ` Peter Zijlstra [this message]
2023-11-07 12:38   ` Peter Zijlstra
2023-11-07 13:24     ` Daniel Bristot de Oliveira
2024-01-19  1:49   ` Joel Fernandes
2024-01-19  1:55   ` Joel Fernandes
2024-01-22 14:14     ` Daniel Bristot de Oliveira
2024-01-23 15:39       ` Joel Fernandes
2024-01-23 15:44       ` Joel Fernandes
2024-02-13  2:13   ` Joel Fernandes
2024-02-13  2:21     ` Joel Fernandes
2024-02-14 14:23     ` Daniel Bristot de Oliveira
2024-02-15 13:57       ` Joel Fernandes
2024-02-15 17:27         ` Daniel Bristot de Oliveira
2024-02-15 17:41           ` Joel Fernandes
2024-04-04 17:43             ` Daniel Bristot de Oliveira
2023-12-08 21:47 ` [PATCH v5 0/7] SCHED_DEADLINE server infrastructure Joel Fernandes
2024-02-19  7:33 ` Huang, Ying
2024-02-19 10:23   ` Daniel Bristot de Oliveira
2024-02-20  3:28     ` Huang, Ying
2024-02-20  8:31       ` Daniel Bristot de Oliveira
2024-02-20  8:41         ` Huang, Ying

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231107144424.GX8262@noisy.programming.kicks-ass.net \
    --to=peterz@infradead.org \
    --cc=bristot@kernel.org \
    --cc=bsegall@google.com \
    --cc=dietmar.eggemann@arm.com \
    --cc=joel@joelfernandes.org \
    --cc=juri.lelli@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luca.abeni@santannapisa.it \
    --cc=mgorman@suse.de \
    --cc=mingo@redhat.com \
    --cc=pauld@redhat.com \
    --cc=rostedt@goodmis.org \
    --cc=skhan@linuxfoundation.org \
    --cc=tglx@linutronix.de \
    --cc=tommaso.cucinotta@santannapisa.it \
    --cc=vincent.guittot@linaro.org \
    --cc=vineeth@bitbyteword.org \
    --cc=vschneid@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox