public inbox for cgroups@vger.kernel.org
 help / color / mirror / Atom feed
From: Shrikanth Hegde <sshegde@linux.ibm.com>
To: Juri Lelli <juri.lelli@redhat.com>,
	linux-kernel@vger.kernel.org, cgroups@vger.kernel.org
Cc: "Ingo Molnar" <mingo@redhat.com>,
	"Peter Zijlstra" <peterz@infradead.org>,
	"Vincent Guittot" <vincent.guittot@linaro.org>,
	"Dietmar Eggemann" <dietmar.eggemann@arm.com>,
	"Steven Rostedt" <rostedt@goodmis.org>,
	"Ben Segall" <bsegall@google.com>, "Mel Gorman" <mgorman@suse.de>,
	"Valentin Schneider" <vschneid@redhat.com>,
	"Waiman Long" <longman@redhat.com>, "Tejun Heo" <tj@kernel.org>,
	"Johannes Weiner" <hannes@cmpxchg.org>,
	"Michal Koutný" <mkoutny@suse.com>,
	"Qais Yousef" <qyousef@layalina.io>,
	"Sebastian Andrzej Siewior" <bigeasy@linutronix.de>,
	"Swapnil Sapkal" <swapnil.sapkal@amd.com>,
	"Phil Auld" <pauld@redhat.com>,
	luca.abeni@santannapisa.it, tommaso.cucinotta@santannapisa.it,
	"Jon Hunter" <jonathanh@nvidia.com>
Subject: Re: [PATCH v2 4/8] sched/deadline: Rebuild root domain accounting after every update
Date: Fri, 7 Mar 2025 13:02:15 +0530	[thread overview]
Message-ID: <295680e1-ba91-4019-9b7f-e8efd75d7f13@linux.ibm.com> (raw)
In-Reply-To: <20250306141016.268313-5-juri.lelli@redhat.com>



On 3/6/25 19:40, Juri Lelli wrote:
> Rebuilding of root domains accounting information (total_bw) is
> currently broken on some cases, e.g. suspend/resume on aarch64. Problem
> is that the way we keep track of domain changes and try to add bandwidth
> back is convoluted and fragile.
> 
> Fix it by simplify things by making sure bandwidth accounting is cleared
> and completely restored after root domains changes (after root domains
> are again stable).
> 
> Reported-by: Jon Hunter <jonathanh@nvidia.com>
> Fixes: 53916d5fd3c0 ("sched/deadline: Check bandwidth overflow earlier for hotplug")
> Signed-off-by: Juri Lelli <juri.lelli@redhat.com>
> ---
>   include/linux/sched/deadline.h |  4 ++++
>   include/linux/sched/topology.h |  2 ++
>   kernel/cgroup/cpuset.c         | 16 +++++++++-------
>   kernel/sched/deadline.c        | 16 ++++++++++------
>   kernel/sched/topology.c        |  1 +
>   5 files changed, 26 insertions(+), 13 deletions(-)
> 
> diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h
> index 6ec578600b24..a780068aa1a5 100644
> --- a/include/linux/sched/deadline.h
> +++ b/include/linux/sched/deadline.h
> @@ -34,6 +34,10 @@ static inline bool dl_time_before(u64 a, u64 b)
>   struct root_domain;
>   extern void dl_add_task_root_domain(struct task_struct *p);
>   extern void dl_clear_root_domain(struct root_domain *rd);
> +extern void dl_clear_root_domain_cpu(int cpu);
> +
> +extern u64 dl_cookie;
> +extern bool dl_bw_visited(int cpu, u64 gen);
>   
>   #endif /* CONFIG_SMP */
>   
> diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
> index 7f3dbafe1817..1622232bd08b 100644
> --- a/include/linux/sched/topology.h
> +++ b/include/linux/sched/topology.h
> @@ -166,6 +166,8 @@ static inline struct cpumask *sched_domain_span(struct sched_domain *sd)
>   	return to_cpumask(sd->span);
>   }
>   
> +extern void dl_rebuild_rd_accounting(void);
> +
>   extern void partition_sched_domains_locked(int ndoms_new,
>   					   cpumask_var_t doms_new[],
>   					   struct sched_domain_attr *dattr_new);
> diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
> index f87526edb2a4..f66b2aefdc04 100644
> --- a/kernel/cgroup/cpuset.c
> +++ b/kernel/cgroup/cpuset.c
> @@ -954,10 +954,12 @@ static void dl_update_tasks_root_domain(struct cpuset *cs)
>   	css_task_iter_end(&it);
>   }
>   
> -static void dl_rebuild_rd_accounting(void)
> +void dl_rebuild_rd_accounting(void)
>   {
>   	struct cpuset *cs = NULL;
>   	struct cgroup_subsys_state *pos_css;
> +	int cpu;
> +	u64 cookie = ++dl_cookie;
>   
>   	lockdep_assert_held(&cpuset_mutex);
>   	lockdep_assert_cpus_held();
> @@ -965,11 +967,12 @@ static void dl_rebuild_rd_accounting(void)
>   
>   	rcu_read_lock();
>   
> -	/*
> -	 * Clear default root domain DL accounting, it will be computed again
> -	 * if a task belongs to it.
> -	 */
> -	dl_clear_root_domain(&def_root_domain);
> +	for_each_possible_cpu(cpu) {
> +		if (dl_bw_visited(cpu, cookie))
> +			continue;
> +
> +		dl_clear_root_domain_cpu(cpu);
> +	}
>   

This will clear all possible root domains bandwidth and rebuild it.

For an online CPUs, the fair server bandwidth is added i think in 
rq_attach_root. But for an offline CPUs the sched domains wont be 
rebuilt. It may not be an issue. but the def_root_domain's bw may be 
different afterwords. no?

>   	cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) {
>   
> @@ -996,7 +999,6 @@ partition_and_rebuild_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
>   {
>   	sched_domains_mutex_lock();
>   	partition_sched_domains_locked(ndoms_new, doms_new, dattr_new);
> -	dl_rebuild_rd_accounting();
>   	sched_domains_mutex_unlock();
>   }
>   
> diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
> index 339434271cba..17b040c92885 100644
> --- a/kernel/sched/deadline.c
> +++ b/kernel/sched/deadline.c
> @@ -166,7 +166,7 @@ static inline unsigned long dl_bw_capacity(int i)
>   	}
>   }
>   
> -static inline bool dl_bw_visited(int cpu, u64 cookie)
> +bool dl_bw_visited(int cpu, u64 cookie)
>   {
>   	struct root_domain *rd = cpu_rq(cpu)->rd;
>   
> @@ -207,7 +207,7 @@ static inline unsigned long dl_bw_capacity(int i)
>   	return SCHED_CAPACITY_SCALE;
>   }
>   
> -static inline bool dl_bw_visited(int cpu, u64 cookie)
> +bool dl_bw_visited(int cpu, u64 cookie)
>   {
>   	return false;
>   }
> @@ -2981,18 +2981,22 @@ void dl_clear_root_domain(struct root_domain *rd)
>   	rd->dl_bw.total_bw = 0;
>   
>   	/*
> -	 * dl_server bandwidth is only restored when CPUs are attached to root
> -	 * domains (after domains are created or CPUs moved back to the
> -	 * default root doamin).
> +	 * dl_servers are not tasks. Since dl_add_task_root_domanin ignores
> +	 * them, we need to account for them here explicitly.
>   	 */
>   	for_each_cpu(i, rd->span) {
>   		struct sched_dl_entity *dl_se = &cpu_rq(i)->fair_server;
>   
>   		if (dl_server(dl_se) && cpu_active(i))
> -			rd->dl_bw.total_bw += dl_se->dl_bw;
> +			__dl_add(&rd->dl_bw, dl_se->dl_bw, dl_bw_cpus(i));
>   	}
>   }
>   
> +void dl_clear_root_domain_cpu(int cpu)
> +{
> +	dl_clear_root_domain(cpu_rq(cpu)->rd);
> +}
> +
>   #endif /* CONFIG_SMP */
>   
>   static void switched_from_dl(struct rq *rq, struct task_struct *p)
> diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
> index 44093339761c..363ad268a25b 100644
> --- a/kernel/sched/topology.c
> +++ b/kernel/sched/topology.c
> @@ -2791,6 +2791,7 @@ void partition_sched_domains_locked(int ndoms_new, cpumask_var_t doms_new[],
>   	ndoms_cur = ndoms_new;
>   
>   	update_sched_domain_debugfs();
> +	dl_rebuild_rd_accounting();
>   }
>   
>   /*


  parent reply	other threads:[~2025-03-07  7:33 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-03-06 14:10 [PATCH v2 0/8] Fix SCHED_DEADLINE bandwidth accounting during suspend Juri Lelli
2025-03-06 14:10 ` [PATCH v2 1/8] sched/deadline: Ignore special tasks when rebuilding domains Juri Lelli
2025-03-06 14:10 ` [PATCH v2 2/8] sched/topology: Wrappers for sched_domains_mutex Juri Lelli
2025-03-07  6:34   ` Shrikanth Hegde
2025-03-07  8:53     ` Juri Lelli
2025-03-07  9:02       ` Shrikanth Hegde
2025-03-07 15:11   ` Waiman Long
2025-03-07 15:15     ` Juri Lelli
2025-03-07 15:19     ` Waiman Long
2025-03-07 15:59       ` Juri Lelli
2025-03-07 16:34         ` Waiman Long
2025-03-06 14:10 ` [PATCH v2 3/8] sched/deadline: Generalize unique visiting of root domains Juri Lelli
2025-03-07  5:36   ` Shrikanth Hegde
2025-03-07  8:55     ` Juri Lelli
2025-03-06 14:10 ` [PATCH v2 4/8] sched/deadline: Rebuild root domain accounting after every update Juri Lelli
2025-03-07  6:33   ` Shrikanth Hegde
2025-03-07  9:33     ` Juri Lelli
2025-03-07  7:32   ` Shrikanth Hegde [this message]
2025-03-07  8:59     ` Juri Lelli
2025-03-06 14:10 ` [PATCH v2 5/8] sched/topology: Remove redundant dl_clear_root_domain call Juri Lelli
2025-03-06 14:10 ` [PATCH v2 6/8] cgroup/cpuset: Remove partition_and_rebuild_sched_domains Juri Lelli
2025-03-07  7:40   ` Shrikanth Hegde
2025-03-07 15:14   ` Waiman Long
2025-03-07 15:16   ` Waiman Long
2025-03-06 14:10 ` [PATCH v2 7/8] sched/topology: Stop exposing partition_sched_domains_locked Juri Lelli
2025-03-06 14:10 ` [PATCH v2 8/8] include/{topology,cpuset}: Move dl_rebuild_rd_accounting to cpuset.h Juri Lelli
2025-03-07 15:17   ` Waiman Long
2025-03-07 11:40 ` [PATCH v2 0/8] Fix SCHED_DEADLINE bandwidth accounting during suspend Jon Hunter
2025-03-07 15:16   ` Juri Lelli
2025-03-07 19:00 ` Waiman Long
2025-03-10  8:55   ` Juri Lelli

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=295680e1-ba91-4019-9b7f-e8efd75d7f13@linux.ibm.com \
    --to=sshegde@linux.ibm.com \
    --cc=bigeasy@linutronix.de \
    --cc=bsegall@google.com \
    --cc=cgroups@vger.kernel.org \
    --cc=dietmar.eggemann@arm.com \
    --cc=hannes@cmpxchg.org \
    --cc=jonathanh@nvidia.com \
    --cc=juri.lelli@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=longman@redhat.com \
    --cc=luca.abeni@santannapisa.it \
    --cc=mgorman@suse.de \
    --cc=mingo@redhat.com \
    --cc=mkoutny@suse.com \
    --cc=pauld@redhat.com \
    --cc=peterz@infradead.org \
    --cc=qyousef@layalina.io \
    --cc=rostedt@goodmis.org \
    --cc=swapnil.sapkal@amd.com \
    --cc=tj@kernel.org \
    --cc=tommaso.cucinotta@santannapisa.it \
    --cc=vincent.guittot@linaro.org \
    --cc=vschneid@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox