From: Peter Zijlstra <peterz@infradead.org>
To: Barry Song <song.bao.hua@hisilicon.com>
Cc: tim.c.chen@linux.intel.com, catalin.marinas@arm.com,
will@kernel.org, rjw@rjwysocki.net, vincent.guittot@linaro.org,
bp@alien8.de, tglx@linutronix.de, mingo@redhat.com,
lenb@kernel.org, dietmar.eggemann@arm.com, rostedt@goodmis.org,
bsegall@google.com, mgorman@suse.de, msys.mizuma@gmail.com,
valentin.schneider@arm.com, gregkh@linuxfoundation.org,
jonathan.cameron@huawei.com, juri.lelli@redhat.com,
mark.rutland@arm.com, sudeep.holla@arm.com,
aubrey.li@linux.intel.com, linux-arm-kernel@lists.infradead.org,
linux-kernel@vger.kernel.org, linux-acpi@vger.kernel.org,
x86@kernel.org, xuwei5@huawei.com, prime.zeng@hisilicon.com,
guodong.xu@linaro.org, yangyicong@huawei.com,
liguozhu@hisilicon.com, linuxarm@openeuler.org, hpa@zytor.com
Subject: Re: [RFC PATCH v4 2/3] scheduler: add scheduler level for clusters
Date: Tue, 2 Mar 2021 11:43:14 +0100 [thread overview]
Message-ID: <YD4WwrlVNwlSj/Zn@hirez.programming.kicks-ass.net> (raw)
In-Reply-To: <20210301225940.16728-3-song.bao.hua@hisilicon.com>
On Tue, Mar 02, 2021 at 11:59:39AM +1300, Barry Song wrote:
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 88a2e2b..d805e59 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -7797,6 +7797,16 @@ int sched_cpu_activate(unsigned int cpu)
> if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
> static_branch_inc_cpuslocked(&sched_smt_present);
> #endif
> +
> +#ifdef CONFIG_SCHED_CLUSTER
> + /*
> + * When going up, increment the number of cluster cpus with
> + * cluster present.
> + */
> + if (cpumask_weight(cpu_cluster_mask(cpu)) > 1)
> + static_branch_inc_cpuslocked(&sched_cluster_present);
> +#endif
> +
> set_cpu_active(cpu, true);
>
> if (sched_smp_initialized) {
> @@ -7873,6 +7883,14 @@ int sched_cpu_deactivate(unsigned int cpu)
> static_branch_dec_cpuslocked(&sched_smt_present);
> #endif
>
> +#ifdef CONFIG_SCHED_CLUSTER
> + /*
> + * When going down, decrement the number of cpus with cluster present.
> + */
> + if (cpumask_weight(cpu_cluster_mask(cpu)) > 1)
> + static_branch_dec_cpuslocked(&sched_cluster_present);
> +#endif
> +
> if (!sched_smp_initialized)
> return 0;
I don't think that's correct. IIUC this will mean the
sched_cluster_present thing will be enabled on anything with SMT (very
much including x86 big cores after the next patch).
I'm thinking that at the very least you should check a CLS domain
exists, but that might be hard at this point, because the sched domains
haven't been build yet.
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 8a8bd7b..3db7b07 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -6009,6 +6009,11 @@ static inline int __select_idle_cpu(int cpu)
> return -1;
> }
>
> +#ifdef CONFIG_SCHED_CLUSTER
> +DEFINE_STATIC_KEY_FALSE(sched_cluster_present);
> +EXPORT_SYMBOL_GPL(sched_cluster_present);
I really rather think this shouldn't be exported
> +#endif
> +
> #ifdef CONFIG_SCHED_SMT
> DEFINE_STATIC_KEY_FALSE(sched_smt_present);
> EXPORT_SYMBOL_GPL(sched_smt_present);
This is a KVM wart, it needs to know because mitigation crap.
> @@ -6116,6 +6121,26 @@ static inline int select_idle_core(struct task_struct *p, int core, struct cpuma
>
> #endif /* CONFIG_SCHED_SMT */
>
> +static inline int _select_idle_cpu(bool smt, struct task_struct *p, int target, struct cpumask *cpus, int *idle_cpu, int *nr)
> +{
> + int cpu, i;
> +
> + for_each_cpu_wrap(cpu, cpus, target) {
> + if (smt) {
> + i = select_idle_core(p, cpu, cpus, idle_cpu);
> + } else {
> + if (!--*nr)
> + return -1;
> + i = __select_idle_cpu(cpu);
> + }
> +
> + if ((unsigned int)i < nr_cpumask_bits)
> + return i;
> + }
> +
> + return -1;
> +}
> +
> /*
> * Scan the LLC domain for idle CPUs; this is dynamically regulated by
> * comparing the average scan cost (tracked in sd->avg_scan_cost) against the
> @@ -6124,7 +6149,7 @@ static inline int select_idle_core(struct task_struct *p, int core, struct cpuma
> static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int target)
> {
> struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
> - int i, cpu, idle_cpu = -1, nr = INT_MAX;
> + int i, idle_cpu = -1, nr = INT_MAX;
> bool smt = test_idle_cores(target, false);
> int this = smp_processor_id();
> struct sched_domain *this_sd;
> @@ -6134,7 +6159,12 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
> if (!this_sd)
> return -1;
>
> - cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
> + if (!sched_cluster_active())
> + cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
> +#ifdef CONFIG_SCHED_CLUSTER
> + if (sched_cluster_active())
> + cpumask_and(cpus, cpu_cluster_mask(target), p->cpus_ptr);
> +#endif
>
> if (sched_feat(SIS_PROP) && !smt) {
> u64 avg_cost, avg_idle, span_avg;
> @@ -6155,24 +6185,32 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
> time = cpu_clock(this);
> }
>
> - for_each_cpu_wrap(cpu, cpus, target) {
> - if (smt) {
> - i = select_idle_core(p, cpu, cpus, &idle_cpu);
> - if ((unsigned int)i < nr_cpumask_bits)
> - return i;
> + /* scan cluster before scanning the whole llc */
> +#ifdef CONFIG_SCHED_CLUSTER
> + if (sched_cluster_active()) {
> + i = _select_idle_cpu(smt, p, target, cpus, &idle_cpu, &nr);
> + if ((unsigned int) i < nr_cpumask_bits) {
> + idle_cpu = i;
> + goto done;
> + } else if (nr <= 0)
> + return -1;
>
> - } else {
> - if (!--nr)
> - return -1;
> - idle_cpu = __select_idle_cpu(cpu);
> - if ((unsigned int)idle_cpu < nr_cpumask_bits)
> - break;
> - }
> + cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
> + cpumask_andnot(cpus, cpus, cpu_cluster_mask(target));
> }
> +#endif
> +
> + i = _select_idle_cpu(smt, p, target, cpus, &idle_cpu, &nr);
> + if ((unsigned int) i < nr_cpumask_bits) {
> + idle_cpu = i;
> + goto done;
> + } else if (nr <= 0)
> + return -1;
>
> if (smt)
> set_idle_cores(this, false);
>
> +done:
> if (sched_feat(SIS_PROP) && !smt) {
> time = cpu_clock(this) - time;
> update_avg(&this_sd->avg_scan_cost, time);
And this is just horrific :-(
next prev parent reply other threads:[~2021-03-02 11:22 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-03-01 22:59 [RFC PATCH v4 0/3] scheduler: expose the topology of clusters and add cluster scheduler Barry Song
2021-03-01 22:59 ` [RFC PATCH v4 1/3] topology: Represent clusters of CPUs within a die Barry Song
2021-03-15 3:11 ` Song Bao Hua (Barry Song)
2021-03-15 10:52 ` Jonathan Cameron
2021-03-01 22:59 ` [RFC PATCH v4 2/3] scheduler: add scheduler level for clusters Barry Song
2021-03-02 10:43 ` Peter Zijlstra [this message]
2021-03-16 7:33 ` Song Bao Hua (Barry Song)
2021-03-08 11:25 ` Vincent Guittot
2021-03-08 22:15 ` Song Bao Hua (Barry Song)
2021-03-01 22:59 ` [RFC PATCH v4 3/3] scheduler: Add cluster scheduler level for x86 Barry Song
2021-03-02 10:30 ` Peter Zijlstra
2021-03-03 18:34 ` Tim Chen
2021-03-08 22:30 ` [Linuxarm] " Song Bao Hua (Barry Song)
2021-03-15 20:53 ` Tim Chen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=YD4WwrlVNwlSj/Zn@hirez.programming.kicks-ass.net \
--to=peterz@infradead.org \
--cc=aubrey.li@linux.intel.com \
--cc=bp@alien8.de \
--cc=bsegall@google.com \
--cc=catalin.marinas@arm.com \
--cc=dietmar.eggemann@arm.com \
--cc=gregkh@linuxfoundation.org \
--cc=guodong.xu@linaro.org \
--cc=hpa@zytor.com \
--cc=jonathan.cameron@huawei.com \
--cc=juri.lelli@redhat.com \
--cc=lenb@kernel.org \
--cc=liguozhu@hisilicon.com \
--cc=linux-acpi@vger.kernel.org \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linuxarm@openeuler.org \
--cc=mark.rutland@arm.com \
--cc=mgorman@suse.de \
--cc=mingo@redhat.com \
--cc=msys.mizuma@gmail.com \
--cc=prime.zeng@hisilicon.com \
--cc=rjw@rjwysocki.net \
--cc=rostedt@goodmis.org \
--cc=song.bao.hua@hisilicon.com \
--cc=sudeep.holla@arm.com \
--cc=tglx@linutronix.de \
--cc=tim.c.chen@linux.intel.com \
--cc=valentin.schneider@arm.com \
--cc=vincent.guittot@linaro.org \
--cc=will@kernel.org \
--cc=x86@kernel.org \
--cc=xuwei5@huawei.com \
--cc=yangyicong@huawei.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox