Re: [RFC][PATCH] sched: Cache aware load-balancing

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

From: Peter Zijlstra <peterz@infradead.org>
To: "Chen, Yu C" <yu.c.chen@intel.com>
Cc: juri.lelli@redhat.com, vincent.guittot@linaro.org,
	dietmar.eggemann@arm.com, rostedt@goodmis.org,
	bsegall@google.com, mgorman@suse.de, vschneid@redhat.com,
	linux-kernel@vger.kernel.org, tim.c.chen@linux.intel.com,
	tglx@linutronix.de, len.brown@intel.com, gautham.shenoy@amd.com,
	mingo@kernel.org, kprateek.nayak@amd.com,
	yu.chen.surf@foxmail.com
Subject: Re: [RFC][PATCH] sched: Cache aware load-balancing
Date: Wed, 26 Mar 2025 11:25:53 +0100	[thread overview]
Message-ID: <20250326102553.GA12071@noisy.programming.kicks-ass.net> (raw)
In-Reply-To: <20250326093841.GC25239@noisy.programming.kicks-ass.net>

On Wed, Mar 26, 2025 at 10:38:41AM +0100, Peter Zijlstra wrote:

> Nah, the saner thing to do is to preserve the topology averages and look
> at those instead of the per-cpu values.
> 
> Eg. have task_cache_work() compute and store averages in the
> sched_domain structure and then use those.

A little something like so perhaps ?

This immediately also gives the information required for clusters and
finding the best LLC of a Node and things like that.

--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -82,6 +82,9 @@ struct sched_domain_shared {
 	atomic_t	nr_busy_cpus;
 	int		has_idle_cores;
 	int		nr_idle_scan;
+
+	unsigned long   sum_occ;
+	unsigned long	avg_occ;
 };
 
 struct sched_domain {
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1286,8 +1286,8 @@ static void task_cache_work(struct callb
 	struct task_struct *p = current;
 	struct mm_struct *mm = p->mm;
 	unsigned long m_a_occ = 0;
-	int cpu, m_a_cpu = -1;
-	cpumask_var_t cpus;
+	int m_a_cpu = -1;
+	int cpu;
 
 	WARN_ON_ONCE(work != &p->cache_work);
 
@@ -1296,46 +1296,46 @@ static void task_cache_work(struct callb
 	if (p->flags & PF_EXITING)
 		return;
 
-	if (!alloc_cpumask_var(&cpus, GFP_KERNEL))
-		return;
-
 	scoped_guard (cpus_read_lock) {
-		cpumask_copy(cpus, cpu_online_mask);
 
-		for_each_cpu(cpu, cpus) {
-			/* XXX sched_cluster_active */
-			struct sched_domain *sd = per_cpu(sd_llc, cpu);
-			unsigned long occ, m_occ = 0, a_occ = 0;
-			int m_cpu = -1, nr = 0, i;
+		for_each_online_cpu(cpu) {
+			struct sched_domain *sd;
+			struct sched_domain_shared *sds;
+			unsigned long occ;
+
+			for_each_domain(cpu, sd) {
+				if (!(sd->flags & SD_SHARE_LLC))
+					break;
 
-			for_each_cpu(i, sched_domain_span(sd)) {
+				sds = sd->shared;
 				occ = fraction_mm_sched(cpu_rq(i),
 							per_cpu_ptr(mm->pcpu_sched, i));
-				a_occ += occ;
-				if (occ > m_occ) {
-					m_occ = occ;
-					m_cpu = i;
-				}
-				nr++;
-				trace_printk("(%d) occ: %ld m_occ: %ld m_cpu: %d nr: %d\n",
-					     per_cpu(sd_llc_id, i), occ, m_occ, m_cpu, nr);
-			}
-
-			a_occ /= nr;
-			if (a_occ > m_a_occ) {
-				m_a_occ = a_occ;
-				m_a_cpu = m_cpu;
+				sds->sum_occ += occ + 1;
 			}
+		}
 
-			trace_printk("(%d) a_occ: %ld m_a_occ: %ld\n",
-				     per_cpu(sd_llc_id, cpu), a_occ, m_a_occ);
+		for_each_online_cpu(cpu) {
+			struct sched_domain *sd;
+			struct sched_domain_shared *sds;
+
+			for_each_domain(cpu, sd) {
+				if (!(sd->flags & SD_SHARE_LLC))
+					break;
+
+				sds = sd->shared;
+				if (sds->agg_occ) {
+					sds->avg_occ = (sds->agg_occ - sd->span_weight) /
+						       sd->span_weight;
+					sds->sum_occ = 0;
+				}
 
-			for_each_cpu(i, sched_domain_span(sd)) {
-				/* XXX threshold ? */
-				per_cpu_ptr(mm->pcpu_sched, i)->occ = a_occ;
+				if (sd == per_cpu(sd_llc, cpu)) {
+					if (sds->avg_occ > m_a_occ) {
+						m_a_occ = sds->avg_occ;
+						m_a_cpu = cpu;
+					}
+				}
 			}
-
-			cpumask_andnot(cpus, cpus, sched_domain_span(sd));
 		}
 	}
 
@@ -1346,8 +1346,6 @@ static void task_cache_work(struct callb
 		m_a_cpu = -1;
 
 	mm->mm_sched_cpu = m_a_cpu;
-
-	free_cpumask_var(cpus);
 }
 
 void init_sched_mm(struct task_struct *p)

next prev parent reply	other threads:[~2025-03-26 10:26 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-03-25 12:09 [RFC][PATCH] sched: Cache aware load-balancing Peter Zijlstra
2025-03-25 15:19 ` Chen, Yu C
2025-03-25 18:44   ` Peter Zijlstra
2025-03-26  6:18     ` K Prateek Nayak
2025-03-26  9:15       ` Chen, Yu C
2025-03-26  9:42         ` Peter Zijlstra
2025-03-27  8:10           ` Chen, Yu C
2025-03-26  9:38   ` Peter Zijlstra
2025-03-26 10:25     ` Peter Zijlstra [this message]
2025-03-26 10:42       ` Peter Zijlstra
2025-03-26 10:46       ` Peter Zijlstra
     [not found]       ` <20250327112059.3661-1-hdanton@sina.com>
2025-03-31  6:25         ` Chen, Yu C
2025-03-27  2:48     ` Chen, Yu C
2025-03-27  2:43 ` Madadi Vineeth Reddy
2025-03-27 11:14   ` Chen, Yu C
2025-03-31 20:17     ` Madadi Vineeth Reddy
2025-03-28 13:57 ` Abel Wu
2025-03-29 15:06   ` Chen, Yu C
2025-03-30  8:46     ` Abel Wu
2025-03-31  5:25       ` Chen, Yu C
2025-03-31  8:04         ` Abel Wu
2025-03-31 21:06 ` Tim Chen
2025-04-02  1:52 ` Libo Chen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250326102553.GA12071@noisy.programming.kicks-ass.net \
    --to=peterz@infradead.org \
    --cc=bsegall@google.com \
    --cc=dietmar.eggemann@arm.com \
    --cc=gautham.shenoy@amd.com \
    --cc=juri.lelli@redhat.com \
    --cc=kprateek.nayak@amd.com \
    --cc=len.brown@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mgorman@suse.de \
    --cc=mingo@kernel.org \
    --cc=rostedt@goodmis.org \
    --cc=tglx@linutronix.de \
    --cc=tim.c.chen@linux.intel.com \
    --cc=vincent.guittot@linaro.org \
    --cc=vschneid@redhat.com \
    --cc=yu.c.chen@intel.com \
    --cc=yu.chen.surf@foxmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox