linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Chen Yu <yu.c.chen@intel.com>
To: Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@redhat.com>,
	K Prateek Nayak <kprateek.nayak@amd.com>,
	"Gautham R . Shenoy" <gautham.shenoy@amd.com>
Cc: Vincent Guittot <vincent.guittot@linaro.org>,
	Juri Lelli <juri.lelli@redhat.com>,
	Dietmar Eggemann <dietmar.eggemann@arm.com>,
	Steven Rostedt <rostedt@goodmis.org>,
	Ben Segall <bsegall@google.com>, Mel Gorman <mgorman@suse.de>,
	Valentin Schneider <vschneid@redhat.com>,
	Libo Chen <libo.chen@oracle.com>,
	Madadi Vineeth Reddy <vineethr@linux.ibm.com>,
	Hillf Danton <hdanton@sina.com>,
	Shrikanth Hegde <sshegde@linux.ibm.com>,
	Jianyong Wu <jianyong.wu@outlook.com>,
	Yangyu Chen <cyy@cyyself.name>,
	Tingyin Duan <tingyin.duan@gmail.com>,
	Vern Hao <vernhao@tencent.com>, Len Brown <len.brown@intel.com>,
	Tim Chen <tim.c.chen@linux.intel.com>,
	Aubrey Li <aubrey.li@intel.com>, Zhao Liu <zhao1.liu@intel.com>,
	Chen Yu <yu.chen.surf@gmail.com>, Chen Yu <yu.c.chen@intel.com>,
	linux-kernel@vger.kernel.org
Subject: [RFC PATCH v4 06/28] sched: Save the per LLC utilization for better cache aware scheduling
Date: Sat,  9 Aug 2025 13:02:54 +0800	[thread overview]
Message-ID: <d77d4db175adc09cd01fdee097c16bc3e52c8be2.1754712565.git.tim.c.chen@linux.intel.com> (raw)
In-Reply-To: <cover.1754712565.git.tim.c.chen@linux.intel.com>

When a system gets busy and a process's preferred LLC
is saturated by too many threads within this process, there are significant
in-LLC task migrations within its preferred LLC. This leads to migration
latency and degrades performance. Ideally, task aggregation should be
inhibited if the task's preferred LLC is overloaded. This implies that a
metric is needed to indicate whether the LLC is busy.

Store the per-LLC utilization calculated via periodic load
balancing. These statistics will be used in subsequent patches to
determine whether tasks should be aggregated to their preferred LLC.

Signed-off-by: Chen Yu <yu.c.chen@intel.com>
---
 include/linux/sched/topology.h |  3 ++
 kernel/sched/fair.c            | 53 ++++++++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+)

diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 198bb5cc1774..692f8a703b93 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -78,6 +78,9 @@ struct sched_domain_shared {
 	atomic_t	nr_busy_cpus;
 	int		has_idle_cores;
 	int		nr_idle_scan;
+#ifdef CONFIG_SCHED_CACHE
+	unsigned long	util_avg;
+#endif
 };
 
 struct sched_domain {
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 9e3c6f0eb934..4f79b7652642 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8828,6 +8828,22 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
 #ifdef CONFIG_SCHED_CACHE
 static long __migrate_degrades_locality(struct task_struct *p, int src_cpu, int dst_cpu, bool idle);
 
+/* expected to be protected by rcu_read_lock() */
+static bool get_llc_stats(int cpu, unsigned long *util,
+			  unsigned long *cap)
+{
+	struct sched_domain_shared *sd_share;
+
+	sd_share = rcu_dereference(per_cpu(sd_llc_shared, cpu));
+	if (!sd_share)
+		return false;
+
+	*util = READ_ONCE(sd_share->util_avg);
+	*cap = per_cpu(sd_llc_size, cpu) * SCHED_CAPACITY_SCALE;
+
+	return true;
+}
+
 static int select_cache_cpu(struct task_struct *p, int prev_cpu)
 {
 	struct mm_struct *mm = p->mm;
@@ -10670,6 +10686,42 @@ sched_reduced_capacity(struct rq *rq, struct sched_domain *sd)
 	return check_cpu_capacity(rq, sd);
 }
 
+#ifdef CONFIG_SCHED_CACHE
+/*
+ * Save this sched group's statistic for later use:
+ * The task wakeup and load balance can make better
+ * decision based on these statistics.
+ */
+static void update_sg_if_llc(struct lb_env *env, struct sg_lb_stats *sgs,
+			     struct sched_group *group)
+{
+	/* Find the sched domain that spans this group. */
+	struct sched_domain *sd = env->sd->child;
+	struct sched_domain_shared *sd_share;
+
+	if (!sched_feat(SCHED_CACHE) || env->idle == CPU_NEWLY_IDLE)
+		return;
+
+	/* only care the sched domain that spans 1 LLC */
+	if (!sd || !(sd->flags & SD_SHARE_LLC) ||
+	    !sd->parent || (sd->parent->flags & SD_SHARE_LLC))
+		return;
+
+	sd_share = rcu_dereference(per_cpu(sd_llc_shared,
+				  cpumask_first(sched_group_span(group))));
+	if (!sd_share)
+		return;
+
+	if (likely(READ_ONCE(sd_share->util_avg) != sgs->group_util))
+		WRITE_ONCE(sd_share->util_avg, sgs->group_util);
+}
+#else
+static inline void update_sg_if_llc(struct lb_env *env, struct sg_lb_stats *sgs,
+				    struct sched_group *group)
+{
+}
+#endif
+
 /**
  * update_sg_lb_stats - Update sched_group's statistics for load balancing.
  * @env: The load balancing environment.
@@ -10759,6 +10811,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
 
 	sgs->group_type = group_classify(env->sd->imbalance_pct, group, sgs);
 
+	update_sg_if_llc(env, sgs, group);
 	/* Computing avg_load makes sense only when group is overloaded */
 	if (sgs->group_type == group_overloaded)
 		sgs->avg_load = (sgs->group_load * SCHED_CAPACITY_SCALE) /
-- 
2.25.1


  parent reply	other threads:[~2025-08-09  5:09 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-08-09  4:57 [RFC PATCH v4 00/28] Cache aware load-balancing Chen Yu
2025-08-09  5:00 ` [RFC PATCH v4 01/28] sched: " Chen Yu
2025-08-12  1:30   ` kernel test robot
2025-08-12  3:26     ` Chen, Yu C
2025-08-09  5:01 ` [RFC PATCH v4 02/28] sched: Several fixes for cache aware scheduling Chen Yu
2025-08-09  5:01 ` [RFC PATCH v4 03/28] sched: Avoid task migration within its preferred LLC Chen Yu
2025-08-09  5:02 ` [RFC PATCH v4 04/28] sched: Avoid calculating the cpumask if the system is overloaded Chen Yu
2025-08-09  5:02 ` [RFC PATCH v4 05/28] sched: Add hysteresis to switch a task's preferred LLC Chen Yu
2025-08-09  5:02 ` Chen Yu [this message]
2025-08-09  5:03 ` [RFC PATCH v4 07/28] sched: Add helper function to decide whether to allow cache aware scheduling Chen Yu
2025-08-09  5:03 ` [RFC PATCH v4 08/28] sched: Set up LLC indexing Chen Yu
2025-08-09  5:03 ` [RFC PATCH v4 09/28] sched: Introduce task preferred LLC field Chen Yu
2025-08-09  5:04 ` [RFC PATCH v4 10/28] sched: Calculate the number of tasks that have LLC preference on a runqueue Chen Yu
2025-08-09  5:04 ` [RFC PATCH v4 11/28] sched: Introduce per runqueue task LLC preference counter Chen Yu
2025-08-09  5:04 ` [RFC PATCH v4 12/28] sched: Calculate the total number of preferred LLC tasks during load balance Chen Yu
2025-08-09  5:05 ` [RFC PATCH v4 13/28] sched: Tag the sched group as llc_balance if it has tasks prefer other LLC Chen Yu
2025-08-09  5:05 ` [RFC PATCH v4 14/28] sched: Introduce update_llc_busiest() to deal with groups having preferred LLC tasks Chen Yu
2025-08-09  5:06 ` [RFC PATCH v4 15/28] sched: Introduce a new migration_type to track the preferred LLC load balance Chen Yu
2025-08-09  5:06 ` [RFC PATCH v4 16/28] sched: Consider LLC locality for active balance Chen Yu
2025-08-09  5:06 ` [RFC PATCH v4 17/28] sched: Consider LLC preference when picking tasks from busiest queue Chen Yu
2025-08-09  5:07 ` [RFC PATCH v4 18/28] sched: Do not migrate task if it is moving out of its preferred LLC Chen Yu
2025-08-09  5:07 ` [RFC PATCH v4 19/28] sched: Introduce SCHED_CACHE_LB to control cache aware load balance Chen Yu
2025-08-09  5:07 ` [RFC PATCH v4 20/28] sched: Introduce SCHED_CACHE_WAKE to control LLC aggregation on wake up Chen Yu
2025-08-09  5:07 ` [RFC PATCH v4 21/28] sched: Introduce a static key to enable cache aware only for multi LLCs Chen Yu
2025-08-09  5:07 ` [RFC PATCH v4 22/28] sched: Turn EPOCH_PERIOD and EPOCH_OLD into tunnable debugfs Chen Yu
2025-08-09  5:08 ` [RFC PATCH v4 23/28] sched: Scan a task's preferred node for preferred LLC Chen Yu
2025-08-12  1:59   ` kernel test robot
2025-08-12  3:36     ` Chen, Yu C
2025-08-09  5:08 ` [RFC PATCH v4 24/28] sched: Record average number of runninhg tasks per process Chen Yu
2025-08-09  5:08 ` [RFC PATCH v4 25/28] sched: Skip cache aware scheduling if the process has many active threads Chen Yu
2025-09-02  3:52   ` Tingyin Duan
2025-09-02  5:16   ` Tingyin Duan
2025-09-02  6:14     ` Chen, Yu C
2025-09-02  7:56       ` Duan Tingyin
2025-08-09  5:08 ` [RFC PATCH v4 26/28] sched: Do not enable cache aware scheduling for process with large RSS Chen Yu
2025-08-09  5:09 ` [RFC PATCH v4 27/28] sched: Allow the user space to tune the scale factor for RSS comparison Chen Yu
2025-08-09  5:09 ` [RFC PATCH v4 28/28] sched: Add ftrace to track cache aware load balance and hottest CPU changes Chen Yu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=d77d4db175adc09cd01fdee097c16bc3e52c8be2.1754712565.git.tim.c.chen@linux.intel.com \
    --to=yu.c.chen@intel.com \
    --cc=aubrey.li@intel.com \
    --cc=bsegall@google.com \
    --cc=cyy@cyyself.name \
    --cc=dietmar.eggemann@arm.com \
    --cc=gautham.shenoy@amd.com \
    --cc=hdanton@sina.com \
    --cc=jianyong.wu@outlook.com \
    --cc=juri.lelli@redhat.com \
    --cc=kprateek.nayak@amd.com \
    --cc=len.brown@intel.com \
    --cc=libo.chen@oracle.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mgorman@suse.de \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=rostedt@goodmis.org \
    --cc=sshegde@linux.ibm.com \
    --cc=tim.c.chen@linux.intel.com \
    --cc=tingyin.duan@gmail.com \
    --cc=vernhao@tencent.com \
    --cc=vincent.guittot@linaro.org \
    --cc=vineethr@linux.ibm.com \
    --cc=vschneid@redhat.com \
    --cc=yu.chen.surf@gmail.com \
    --cc=zhao1.liu@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).