From: Parth Shah <parth@linux.ibm.com>
To: linux-kernel@vger.kernel.org, linux-pm@vger.kernel.org
Cc: peterz@infradead.org, mingo@redhat.com,
vincent.guittot@linaro.org, dietmar.eggemann@arm.com,
patrick.bellasi@matbug.net, valentin.schneider@arm.com,
pavel@ucw.cz, dsmythies@telus.net, qperret@google.com,
tim.c.chen@linux.intel.com
Subject: [RFC v6 3/5] sched/fair: Tune task wake-up logic to pack small background tasks on fewer cores
Date: Tue, 21 Jan 2020 12:03:05 +0530 [thread overview]
Message-ID: <20200121063307.17221-4-parth@linux.ibm.com> (raw)
In-Reply-To: <20200121063307.17221-1-parth@linux.ibm.com>
The algorithm finds the first non idle core in the system and tries to
place a task in the idle CPU of the chosen core. To maintain cache hotness,
work of finding non idle core starts from the prev_cpu, which also reduces
task ping-pong behaviour inside of the core.
Define a new method to select_non_idle_core which keep tracks of the idle
and non-idle CPUs in the core and based on the heuristics determines if the
core is sufficiently busy to place the waking up background task. The
heuristic further defines the non-idle CPU into either busy (>12.5% util)
CPU and overutilized (>80% util) CPU.
- The core containing more idle CPUs and no busy CPUs is not selected for
packing
- The core if contains more than 1 overutilized CPUs are exempted from
task packing
- Pack if there is atleast one busy CPU and overutilized CPUs count is <2
Value of 12.5% utilization for busy CPU gives sufficient heuristics for CPU
doing enough work and not become idle in nearby time frame.
Signed-off-by: Parth Shah <parth@linux.ibm.com>
---
kernel/sched/core.c | 3 ++
kernel/sched/fair.c | 87 ++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 89 insertions(+), 1 deletion(-)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 629c2589d727..a34a5589ae16 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6617,6 +6617,7 @@ static struct kmem_cache *task_group_cache __read_mostly;
DECLARE_PER_CPU(cpumask_var_t, load_balance_mask);
DECLARE_PER_CPU(cpumask_var_t, select_idle_mask);
+DECLARE_PER_CPU(cpumask_var_t, turbo_sched_mask);
void __init sched_init(void)
{
@@ -6657,6 +6658,8 @@ void __init sched_init(void)
cpumask_size(), GFP_KERNEL, cpu_to_node(i));
per_cpu(select_idle_mask, i) = (cpumask_var_t)kzalloc_node(
cpumask_size(), GFP_KERNEL, cpu_to_node(i));
+ per_cpu(turbo_sched_mask, i) = (cpumask_var_t)kzalloc_node(
+ cpumask_size(), GFP_KERNEL, cpu_to_node(i));
}
#endif /* CONFIG_CPUMASK_OFFSTACK */
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 2d170b5da0e3..8643e6309451 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5379,6 +5379,8 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
/* Working cpumask for: load_balance, load_balance_newidle. */
DEFINE_PER_CPU(cpumask_var_t, load_balance_mask);
DEFINE_PER_CPU(cpumask_var_t, select_idle_mask);
+/* A cpumask to find active cores in the system. */
+DEFINE_PER_CPU(cpumask_var_t, turbo_sched_mask);
#ifdef CONFIG_NO_HZ_COMMON
@@ -5883,6 +5885,81 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
return cpu;
}
+#ifdef CONFIG_SCHED_SMT
+
+/* Define non-idle CPU as the one with the utilization >= 12.5% */
+#define merely_used_cpu(util) ((cpu_util(util)) > (100 >> 3))
+
+/*
+ * Classify small background tasks with higher latency_nice value for task
+ * packing.
+ */
+static inline bool is_small_bg_task(struct task_struct *p)
+{
+ if (is_bg_task(p) && (task_util(p) > (SCHED_CAPACITY_SCALE >> 3)))
+ return true;
+
+ return false;
+}
+
+/*
+ * Try to find a non idle core in the system based on few heuristics:
+ * - Keep track of overutilized (>80% util) and busy (>12.5% util) CPUs
+ * - If none CPUs are busy then do not select the core for task packing
+ * - If atleast one CPU is busy then do task packing unless overutilized CPUs
+ * count is < busy/2 CPU count
+ * - Always select idle CPU for task packing
+ */
+static int select_non_idle_core(struct task_struct *p, int prev_cpu)
+{
+ struct cpumask *cpus = this_cpu_cpumask_var_ptr(turbo_sched_mask);
+ int iter_cpu, sibling;
+
+ cpumask_and(cpus, cpu_online_mask, p->cpus_ptr);
+
+ for_each_cpu_wrap(iter_cpu, cpus, prev_cpu) {
+ int idle_cpu_count = 0, non_idle_cpu_count = 0;
+ int overutil_cpu_count = 0;
+ int busy_cpu_count = 0;
+ int best_cpu = iter_cpu;
+
+ for_each_cpu(sibling, cpu_smt_mask(iter_cpu)) {
+ __cpumask_clear_cpu(sibling, cpus);
+ if (idle_cpu(sibling)) {
+ idle_cpu_count++;
+ best_cpu = sibling;
+ } else {
+ non_idle_cpu_count++;
+ if (cpu_overutilized(sibling))
+ overutil_cpu_count++;
+ if (merely_used_cpu(sibling))
+ busy_cpu_count++;
+ }
+ }
+
+ /*
+ * Pack tasks to this core if
+ * 1. Idle CPU count is higher and atleast one is busy
+ * 2. If idle_cpu_count < non_idle_cpu_count then ideally do
+ * packing but if there are more CPUs overutilized then don't
+ * overload it.
+ */
+ if (idle_cpu_count > non_idle_cpu_count) {
+ if (busy_cpu_count)
+ return best_cpu;
+ } else {
+ /*
+ * Pack tasks if at max 1 CPU is overutilized
+ */
+ if (overutil_cpu_count < 2)
+ return best_cpu;
+ }
+ }
+
+ return -1;
+}
+#endif /* CONFIG_SCHED_SMT */
+
/*
* Try and locate an idle core/thread in the LLC cache domain.
*/
@@ -6367,6 +6444,15 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
new_cpu = prev_cpu;
}
+#ifdef CONFIG_SCHED_SMT
+ if (is_turbosched_enabled() && unlikely(is_small_bg_task(p))) {
+ new_cpu = select_non_idle_core(p, prev_cpu);
+ if (new_cpu >= 0)
+ return new_cpu;
+ new_cpu = prev_cpu;
+ }
+#endif
+
want_affine = !wake_wide(p) && !wake_cap(p, cpu, prev_cpu) &&
cpumask_test_cpu(cpu, p->cpus_ptr);
}
@@ -6400,7 +6486,6 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
new_cpu = find_idlest_cpu(sd, p, cpu, prev_cpu, sd_flag);
} else if (sd_flag & SD_BALANCE_WAKE) { /* XXX always ? */
/* Fast path */
-
new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);
if (want_affine)
--
2.17.2
next prev parent reply other threads:[~2020-01-21 6:33 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-01-21 6:33 [RFC v6 0/5] TurboSched: A scheduler for sustaining Turbo Frequencies for longer durations Parth Shah
2020-01-21 6:33 ` [RFC v6 1/5] sched: Introduce switch to enable TurboSched for task packing Parth Shah
2020-01-22 21:37 ` Tim Chen
2020-01-23 6:35 ` Parth Shah
2020-01-24 2:14 ` kbuild test robot
2020-01-21 6:33 ` [RFC v6 2/5] sched/core: Update turbo_sched count only when required Parth Shah
2020-01-24 2:28 ` kbuild test robot
2020-01-24 2:36 ` kbuild test robot
2020-01-21 6:33 ` Parth Shah [this message]
2020-01-24 0:30 ` [RFC v6 3/5] sched/fair: Tune task wake-up logic to pack small background tasks on fewer cores kbuild test robot
2020-01-24 3:53 ` kbuild test robot
2020-01-27 9:43 ` [RFC PATCH] sched/fair: __pcpu_scope_turbo_sched_mask can be static kbuild test robot
2020-01-21 6:33 ` [RFC v6 4/5] sched/fair: Provide arch hook to find domain for non idle core search scan Parth Shah
2020-01-21 6:33 ` [RFC v6 5/5] powerpc: Set turbo domain to NUMA node for task packing Parth Shah
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200121063307.17221-4-parth@linux.ibm.com \
--to=parth@linux.ibm.com \
--cc=dietmar.eggemann@arm.com \
--cc=dsmythies@telus.net \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-pm@vger.kernel.org \
--cc=mingo@redhat.com \
--cc=patrick.bellasi@matbug.net \
--cc=pavel@ucw.cz \
--cc=peterz@infradead.org \
--cc=qperret@google.com \
--cc=tim.c.chen@linux.intel.com \
--cc=valentin.schneider@arm.com \
--cc=vincent.guittot@linaro.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.