public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Parth Shah <parth@linux.ibm.com>
To: linux-kernel@vger.kernel.org, linux-pm@vger.kernel.org
Cc: peterz@infradead.org, mingo@redhat.com,
	vincent.guittot@linaro.org, dietmar.eggemann@arm.com,
	patrick.bellasi@matbug.net, valentin.schneider@arm.com,
	pavel@ucw.cz, dsmythies@telus.net, qperret@google.com,
	tim.c.chen@linux.intel.com
Subject: [RFC v6 3/5] sched/fair: Tune task wake-up logic to pack small background tasks on fewer cores
Date: Tue, 21 Jan 2020 12:03:05 +0530	[thread overview]
Message-ID: <20200121063307.17221-4-parth@linux.ibm.com> (raw)
In-Reply-To: <20200121063307.17221-1-parth@linux.ibm.com>

The algorithm finds the first non idle core in the system and tries to
place a task in the idle CPU of the chosen core. To maintain cache hotness,
work of finding non idle core starts from the prev_cpu, which also reduces
task ping-pong behaviour inside of the core.

Define a new method to select_non_idle_core which keep tracks of the idle
and non-idle CPUs in the core and based on the heuristics determines if the
core is sufficiently busy to place the waking up background task. The
heuristic further defines the non-idle CPU into either busy (>12.5% util)
CPU and overutilized (>80% util) CPU.
- The core containing more idle CPUs and no busy CPUs is not selected for
  packing
- The core if contains more than 1 overutilized CPUs are exempted from
  task packing
- Pack if there is atleast one busy CPU and overutilized CPUs count is <2

Value of 12.5% utilization for busy CPU gives sufficient heuristics for CPU
doing enough work and not become idle in nearby time frame.

Signed-off-by: Parth Shah <parth@linux.ibm.com>
---
 kernel/sched/core.c |  3 ++
 kernel/sched/fair.c | 87 ++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 89 insertions(+), 1 deletion(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 629c2589d727..a34a5589ae16 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6617,6 +6617,7 @@ static struct kmem_cache *task_group_cache __read_mostly;
 
 DECLARE_PER_CPU(cpumask_var_t, load_balance_mask);
 DECLARE_PER_CPU(cpumask_var_t, select_idle_mask);
+DECLARE_PER_CPU(cpumask_var_t, turbo_sched_mask);
 
 void __init sched_init(void)
 {
@@ -6657,6 +6658,8 @@ void __init sched_init(void)
 			cpumask_size(), GFP_KERNEL, cpu_to_node(i));
 		per_cpu(select_idle_mask, i) = (cpumask_var_t)kzalloc_node(
 			cpumask_size(), GFP_KERNEL, cpu_to_node(i));
+		per_cpu(turbo_sched_mask, i) = (cpumask_var_t)kzalloc_node(
+			cpumask_size(), GFP_KERNEL, cpu_to_node(i));
 	}
 #endif /* CONFIG_CPUMASK_OFFSTACK */
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 2d170b5da0e3..8643e6309451 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5379,6 +5379,8 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 /* Working cpumask for: load_balance, load_balance_newidle. */
 DEFINE_PER_CPU(cpumask_var_t, load_balance_mask);
 DEFINE_PER_CPU(cpumask_var_t, select_idle_mask);
+/* A cpumask to find active cores in the system. */
+DEFINE_PER_CPU(cpumask_var_t, turbo_sched_mask);
 
 #ifdef CONFIG_NO_HZ_COMMON
 
@@ -5883,6 +5885,81 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
 	return cpu;
 }
 
+#ifdef CONFIG_SCHED_SMT
+
+/* Define non-idle CPU as the one with the utilization >= 12.5% */
+#define merely_used_cpu(util) ((cpu_util(util)) > (100 >> 3))
+
+/*
+ * Classify small background tasks with higher latency_nice value for task
+ * packing.
+ */
+static inline bool is_small_bg_task(struct task_struct *p)
+{
+	if (is_bg_task(p) && (task_util(p) > (SCHED_CAPACITY_SCALE >> 3)))
+		return true;
+
+	return false;
+}
+
+/*
+ * Try to find a non idle core in the system  based on few heuristics:
+ * - Keep track of overutilized (>80% util) and busy (>12.5% util) CPUs
+ * - If none CPUs are busy then do not select the core for task packing
+ * - If atleast one CPU is busy then do task packing unless overutilized CPUs
+ *   count is < busy/2 CPU count
+ * - Always select idle CPU for task packing
+ */
+static int select_non_idle_core(struct task_struct *p, int prev_cpu)
+{
+	struct cpumask *cpus = this_cpu_cpumask_var_ptr(turbo_sched_mask);
+	int iter_cpu, sibling;
+
+	cpumask_and(cpus, cpu_online_mask, p->cpus_ptr);
+
+	for_each_cpu_wrap(iter_cpu, cpus, prev_cpu) {
+		int idle_cpu_count = 0, non_idle_cpu_count = 0;
+		int overutil_cpu_count = 0;
+		int busy_cpu_count = 0;
+		int best_cpu = iter_cpu;
+
+		for_each_cpu(sibling, cpu_smt_mask(iter_cpu)) {
+			__cpumask_clear_cpu(sibling, cpus);
+			if (idle_cpu(sibling)) {
+				idle_cpu_count++;
+				best_cpu = sibling;
+			} else {
+				non_idle_cpu_count++;
+				if (cpu_overutilized(sibling))
+					overutil_cpu_count++;
+				if (merely_used_cpu(sibling))
+					busy_cpu_count++;
+			}
+		}
+
+		/*
+		 * Pack tasks to this core if
+		 * 1. Idle CPU count is higher and atleast one is busy
+		 * 2. If idle_cpu_count < non_idle_cpu_count then ideally do
+		 * packing but if there are more CPUs overutilized then don't
+		 * overload it.
+		 */
+		if (idle_cpu_count > non_idle_cpu_count) {
+			if (busy_cpu_count)
+				return best_cpu;
+		} else {
+			/*
+			 * Pack tasks if at max 1 CPU is overutilized
+			 */
+			if (overutil_cpu_count < 2)
+				return best_cpu;
+		}
+	}
+
+	return -1;
+}
+#endif /* CONFIG_SCHED_SMT */
+
 /*
  * Try and locate an idle core/thread in the LLC cache domain.
  */
@@ -6367,6 +6444,15 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
 			new_cpu = prev_cpu;
 		}
 
+#ifdef CONFIG_SCHED_SMT
+		if (is_turbosched_enabled() && unlikely(is_small_bg_task(p))) {
+			new_cpu = select_non_idle_core(p, prev_cpu);
+			if (new_cpu >= 0)
+				return new_cpu;
+			new_cpu = prev_cpu;
+		}
+#endif
+
 		want_affine = !wake_wide(p) && !wake_cap(p, cpu, prev_cpu) &&
 			      cpumask_test_cpu(cpu, p->cpus_ptr);
 	}
@@ -6400,7 +6486,6 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
 		new_cpu = find_idlest_cpu(sd, p, cpu, prev_cpu, sd_flag);
 	} else if (sd_flag & SD_BALANCE_WAKE) { /* XXX always ? */
 		/* Fast path */
-
 		new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);
 
 		if (want_affine)
-- 
2.17.2


  parent reply	other threads:[~2020-01-21  6:33 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-01-21  6:33 [RFC v6 0/5] TurboSched: A scheduler for sustaining Turbo Frequencies for longer durations Parth Shah
2020-01-21  6:33 ` [RFC v6 1/5] sched: Introduce switch to enable TurboSched for task packing Parth Shah
2020-01-22 21:37   ` Tim Chen
2020-01-23  6:35     ` Parth Shah
2020-01-21  6:33 ` [RFC v6 2/5] sched/core: Update turbo_sched count only when required Parth Shah
2020-01-21  6:33 ` Parth Shah [this message]
2020-01-21  6:33 ` [RFC v6 4/5] sched/fair: Provide arch hook to find domain for non idle core search scan Parth Shah
2020-01-21  6:33 ` [RFC v6 5/5] powerpc: Set turbo domain to NUMA node for task packing Parth Shah

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200121063307.17221-4-parth@linux.ibm.com \
    --to=parth@linux.ibm.com \
    --cc=dietmar.eggemann@arm.com \
    --cc=dsmythies@telus.net \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-pm@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=patrick.bellasi@matbug.net \
    --cc=pavel@ucw.cz \
    --cc=peterz@infradead.org \
    --cc=qperret@google.com \
    --cc=tim.c.chen@linux.intel.com \
    --cc=valentin.schneider@arm.com \
    --cc=vincent.guittot@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox