All of lore.kernel.org
 help / color / mirror / Atom feed
From: Parth Shah <parth@linux.ibm.com>
To: linux-kernel@vger.kernel.org, linux-pm@vger.kernel.org
Cc: peterz@infradead.org, mingo@redhat.com,
	vincent.guittot@linaro.org, dietmar.eggemann@arm.com,
	patrick.bellasi@matbug.net, valentin.schneider@arm.com,
	pavel@ucw.cz, dsmythies@telus.net, quentin.perret@arm.com,
	rafael.j.wysocki@intel.com, tim.c.chen@linux.intel.com,
	daniel.lezcano@linaro.org
Subject: [RFC v5 4/6] sched/fair: Tune task wake-up logic to pack small background tasks on fewer cores
Date: Mon,  7 Oct 2019 14:00:49 +0530	[thread overview]
Message-ID: <20191007083051.4820-5-parth@linux.ibm.com> (raw)
In-Reply-To: <20191007083051.4820-1-parth@linux.ibm.com>

The algorithm finds the first non idle core in the system and tries to
place a task in the idle CPU in the chosen core. To maintain
cache hotness, work of finding non idle core starts from the prev_cpu,
which also reduces task ping-pong behaviour inside of the core.

Define a new method to select_non_idle_core which keep tracks of the idle
and non-idle CPUs in the core and based on the heuristics determines if the
core is sufficiently busy to place the incoming backgroung task. The
heuristic further defines the non-idle CPU into either busy (>12.5% util)
CPU and overutilized (>80% util) CPU.
- The core containing more idle CPUs and no busy CPUs is not selected for
  packing
- The core if contains more than 1 overutilized CPUs are exempted from
  task packing
- Pack if there is atleast one busy CPU and overutilized CPUs count is <2

Value of 12.5% utilization for busy CPU gives sufficient heuristics for CPU
doing enough work and not become idle in nearby timeframe.

Signed-off-by: Parth Shah <parth@linux.ibm.com>
---
 kernel/sched/core.c |  3 ++
 kernel/sched/fair.c | 95 ++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 97 insertions(+), 1 deletion(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 6e1ae8046fe0..7e3aff59540a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6402,6 +6402,7 @@ static struct kmem_cache *task_group_cache __read_mostly;
 
 DECLARE_PER_CPU(cpumask_var_t, load_balance_mask);
 DECLARE_PER_CPU(cpumask_var_t, select_idle_mask);
+DECLARE_PER_CPU(cpumask_var_t, turbo_sched_mask);
 
 void __init sched_init(void)
 {
@@ -6442,6 +6443,8 @@ void __init sched_init(void)
 			cpumask_size(), GFP_KERNEL, cpu_to_node(i));
 		per_cpu(select_idle_mask, i) = (cpumask_var_t)kzalloc_node(
 			cpumask_size(), GFP_KERNEL, cpu_to_node(i));
+		per_cpu(turbo_sched_mask, i) = (cpumask_var_t)kzalloc_node(
+			cpumask_size(), GFP_KERNEL, cpu_to_node(i));
 	}
 #endif /* CONFIG_CPUMASK_OFFSTACK */
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index b798fe7ff7cd..d4a1b6474338 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5353,6 +5353,8 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 /* Working cpumask for: load_balance, load_balance_newidle. */
 DEFINE_PER_CPU(cpumask_var_t, load_balance_mask);
 DEFINE_PER_CPU(cpumask_var_t, select_idle_mask);
+/* A cpumask to find active cores in the system. */
+DEFINE_PER_CPU(cpumask_var_t, turbo_sched_mask);
 
 #ifdef CONFIG_NO_HZ_COMMON
 
@@ -5964,6 +5966,76 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
 	return cpu;
 }
 
+#ifdef CONFIG_SCHED_SMT
+static inline bool is_background_task(struct task_struct *p)
+{
+	if (p->flags & PF_CAN_BE_PACKED)
+		return true;
+
+	return false;
+}
+
+#define busyness_threshold	(100 >> 3)
+#define is_cpu_busy(util) ((util) > busyness_threshold)
+
+/*
+ * Try to find a non idle core in the system  based on few heuristics:
+ * - Keep track of overutilized (>80% util) and busy (>12.5% util) CPUs
+ * - If none CPUs are busy then do not select the core for task packing
+ * - If atleast one CPU is busy then do task packing unless overutilized CPUs
+ *   count is < busy/2 CPU count
+ * - Always select idle CPU for task packing
+ */
+static int select_non_idle_core(struct task_struct *p, int prev_cpu, int target)
+{
+	struct cpumask *cpus = this_cpu_cpumask_var_ptr(turbo_sched_mask);
+	int iter_cpu, sibling;
+
+	cpumask_and(cpus, cpu_online_mask, p->cpus_ptr);
+
+	for_each_cpu_wrap(iter_cpu, cpus, prev_cpu) {
+		int idle_cpu_count = 0, non_idle_cpu_count = 0;
+		int overutil_cpu_count = 0;
+		int busy_cpu_count = 0;
+		int best_cpu = iter_cpu;
+
+		for_each_cpu(sibling, cpu_smt_mask(iter_cpu)) {
+			__cpumask_clear_cpu(sibling, cpus);
+			if (idle_cpu(iter_cpu)) {
+				idle_cpu_count++;
+				best_cpu = iter_cpu;
+			} else {
+				non_idle_cpu_count++;
+				if (cpu_overutilized(iter_cpu))
+					overutil_cpu_count++;
+				if (is_cpu_busy(cpu_util(iter_cpu)))
+					busy_cpu_count++;
+			}
+		}
+
+		/*
+		 * Pack tasks to this core if
+		 * 1. Idle CPU count is higher and atleast one is busy
+		 * 2. If idle_cpu_count < non_idle_cpu_count then ideally do
+		 * packing but if there are more CPUs overutilized then don't
+		 * overload it.
+		 */
+		if (idle_cpu_count > non_idle_cpu_count) {
+			if (busy_cpu_count)
+				return best_cpu;
+		} else {
+			/*
+			 * Pack tasks if at max 1 CPU is overutilized
+			 */
+			if (overutil_cpu_count < 2)
+				return best_cpu;
+		}
+	}
+
+	return select_idle_sibling(p, prev_cpu, target);
+}
+#endif /* CONFIG_SCHED_SMT */
+
 /*
  * Try and locate an idle core/thread in the LLC cache domain.
  */
@@ -6418,6 +6490,23 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
 	return -1;
 }
 
+#ifdef CONFIG_SCHED_SMT
+/*
+ * Select all classified background tasks for task packing
+ */
+static inline int turbosched_select_non_idle_core(struct task_struct *p,
+						  int prev_cpu, int target)
+{
+	return select_non_idle_core(p, prev_cpu, target);
+}
+#else
+static inline int turbosched_select_non_idle_core(struct task_struct *p,
+						  int prev_cpu, int target)
+{
+	return select_idle_sibling(p, prev_cpu, target);
+}
+#endif
+
 /*
  * select_task_rq_fair: Select target runqueue for the waking task in domains
  * that have the 'sd_flag' flag set. In practice, this is SD_BALANCE_WAKE,
@@ -6483,7 +6572,11 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
 	} else if (sd_flag & SD_BALANCE_WAKE) { /* XXX always ? */
 		/* Fast path */
 
-		new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);
+		if (is_turbosched_enabled() && unlikely(is_background_task(p)))
+			new_cpu = turbosched_select_non_idle_core(p, prev_cpu,
+								  new_cpu);
+		else
+			new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);
 
 		if (want_affine)
 			current->recent_used_cpu = cpu;
-- 
2.17.1


  parent reply	other threads:[~2019-10-07  8:31 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-10-07  8:30 [RFC v5 0/6] TurboSched: A scheduler for sustaining Turbo Frequencies for longer durations Parth Shah
2019-10-07  8:30 ` [RFC v5 1/6] sched/core: Add manual background task classification using sched_setattr syscall Parth Shah
2019-10-07  8:30 ` [RFC v5 2/6] sched: Introduce switch to enable TurboSched for task packing Parth Shah
2019-10-07  8:30 ` [RFC v5 3/6] sched/core: Update turbo_sched count only when required Parth Shah
2019-10-07  8:30 ` Parth Shah [this message]
2019-10-07 12:19   ` [RFC v5 4/6] sched/fair: Tune task wake-up logic to pack small background tasks on fewer cores Vincent Guittot
2019-10-07 16:53     ` Parth Shah
2019-10-08 16:20       ` Vincent Guittot
2019-10-09  8:46         ` Parth Shah
2019-10-08 16:52       ` Dietmar Eggemann
2019-10-09  8:57         ` Parth Shah
2019-10-09 14:26           ` Dietmar Eggemann
2019-10-09 17:02             ` Parth Shah
2019-10-10 14:53               ` Dietmar Eggemann
2019-10-07  8:30 ` [RFC v5 5/6] sched/fair: Provide arch hook to find domain for non idle core search scan Parth Shah
2019-10-07  8:30 ` [RFC v5 6/6] powerpc: Set turbo domain to NUMA node for task packing Parth Shah
     [not found] ` <20191008132842.6612-1-hdanton@sina.com>
2019-10-09  9:22   ` [RFC v5 4/6] sched/fair: Tune task wake-up logic to pack small background tasks on fewer cores Parth Shah
2019-10-09 11:34     ` Vincent Guittot
2019-10-09 16:55       ` Parth Shah

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191007083051.4820-5-parth@linux.ibm.com \
    --to=parth@linux.ibm.com \
    --cc=daniel.lezcano@linaro.org \
    --cc=dietmar.eggemann@arm.com \
    --cc=dsmythies@telus.net \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-pm@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=patrick.bellasi@matbug.net \
    --cc=pavel@ucw.cz \
    --cc=peterz@infradead.org \
    --cc=quentin.perret@arm.com \
    --cc=rafael.j.wysocki@intel.com \
    --cc=tim.c.chen@linux.intel.com \
    --cc=valentin.schneider@arm.com \
    --cc=vincent.guittot@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.