From mboxrd@z Thu Jan 1 00:00:00 1970 From: Yao Dongdong Subject: Re: [PATCH v2 2/2] sched/fair: leverage the idle state info when choosing the "idlest" cpu Date: Fri, 19 Sep 2014 12:49:20 +0800 Message-ID: <541BB5D0.8080509@huawei.com> References: <1409844730-12273-1-git-send-email-nicolas.pitre@linaro.org> <1409844730-12273-3-git-send-email-nicolas.pitre@linaro.org> Mime-Version: 1.0 Content-Type: text/plain; charset="ISO-8859-1" Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <1409844730-12273-3-git-send-email-nicolas.pitre@linaro.org> Sender: linux-kernel-owner@vger.kernel.org To: Nicolas Pitre Cc: Peter Zijlstra , Ingo Molnar , Daniel Lezcano , "Rafael J. Wysocki" , linux-pm@vger.kernel.org, linux-kernel@vger.kernel.org, linaro-kernel@lists.linaro.org List-Id: linux-pm@vger.kernel.org On 2014/9/4 23:32, Nicolas Pitre wrote: > The code in find_idlest_cpu() looks for the CPU with the smallest load. > However, if multiple CPUs are idle, the first idle CPU is selected > irrespective of the depth of its idle state. > > Among the idle CPUs we should pick the one with with the shallowest idle > state, or the latest to have gone idle if all idle CPUs are in the same > state. The later applies even when cpuidle is configured out. > > This patch doesn't cover the following issues: > > - The idle exit latency of a CPU might be larger than the time needed > to migrate the waking task to an already running CPU with sufficient > capacity, and therefore performance would benefit from task packing > in such case (in most cases task packing is about power saving). > > - Some idle states have a non negligible and non abortable entry latency > which needs to run to completion before the exit latency can start. > A concurrent patch series is making this info available to the cpuidle > core. Once available, the entry latency with the idle timestamp could > determine when the exit latency may be effective. > > Those issues will be handled in due course. In the mean time, what > is implemented here should improve things already compared to the current > state of affairs. > > Based on an initial patch from Daniel Lezcano. > > Signed-off-by: Nicolas Pitre > --- > kernel/sched/fair.c | 43 ++++++++++++++++++++++++++++++++++++------- > 1 file changed, 36 insertions(+), 7 deletions(-) > > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c > index bfa3c86d0d..416329e1a6 100644 > --- a/kernel/sched/fair.c > +++ b/kernel/sched/fair.c > @@ -23,6 +23,7 @@ > #include > #include > #include > +#include > #include > #include > #include > @@ -4428,20 +4429,48 @@ static int > find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) > { > unsigned long load, min_load = ULONG_MAX; > - int idlest = -1; > + unsigned int min_exit_latency = UINT_MAX; > + u64 latest_idle_timestamp = 0; > + int least_loaded_cpu = this_cpu; > + int shallowest_idle_cpu = -1; > int i; > > /* Traverse only the allowed CPUs */ > for_each_cpu_and(i, sched_group_cpus(group), tsk_cpus_allowed(p)) { > - load = weighted_cpuload(i); > - > - if (load < min_load || (load == min_load && i == this_cpu)) { > - min_load = load; > - idlest = i; > + if (idle_cpu(i)) { > + struct rq *rq = cpu_rq(i); > + struct cpuidle_state *idle = idle_get_state(rq); > + if (idle && idle->exit_latency < min_exit_latency) { > + /* > + * We give priority to a CPU whose idle state > + * has the smallest exit latency irrespective > + * of any idle timestamp. > + */ > + min_exit_latency = idle->exit_latency; > + latest_idle_timestamp = rq->idle_stamp; > + shallowest_idle_cpu = i; > + } else if ((!idle || idle->exit_latency == min_exit_latency) && > + rq->idle_stamp > latest_idle_timestamp) { > + /* > + * If equal or no active idle state, then > + * the most recently idled CPU might have > + * a warmer cache. > + */ > + latest_idle_timestamp = rq->idle_stamp; > + shallowest_idle_cpu = i; > + } > + cpuidle_put_state(rq); > + } else { I think we needn't test no idle cpus after find an idle cpu. And what about this? } else if (shallowest_idle_cpu == -1) { > + load = weighted_cpuload(i); > + if (load < min_load || > + (load == min_load && i == this_cpu)) { > + min_load = load; > + least_loaded_cpu = i; > + } > } > } > > - return idlest; > + return shallowest_idle_cpu != -1 ? shallowest_idle_cpu : least_loaded_cpu; > } > > /* From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1756085AbaISEtc (ORCPT ); Fri, 19 Sep 2014 00:49:32 -0400 Received: from szxga02-in.huawei.com ([119.145.14.65]:51196 "EHLO szxga02-in.huawei.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751294AbaISEta (ORCPT ); Fri, 19 Sep 2014 00:49:30 -0400 Message-ID: <541BB5D0.8080509@huawei.com> Date: Fri, 19 Sep 2014 12:49:20 +0800 From: Yao Dongdong User-Agent: Mozilla/5.0 (Windows NT 6.1; rv:17.0) Gecko/20130509 Thunderbird/17.0.6 MIME-Version: 1.0 To: Nicolas Pitre CC: Peter Zijlstra , Ingo Molnar , Daniel Lezcano , "Rafael J. Wysocki" , , , Subject: Re: [PATCH v2 2/2] sched/fair: leverage the idle state info when choosing the "idlest" cpu References: <1409844730-12273-1-git-send-email-nicolas.pitre@linaro.org> <1409844730-12273-3-git-send-email-nicolas.pitre@linaro.org> In-Reply-To: <1409844730-12273-3-git-send-email-nicolas.pitre@linaro.org> Content-Type: text/plain; charset="ISO-8859-1" Content-Transfer-Encoding: 7bit X-Originating-IP: [10.110.51.36] X-CFilter-Loop: Reflected Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org On 2014/9/4 23:32, Nicolas Pitre wrote: > The code in find_idlest_cpu() looks for the CPU with the smallest load. > However, if multiple CPUs are idle, the first idle CPU is selected > irrespective of the depth of its idle state. > > Among the idle CPUs we should pick the one with with the shallowest idle > state, or the latest to have gone idle if all idle CPUs are in the same > state. The later applies even when cpuidle is configured out. > > This patch doesn't cover the following issues: > > - The idle exit latency of a CPU might be larger than the time needed > to migrate the waking task to an already running CPU with sufficient > capacity, and therefore performance would benefit from task packing > in such case (in most cases task packing is about power saving). > > - Some idle states have a non negligible and non abortable entry latency > which needs to run to completion before the exit latency can start. > A concurrent patch series is making this info available to the cpuidle > core. Once available, the entry latency with the idle timestamp could > determine when the exit latency may be effective. > > Those issues will be handled in due course. In the mean time, what > is implemented here should improve things already compared to the current > state of affairs. > > Based on an initial patch from Daniel Lezcano. > > Signed-off-by: Nicolas Pitre > --- > kernel/sched/fair.c | 43 ++++++++++++++++++++++++++++++++++++------- > 1 file changed, 36 insertions(+), 7 deletions(-) > > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c > index bfa3c86d0d..416329e1a6 100644 > --- a/kernel/sched/fair.c > +++ b/kernel/sched/fair.c > @@ -23,6 +23,7 @@ > #include > #include > #include > +#include > #include > #include > #include > @@ -4428,20 +4429,48 @@ static int > find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) > { > unsigned long load, min_load = ULONG_MAX; > - int idlest = -1; > + unsigned int min_exit_latency = UINT_MAX; > + u64 latest_idle_timestamp = 0; > + int least_loaded_cpu = this_cpu; > + int shallowest_idle_cpu = -1; > int i; > > /* Traverse only the allowed CPUs */ > for_each_cpu_and(i, sched_group_cpus(group), tsk_cpus_allowed(p)) { > - load = weighted_cpuload(i); > - > - if (load < min_load || (load == min_load && i == this_cpu)) { > - min_load = load; > - idlest = i; > + if (idle_cpu(i)) { > + struct rq *rq = cpu_rq(i); > + struct cpuidle_state *idle = idle_get_state(rq); > + if (idle && idle->exit_latency < min_exit_latency) { > + /* > + * We give priority to a CPU whose idle state > + * has the smallest exit latency irrespective > + * of any idle timestamp. > + */ > + min_exit_latency = idle->exit_latency; > + latest_idle_timestamp = rq->idle_stamp; > + shallowest_idle_cpu = i; > + } else if ((!idle || idle->exit_latency == min_exit_latency) && > + rq->idle_stamp > latest_idle_timestamp) { > + /* > + * If equal or no active idle state, then > + * the most recently idled CPU might have > + * a warmer cache. > + */ > + latest_idle_timestamp = rq->idle_stamp; > + shallowest_idle_cpu = i; > + } > + cpuidle_put_state(rq); > + } else { I think we needn't test no idle cpus after find an idle cpu. And what about this? } else if (shallowest_idle_cpu == -1) { > + load = weighted_cpuload(i); > + if (load < min_load || > + (load == min_load && i == this_cpu)) { > + min_load = load; > + least_loaded_cpu = i; > + } > } > } > > - return idlest; > + return shallowest_idle_cpu != -1 ? shallowest_idle_cpu : least_loaded_cpu; > } > > /*