All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Li, Aubrey" <aubrey.li@linux.intel.com>
To: "benbjiang(蒋彪)" <benbjiang@tencent.com>
Cc: Vineeth Remanan Pillai <vpillai@digitalocean.com>,
	Nishanth Aravamudan <naravamudan@digitalocean.com>,
	Julien Desfossez <jdesfossez@digitalocean.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Tim Chen <tim.c.chen@linux.intel.com>,
	"mingo@kernel.org" <mingo@kernel.org>,
	"tglx@linutronix.de" <tglx@linutronix.de>,
	"pjt@google.com" <pjt@google.com>,
	"torvalds@linux-foundation.org" <torvalds@linux-foundation.org>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	"subhra.mazumdar@oracle.com" <subhra.mazumdar@oracle.com>,
	"fweisbec@gmail.com" <fweisbec@gmail.com>,
	"keescook@chromium.org" <keescook@chromium.org>,
	"kerrnel@google.com" <kerrnel@google.com>,
	Phil Auld <pauld@redhat.com>, Aaron Lu <aaron.lwe@gmail.com>,
	Aubrey Li <aubrey.intel@gmail.com>,
	Valentin Schneider <valentin.schneider@arm.com>,
	Mel Gorman <mgorman@techsingularity.net>,
	Pawan Gupta <pawan.kumar.gupta@linux.intel.com>,
	Paolo Bonzini <pbonzini@redhat.com>,
	Joel Fernandes <joelaf@google.com>,
	"Joel Fernandes (Google)" <joel@joelfernandes.org>,
	"vineethrp@gmail.com" <vineethrp@gmail.com>,
	Chen Yu <yu.c.chen@intel.com>,
	Christian Brauner <christian.brauner@ubuntu.com>
Subject: Re: [RFC PATCH 10/16] sched: Trivial forced-newidle balancer(Internet mail)
Date: Mon, 20 Jul 2020 16:03:06 +0800	[thread overview]
Message-ID: <5ac7b50a-b422-040c-81f4-eab5bdda477b@linux.intel.com> (raw)
In-Reply-To: <8082F052-2F52-42D3-B396-18A35A94F26F@tencent.com>

On 2020/7/20 15:23, benbjiang(蒋彪) wrote:
> Hi, 
> 
>> On Jul 20, 2020, at 2:06 PM, Li, Aubrey <aubrey.li@linux.intel.com <mailto:aubrey.li@linux.intel.com>> wrote:
>>
>> On 2020/7/20 12:06, benbjiang(蒋彪) wrote:
>>> Hi,
>>>
>>>> On Jul 1, 2020, at 5:32 AM, Vineeth Remanan Pillai <vpillai@digitalocean.com <mailto:vpillai@digitalocean.com>> wrote:
>>>>
>>>> From: Peter Zijlstra <peterz@infradead.org <mailto:peterz@infradead.org>>
>>>>
>>>> When a sibling is forced-idle to match the core-cookie; search for
>>>> matching tasks to fill the core.
>>>>
>>>> rcu_read_unlock() can incur an infrequent deadlock in
>>>> sched_core_balance(). Fix this by using the RCU-sched flavor instead.
>>>>
>>>> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org <mailto:peterz@infradead.org>>
>>>> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org <mailto:joel@joelfernandes.org>>
>>>> Acked-by: Paul E. McKenney <paulmck@kernel.org <mailto:paulmck@kernel.org>>
>>>> ---
>>>> include/linux/sched.h |   1 +
>>>> kernel/sched/core.c   | 131 +++++++++++++++++++++++++++++++++++++++++-
>>>> kernel/sched/idle.c   |   1 +
>>>> kernel/sched/sched.h  |   6 ++
>>>> 4 files changed, 138 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/include/linux/sched.h b/include/linux/sched.h
>>>> index 3c8dcc5ff039..4f9edf013df3 100644
>>>> --- a/include/linux/sched.h
>>>> +++ b/include/linux/sched.h
>>>> @@ -688,6 +688,7 @@ struct task_struct {
>>>> #ifdef CONFIG_SCHED_CORE
>>>> struct rb_nodecore_node;
>>>> unsigned longcore_cookie;
>>>> +unsigned intcore_occupation;
>>>> #endif
>>>>
>>>> #ifdef CONFIG_CGROUP_SCHED
>>>> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
>>>> index 4d6d6a678013..fb9edb09ead7 100644
>>>> --- a/kernel/sched/core.c
>>>> +++ b/kernel/sched/core.c
>>>> @@ -201,6 +201,21 @@ static struct task_struct *sched_core_find(struct rq *rq, unsigned long cookie)
>>>> return match;
>>>> }
>>>>
>>>> +static struct task_struct *sched_core_next(struct task_struct *p, unsigned long cookie)
>>>> +{
>>>> +struct rb_node *node = &p->core_node;
>>>> +
>>>> +node = rb_next(node);
>>>> +if (!node)
>>>> +return NULL;
>>>> +
>>>> +p = container_of(node, struct task_struct, core_node);
>>>> +if (p->core_cookie != cookie)
>>>> +return NULL;
>>>> +
>>>> +return p;
>>>> +}
>>>> +
>>>> /*
>>>> * The static-key + stop-machine variable are needed such that:
>>>> *
>>>> @@ -4233,7 +4248,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
>>>> struct task_struct *next, *max = NULL;
>>>> const struct sched_class *class;
>>>> const struct cpumask *smt_mask;
>>>> -int i, j, cpu;
>>>> +int i, j, cpu, occ = 0;
>>>> bool need_sync;
>>>>
>>>> if (!sched_core_enabled(rq))
>>>> @@ -4332,6 +4347,9 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
>>>> goto done;
>>>> }
>>>>
>>>> +if (!is_idle_task(p))
>>>> +occ++;
>>>> +
>>>> rq_i->core_pick = p;
>>>>
>>>> /*
>>>> @@ -4357,6 +4375,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
>>>>
>>>> cpu_rq(j)->core_pick = NULL;
>>>> }
>>>> +occ = 1;
>>>> goto again;
>>>> } else {
>>>> /*
>>>> @@ -4393,6 +4412,8 @@ next_class:;
>>>> if (is_idle_task(rq_i->core_pick) && rq_i->nr_running)
>>>> rq_i->core_forceidle = true;
>>>>
>>>> +rq_i->core_pick->core_occupation = occ;
>>>> +
>>>> if (i == cpu)
>>>> continue;
>>>>
>>>> @@ -4408,6 +4429,114 @@ next_class:;
>>>> return next;
>>>> }
>>>>
>>>> +static bool try_steal_cookie(int this, int that)
>>>> +{
>>>> +struct rq *dst = cpu_rq(this), *src = cpu_rq(that);
>>>> +struct task_struct *p;
>>>> +unsigned long cookie;
>>>> +bool success = false;
>>>> +
>>>> +local_irq_disable();
>>>> +double_rq_lock(dst, src);
>>>> +
>>>> +cookie = dst->core->core_cookie;
>>>> +if (!cookie)
>>>> +goto unlock;
>>>> +
>>>> +if (dst->curr != dst->idle)
>>>> +goto unlock;
>>>> +
>>>> +p = sched_core_find(src, cookie);
>>>> +if (p == src->idle)
>>>> +goto unlock;
>>>> +
>>>> +do {
>>>> +if (p == src->core_pick || p == src->curr)
>>>> +goto next;
>>>> +
>>>> +if (!cpumask_test_cpu(this, &p->cpus_mask))
>>>> +goto next;
>>>> +
>>>> +if (p->core_occupation > dst->idle->core_occupation)
>>>> +goto next;
>>>> +
>>>> +p->on_rq = TASK_ON_RQ_MIGRATING;
>>>> +deactivate_task(src, p, 0);
>>>> +set_task_cpu(p, this);
>>>> +activate_task(dst, p, 0);
>>>> +p->on_rq = TASK_ON_RQ_QUEUED;
>>>> +
>>>> +resched_curr(dst);
>>>> +
>>>> +success = true;
>>>> +break;
>>>> +
>>>> +next:
>>>> +p = sched_core_next(p, cookie);
>>>> +} while (p);
>>>> +
>>>> +unlock:
>>>> +double_rq_unlock(dst, src);
>>>> +local_irq_enable();
>>>> +
>>>> +return success;
>>>> +}
>>>> +
>>>> +static bool steal_cookie_task(int cpu, struct sched_domain *sd)
>>>> +{
>>>> +int i;
>>>> +
>>>> +for_each_cpu_wrap(i, sched_domain_span(sd), cpu) {
>>> Since (i == cpu) should be skipped, should we start iteration at cpu+1? like,
>>> for_each_cpu_wrap(i, sched_domain_span(sd), cpu+1) {
>>> …
>>> }
>>> In that way, we could avoid hitting following if(i == cpu) always.
>>
>> IMHO, this won't work, as cpuid is not continuous.
> Cpuid may be not continuous, but for_each_cpu_wrap() could cover the case, I think. :)

And for_each_cpu_wrap() will still wrap around and pick i == cpu, even though it starts
from (cpu+1)...

Thanks,
-Aubrey

> 
>>
>>>> +if (i == cpu)
>>>> +continue;
>>>> +
>>>> +if (need_resched())
>>>> +break;
>>> Should we return true here to accelerate the breaking of sched_core_balance? 
>>> Otherwise the breaking would be delayed to the next level sd iteration.
>>>> +
>>>> +if (try_steal_cookie(cpu, i))
>>>> +return true;
>>>> +}
>>>> +
>>>> +return false;
>>>> +}
>>>> +
>>>> +static void sched_core_balance(struct rq *rq)
>>>> +{
>>>> +struct sched_domain *sd;
>>>> +int cpu = cpu_of(rq);
>>>> +
>>>> +rcu_read_lock_sched();
>>>> +raw_spin_unlock_irq(rq_lockp(rq));
>>>> +for_each_domain(cpu, sd) {
>>>> +if (!(sd->flags & SD_LOAD_BALANCE))
>>>> +break;
>>>> +
>>>> +if (need_resched())
>>>> +break;
>>> If rescheded here, we missed the chance to do further forced-newidle balance, 
>>> and the idle-core could be idle for a long time, because lacking of pulling chance.
>>> Could it be possible to add a new forced-newidle balance chance in task_tick_idle?
>>> which could make it more efficient.
>>
>> This flag indicates there is another thread deserves to run, So I guess the core won't
>> be idle for a long time.
>>
>> Thanks,
>> -Aubrey
> Indeed, thanks for the explanation. :)
> 
>>>
>>>> +if (steal_cookie_task(cpu, sd))
>>>> +break;
>>>> +}
>>>> +raw_spin_lock_irq(rq_lockp(rq));
>>>> +rcu_read_unlock_sched();
>>>> +}
>>>> +
>>>> +static DEFINE_PER_CPU(struct callback_head, core_balance_head);
>>>> +
>>>> +void queue_core_balance(struct rq *rq)
>>>> +{
>>>> +if (!sched_core_enabled(rq))
>>>> +return;
>>>> +
>>>> +if (!rq->core->core_cookie)
>>>> +return;
>>>> +
>>>> +if (!rq->nr_running) /* not forced idle */
>>>> +return;
>>>> +
>>>> +queue_balance_callback(rq, &per_cpu(core_balance_head, rq->cpu), sched_core_balance);
>>>> +}
>>>> +
>>>> #else /* !CONFIG_SCHED_CORE */
>>>>
>>>> static struct task_struct *
>>>> diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
>>>> index a8d40ffab097..dff6ba220ed7 100644
>>>> --- a/kernel/sched/idle.c
>>>> +++ b/kernel/sched/idle.c
>>>> @@ -395,6 +395,7 @@ static void set_next_task_idle(struct rq *rq, struct task_struct *next, bool fir
>>>> {
>>>> update_idle_core(rq);
>>>> schedstat_inc(rq->sched_goidle);
>>>> +queue_core_balance(rq);
>>>> }
>>>>
>>>> #ifdef CONFIG_SMP
>>>> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
>>>> index 293aa1ae0308..464559676fd2 100644
>>>> --- a/kernel/sched/sched.h
>>>> +++ b/kernel/sched/sched.h
>>>> @@ -1089,6 +1089,8 @@ static inline raw_spinlock_t *rq_lockp(struct rq *rq)
>>>> bool cfs_prio_less(struct task_struct *a, struct task_struct *b);
>>>> void sched_core_adjust_sibling_vruntime(int cpu, bool coresched_enabled);
>>>>
>>>> +extern void queue_core_balance(struct rq *rq);
>>>> +
>>>> #else /* !CONFIG_SCHED_CORE */
>>>>
>>>> static inline bool sched_core_enabled(struct rq *rq)
>>>> @@ -1101,6 +1103,10 @@ static inline raw_spinlock_t *rq_lockp(struct rq *rq)
>>>> return &rq->__lock;
>>>> }
>>>>
>>>> +static inline void queue_core_balance(struct rq *rq)
>>>> +{
>>>> +}
>>>> +
>>>> #endif /* CONFIG_SCHED_CORE */
>>>>
>>>> #ifdef CONFIG_SCHED_SMT
>>>> -- 
>>>> 2.17.1
> 


  parent reply	other threads:[~2020-07-20  8:03 UTC|newest]

Thread overview: 81+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-06-30 21:32 [RFC PATCH 00/16] Core scheduling v6 Vineeth Remanan Pillai
2020-06-30 21:32 ` [RFC PATCH 01/16] sched: Wrap rq::lock access Vineeth Remanan Pillai
2020-06-30 21:32 ` [RFC PATCH 02/16] sched: Introduce sched_class::pick_task() Vineeth Remanan Pillai
2020-06-30 21:32 ` [RFC PATCH 03/16] sched: Core-wide rq->lock Vineeth Remanan Pillai
2020-06-30 21:32 ` [RFC PATCH 04/16] sched/fair: Add a few assertions Vineeth Remanan Pillai
2020-06-30 21:32 ` [RFC PATCH 05/16] sched: Basic tracking of matching tasks Vineeth Remanan Pillai
2020-07-21 14:02   ` [RFC PATCH 05/16] sched: Basic tracking of matching tasks(Internet mail) benbjiang(蒋彪)
2020-06-30 21:32 ` [RFC PATCH 06/16] sched: Add core wide task selection and scheduling Vineeth Remanan Pillai
2020-07-01 23:28   ` Joel Fernandes
2020-07-02  0:54     ` Tim Chen
2020-07-02 12:57       ` Joel Fernandes
2020-07-02 13:23         ` Joel Fernandes
2020-07-05 23:44         ` Tim Chen
2020-07-03 20:21     ` Vineeth Remanan Pillai
2020-07-06 14:09       ` Joel Fernandes
2020-07-06 14:38         ` Vineeth Remanan Pillai
2020-07-06 17:37           ` Joel Fernandes
2020-06-30 21:32 ` [RFC PATCH 07/16] sched/fair: Fix forced idle sibling starvation corner case Vineeth Remanan Pillai
2020-07-21  7:35   ` [RFC PATCH 07/16] sched/fair: Fix forced idle sibling starvation corner case(Internet mail) benbjiang(蒋彪)
2020-07-22  7:20   ` benbjiang(蒋彪)
2020-06-30 21:32 ` [RFC PATCH 08/16] sched/fair: wrapper for cfs_rq->min_vruntime Vineeth Remanan Pillai
2020-06-30 21:32 ` [RFC PATCH 09/16] sched/fair: core wide cfs task priority comparison Vineeth Remanan Pillai
2020-07-22  0:23   ` [RFC PATCH 09/16] sched/fair: core wide cfs task priority comparison(Internet mail) benbjiang(蒋彪)
2020-07-24  7:14     ` Aaron Lu
2020-07-24 12:08       ` Jiang Biao
2020-06-30 21:32 ` [RFC PATCH 10/16] sched: Trivial forced-newidle balancer Vineeth Remanan Pillai
2020-07-20  4:06   ` [RFC PATCH 10/16] sched: Trivial forced-newidle balancer(Internet mail) benbjiang(蒋彪)
2020-07-20  6:06     ` Li, Aubrey
     [not found]       ` <8082F052-2F52-42D3-B396-18A35A94F26F@tencent.com>
2020-07-20  8:03         ` Li, Aubrey [this message]
2020-07-20  8:22           ` benbjiang(蒋彪)
2020-07-20 14:34   ` benbjiang(蒋彪)
2020-06-30 21:32 ` [RFC PATCH 11/16] sched: migration changes for core scheduling Vineeth Remanan Pillai
2020-07-22  8:54   ` [RFC PATCH 11/16] sched: migration changes for core scheduling(Internet mail) benbjiang(蒋彪)
2020-07-22 12:13     ` Li, Aubrey
2020-07-22 14:32       ` benbjiang(蒋彪)
2020-07-23  1:57         ` Li, Aubrey
2020-07-23  2:42           ` benbjiang(蒋彪)
2020-07-23  3:35             ` Li, Aubrey
2020-07-23  4:23               ` benbjiang(蒋彪)
2020-07-23  5:39                 ` Li, Aubrey
2020-07-23  7:47                   ` benbjiang(蒋彪)
2020-07-23  8:06                     ` Li, Aubrey
2020-07-23  8:28                       ` benbjiang(蒋彪)
2020-07-23 23:43                         ` Aubrey Li
2020-07-24  1:26                           ` benbjiang(蒋彪)
2020-07-24  2:05                             ` Li, Aubrey
2020-07-24  2:29                               ` benbjiang(蒋彪)
2020-06-30 21:32 ` [RFC PATCH 12/16] sched: cgroup tagging interface for core scheduling Vineeth Remanan Pillai
2020-06-30 21:32 ` [RFC PATCH 13/16] sched: Fix pick_next_task() race condition in " Vineeth Remanan Pillai
2020-06-30 21:32 ` [RFC PATCH 14/16] irq: Add support for core-wide protection of IRQ and softirq Vineeth Remanan Pillai
2020-07-10 12:19   ` Li, Aubrey
2020-07-10 13:21     ` Joel Fernandes
2020-07-13  2:23       ` Li, Aubrey
2020-07-13 15:58         ` Joel Fernandes
2020-07-10 13:36     ` Vineeth Remanan Pillai
2020-07-11  1:33       ` Aubrey Li
2020-07-17 23:37     ` Thomas Gleixner
2020-07-18 17:05       ` Joel Fernandes
2020-07-17 23:36   ` Thomas Gleixner
2020-07-20  3:53     ` Joel Fernandes
2020-07-20  8:20       ` Thomas Gleixner
2020-07-20 11:09       ` Vineeth Pillai
2020-06-30 21:32 ` [RFC PATCH 15/16] Documentation: Add documentation on core scheduling Vineeth Remanan Pillai
2020-06-30 21:32 ` [RFC PATCH 16/16] sched: Debug bits Vineeth Remanan Pillai
2020-07-31 16:41 ` [RFC PATCH 00/16] Core scheduling v6 Vineeth Pillai
2020-08-03  8:23 ` Li, Aubrey
2020-08-03 16:53   ` Joel Fernandes
2020-08-05  3:57     ` Li, Aubrey
2020-08-05  6:16       ` [RFC PATCH 00/16] Core scheduling v6(Internet mail) benbjiang(蒋彪)
2020-08-09 16:44       ` [RFC PATCH 00/16] Core scheduling v6 Joel Fernandes
2020-08-12  2:01         ` Li, Aubrey
2020-08-12 23:08           ` Joel Fernandes
2020-08-13  4:28             ` Li, Aubrey
2020-08-14  0:26               ` [RFC PATCH 00/16] Core scheduling v6(Internet mail) benbjiang(蒋彪)
2020-08-14  1:36                 ` Li, Aubrey
2020-08-14  4:04                   ` benbjiang(蒋彪)
2020-08-14  5:18                     ` Li, Aubrey
2020-08-14  7:54                       ` benbjiang(蒋彪)
2020-08-20 22:37               ` [RFC PATCH 00/16] Core scheduling v6 Joel Fernandes
2020-08-27  0:30 ` Alexander Graf
2020-08-27  1:20   ` Vineeth Pillai

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5ac7b50a-b422-040c-81f4-eab5bdda477b@linux.intel.com \
    --to=aubrey.li@linux.intel.com \
    --cc=aaron.lwe@gmail.com \
    --cc=aubrey.intel@gmail.com \
    --cc=benbjiang@tencent.com \
    --cc=christian.brauner@ubuntu.com \
    --cc=fweisbec@gmail.com \
    --cc=jdesfossez@digitalocean.com \
    --cc=joel@joelfernandes.org \
    --cc=joelaf@google.com \
    --cc=keescook@chromium.org \
    --cc=kerrnel@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mgorman@techsingularity.net \
    --cc=mingo@kernel.org \
    --cc=naravamudan@digitalocean.com \
    --cc=pauld@redhat.com \
    --cc=pawan.kumar.gupta@linux.intel.com \
    --cc=pbonzini@redhat.com \
    --cc=peterz@infradead.org \
    --cc=pjt@google.com \
    --cc=subhra.mazumdar@oracle.com \
    --cc=tglx@linutronix.de \
    --cc=tim.c.chen@linux.intel.com \
    --cc=torvalds@linux-foundation.org \
    --cc=valentin.schneider@arm.com \
    --cc=vineethrp@gmail.com \
    --cc=vpillai@digitalocean.com \
    --cc=yu.c.chen@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.