From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <linux-kernel-owner@vger.kernel.org>
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
	id S1752925AbbCaXjm (ORCPT <rfc822;w@1wt.eu>);
	Tue, 31 Mar 2015 19:39:42 -0400
Received: from mga14.intel.com ([192.55.52.115]:26119 "EHLO mga14.intel.com"
	rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP
	id S1752334AbbCaXjj (ORCPT <rfc822;linux-kernel@vger.kernel.org>);
	Tue, 31 Mar 2015 19:39:39 -0400
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="5.11,503,1422950400"; 
   d="scan'208";a="475146977"
Date: Wed, 1 Apr 2015 07:21:21 +0800
From: Wanpeng Li <wanpeng.li@linux.intel.com>
To: Wanpeng Li <wanpeng.li@linux.intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>,
        Peter Zijlstra <peterz@infradead.org>, Juri Lelli <juri.lelli@arm.com>,
        Ingo Molnar <mingo@redhat.com>, linux-kernel@vger.kernel.org
Subject: Re: [PATCH] sched/deadline: Use IPI to trigger DL task push
 migration instead of pulling
Message-ID: <20150331232121.GB10760@kernel>
Reply-To: Wanpeng Li <wanpeng.li@linux.intel.com>
References: <1427670430-4667-1-git-send-email-wanpeng.li@linux.intel.com>
 <20150330094140.31dcc1cf@gandalf.local.home>
 <20150331231836.GA10760@kernel>
MIME-Version: 1.0
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
In-Reply-To: <20150331231836.GA10760@kernel>
User-Agent: Mutt/1.5.21 (2010-09-15)
Sender: linux-kernel-owner@vger.kernel.org
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org

Sorry for forget Cc Steven,
On Wed, Apr 01, 2015 at 07:18:36AM +0800, Wanpeng Li wrote:
>On Mon, Mar 30, 2015 at 09:41:40AM -0400, Steven Rostedt wrote:
>>On Mon, 30 Mar 2015 07:07:10 +0800
>>Wanpeng Li <wanpeng.li@linux.intel.com> wrote:
>>
>>> +static int find_next_push_cpu(struct rq *rq)
>>> +{
>>> +	struct rq *next_rq;
>>> +	int cpu;
>>> +
>>> +	while (1) {
>>> +		cpu = dlo_next_cpu(rq);
>>> +		if (cpu >= nr_cpu_ids)
>>> +			break;
>>> +		next_rq = cpu_rq(cpu);
>>> +
>>> +		/* Make sure the next rq can push to this rq */
>>> +		if (dl_time_before(next_rq->dl.earliest_dl.next,
>>> +			rq->dl.earliest_dl.curr))
>>> +			break;
>>> +	}
>>> +
>>> +	return cpu;
>>> +}
>>> +
>>
>>Is it possible that we don't duplicate the code and that we can find a
>>way to share the code between rt and dl? It's not that trivial to just
>>cut and paste. If a bug is found in one, it most likely wont be ported
>>to the other.
>>
>>The best is if we can share the code here some way. Perhaps have a
>>single IPI that checks both rt and dl?
>
>Peter, Juri, what's your ideas? ;)
>
>Regards,
>Wanpeng Li 
>
>>
>>-- Steve
>>
>>> +#define RT_PUSH_IPI_EXECUTING		1
>>> +#define RT_PUSH_IPI_RESTART		2
>>> +
>>> +static void tell_cpu_to_push(struct rq *rq)
>>> +{
>>> +	int cpu;
>>> +
>>> +	if (rq->dl.push_flags & RT_PUSH_IPI_EXECUTING) {
>>> +		raw_spin_lock(&rq->dl.push_lock);
>>> +		/* Make sure it's still executing */
>>> +		if (rq->dl.push_flags & RT_PUSH_IPI_EXECUTING) {
>>> +			/*
>>> +			 * Tell the IPI to restart the loop as things have
>>> +			 * changed since it started.
>>> +			 */
>>> +			rq->dl.push_flags |= RT_PUSH_IPI_RESTART;
>>> +			raw_spin_unlock(&rq->dl.push_lock);
>>> +			return;
>>> +		}
>>> +		raw_spin_unlock(&rq->dl.push_lock);
>>> +	}
>>> +
>>> +	/* When here, there's no IPI going around */
>>> +
>>> +	rq->dl.push_cpu = rq->cpu;
>>> +	cpu = find_next_push_cpu(rq);
>>> +	if (cpu >= nr_cpu_ids)
>>> +		return;
>>> +
>>> +	rq->dl.push_flags = RT_PUSH_IPI_EXECUTING;
>>> +
>>> +	irq_work_queue_on(&rq->dl.push_work, cpu);
>>> +}
>>> +
>>> +/* Called from hardirq context */
>>> +static void try_to_push_tasks(void *arg)
>>> +{
>>> +	struct dl_rq *dl_rq = arg;
>>> +	struct rq *rq, *src_rq;
>>> +	int this_cpu;
>>> +	int cpu;
>>> +
>>> +	this_cpu = dl_rq->push_cpu;
>>> +
>>> +	/* Paranoid check */
>>> +	BUG_ON(this_cpu != smp_processor_id());
>>> +
>>> +	rq = cpu_rq(this_cpu);
>>> +	src_rq = rq_of_dl_rq(dl_rq);
>>> +
>>> +again:
>>> +	if (has_pushable_dl_tasks(rq)) {
>>> +		raw_spin_lock(&rq->lock);
>>> +		push_dl_task(rq);
>>> +		raw_spin_unlock(&rq->lock);
>>> +	}
>>> +
>>> +	/* Pass the IPI to the next rt overloaded queue */
>>> +	raw_spin_lock(&dl_rq->push_lock);
>>> +	/*
>>> +	 * If the source queue changed since the IPI went out,
>>> +	 * we need to restart the search from that CPU again.
>>> +	 */
>>> +	if (dl_rq->push_flags & RT_PUSH_IPI_RESTART) {
>>> +		dl_rq->push_flags &= ~RT_PUSH_IPI_RESTART;
>>> +		dl_rq->push_cpu = src_rq->cpu;
>>> +	}
>>> +
>>> +	cpu = find_next_push_cpu(src_rq);
>>> +
>>> +	if (cpu >= nr_cpu_ids)
>>> +		dl_rq->push_flags &= ~RT_PUSH_IPI_EXECUTING;
>>> +	raw_spin_unlock(&dl_rq->push_lock);
>>> +
>>> +	if (cpu >= nr_cpu_ids)
>>> +		return;
>>> +
>>> +	/*
>>> +	 * It is possible that a restart caused this CPU to be
>>> +	 * chosen again. Don't bother with an IPI, just see if we
>>> +	 * have more to push.
>>> +	 */
>>> +	if (unlikely(cpu == rq->cpu))
>>> +		goto again;
>>> +
>>> +	/* Try the next RT overloaded CPU */
>>> +	irq_work_queue_on(&dl_rq->push_work, cpu);
>>> +}
>>> +
>>> +static void push_irq_work_func(struct irq_work *work)
>>> +{
>>> +	struct dl_rq *dl_rq = container_of(work, struct dl_rq, push_work);
>>> +
>>> +	try_to_push_tasks(dl_rq);
>>> +}
>>> +#endif /* HAVE_RT_PUSH_IPI */
>>> +
>>>  static int pull_dl_task(struct rq *this_rq)
>>>  {
>>>  	int this_cpu = this_rq->cpu, ret = 0, cpu;
>>> @@ -1432,6 +1602,13 @@ static int pull_dl_task(struct rq *this_rq)
>>>  	 */
>>>  	smp_rmb();
>>>  
>>> +#ifdef HAVE_RT_PUSH_IPI
>>> +	if (sched_feat(RT_PUSH_IPI)) {
>>> +		tell_cpu_to_push(this_rq);
>>> +		return 0;
>>> +	}
>>> +#endif
>>> +
>>>  	for_each_cpu(cpu, this_rq->rd->dlo_mask) {
>>>  		if (this_cpu == cpu)
>>>  			continue;
>>> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
>>> index dd532c5..87a937c 100644
>>> --- a/kernel/sched/sched.h
>>> +++ b/kernel/sched/sched.h
>>> @@ -500,6 +500,12 @@ struct dl_rq {
>>>  	 */
>>>  	struct rb_root pushable_dl_tasks_root;
>>>  	struct rb_node *pushable_dl_tasks_leftmost;
>>> +#ifdef HAVE_RT_PUSH_IPI
>>> +	int push_flags;
>>> +	int push_cpu;
>>> +	struct irq_work push_work;
>>> +	raw_spinlock_t push_lock;
>>> +#endif
>>>  #else
>>>  	struct dl_bw dl_bw;
>>>  #endif