All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mike Galbraith <efault@gmx.de>
To: Peter Zijlstra <peterz@infradead.org>
Cc: Tom Putzeys <tom.putzeys@be.atlascopco.com>,
	"mingo@redhat.com" <mingo@redhat.com>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	Sebastian Andrzej Siewior <bigeasy@linutronix.de>,
	Thomas Gleixner <tglx@linutronix.de>
Subject: Re: CFS scheduler: spin_lock usage causes dead lock when smp_apic_timer_interrupt occurs
Date: Tue, 08 Jan 2019 06:30:59 +0100	[thread overview]
Message-ID: <1546925459.17252.2.camel@gmx.de> (raw)
In-Reply-To: <20190107125231.GE14122@hirez.programming.kicks-ass.net>

On Mon, 2019-01-07 at 13:52 +0100, Peter Zijlstra wrote:
> On Mon, Jan 07, 2019 at 01:28:34PM +0100, Mike Galbraith wrote:
> > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> > index 960ad0ce77d7..420624c49f38 100644
> > --- a/kernel/sched/fair.c
> > +++ b/kernel/sched/fair.c
> > @@ -5007,9 +5007,9 @@ void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
> >  	cfs_b->period = ns_to_ktime(default_cfs_period());
> >  
> >  	INIT_LIST_HEAD(&cfs_b->throttled_cfs_rq);
> > -	hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
> > +	hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD);
> >  	cfs_b->period_timer.function = sched_cfs_period_timer;
> > -	hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
> > +	hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
> >  	cfs_b->slack_timer.function = sched_cfs_slack_timer;
> >  }
> 
> Right, that should sort it. But I'm not sure this is the best solution
> though. That cfs-runtime crud can (IIRC) iterate lists etc.. so running
> it from the softirq isn't a bad idea. We just need to fix that locking
> up a bit.
> 
> Something a wee bit like so perhaps..

I plugged that into 4.19-rt along with revert of hard irq context, and
it (as expected) does the trick.

> ---
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 13776fac7b74..3cfe26aa098a 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -4566,7 +4566,7 @@ static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b,
>  		struct rq *rq = rq_of(cfs_rq);
>  		struct rq_flags rf;
>  
> -		rq_lock(rq, &rf);
> +		rq_lock_irqsave(rq, &rf);
>  		if (!cfs_rq_throttled(cfs_rq))
>  			goto next;
>  
> @@ -4583,7 +4583,7 @@ static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b,
>  			unthrottle_cfs_rq(cfs_rq);
>  
>  next:
> -		rq_unlock(rq, &rf);
> +		rq_unlock_irqrestore(rq, &rf);
>  
>  		if (!remaining)
>  			break;
> @@ -4599,7 +4599,7 @@ static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b,
>   * period the timer is deactivated until scheduling resumes; cfs_b->idle is
>   * used to track this state.
>   */
> -static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
> +static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun, unsigned long flags)
>  {
>  	u64 runtime, runtime_expires;
>  	int throttled;
> @@ -4641,11 +4641,11 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
>  	while (throttled && cfs_b->runtime > 0 && !cfs_b->distribute_running) {
>  		runtime = cfs_b->runtime;
>  		cfs_b->distribute_running = 1;
> -		raw_spin_unlock(&cfs_b->lock);
> +		raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
>  		/* we can't nest cfs_b->lock while distributing bandwidth */
>  		runtime = distribute_cfs_runtime(cfs_b, runtime,
>  						 runtime_expires);
> -		raw_spin_lock(&cfs_b->lock);
> +		raw_spin_lock_irqsave(&cfs_b->lock, flags);
>  
>  		cfs_b->distribute_running = 0;
>  		throttled = !list_empty(&cfs_b->throttled_cfs_rq);
> @@ -4754,17 +4754,18 @@ static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq)
>  static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
>  {
>  	u64 runtime = 0, slice = sched_cfs_bandwidth_slice();
> +	unsigned long flags;
>  	u64 expires;
>  
>  	/* confirm we're still not at a refresh boundary */
> -	raw_spin_lock(&cfs_b->lock);
> +	raw_spin_lock_irqsave(&cfs_b->lock, flags);
>  	if (cfs_b->distribute_running) {
> -		raw_spin_unlock(&cfs_b->lock);
> +		raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
>  		return;
>  	}
>  
>  	if (runtime_refresh_within(cfs_b, min_bandwidth_expiration)) {
> -		raw_spin_unlock(&cfs_b->lock);
> +		raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
>  		return;
>  	}
>  
> @@ -4775,18 +4776,18 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
>  	if (runtime)
>  		cfs_b->distribute_running = 1;
>  
> -	raw_spin_unlock(&cfs_b->lock);
> +	raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
>  
>  	if (!runtime)
>  		return;
>  
>  	runtime = distribute_cfs_runtime(cfs_b, runtime, expires);
>  
> -	raw_spin_lock(&cfs_b->lock);
> +	raw_spin_lock_irqsave(&cfs_b->lock, flags);
>  	if (expires == cfs_b->runtime_expires)
>  		lsub_positive(&cfs_b->runtime, runtime);
>  	cfs_b->distribute_running = 0;
> -	raw_spin_unlock(&cfs_b->lock);
> +	raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
>  }
>  
>  /*
> @@ -4864,20 +4865,21 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
>  {
>  	struct cfs_bandwidth *cfs_b =
>  		container_of(timer, struct cfs_bandwidth, period_timer);
> +	unsigned long flags;
>  	int overrun;
>  	int idle = 0;
>  
> -	raw_spin_lock(&cfs_b->lock);
> +	raw_spin_lock_irqsave(&cfs_b->lock, flags);
>  	for (;;) {
>  		overrun = hrtimer_forward_now(timer, cfs_b->period);
>  		if (!overrun)
>  			break;
>  
> -		idle = do_sched_cfs_period_timer(cfs_b, overrun);
> +		idle = do_sched_cfs_period_timer(cfs_b, overrun, flags);
>  	}
>  	if (idle)
>  		cfs_b->period_active = 0;
> -	raw_spin_unlock(&cfs_b->lock);
> +	raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
>  
>  	return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
>  }

  reply	other threads:[~2019-01-08  5:31 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <AM0PR03MB480425D5999E0D08DAB30204BB8E0@AM0PR03MB4804.eurprd03.prod.outlook.com>
2019-01-04 12:42 ` CFS scheduler: spin_lock usage causes dead lock when smp_apic_timer_interrupt occurs Tom Putzeys
2019-01-07 10:26   ` Peter Zijlstra
2019-01-07 12:28     ` Mike Galbraith
2019-01-07 12:52       ` Peter Zijlstra
2019-01-08  5:30         ` Mike Galbraith [this message]
2019-01-08  9:06           ` Peter Zijlstra
2019-01-08 11:05             ` Sebastian Andrzej Siewior
2019-01-21 11:37         ` [tip:sched/core] sched/fair: Robustify CFS-bandwidth timer locking tip-bot for Peter Zijlstra
2019-01-21 13:53         ` tip-bot for Peter Zijlstra
2019-01-27 11:36         ` tip-bot for Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1546925459.17252.2.camel@gmx.de \
    --to=efault@gmx.de \
    --cc=bigeasy@linutronix.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=tglx@linutronix.de \
    --cc=tom.putzeys@be.atlascopco.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.