[rfc][patch] sched,rt: enqueue spinlock waiters to the head of their queue

public inbox for linux-rt-users@vger.kernel.org
 help / color / mirror / Atom feed

* [rfc][patch] sched,rt: enqueue spinlock waiters to the head of their queue
@ 2013-04-14 13:34 Mike Galbraith
  2013-04-15 13:01 ` Mike Galbraith
  2013-04-16 13:07 ` Mike Galbraith
  0 siblings, 2 replies; 3+ messages in thread
From: Mike Galbraith @ 2013-04-14 13:34 UTC (permalink / raw)
  To: RT; +Cc: Steven Rostedt, Thomas Gleixner


If a task blocks on a spinlock, give the CPU back as soon as possible so
we can turn over the lock as quickly as possible.  The task was at HEAD
when it blocked, put it back, and tell everyone else to get the hell out
of the way.

Signed-off-by: Mike Galbraith <bitbucket@online.de>
---
 include/linux/sched.h |    1 +
 kernel/rtmutex.c      |   13 +++++++++++--
 kernel/sched/core.c   |    9 +++++++--
 kernel/sched/fair.c   |    4 ++++
 kernel/sched/rt.c     |   29 +++++++++++++++++++++++++++--
 5 files changed, 50 insertions(+), 6 deletions(-)

--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1064,6 +1064,7 @@ struct sched_domain;
 #define WF_FORK		0x02		/* child wakeup after fork */
 #define WF_MIGRATED	0x04		/* internal use, task got migrated */
 #define WF_LOCK_SLEEPER	0x08		/* wakeup spinlock "sleeper" */
+#define WF_REQUEUE	0x10		/* requeue spinlock "sleeper" */
 
 #define ENQUEUE_WAKEUP		1
 #define ENQUEUE_HEAD		2
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -722,7 +722,7 @@ static void  noinline __sched rt_spin_lo
 {
 	struct task_struct *lock_owner, *self = current;
 	struct rt_mutex_waiter waiter, *top_waiter;
-	int ret;
+	int ret, wait, cpu = raw_smp_processor_id();
 
 	rt_mutex_init_waiter(&waiter, true);
 
@@ -757,12 +757,21 @@ static void  noinline __sched rt_spin_lo
 
 		top_waiter = rt_mutex_top_waiter(lock);
 		lock_owner = rt_mutex_owner(lock);
+		wait = top_waiter != &waiter;
+
+		/*
+		 * If we preempt the lock owner, just preempt ourselves.
+		 * the now boosted lock owner is queued to queue head.
+		 * When we release the wait lock, lock owner runs.
+		 */
+		if (!wait && task_cpu(lock_owner) == cpu)
+			set_tsk_need_resched(self);
 
 		raw_spin_unlock(&lock->wait_lock);
 
 		debug_rt_mutex_print_deadlock(&waiter);
 
-		if (top_waiter != &waiter || adaptive_wait(lock, lock_owner))
+		if (wait || adaptive_wait(lock, lock_owner))
 			schedule_rt_mutex(lock);
 
 		raw_spin_lock(&lock->wait_lock);
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1611,7 +1611,12 @@ EXPORT_SYMBOL(wake_up_process);
  */
 int wake_up_lock_sleeper(struct task_struct *p)
 {
-	return try_to_wake_up(p, TASK_ALL, WF_LOCK_SLEEPER);
+	int flags = WF_LOCK_SLEEPER;
+
+	if (rt_task(p))
+		flags |= WF_REQUEUE;
+
+	return try_to_wake_up(p, TASK_ALL, flags);
 }
 
 int wake_up_state(struct task_struct *p, unsigned int state)
@@ -3815,7 +3820,7 @@ void rt_mutex_setprio(struct task_struct
 	if (running)
 		p->sched_class->set_curr_task(rq);
 	if (on_rq)
-		enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0);
+		enqueue_task(rq, p, ENQUEUE_HEAD);
 
 	check_class_changed(rq, p, prev_class, oldprio);
 out_unlock:
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3522,6 +3522,10 @@ static void check_preempt_wakeup(struct
 	if (unlikely(se == pse))
 		return;
 
+	/* Preempting SCHED_OTHER lock holders harms throughput for no good reason */
+	if (__migrate_disabled(curr))
+		return;
+
 	/*
 	 * This is possible from callers such as move_task(), in which we
 	 * unconditionally check_prempt_curr() after an enqueue (which may have
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1180,6 +1180,10 @@ enqueue_task_rt(struct rq *rq, struct ta
 	if (flags & ENQUEUE_WAKEUP)
 		rt_se->timeout = 0;
 
+	/* The wakee is a FIFO lock sleeper */
+	if (flags & WF_REQUEUE)
+		flags |= ENQUEUE_HEAD;
+
 	enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD);
 
 	if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
@@ -1295,8 +1299,29 @@ select_task_rq_rt(struct task_struct *p,
 	return cpu;
 }
 
-static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
+static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p, int wake_flags)
 {
+#ifdef CONFIG_PREEMPT_RT_BASE
+	if (wake_flags & WF_REQUEUE) {
+		if (!p->on_cpu)
+			requeue_task_rt(rq, p, 1);
+
+		/*
+		 * The lock owner was here first, top waiter
+		 * must follow.  If the owner was PI boosted,
+		 * it's gone RSN.  All others need to get off
+		 * this CPU ASAP, this waiter had it first.
+		 */
+		if (rq == this_rq())
+			requeue_task_rt(rq, rq->curr, 1);
+		else if (__migrate_disabled(rq->curr))
+			set_tsk_need_resched(rq->curr);
+		else
+			resched_task(rq->curr);
+
+		return;
+	}
+#endif
 	if (rq->curr->nr_cpus_allowed == 1)
 		return;
 
@@ -1342,7 +1367,7 @@ static void check_preempt_curr_rt(struct
 	 * task.
 	 */
 	if (p->prio == rq->curr->prio && !test_tsk_need_resched(rq->curr))
-		check_preempt_equal_prio(rq, p);
+		check_preempt_equal_prio(rq, p, flags);
 #endif
 }
 



^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [rfc][patch] sched,rt: enqueue spinlock waiters to the head of their queue
  2013-04-14 13:34 [rfc][patch] sched,rt: enqueue spinlock waiters to the head of their queue Mike Galbraith
@ 2013-04-15 13:01 ` Mike Galbraith
  2013-04-16 13:07 ` Mike Galbraith
  1 sibling, 0 replies; 3+ messages in thread
From: Mike Galbraith @ 2013-04-15 13:01 UTC (permalink / raw)
  To: RT; +Cc: Steven Rostedt, Thomas Gleixner

BTW, if I were a reader, I'd be asking "where are the cold hard numbers
showing that this is a good thing to do?"  They're missing because I
don't have any.

This is the fallout from turning spinlocks back into 100% spinning locks
for rt explorations, which _can_ improve semop throughput up to 7 fold
when combined with a "Ok boss, how long may I spin before I have to
check for other runnable tasks at my prio" knob, but which also adds a
little overhead to the general case for one (making it a FAIL), and will
chew huge amounts of CPU in the heavily contended case just to close off
most priority inversions should a high priority task block briefly while
there's a low priority task runnable.  Preemptible spinning locks are
pure evil.

Doing this wake to head _should_ speed up lock turnaround, but I have
zero hard evidence that it really makes any difference in practice, so
yawn/NAK is perfectly understandable :)  These are only the leftover
bits that I think might be worth a ponder or two after declaring
preemptible spinning locks to be too evil to live.

On Sun, 2013-04-14 at 15:34 +0200, Mike Galbraith wrote: 
> If a task blocks on a spinlock, give the CPU back as soon as possible so
> we can turn over the lock as quickly as possible.  The task was at HEAD
> when it blocked, put it back, and tell everyone else to get the hell out
> of the way.
> 
> Signed-off-by: Mike Galbraith <bitbucket@online.de>
> ---
>  include/linux/sched.h |    1 +
>  kernel/rtmutex.c      |   13 +++++++++++--
>  kernel/sched/core.c   |    9 +++++++--
>  kernel/sched/fair.c   |    4 ++++
>  kernel/sched/rt.c     |   29 +++++++++++++++++++++++++++--
>  5 files changed, 50 insertions(+), 6 deletions(-)
> 
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1064,6 +1064,7 @@ struct sched_domain;
>  #define WF_FORK		0x02		/* child wakeup after fork */
>  #define WF_MIGRATED	0x04		/* internal use, task got migrated */
>  #define WF_LOCK_SLEEPER	0x08		/* wakeup spinlock "sleeper" */
> +#define WF_REQUEUE	0x10		/* requeue spinlock "sleeper" */
>  
>  #define ENQUEUE_WAKEUP		1
>  #define ENQUEUE_HEAD		2
> --- a/kernel/rtmutex.c
> +++ b/kernel/rtmutex.c
> @@ -722,7 +722,7 @@ static void  noinline __sched rt_spin_lo
>  {
>  	struct task_struct *lock_owner, *self = current;
>  	struct rt_mutex_waiter waiter, *top_waiter;
> -	int ret;
> +	int ret, wait, cpu = raw_smp_processor_id();
>  
>  	rt_mutex_init_waiter(&waiter, true);
>  
> @@ -757,12 +757,21 @@ static void  noinline __sched rt_spin_lo
>  
>  		top_waiter = rt_mutex_top_waiter(lock);
>  		lock_owner = rt_mutex_owner(lock);
> +		wait = top_waiter != &waiter;
> +
> +		/*
> +		 * If we preempt the lock owner, just preempt ourselves.
> +		 * the now boosted lock owner is queued to queue head.
> +		 * When we release the wait lock, lock owner runs.
> +		 */
> +		if (!wait && task_cpu(lock_owner) == cpu)
> +			set_tsk_need_resched(self);
>  
>  		raw_spin_unlock(&lock->wait_lock);
>  
>  		debug_rt_mutex_print_deadlock(&waiter);
>  
> -		if (top_waiter != &waiter || adaptive_wait(lock, lock_owner))
> +		if (wait || adaptive_wait(lock, lock_owner))
>  			schedule_rt_mutex(lock);
>  
>  		raw_spin_lock(&lock->wait_lock);
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -1611,7 +1611,12 @@ EXPORT_SYMBOL(wake_up_process);
>   */
>  int wake_up_lock_sleeper(struct task_struct *p)
>  {
> -	return try_to_wake_up(p, TASK_ALL, WF_LOCK_SLEEPER);
> +	int flags = WF_LOCK_SLEEPER;
> +
> +	if (rt_task(p))
> +		flags |= WF_REQUEUE;
> +
> +	return try_to_wake_up(p, TASK_ALL, flags);
>  }
>  
>  int wake_up_state(struct task_struct *p, unsigned int state)
> @@ -3815,7 +3820,7 @@ void rt_mutex_setprio(struct task_struct
>  	if (running)
>  		p->sched_class->set_curr_task(rq);
>  	if (on_rq)
> -		enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0);
> +		enqueue_task(rq, p, ENQUEUE_HEAD);
>  
>  	check_class_changed(rq, p, prev_class, oldprio);
>  out_unlock:
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -3522,6 +3522,10 @@ static void check_preempt_wakeup(struct
>  	if (unlikely(se == pse))
>  		return;
>  
> +	/* Preempting SCHED_OTHER lock holders harms throughput for no good reason */
> +	if (__migrate_disabled(curr))
> +		return;
> +
>  	/*
>  	 * This is possible from callers such as move_task(), in which we
>  	 * unconditionally check_prempt_curr() after an enqueue (which may have
> --- a/kernel/sched/rt.c
> +++ b/kernel/sched/rt.c
> @@ -1180,6 +1180,10 @@ enqueue_task_rt(struct rq *rq, struct ta
>  	if (flags & ENQUEUE_WAKEUP)
>  		rt_se->timeout = 0;
>  
> +	/* The wakee is a FIFO lock sleeper */
> +	if (flags & WF_REQUEUE)
> +		flags |= ENQUEUE_HEAD;
> +
>  	enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD);
>  
>  	if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
> @@ -1295,8 +1299,29 @@ select_task_rq_rt(struct task_struct *p,
>  	return cpu;
>  }
>  
> -static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
> +static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p, int wake_flags)
>  {
> +#ifdef CONFIG_PREEMPT_RT_BASE
> +	if (wake_flags & WF_REQUEUE) {
> +		if (!p->on_cpu)
> +			requeue_task_rt(rq, p, 1);
> +
> +		/*
> +		 * The lock owner was here first, top waiter
> +		 * must follow.  If the owner was PI boosted,
> +		 * it's gone RSN.  All others need to get off
> +		 * this CPU ASAP, this waiter had it first.
> +		 */
> +		if (rq == this_rq())
> +			requeue_task_rt(rq, rq->curr, 1);
> +		else if (__migrate_disabled(rq->curr))
> +			set_tsk_need_resched(rq->curr);
> +		else
> +			resched_task(rq->curr);
> +
> +		return;
> +	}
> +#endif
>  	if (rq->curr->nr_cpus_allowed == 1)
>  		return;
>  
> @@ -1342,7 +1367,7 @@ static void check_preempt_curr_rt(struct
>  	 * task.
>  	 */
>  	if (p->prio == rq->curr->prio && !test_tsk_need_resched(rq->curr))
> -		check_preempt_equal_prio(rq, p);
> +		check_preempt_equal_prio(rq, p, flags);
>  #endif
>  }
>  
> 
> 



^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [rfc][patch] sched,rt: enqueue spinlock waiters to the head of their queue
  2013-04-14 13:34 [rfc][patch] sched,rt: enqueue spinlock waiters to the head of their queue Mike Galbraith
  2013-04-15 13:01 ` Mike Galbraith
@ 2013-04-16 13:07 ` Mike Galbraith
  1 sibling, 0 replies; 3+ messages in thread
From: Mike Galbraith @ 2013-04-16 13:07 UTC (permalink / raw)
  To: RT; +Cc: Steven Rostedt, Thomas Gleixner

On Sun, 2013-04-14 at 15:34 +0200, Mike Galbraith wrote: 
> If a task blocks on a spinlock, give the CPU back as soon as possible so
> we can turn over the lock as quickly as possible.  The task was at HEAD
> when it blocked, put it back, and tell everyone else to get the hell out
> of the way.

Hm, seems I may have munged it a little while separating 'good' from
'evil'.

With that patch.

vogelweide:/abuild/mike/:[0]# schedctl -F ./semop-multi 64 64
cpus 64, threads: 64, semaphores: 64, test duration: 30 secs
total operations: 15322680, ops/sec 510756
vogelweide:/abuild/mike/:[0]# schedctl -F ./semop-multi 64 64
cpus 64, threads: 64, semaphores: 64, test duration: 30 secs
total operations: 15448786, ops/sec 514959
vogelweide:/abuild/mike/:[0]# schedctl -F ./semop-multi 64 64
cpus 64, threads: 64, semaphores: 64, test duration: 30 secs
total operations: 15697128, ops/sec 523237

Without the patch.

vogelweide:/abuild/mike/:[0]# schedctl -F ./semop-multi 64 64
cpus 64, threads: 64, semaphores: 64, test duration: 30 secs
total operations: 19056218, ops/sec 635207
vogelweide:/abuild/mike/:[0]# schedctl -F ./semop-multi 64 64
cpus 64, threads: 64, semaphores: 64, test duration: 30 secs
total operations: 18702572, ops/sec 623419
vogelweide:/abuild/mike/:[0]# schedctl -F ./semop-multi 64 64
cpus 64, threads: 64, semaphores: 64, test duration: 30 secs
total operations: 19164792, ops/sec 638826

With spinning spinlocks locks parent patch.

vogelweide:/abuild/mike/:[0]# schedctl -F ./semop-multi 64 64
cpus 64, threads: 64, semaphores: 64, test duration: 30 secs
total operations: 37788816, ops/sec 1259627
vogelweide:/abuild/mike/:[0]# schedctl -F ./semop-multi 64 64
cpus 64, threads: 64, semaphores: 64, test duration: 30 secs
total operations: 37981322, ops/sec 1266044
vogelweide:/abuild/mike/:[0]# schedctl -F ./semop-multi 64 64
cpus 64, threads: 64, semaphores: 64, test duration: 30 secs
total operations: 37840314, ops/sec 1261343

Crap.  Bloody mumble mutter rt....

> Signed-off-by: Mike Galbraith <bitbucket@online.de>
> ---
>  include/linux/sched.h |    1 +
>  kernel/rtmutex.c      |   13 +++++++++++--
>  kernel/sched/core.c   |    9 +++++++--
>  kernel/sched/fair.c   |    4 ++++
>  kernel/sched/rt.c     |   29 +++++++++++++++++++++++++++--
>  5 files changed, 50 insertions(+), 6 deletions(-)
> 
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1064,6 +1064,7 @@ struct sched_domain;
>  #define WF_FORK		0x02		/* child wakeup after fork */
>  #define WF_MIGRATED	0x04		/* internal use, task got migrated */
>  #define WF_LOCK_SLEEPER	0x08		/* wakeup spinlock "sleeper" */
> +#define WF_REQUEUE	0x10		/* requeue spinlock "sleeper" */
>  
>  #define ENQUEUE_WAKEUP		1
>  #define ENQUEUE_HEAD		2
> --- a/kernel/rtmutex.c
> +++ b/kernel/rtmutex.c
> @@ -722,7 +722,7 @@ static void  noinline __sched rt_spin_lo
>  {
>  	struct task_struct *lock_owner, *self = current;
>  	struct rt_mutex_waiter waiter, *top_waiter;
> -	int ret;
> +	int ret, wait, cpu = raw_smp_processor_id();
>  
>  	rt_mutex_init_waiter(&waiter, true);
>  
> @@ -757,12 +757,21 @@ static void  noinline __sched rt_spin_lo
>  
>  		top_waiter = rt_mutex_top_waiter(lock);
>  		lock_owner = rt_mutex_owner(lock);
> +		wait = top_waiter != &waiter;
> +
> +		/*
> +		 * If we preempt the lock owner, just preempt ourselves.
> +		 * the now boosted lock owner is queued to queue head.
> +		 * When we release the wait lock, lock owner runs.
> +		 */
> +		if (!wait && task_cpu(lock_owner) == cpu)
> +			set_tsk_need_resched(self);
>  
>  		raw_spin_unlock(&lock->wait_lock);
>  
>  		debug_rt_mutex_print_deadlock(&waiter);
>  
> -		if (top_waiter != &waiter || adaptive_wait(lock, lock_owner))
> +		if (wait || adaptive_wait(lock, lock_owner))
>  			schedule_rt_mutex(lock);
>  
>  		raw_spin_lock(&lock->wait_lock);
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -1611,7 +1611,12 @@ EXPORT_SYMBOL(wake_up_process);
>   */
>  int wake_up_lock_sleeper(struct task_struct *p)
>  {
> -	return try_to_wake_up(p, TASK_ALL, WF_LOCK_SLEEPER);
> +	int flags = WF_LOCK_SLEEPER;
> +
> +	if (rt_task(p))
> +		flags |= WF_REQUEUE;
> +
> +	return try_to_wake_up(p, TASK_ALL, flags);
>  }
>  
>  int wake_up_state(struct task_struct *p, unsigned int state)
> @@ -3815,7 +3820,7 @@ void rt_mutex_setprio(struct task_struct
>  	if (running)
>  		p->sched_class->set_curr_task(rq);
>  	if (on_rq)
> -		enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0);
> +		enqueue_task(rq, p, ENQUEUE_HEAD);
>  
>  	check_class_changed(rq, p, prev_class, oldprio);
>  out_unlock:
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -3522,6 +3522,10 @@ static void check_preempt_wakeup(struct
>  	if (unlikely(se == pse))
>  		return;
>  
> +	/* Preempting SCHED_OTHER lock holders harms throughput for no good reason */
> +	if (__migrate_disabled(curr))
> +		return;
> +
>  	/*
>  	 * This is possible from callers such as move_task(), in which we
>  	 * unconditionally check_prempt_curr() after an enqueue (which may have
> --- a/kernel/sched/rt.c
> +++ b/kernel/sched/rt.c
> @@ -1180,6 +1180,10 @@ enqueue_task_rt(struct rq *rq, struct ta
>  	if (flags & ENQUEUE_WAKEUP)
>  		rt_se->timeout = 0;
>  
> +	/* The wakee is a FIFO lock sleeper */
> +	if (flags & WF_REQUEUE)
> +		flags |= ENQUEUE_HEAD;
> +
>  	enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD);
>  
>  	if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
> @@ -1295,8 +1299,29 @@ select_task_rq_rt(struct task_struct *p,
>  	return cpu;
>  }
>  
> -static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
> +static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p, int wake_flags)
>  {
> +#ifdef CONFIG_PREEMPT_RT_BASE
> +	if (wake_flags & WF_REQUEUE) {
> +		if (!p->on_cpu)
> +			requeue_task_rt(rq, p, 1);
> +
> +		/*
> +		 * The lock owner was here first, top waiter
> +		 * must follow.  If the owner was PI boosted,
> +		 * it's gone RSN.  All others need to get off
> +		 * this CPU ASAP, this waiter had it first.
> +		 */
> +		if (rq == this_rq())
> +			requeue_task_rt(rq, rq->curr, 1);
> +		else if (__migrate_disabled(rq->curr))
> +			set_tsk_need_resched(rq->curr);
> +		else
> +			resched_task(rq->curr);
> +
> +		return;
> +	}
> +#endif
>  	if (rq->curr->nr_cpus_allowed == 1)
>  		return;
>  
> @@ -1342,7 +1367,7 @@ static void check_preempt_curr_rt(struct
>  	 * task.
>  	 */
>  	if (p->prio == rq->curr->prio && !test_tsk_need_resched(rq->curr))
> -		check_preempt_equal_prio(rq, p);
> +		check_preempt_equal_prio(rq, p, flags);
>  #endif
>  }
>  
> 
> 



^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2013-04-16 13:07 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-04-14 13:34 [rfc][patch] sched,rt: enqueue spinlock waiters to the head of their queue Mike Galbraith
2013-04-15 13:01 ` Mike Galbraith
2013-04-16 13:07 ` Mike Galbraith

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox