public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: Chris Mason <chris.mason@oracle.com>,
	Frank Rowand <frank.rowand@am.sony.com>,
	Ingo Molnar <mingo@elte.hu>, Thomas Gleixner <tglx@linutronix.de>,
	Mike Galbraith <efault@gmx.de>, Oleg Nesterov <oleg@redhat.com>,
	Paul Turner <pjt@google.com>, Jens Axboe <axboe@kernel.dk>,
	Yong Zhang <yong.zhang0@gmail.com>
Cc: linux-kernel@vger.kernel.org, Peter Zijlstra <a.p.zijlstra@chello.nl>
Subject: [RFC][PATCH 06/17] sched: Provide p->on_rq
Date: Fri, 24 Dec 2010 13:23:44 +0100	[thread overview]
Message-ID: <20101224123742.782732530@chello.nl> (raw)
In-Reply-To: 20101224122338.172750730@chello.nl

[-- Attachment #1: sched-onrq.patch --]
[-- Type: text/plain, Size: 8343 bytes --]

Provide a generic p->on_rq because the p->se.on_rq semantics are
unfavourable for lockless wakeups but needed for sched_fair.

In particular, p->on_rq is only cleared when we actually dequeue the
task in schedule() and not on any random dequeue as done by things
like __migrate_task() and __sched_setscheduler().

This also allows us to remove p->se usage from !sched_fair code.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 include/linux/sched.h   |    1 +
 kernel/sched.c          |   36 ++++++++++++++++++------------------
 kernel/sched_debug.c    |    2 +-
 kernel/sched_rt.c       |   10 +++++-----
 kernel/sched_stoptask.c |    2 +-
 5 files changed, 26 insertions(+), 25 deletions(-)

Index: linux-2.6/include/linux/sched.h
===================================================================
--- linux-2.6.orig/include/linux/sched.h
+++ linux-2.6/include/linux/sched.h
@@ -1200,6 +1200,7 @@ struct task_struct {
 #ifdef CONFIG_SMP
 	int on_cpu;
 #endif
+	int on_rq;
 
 	int prio, static_prio, normal_prio;
 	unsigned int rt_priority;
Index: linux-2.6/kernel/sched.c
===================================================================
--- linux-2.6.orig/kernel/sched.c
+++ linux-2.6/kernel/sched.c
@@ -1759,7 +1759,6 @@ static void enqueue_task(struct rq *rq, 
 	update_rq_clock(rq);
 	sched_info_queued(p);
 	p->sched_class->enqueue_task(rq, p, flags);
-	p->se.on_rq = 1;
 }
 
 static void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
@@ -1767,7 +1766,6 @@ static void dequeue_task(struct rq *rq, 
 	update_rq_clock(rq);
 	sched_info_dequeued(p);
 	p->sched_class->dequeue_task(rq, p, flags);
-	p->se.on_rq = 0;
 }
 
 /*
@@ -1780,6 +1778,7 @@ static void activate_task(struct rq *rq,
 
 	enqueue_task(rq, p, flags);
 	inc_nr_running(rq);
+	p->on_rq = 1;
 }
 
 /*
@@ -2070,7 +2069,7 @@ static void check_preempt_curr(struct rq
 	 * A queue event has occurred, and we're going to schedule.  In
 	 * this case, we can save a useless back to back clock update.
 	 */
-	if (rq->curr->se.on_rq && test_tsk_need_resched(rq->curr))
+	if (rq->curr->on_rq && test_tsk_need_resched(rq->curr))
 		rq->skip_clock_update = 1;
 }
 
@@ -2145,7 +2144,7 @@ static bool migrate_task(struct task_str
 	 * If the task is not on a runqueue (and not running), then
 	 * the next wake-up will properly place the task.
 	 */
-	return p->se.on_rq || task_running(rq, p);
+	return p->on_rq || task_running(rq, p);
 }
 
 /*
@@ -2205,7 +2204,7 @@ unsigned long wait_task_inactive(struct 
 		rq = task_rq_lock(p, &flags);
 		trace_sched_wait_task(p);
 		running = task_running(rq, p);
-		on_rq = p->se.on_rq;
+		on_rq = p->on_rq;
 		ncsw = 0;
 		if (!match_state || p->state == match_state)
 			ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
@@ -2457,7 +2456,7 @@ static int try_to_wake_up(struct task_st
 
 	cpu = task_cpu(p);
 
-	if (p->se.on_rq)
+	if (p->on_rq)
 		goto out_running;
 
 	orig_cpu = cpu;
@@ -2534,7 +2533,7 @@ static void try_to_wake_up_local(struct 
 	if (!(p->state & TASK_NORMAL))
 		return;
 
-	if (!p->se.on_rq)
+	if (!p->on_rq)
 		activate_task(rq, p, ENQUEUE_WAKEUP);
 
 	ttwu_post_activation(p, rq, 0);
@@ -2571,18 +2570,20 @@ int wake_up_state(struct task_struct *p,
  */
 static void __sched_fork(struct task_struct *p)
 {
+	p->on_rq				= 0;
+
+	p->se.on_rq			= 0;
 	p->se.exec_start		= 0;
 	p->se.sum_exec_runtime		= 0;
 	p->se.prev_sum_exec_runtime	= 0;
 	p->se.nr_migrations		= 0;
+	INIT_LIST_HEAD(&p->se.group_node);
 
 #ifdef CONFIG_SCHEDSTATS
 	memset(&p->se.statistics, 0, sizeof(p->se.statistics));
 #endif
 
 	INIT_LIST_HEAD(&p->rt.run_list);
-	p->se.on_rq = 0;
-	INIT_LIST_HEAD(&p->se.group_node);
 
 #ifdef CONFIG_PREEMPT_NOTIFIERS
 	INIT_HLIST_HEAD(&p->preempt_notifiers);
@@ -3904,7 +3905,7 @@ static inline void schedule_debug(struct
 
 static void put_prev_task(struct rq *rq, struct task_struct *prev)
 {
-	if (prev->se.on_rq)
+	if (prev->on_rq)
 		update_rq_clock(rq);
 	prev->sched_class->put_prev_task(rq, prev);
 }
@@ -3983,6 +3984,7 @@ asmlinkage void __sched schedule(void)
 					try_to_wake_up_local(to_wakeup);
 			}
 			deactivate_task(rq, prev, DEQUEUE_SLEEP);
+			prev->on_rq = 0;
 		}
 		switch_count = &prev->nvcsw;
 	}
@@ -4546,7 +4548,7 @@ void rt_mutex_setprio(struct task_struct
 	trace_sched_pi_setprio(p, prio);
 	oldprio = p->prio;
 	prev_class = p->sched_class;
-	on_rq = p->se.on_rq;
+	on_rq = p->on_rq;
 	running = task_current(rq, p);
 	if (on_rq)
 		dequeue_task(rq, p, 0);
@@ -4595,7 +4597,7 @@ void set_user_nice(struct task_struct *p
 		p->static_prio = NICE_TO_PRIO(nice);
 		goto out_unlock;
 	}
-	on_rq = p->se.on_rq;
+	on_rq = p->on_rq;
 	if (on_rq)
 		dequeue_task(rq, p, 0);
 
@@ -4729,8 +4731,6 @@ static struct task_struct *find_process_
 static void
 __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio)
 {
-	BUG_ON(p->se.on_rq);
-
 	p->policy = policy;
 	p->rt_priority = prio;
 	p->normal_prio = normal_prio(p);
@@ -4878,7 +4878,7 @@ static int __sched_setscheduler(struct t
 		raw_spin_unlock_irqrestore(&p->pi_lock, flags);
 		goto recheck;
 	}
-	on_rq = p->se.on_rq;
+	on_rq = p->on_rq;
 	running = task_current(rq, p);
 	if (on_rq)
 		deactivate_task(rq, p, 0);
@@ -5737,7 +5737,7 @@ static int __migrate_task(struct task_st
 	 * If we're not on a rq, the next wake-up will ensure we're
 	 * placed properly.
 	 */
-	if (p->se.on_rq) {
+	if (p->on_rq) {
 		deactivate_task(rq_src, p, 0);
 		set_task_cpu(p, dest_cpu);
 		activate_task(rq_dest, p, 0);
@@ -8106,7 +8106,7 @@ static void normalize_task(struct rq *rq
 {
 	int on_rq;
 
-	on_rq = p->se.on_rq;
+	on_rq = p->on_rq;
 	if (on_rq)
 		deactivate_task(rq, p, 0);
 	__setscheduler(rq, p, SCHED_NORMAL, 0);
@@ -8449,7 +8449,7 @@ void sched_move_task(struct task_struct 
 	rq = task_rq_lock(tsk, &flags);
 
 	running = task_current(rq, tsk);
-	on_rq = tsk->se.on_rq;
+	on_rq = tsk->on_rq;
 
 	if (on_rq)
 		dequeue_task(rq, tsk, 0);
Index: linux-2.6/kernel/sched_debug.c
===================================================================
--- linux-2.6.orig/kernel/sched_debug.c
+++ linux-2.6/kernel/sched_debug.c
@@ -127,7 +127,7 @@ static void print_rq(struct seq_file *m,
 	read_lock_irqsave(&tasklist_lock, flags);
 
 	do_each_thread(g, p) {
-		if (!p->se.on_rq || task_cpu(p) != rq_cpu)
+		if (!p->on_rq || task_cpu(p) != rq_cpu)
 			continue;
 
 		print_task(m, rq, p);
Index: linux-2.6/kernel/sched_rt.c
===================================================================
--- linux-2.6.orig/kernel/sched_rt.c
+++ linux-2.6/kernel/sched_rt.c
@@ -1132,7 +1132,7 @@ static void put_prev_task_rt(struct rq *
 	 * The previous task needs to be made eligible for pushing
 	 * if it is still active
 	 */
-	if (p->se.on_rq && p->rt.nr_cpus_allowed > 1)
+	if (p->on_rq && p->rt.nr_cpus_allowed > 1)
 		enqueue_pushable_task(rq, p);
 }
 
@@ -1283,7 +1283,7 @@ static struct rq *find_lock_lowest_rq(st
 				     !cpumask_test_cpu(lowest_rq->cpu,
 						       &task->cpus_allowed) ||
 				     task_running(rq, task) ||
-				     !task->se.on_rq)) {
+				     !task->on_rq)) {
 
 				raw_spin_unlock(&lowest_rq->lock);
 				lowest_rq = NULL;
@@ -1317,7 +1317,7 @@ static struct task_struct *pick_next_pus
 	BUG_ON(task_current(rq, p));
 	BUG_ON(p->rt.nr_cpus_allowed <= 1);
 
-	BUG_ON(!p->se.on_rq);
+	BUG_ON(!p->on_rq);
 	BUG_ON(!rt_task(p));
 
 	return p;
@@ -1463,7 +1463,7 @@ static int pull_rt_task(struct rq *this_
 		 */
 		if (p && (p->prio < this_rq->rt.highest_prio.curr)) {
 			WARN_ON(p == src_rq->curr);
-			WARN_ON(!p->se.on_rq);
+			WARN_ON(!p->on_rq);
 
 			/*
 			 * There's a chance that p is higher in priority
@@ -1534,7 +1534,7 @@ static void set_cpus_allowed_rt(struct t
 	 * Update the migration status of the RQ if we have an RT task
 	 * which is running AND changing its weight value.
 	 */
-	if (p->se.on_rq && (weight != p->rt.nr_cpus_allowed)) {
+	if (p->on_rq && (weight != p->rt.nr_cpus_allowed)) {
 		struct rq *rq = task_rq(p);
 
 		if (!task_current(rq, p)) {
Index: linux-2.6/kernel/sched_stoptask.c
===================================================================
--- linux-2.6.orig/kernel/sched_stoptask.c
+++ linux-2.6/kernel/sched_stoptask.c
@@ -26,7 +26,7 @@ static struct task_struct *pick_next_tas
 {
 	struct task_struct *stop = rq->stop;
 
-	if (stop && stop->se.on_rq)
+	if (stop && stop->on_rq)
 		return stop;
 
 	return NULL;



  parent reply	other threads:[~2010-12-24 12:45 UTC|newest]

Thread overview: 59+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-12-24 12:23 [RFC][PATCH 00/17] sched: Reduce runqueue lock contention -v3 Peter Zijlstra
2010-12-24 12:23 ` [RFC][PATCH 01/17] sched: Always provide p->on_cpu Peter Zijlstra
2010-12-24 12:23 ` [RFC][PATCH 02/17] mutex: Use p->on_cpu for the adaptive spin Peter Zijlstra
2010-12-24 12:23 ` [RFC][PATCH 03/17] sched: Change the ttwu success details Peter Zijlstra
2010-12-24 12:23 ` [RFC][PATCH 04/17] sched: Clean up ttwu stats Peter Zijlstra
2010-12-24 12:23 ` [RFC][PATCH 05/17] x86: Optimize arch_spin_unlock_wait() Peter Zijlstra
2010-12-24 18:26   ` Linus Torvalds
2011-01-03 11:32     ` Peter Zijlstra
2011-01-04  6:45       ` Nick Piggin
2011-01-05 19:14         ` [RFC][PATCH] spinlock: Kill spin_unlock_wait() Peter Zijlstra
2011-01-05 19:26           ` Oleg Nesterov
2011-01-05 19:43           ` Linus Torvalds
2011-01-06  9:32             ` Peter Zijlstra
2011-01-06 10:38               ` Nick Piggin
2011-01-06 18:26                 ` Peter Zijlstra
2011-01-07 21:01                   ` Tejun Heo
2011-01-07 21:13                     ` Jeff Garzik
2011-01-07 21:33                       ` Tejun Heo
2010-12-24 12:23 ` Peter Zijlstra [this message]
2010-12-29 14:14   ` [RFC][PATCH 06/17] sched: Provide p->on_rq Yong Zhang
2010-12-24 12:23 ` [RFC][PATCH 07/17] sched: Serialize p->cpus_allowed and ttwu() using p->pi_lock Peter Zijlstra
2010-12-29 14:20   ` Yong Zhang
2011-01-03 11:12     ` Peter Zijlstra
2010-12-24 12:23 ` [RFC][PATCH 08/17] sched: Drop the rq argument to sched_class::select_task_rq() Peter Zijlstra
2010-12-29 14:31   ` Yong Zhang
2011-01-03 11:16     ` Peter Zijlstra
2011-01-03 14:59       ` Oleg Nesterov
2011-01-03 15:21         ` Peter Zijlstra
2011-01-03 15:49           ` Oleg Nesterov
2011-01-03 16:35             ` Peter Zijlstra
2011-01-03 16:41               ` Peter Zijlstra
2011-01-04  7:27             ` Yong Zhang
2011-01-04 12:34               ` Peter Zijlstra
2011-01-04  5:59       ` Yong Zhang
2011-01-04 13:00         ` Peter Zijlstra
2011-01-03 18:05   ` Oleg Nesterov
2011-01-04 13:01     ` Peter Zijlstra
2010-12-24 12:23 ` [RFC][PATCH 09/17] sched: Remove rq argument to sched_class::task_waking() Peter Zijlstra
2010-12-24 12:23 ` [RFC][PATCH 10/17] sched: Add TASK_WAKING to task_rq_lock Peter Zijlstra
2010-12-24 12:23 ` [RFC][PATCH 11/17] sched: Delay task_contributes_to_load() Peter Zijlstra
2010-12-24 12:23 ` [RFC][PATCH 12/17] sched: Also serialize ttwu_local() with p->pi_lock Peter Zijlstra
2011-01-03 17:32   ` Oleg Nesterov
2011-01-09 23:11     ` Tejun Heo
2010-12-24 12:23 ` [RFC][PATCH 13/17] sched: Remove rq->lock from the first half of ttwu() Peter Zijlstra
2010-12-24 12:23 ` [RFC][PATCH 14/17] sched: Remove rq argument to ttwu_stat() Peter Zijlstra
2010-12-29 14:40   ` Yong Zhang
2011-01-03 11:20     ` Peter Zijlstra
2010-12-24 12:23 ` [RFC][PATCH 15/17] sched: Rename ttwu_post_activation Peter Zijlstra
2010-12-24 12:23 ` [RFC][PATCH 16/17] sched: Move the second half of ttwu() to the remote cpu Peter Zijlstra
2011-01-03 14:36   ` [RFC][PATCH] sembench: add stddev to the burn stats Peter Zijlstra
2011-01-04 14:28   ` [RFC][PATCH 16/17] sched: Move the second half of ttwu() to the remote cpu Oleg Nesterov
2011-01-04 14:47     ` Peter Zijlstra
2011-01-04 15:18       ` Oleg Nesterov
2011-01-04 15:43         ` Peter Zijlstra
2011-01-04 16:06           ` Oleg Nesterov
2010-12-24 12:23 ` [RFC][PATCH 17/17] sched: Sort hotplug vs ttwu queueing Peter Zijlstra
2010-12-29 14:51   ` Yong Zhang
2011-01-03 11:21     ` Peter Zijlstra
2010-12-24 13:15 ` [RFC][PATCH 00/17] sched: Reduce runqueue lock contention -v3 Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20101224123742.782732530@chello.nl \
    --to=a.p.zijlstra@chello.nl \
    --cc=axboe@kernel.dk \
    --cc=chris.mason@oracle.com \
    --cc=efault@gmx.de \
    --cc=frank.rowand@am.sony.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=oleg@redhat.com \
    --cc=pjt@google.com \
    --cc=tglx@linutronix.de \
    --cc=yong.zhang0@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox