public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Frank Rowand <frank.rowand@gmail.com>
To: frank.rowand@am.sony.com, frank.rowand@gmail.com
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>,
	Chris Mason <chris.mason@oracle.com>, Ingo Molnar <mingo@elte.hu>,
	Thomas Gleixner <tglx@linutronix.de>,
	Mike Galbraith <efault@gmx.de>, Oleg Nesterov <oleg@redhat.com>,
	Paul Turner <pjt@google.com>, Jens Axboe <axboe@kernel.dk>,
	linux-kernel@vger.kernel.org
Subject: Re: [RFC][PATCH 0/5] Reduce runqueue lock contention -v2
Date: Thu, 16 Dec 2010 11:36:32 -0800	[thread overview]
Message-ID: <4D0A6A40.2040907@am.sony.com> (raw)
In-Reply-To: <4D0A649B.9080505@am.sony.com>



patch 1 of 2

Signed-off-by: Frank Rowand <frank.rowand@am.sony.com>

---
 arch/x86/kernel/smp.c |    1 	1 +	0 -	0 !
 include/linux/sched.h |    5 	5 +	0 -	0 !
 kernel/sched.c        |  105 	99 +	6 -	0 !
 3 files changed, 105 insertions(+), 6 deletions(-)

Index: linux-2.6/arch/x86/kernel/smp.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/smp.c
+++ linux-2.6/arch/x86/kernel/smp.c
@@ -205,6 +205,7 @@ void smp_reschedule_interrupt(struct pt_
 	/*
 	 * KVM uses this interrupt to force a cpu out of guest mode
 	 */
+	sched_ttwu_pending();
 }
 
 void smp_call_function_interrupt(struct pt_regs *regs)
Index: linux-2.6/include/linux/sched.h
===================================================================
--- linux-2.6.orig/include/linux/sched.h
+++ linux-2.6/include/linux/sched.h
@@ -1038,6 +1038,7 @@ struct sched_domain;
  */
 #define WF_SYNC		0x01		/* waker goes to sleep after wakup */
 #define WF_FORK		0x02		/* child wakeup after fork */
+#define WF_LOAD		0x04		/* for queued try_to_wake_up() */
 
 #define ENQUEUE_WAKEUP		1
 #define ENQUEUE_WAKING		2
@@ -1193,6 +1194,8 @@ struct task_struct {
 	int lock_depth;		/* BKL lock depth */
 
 #ifdef CONFIG_SMP
+	struct task_struct *ttwu_queue_wake_entry;
+	int ttwu_queue_wake_flags;
 #ifdef __ARCH_WANT_UNLOCKED_CTXSW
 	int oncpu;
 #endif
@@ -2017,6 +2020,7 @@ extern void release_uids(struct user_nam
 
 extern void do_timer(unsigned long ticks);
 
+extern void sched_ttwu_pending(void);
 extern int wake_up_state(struct task_struct *tsk, unsigned int state);
 extern int wake_up_process(struct task_struct *tsk);
 extern void wake_up_new_task(struct task_struct *tsk,
Index: linux-2.6/kernel/sched.c
===================================================================
--- linux-2.6.orig/kernel/sched.c
+++ linux-2.6/kernel/sched.c
@@ -515,6 +515,8 @@ struct rq {
 	u64 age_stamp;
 	u64 idle_stamp;
 	u64 avg_idle;
+
+	struct task_struct *wake_list;
 #endif
 
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
@@ -2332,6 +2334,28 @@ static inline void ttwu_post_activation(
 		wq_worker_waking_up(p, cpu_of(rq));
 }
 
+#ifdef CONFIG_SMP
+static void ttwu_queue_wake_up(struct task_struct *p, int cpu, int wake_flags)
+{
+	struct task_struct *next = NULL;
+	struct rq *rq = cpu_rq(cpu);
+
+	p->ttwu_queue_wake_flags = wake_flags;
+
+	for (;;) {
+		struct task_struct *old = next;
+
+		p->ttwu_queue_wake_entry = next;
+		next = cmpxchg(&rq->wake_list, old, p);
+		if (next == old)
+			break;
+	}
+
+	if (!next)
+		smp_send_reschedule(cpu);
+}
+#endif
+
 /**
  * try_to_wake_up - wake up a thread
  * @p: the thread to be awakened
@@ -2350,20 +2374,88 @@ static inline void ttwu_post_activation(
 static int try_to_wake_up(struct task_struct *p, unsigned int state,
 			  int wake_flags)
 {
+/*
+ * xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ * todo
+ *  - pass waking cpu with queued wake up, to be used in call to
+ *    select_task_rq().
+ *  - handle cpu being offlined
+ * xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ */
 	int cpu, orig_cpu, this_cpu, success = 0;
 	unsigned long flags;
 	unsigned long en_flags = ENQUEUE_WAKEUP;
 	struct rq *rq;
+#ifdef CONFIG_SMP
+	int load;
+#endif
 
 	this_cpu = get_cpu();
 
-	smp_wmb();
-	rq = task_rq_lock(p, &flags);
-	if (!(p->state & state))
-		goto out;
+	local_irq_save(flags);
 
-	if (p->se.on_rq)
-		goto out_running;
+	for (;;) {
+		unsigned int task_state = p->state;
+
+		if (!(task_state & state))
+			goto out_nolock;
+		/*
+		 * task_contributes_to_load() tests p->state
+		 */
+		load = task_contributes_to_load(p);
+
+		if (cmpxchg(&p->state, task_state, TASK_WAKING) == task_state) {
+			if (state == TASK_WAKING)
+				load = wake_flags & WF_LOAD;
+			break;
+		}
+	}
+
+	/*
+	 * Avoid a possible cross cpu rq lock attempt until we know that a
+	 * lock must be acquired.  rq lock is to protect interaction with
+	 * schedule().
+	 *
+	 * p->state == TASK_WAKING protects against any other try_to_wake_up()
+	 * setting p->se.on_rq true after this test.
+	 */
+	if (unlikely(p->se.on_rq)) {
+		smp_wmb();
+		rq = __task_rq_lock(p);
+		if (p->se.on_rq)
+			goto out_running;
+		__task_rq_unlock(rq);
+	}
+
+#ifdef CONFIG_SMP
+	/*
+	 * If task_cpu(p) != this_cpu then the attempt to lock the rq on the
+	 * other cpu can result in rq lock contention.  Queueing this wake up
+	 * on the other cpu may reduce rq lock contention.
+	 *
+	 * All tests that could have led to returning 0 have been completed
+	 * before this point, return value will be 1.  The return value of
+	 * the try_to_wake_up() executed after unqueueing the wake request
+	 * can not be returned to the current caller, so have to know what
+	 * the return value of the queued request will be.
+	 */
+	cpu = task_cpu(p);
+	if (cpu != this_cpu) {
+		if (load)
+			wake_flags |= WF_LOAD;
+		ttwu_queue_wake_up(p, cpu, wake_flags);
+		success = 1;
+		goto out_nolock;
+	}
+#endif
+
+	/*
+	 * task_cpu(p) may have changed since it was checked since rq->lock
+	 * is not held.  Thus may still end up with cross cpu rq lock
+	 * contention.  Encountering this race should be very rare.
+	 */
+	smp_wmb();
+	rq = __task_rq_lock(p);
 
 	cpu = task_cpu(p);
 	orig_cpu = cpu;
@@ -2378,13 +2470,12 @@ static int try_to_wake_up(struct task_st
 	 *
 	 * First fix up the nr_uninterruptible count:
 	 */
-	if (task_contributes_to_load(p)) {
+	if (load) {
 		if (likely(cpu_online(orig_cpu)))
 			rq->nr_uninterruptible--;
 		else
 			this_rq()->nr_uninterruptible--;
 	}
-	p->state = TASK_WAKING;
 
 	if (p->sched_class->task_waking) {
 		p->sched_class->task_waking(rq, p);
@@ -2394,6 +2485,10 @@ static int try_to_wake_up(struct task_st
 	cpu = select_task_rq(rq, p, SD_BALANCE_WAKE, wake_flags);
 	if (cpu != orig_cpu)
 		set_task_cpu(p, cpu);
+	/*
+	 * Protected against concurrent wakeups while rq->lock released because
+	 * p is in TASK_WAKING state.
+	 */
 	__task_rq_unlock(rq);
 
 	rq = cpu_rq(cpu);
@@ -2430,13 +2525,30 @@ out_activate:
 	success = 1;
 out_running:
 	ttwu_post_activation(p, rq, wake_flags, success);
-out:
-	task_rq_unlock(rq, &flags);
+	__task_rq_unlock(rq);
+out_nolock:
+	local_irq_restore(flags);
 	put_cpu();
 
 	return success;
 }
 
+#ifdef CONFIG_SMP
+void sched_ttwu_pending(void)
+{
+	struct rq *rq = this_rq();
+	struct task_struct *p = xchg(&rq->wake_list, NULL);
+
+	if (!p)
+		return;
+
+	while (p) {
+		try_to_wake_up(p, TASK_WAKING, p->ttwu_queue_wake_flags);
+		p = p->ttwu_queue_wake_entry;
+	}
+}
+#endif
+
 /**
  * try_to_wake_up_local - try to wake up a local task with rq lock held
  * @p: the thread to be awakened

  reply	other threads:[~2010-12-16 19:36 UTC|newest]

Thread overview: 44+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-12-16 14:56 [RFC][PATCH 0/5] Reduce runqueue lock contention -v2 Peter Zijlstra
2010-12-16 14:56 ` [RFC][PATCH 1/5] sched: Always provide p->oncpu Peter Zijlstra
2010-12-18  1:03   ` Frank Rowand
2010-12-16 14:56 ` [RFC][PATCH 2/5] mutex: Use p->oncpu for the adaptive spin Peter Zijlstra
2010-12-16 17:34   ` Oleg Nesterov
2010-12-16 19:29     ` Peter Zijlstra
2010-12-17 19:17       ` Oleg Nesterov
2010-12-16 14:56 ` [RFC][PATCH 3/5] sched: Change the ttwu success details Peter Zijlstra
2010-12-16 15:23   ` Frederic Weisbecker
2010-12-16 15:27     ` Peter Zijlstra
2010-12-16 15:30       ` Peter Zijlstra
2010-12-16 15:45         ` Frederic Weisbecker
2010-12-16 15:35       ` Frederic Weisbecker
2010-12-18  1:05   ` Frank Rowand
2010-12-16 14:56 ` [RFC][PATCH 4/5] sched: Clean up ttwu stats Peter Zijlstra
2010-12-18  1:09   ` Frank Rowand
2010-12-16 14:56 ` [RFC][PATCH 5/5] sched: Reduce ttwu rq->lock contention Peter Zijlstra
2010-12-16 15:31   ` Frederic Weisbecker
2010-12-16 17:58   ` Oleg Nesterov
2010-12-16 18:42   ` Oleg Nesterov
2010-12-16 18:58     ` Peter Zijlstra
2010-12-16 19:03       ` Peter Zijlstra
2010-12-16 19:47         ` Peter Zijlstra
2010-12-16 20:32           ` Peter Zijlstra
2010-12-17  3:06             ` Yan, Zheng
2010-12-17 13:23               ` Peter Zijlstra
2010-12-17 16:54             ` Oleg Nesterov
2010-12-17 17:43               ` Peter Zijlstra
2010-12-17 18:15                 ` Peter Zijlstra
2010-12-17 19:28                   ` Oleg Nesterov
2010-12-17 21:02                     ` Peter Zijlstra
2010-12-18 14:49                   ` Yong Zhang
2010-12-18 20:08                     ` Oleg Nesterov
2010-12-19 11:20                       ` Yong Zhang
2010-12-17 18:21                 ` Oleg Nesterov
2010-12-17 17:50               ` Oleg Nesterov
2010-12-17 18:24                 ` Peter Zijlstra
2010-12-17 18:41                   ` Peter Zijlstra
2010-12-16 19:12 ` [RFC][PATCH 0/5] Reduce runqueue lock contention -v2 Frank Rowand
2010-12-16 19:36   ` Frank Rowand [this message]
2010-12-16 19:39     ` Frank Rowand
2010-12-16 19:42       ` Peter Zijlstra
2010-12-16 20:45         ` Frank Rowand
2010-12-16 19:36   ` Frank Rowand

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4D0A6A40.2040907@am.sony.com \
    --to=frank.rowand@gmail.com \
    --cc=a.p.zijlstra@chello.nl \
    --cc=axboe@kernel.dk \
    --cc=chris.mason@oracle.com \
    --cc=efault@gmx.de \
    --cc=frank.rowand@am.sony.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=oleg@redhat.com \
    --cc=pjt@google.com \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox