public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: tip-bot for Peter Zijlstra <tipbot@zytor.com>
To: linux-tip-commits@vger.kernel.org
Cc: peterz@infradead.org, linux-kernel@vger.kernel.org,
	torvalds@linux-foundation.org, mingo@kernel.org,
	tglx@linutronix.de, hpa@zytor.com
Subject: [tip:sched/core] sched/core: Rework rq->clock update skips
Date: Wed, 14 Jan 2015 06:03:25 -0800	[thread overview]
Message-ID: <tip-9edfbfed3f544a7830d99b341f0c175995a02950@git.kernel.org> (raw)
In-Reply-To: <20150105103554.432381549@infradead.org>

Commit-ID:  9edfbfed3f544a7830d99b341f0c175995a02950
Gitweb:     http://git.kernel.org/tip/9edfbfed3f544a7830d99b341f0c175995a02950
Author:     Peter Zijlstra <peterz@infradead.org>
AuthorDate: Mon, 5 Jan 2015 11:18:11 +0100
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Wed, 14 Jan 2015 13:34:20 +0100

sched/core: Rework rq->clock update skips

The original purpose of rq::skip_clock_update was to avoid 'costly' clock
updates for back to back wakeup-preempt pairs. The big problem with it
has always been that the rq variable is unaware of the context and
causes indiscrimiate clock skips.

Rework the entire thing and create a sense of context by only allowing
schedule() to skip clock updates. (XXX can we measure the cost of the
added store?)

By ensuring only schedule can ever skip an update, we guarantee we're
never more than 1 tick behind on the update.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: umgwanakikbuti@gmail.com
Link: http://lkml.kernel.org/r/20150105103554.432381549@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/core.c  | 12 ++++++++----
 kernel/sched/fair.c  |  2 +-
 kernel/sched/rt.c    |  9 ++++++---
 kernel/sched/sched.h | 15 +++++++++++++--
 4 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 46a2345..b53cc85 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -119,7 +119,9 @@ void update_rq_clock(struct rq *rq)
 {
 	s64 delta;
 
-	if (rq->skip_clock_update > 0)
+	lockdep_assert_held(&rq->lock);
+
+	if (rq->clock_skip_update & RQCF_ACT_SKIP)
 		return;
 
 	delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
@@ -1046,7 +1048,7 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
 	 * this case, we can save a useless back to back clock update.
 	 */
 	if (task_on_rq_queued(rq->curr) && test_tsk_need_resched(rq->curr))
-		rq->skip_clock_update = 1;
+		rq_clock_skip_update(rq, true);
 }
 
 #ifdef CONFIG_SMP
@@ -2779,6 +2781,8 @@ need_resched:
 	smp_mb__before_spinlock();
 	raw_spin_lock_irq(&rq->lock);
 
+	rq->clock_skip_update <<= 1; /* promote REQ to ACT */
+
 	switch_count = &prev->nivcsw;
 	if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
 		if (unlikely(signal_pending_state(prev->state, prev))) {
@@ -2803,13 +2807,13 @@ need_resched:
 		switch_count = &prev->nvcsw;
 	}
 
-	if (task_on_rq_queued(prev) || rq->skip_clock_update < 0)
+	if (task_on_rq_queued(prev))
 		update_rq_clock(rq);
 
 	next = pick_next_task(rq, prev);
 	clear_tsk_need_resched(prev);
 	clear_preempt_need_resched();
-	rq->skip_clock_update = 0;
+	rq->clock_skip_update = 0;
 
 	if (likely(prev != next)) {
 		rq->nr_switches++;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 50ff902..2ecf779 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5156,7 +5156,7 @@ static void yield_task_fair(struct rq *rq)
 		 * so we don't do microscopic update in schedule()
 		 * and double the fastpath cost.
 		 */
-		 rq->skip_clock_update = 1;
+		rq_clock_skip_update(rq, true);
 	}
 
 	set_skip_buddy(se);
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index ee15f5a..6725e3c 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -831,11 +831,14 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
 				enqueue = 1;
 
 				/*
-				 * Force a clock update if the CPU was idle,
-				 * lest wakeup -> unthrottle time accumulate.
+				 * When we're idle and a woken (rt) task is
+				 * throttled check_preempt_curr() will set
+				 * skip_update and the time between the wakeup
+				 * and this unthrottle will get accounted as
+				 * 'runtime'.
 				 */
 				if (rt_rq->rt_nr_running && rq->curr == rq->idle)
-					rq->skip_clock_update = -1;
+					rq_clock_skip_update(rq, false);
 			}
 			if (rt_rq->rt_time || rt_rq->rt_nr_running)
 				idle = 0;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index bd23732..0870db2 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -558,8 +558,6 @@ struct rq {
 #ifdef CONFIG_NO_HZ_FULL
 	unsigned long last_sched_tick;
 #endif
-	int skip_clock_update;
-
 	/* capture load from *all* tasks on this cpu: */
 	struct load_weight load;
 	unsigned long nr_load_updates;
@@ -588,6 +586,7 @@ struct rq {
 	unsigned long next_balance;
 	struct mm_struct *prev_mm;
 
+	unsigned int clock_skip_update;
 	u64 clock;
 	u64 clock_task;
 
@@ -704,6 +703,18 @@ static inline u64 rq_clock_task(struct rq *rq)
 	return rq->clock_task;
 }
 
+#define RQCF_REQ_SKIP	0x01
+#define RQCF_ACT_SKIP	0x02
+
+static inline void rq_clock_skip_update(struct rq *rq, bool skip)
+{
+	lockdep_assert_held(&rq->lock);
+	if (skip)
+		rq->clock_skip_update |= RQCF_REQ_SKIP;
+	else
+		rq->clock_skip_update &= ~RQCF_REQ_SKIP;
+}
+
 #ifdef CONFIG_NUMA
 enum numa_topology_type {
 	NUMA_DIRECT,

  reply	other threads:[~2015-01-14 14:04 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-01-05 10:18 [RFC][PATCH 0/3] sched: skip_clock_update madness Peter Zijlstra
2015-01-05 10:18 ` [RFC][PATCH 1/3] sched: Validate rq_clock*() serialization Peter Zijlstra
2015-01-14 14:03   ` [tip:sched/core] sched/core: " tip-bot for Peter Zijlstra
2015-05-30  1:03     ` Sasha Levin
2015-06-04  2:28       ` Wanpeng Li
2015-01-05 10:18 ` [RFC][PATCH 2/3] sched: Rework rq->clock update skips Peter Zijlstra
2015-01-14 14:03   ` tip-bot for Peter Zijlstra [this message]
2015-01-05 10:18 ` [RFC][PATCH 3/3] sched,debug: Print clock_task Peter Zijlstra
2015-01-14 14:03   ` [tip:sched/core] sched/debug: Print rq->clock_task tip-bot for Peter Zijlstra
2015-01-05 10:49 ` [RFC][PATCH 0/3] sched: skip_clock_update madness Mike Galbraith
2015-01-05 11:50   ` Peter Zijlstra
2015-01-05 12:07     ` Mike Galbraith
2015-01-05 13:59   ` Peter Zijlstra
2015-01-06  5:55     ` Mike Galbraith

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=tip-9edfbfed3f544a7830d99b341f0c175995a02950@git.kernel.org \
    --to=tipbot@zytor.com \
    --cc=hpa@zytor.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-tip-commits@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox