All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mike Galbraith <umgwanakikbuti@gmail.com>
To: Peter Zijlstra <peterz@infradead.org>,
	mingo@kernel.org, linux-kernel@vger.kernel.org
Cc: Pavan Kondeti <pkondeti@codeaurora.org>,
	Ben Segall <bsegall@google.com>,
	Matt Fleming <matt@codeblueprint.co.uk>,
	Morten Rasmussen <morten.rasmussen@arm.com>,
	Paul Turner <pjt@google.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	byungchul.park@lge.com, Andrew Hunter <ahh@google.com>
Subject: [patch] sched/fair: Move se->vruntime normalization state into struct sched_entity
Date: Sun, 22 May 2016 09:00:01 +0200	[thread overview]
Message-ID: <1463900401.30072.6.camel@gmail.com> (raw)
In-Reply-To: <1463857236.10353.5.camel@gmail.com>

On Sat, 2016-05-21 at 21:00 +0200, Mike Galbraith wrote:
> On Sat, 2016-05-21 at 16:04 +0200, Mike Galbraith wrote:
> 
> > Wakees that were not migrated/normalized eat an unwanted min_vruntime,
> > and likely take a size XXL latency hit.  Big box running master bled
> > profusely under heavy load until I turned TTWU_QUEUE off.

May as well make it official and against master.today.  Fly or die
little patchlet.

sched/fair: Move se->vruntime normalization state into struct sched_entity

b5179ac70de ceased globally normalizing wakee vruntime in ttwu(), leaving
sched_ttwu_pending() with the need to know whether each wakee on wake_list
was migrated or not, to pass that on to fair class functions so they can
DTRT wrt vruntime normalization.  Store vruntime normalization state in
struct sched_entity, so fair class functions that need it always have it,
and sched_ttwu_pending() again doesn't need to care whether tasks on the
wake_list have been migrated or not.

Since there are now no consumers of ENQUEUE_MIGRATED, drop it as well. 

master v4.6-8889-gf6c658df6385 virgin
 256     49096  71698.99 MB/sec  warmup   1 sec  latency 1136.488 ms
 256    155009  72862.08 MB/sec  execute   1 sec  latency 3136.900 ms
 256    207430  72628.04 MB/sec  execute   2 sec  latency 4137.001 ms
 256    259635  72442.97 MB/sec  execute   3 sec  latency 5137.105 ms
 256    311905  72371.84 MB/sec  execute   4 sec  latency 6137.214 ms
 256    364210  72564.99 MB/sec  execute   5 sec  latency 7137.323 ms
 256    416551  72598.74 MB/sec  execute   6 sec  latency 5816.895 ms
 256    468824  72601.54 MB/sec  execute   7 sec  latency 6815.386 ms
 256    520996  72621.87 MB/sec  execute   8 sec  latency 7815.499 ms
 256    573113  72608.75 MB/sec  execute   9 sec  latency 8815.609 ms
 256  cleanup  10 sec
   0  cleanup  10 sec

master v4.6-8889-gf6c658df6385 post
 256     51527  75357.55 MB/sec  warmup   1 sec  latency 21.591 ms
 256    157610  73188.06 MB/sec  execute   1 sec  latency 12.985 ms
 256    210089  72809.01 MB/sec  execute   2 sec  latency 11.543 ms
 256    262554  72681.86 MB/sec  execute   3 sec  latency 0.209 ms
 256    315432  72798.65 MB/sec  execute   4 sec  latency 0.206 ms
 256    368162  72963.33 MB/sec  execute   5 sec  latency 8.052 ms
 256    420854  72976.50 MB/sec  execute   6 sec  latency 0.221 ms
 256    473420  72953.76 MB/sec  execute   7 sec  latency 0.198 ms
 256    525859  73011.17 MB/sec  execute   8 sec  latency 2.810 ms
 256    578301  73052.84 MB/sec  execute   9 sec  latency 0.247 ms
 256  cleanup  10 sec
   0  cleanup  10 sec


Fixes: b5179ac70de sched/fair: Prepare to fix fairness problems on migration
Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com>
---
 include/linux/sched.h |    1 
 kernel/sched/core.c   |    6 +----
 kernel/sched/fair.c   |   60 ++++++++++++++++++++------------------------------
 3 files changed, 28 insertions(+), 39 deletions(-)

--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1319,6 +1319,7 @@ struct sched_entity {
 	struct rb_node		run_node;
 	struct list_head	group_node;
 	unsigned int		on_rq;
+	bool			normalized;
 
 	u64			exec_start;
 	u64			sum_exec_runtime;
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1719,9 +1719,6 @@ ttwu_do_activate(struct rq *rq, struct t
 #ifdef CONFIG_SMP
 	if (p->sched_contributes_to_load)
 		rq->nr_uninterruptible--;
-
-	if (wake_flags & WF_MIGRATED)
-		en_flags |= ENQUEUE_MIGRATED;
 #endif
 
 	ttwu_activate(rq, p, en_flags);
@@ -1774,7 +1771,7 @@ void sched_ttwu_pending(void)
 		 * See ttwu_queue(); we only call ttwu_queue_remote() when
 		 * its a x-cpu wakeup.
 		 */
-		ttwu_do_activate(rq, p, WF_MIGRATED, cookie);
+		ttwu_do_activate(rq, p, 0, cookie);
 	}
 
 	lockdep_unpin_lock(&rq->lock, cookie);
@@ -2166,6 +2163,7 @@ static void __sched_fork(unsigned long c
 	p->se.prev_sum_exec_runtime	= 0;
 	p->se.nr_migrations		= 0;
 	p->se.vruntime			= 0;
+	p->se.normalized		= true;
 	INIT_LIST_HEAD(&p->se.group_node);
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3230,6 +3230,7 @@ place_entity(struct cfs_rq *cfs_rq, stru
 
 	/* ensure we never gain time by being placed backwards. */
 	se->vruntime = max_vruntime(se->vruntime, vruntime);
+	se->normalized = false;
 }
 
 static void check_enqueue_throttle(struct cfs_rq *cfs_rq);
@@ -3285,29 +3286,40 @@ static inline void check_schedstat_requi
  * CPU and an up-to-date min_vruntime on the destination CPU.
  */
 
+static void normalize_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+	se->vruntime -= cfs_rq->min_vruntime;
+	se->normalized = true;
+}
+
+static void renormalize_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+	se->vruntime += cfs_rq->min_vruntime;
+	se->normalized = false;
+}
+
 static void
 enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 {
-	bool renorm = !(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_MIGRATED);
-	bool curr = cfs_rq->curr == se;
+	bool renorm = se->normalized, curr = cfs_rq->curr == se;
 
 	/*
-	 * If we're the current task, we must renormalise before calling
+	 * If we're the current task, we must renormalize before calling
 	 * update_curr().
 	 */
 	if (renorm && curr)
-		se->vruntime += cfs_rq->min_vruntime;
+		renormalize_entity(cfs_rq, se);
 
 	update_curr(cfs_rq);
 
 	/*
-	 * Otherwise, renormalise after, such that we're placed at the current
+	 * Otherwise, renormalize after, such that we're placed at the current
 	 * moment in time, instead of some random moment in the past. Being
 	 * placed in the past could significantly boost this task to the
 	 * fairness detriment of existing tasks.
 	 */
 	if (renorm && !curr)
-		se->vruntime += cfs_rq->min_vruntime;
+		renormalize_entity(cfs_rq, se);
 
 	enqueue_entity_load_avg(cfs_rq, se);
 	account_entity_enqueue(cfs_rq, se);
@@ -3406,7 +3418,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, st
 	 * movement in our normalized position.
 	 */
 	if (!(flags & DEQUEUE_SLEEP))
-		se->vruntime -= cfs_rq->min_vruntime;
+		normalize_entity(cfs_rq, se);
 
 	/* return excess runtime on last dequeue */
 	return_cfs_rq_runtime(cfs_rq);
@@ -5408,7 +5420,7 @@ static void migrate_task_rq_fair(struct
 		min_vruntime = cfs_rq->min_vruntime;
 #endif
 
-		se->vruntime -= min_vruntime;
+		normalize_entity(cfs_rq, se);
 	}
 
 	/*
@@ -8319,7 +8331,7 @@ static void task_fork_fair(struct task_s
 		resched_curr(rq);
 	}
 
-	se->vruntime -= cfs_rq->min_vruntime;
+	normalize_entity(cfs_rq, se);
 
 	raw_spin_unlock_irqrestore(&rq->lock, flags);
 }
@@ -8348,29 +8360,7 @@ prio_changed_fair(struct rq *rq, struct
 
 static inline bool vruntime_normalized(struct task_struct *p)
 {
-	struct sched_entity *se = &p->se;
-
-	/*
-	 * In both the TASK_ON_RQ_QUEUED and TASK_ON_RQ_MIGRATING cases,
-	 * the dequeue_entity(.flags=0) will already have normalized the
-	 * vruntime.
-	 */
-	if (p->on_rq)
-		return true;
-
-	/*
-	 * When !on_rq, vruntime of the task has usually NOT been normalized.
-	 * But there are some cases where it has already been normalized:
-	 *
-	 * - A forked child which is waiting for being woken up by
-	 *   wake_up_new_task().
-	 * - A task which has been woken up by try_to_wake_up() and
-	 *   waiting for actually being woken up by sched_ttwu_pending().
-	 */
-	if (!se->sum_exec_runtime || p->state == TASK_WAKING)
-		return true;
-
-	return false;
+	return p->se.normalized;
 }
 
 static void detach_task_cfs_rq(struct task_struct *p)
@@ -8384,7 +8374,7 @@ static void detach_task_cfs_rq(struct ta
 		 * cause 'unlimited' sleep bonus.
 		 */
 		place_entity(cfs_rq, se, 0);
-		se->vruntime -= cfs_rq->min_vruntime;
+		normalize_entity(cfs_rq, se);
 	}
 
 	/* Catch up with the cfs_rq and remove our load when we leave */
@@ -8407,8 +8397,8 @@ static void attach_task_cfs_rq(struct ta
 	/* Synchronize task with its cfs_rq */
 	attach_entity_load_avg(cfs_rq, se);
 
-	if (!vruntime_normalized(p))
-		se->vruntime += cfs_rq->min_vruntime;
+	if (vruntime_normalized(p))
+		renormalize_entity(cfs_rq, se);
 }
 
 static void switched_from_fair(struct rq *rq, struct task_struct *p)

  reply	other threads:[~2016-05-22  7:00 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-05-10 17:43 [PATCH 0/3] sched: Fix wakeup preemption regression Peter Zijlstra
2016-05-10 17:43 ` [PATCH 1/3] sched,fair: Move record_wakee() Peter Zijlstra
2016-05-12 10:27   ` Matt Fleming
2016-05-12 10:31     ` Peter Zijlstra
2016-05-10 17:43 ` [PATCH 2/3] sched,fair: Fix local starvation Peter Zijlstra
2016-05-10 20:21   ` Ingo Molnar
2016-05-10 22:23     ` Peter Zijlstra
2016-05-20 21:24   ` Matt Fleming
2016-05-21 14:04   ` Mike Galbraith
2016-05-21 19:00     ` Mike Galbraith
2016-05-22  7:00       ` Mike Galbraith [this message]
2016-05-22  9:36         ` [patch] sched/fair: Move se->vruntime normalization state into struct sched_entity Peter Zijlstra
2016-05-22  9:52           ` Mike Galbraith
2016-05-22 10:33           ` Peter Zijlstra
2016-05-23  9:19         ` Peter Zijlstra
2016-05-23  9:40           ` Mike Galbraith
2016-05-23 10:13             ` Wanpeng Li
2016-05-23 10:26               ` Mike Galbraith
2016-05-23 12:28             ` Peter Zijlstra
2016-05-25  7:12           ` [tip:sched/urgent] sched/core: Fix remote wakeups tip-bot for Peter Zijlstra
2016-05-22  6:50     ` [PATCH 2/3] sched,fair: Fix local starvation Wanpeng Li
2016-05-22  7:15       ` Mike Galbraith
2016-05-22  7:27         ` Wanpeng Li
2016-05-22  7:32           ` Mike Galbraith
2016-05-22  7:42             ` Wanpeng Li
2016-05-22  8:04               ` Mike Galbraith
2016-05-22  8:24                 ` Wanpeng Li
2016-05-22  8:39                   ` Mike Galbraith
2016-05-22  8:50                     ` Wanpeng Li
2016-05-10 17:43 ` [PATCH 3/3] sched: Kill sched_class::task_waking Peter Zijlstra
2016-05-11  5:55 ` [PATCH 0/3] sched: Fix wakeup preemption regression Mike Galbraith
2016-05-12  9:56 ` Pavan Kondeti
2016-05-12 10:52 ` Matt Fleming
  -- strict thread matches above, loose matches on Subject: below --
2016-05-24 17:04 [patch] sched/fair: Move se->vruntime normalization state into struct sched_entity Paul E. McKenney
2016-05-25 17:49 ` Paul E. McKenney

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1463900401.30072.6.camel@gmail.com \
    --to=umgwanakikbuti@gmail.com \
    --cc=ahh@google.com \
    --cc=bsegall@google.com \
    --cc=byungchul.park@lge.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=matt@codeblueprint.co.uk \
    --cc=mingo@kernel.org \
    --cc=morten.rasmussen@arm.com \
    --cc=peterz@infradead.org \
    --cc=pjt@google.com \
    --cc=pkondeti@codeaurora.org \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.