All of lore.kernel.org
 help / color / mirror / Atom feed
From: Paul Turner <pjt@google.com>
To: linux-kernel@vger.kernel.org
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>,
	Ingo Molnar <mingo@elte.hu>,
	Srivatsa Vaddagiri <vatsa@in.ibm.com>,
	Chris Friesen <cfriesen@nortel.com>,
	Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>,
	Pierre Bourdon <pbourdon@excellency.fr>,
	Paul Turner <pjt@google.com>,
	Bharata B Rao <bharata@linux.vnet.ibm.com>,
	Karl Rister <kmr@us.ibm.com>,
	Balbir Singh <balbir@linux.vnet.ibm.com>,
	David Miller <davem@davemloft.net>
Subject: [tg_shares_up rewrite v3 05/11] sched: fix update_cfs_load synchronization
Date: Thu, 11 Nov 2010 19:24:10 -0800	[thread overview]
Message-ID: <20101112032701.712083681@google.com> (raw)
In-Reply-To: 20101112032405.657789056@google.com

[-- Attachment #1: sched-tg-fix-update_cfs_load.patch --]
[-- Type: text/plain, Size: 4093 bytes --]

Using cfs_rq->nr_running is not sufficient to synchronize update_cfs_load with
the put path since nr_running accounting occurs at deactivation.

It's also not safe to make the removal decision based on load_avg as this fails
with both high periods and low shares.  Resolve this by clipping history after 4
periods without activity.

Note: the above will always occur from update_shares() since in the 
last-task-sleep-case that task will still be cfs_rq->curr when update_cfs_load is
called.

Signed-off-by: Paul Turner <pjt@google.com>

---
 kernel/sched.c      |    2 +-
 kernel/sched_fair.c |   33 +++++++++++++++++++++------------
 2 files changed, 22 insertions(+), 13 deletions(-)

Index: kernel/sched_fair.c
===================================================================
--- kernel/sched_fair.c.orig
+++ kernel/sched_fair.c
@@ -674,10 +674,11 @@ account_entity_dequeue(struct cfs_rq *cf
 }
 
 #if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED
-static void update_cfs_load(struct cfs_rq *cfs_rq, int lb)
+static void update_cfs_load(struct cfs_rq *cfs_rq)
 {
 	u64 period = sched_avg_period();
 	u64 now, delta;
+	unsigned long load = cfs_rq->load.weight;
 
 	if (!cfs_rq)
 		return;
@@ -685,9 +686,19 @@ static void update_cfs_load(struct cfs_r
 	now = rq_of(cfs_rq)->clock;
 	delta = now - cfs_rq->load_stamp;
 
+	/* truncate load history at 4 idle periods */
+	if (cfs_rq->load_stamp > cfs_rq->load_last &&
+	    now - cfs_rq->load_last > 4 * period) {
+		cfs_rq->load_period = 0;
+		cfs_rq->load_avg = 0;
+	}
+
 	cfs_rq->load_stamp = now;
 	cfs_rq->load_period += delta;
-	cfs_rq->load_avg += delta * cfs_rq->load.weight;
+	if (load) {
+		cfs_rq->load_last = now;
+		cfs_rq->load_avg += delta * load;
+	}
 
 	while (cfs_rq->load_period > period) {
 		/*
@@ -700,10 +711,8 @@ static void update_cfs_load(struct cfs_r
 		cfs_rq->load_avg /= 2;
 	}
 
-	if (lb && !cfs_rq->nr_running) {
-		if (cfs_rq->load_avg < (period / 8))
-			list_del_leaf_cfs_rq(cfs_rq);
-	}
+	if (!cfs_rq->curr && !cfs_rq->nr_running && !cfs_rq->load_avg)
+		list_del_leaf_cfs_rq(cfs_rq);
 }
 
 static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
@@ -750,7 +759,7 @@ static void update_cfs_shares(struct cfs
 	reweight_entity(cfs_rq_of(se), se, shares);
 }
 #else /* CONFIG_FAIR_GROUP_SCHED */
-static inline void update_cfs_load(struct cfs_rq *cfs_rq, int lb)
+static inline void update_cfs_load(struct cfs_rq *cfs_rq)
 {
 }
 
@@ -880,7 +889,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, st
 	 * Update run-time statistics of the 'current'.
 	 */
 	update_curr(cfs_rq);
-	update_cfs_load(cfs_rq, 0);
+	update_cfs_load(cfs_rq);
 	update_cfs_shares(cfs_rq, se->load.weight);
 	account_entity_enqueue(cfs_rq, se);
 
@@ -941,7 +950,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, st
 	if (se != cfs_rq->curr)
 		__dequeue_entity(cfs_rq, se);
 	se->on_rq = 0;
-	update_cfs_load(cfs_rq, 0);
+	update_cfs_load(cfs_rq);
 	account_entity_dequeue(cfs_rq, se);
 	update_min_vruntime(cfs_rq);
 	update_cfs_shares(cfs_rq, 0);
@@ -1176,7 +1185,7 @@ enqueue_task_fair(struct rq *rq, struct 
 	for_each_sched_entity(se) {
 		struct cfs_rq *cfs_rq = cfs_rq_of(se);
 
-		update_cfs_load(cfs_rq, 0);
+		update_cfs_load(cfs_rq);
 		update_cfs_shares(cfs_rq, 0);
 	}
 
@@ -1206,7 +1215,7 @@ static void dequeue_task_fair(struct rq 
 	for_each_sched_entity(se) {
 		struct cfs_rq *cfs_rq = cfs_rq_of(se);
 
-		update_cfs_load(cfs_rq, 0);
+		update_cfs_load(cfs_rq);
 		update_cfs_shares(cfs_rq, 0);
 	}
 
@@ -2019,7 +2028,7 @@ static int tg_shares_up(struct task_grou
 	raw_spin_lock_irqsave(&rq->lock, flags);
 
 	update_rq_clock(rq);
-	update_cfs_load(cfs_rq, 1);
+	update_cfs_load(cfs_rq);
 
 	load_avg = div64_u64(cfs_rq->load_avg, cfs_rq->load_period+1);
 	load_avg -= cfs_rq->load_contribution;
Index: kernel/sched.c
===================================================================
--- kernel/sched.c.orig
+++ kernel/sched.c
@@ -357,7 +357,7 @@ struct cfs_rq {
 
 	u64 load_avg;
 	u64 load_period;
-	u64 load_stamp;
+	u64 load_stamp, load_last;
 
 	unsigned long load_contribution;
 #endif

-- 


  parent reply	other threads:[~2010-11-12  3:32 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-11-12  3:24 [tg_shares_up rewrite v3 00/11] reduce overhead for tg->shares distribution Paul Turner
2010-11-12  3:24 ` [tg_shares_up rewrite v3 01/11] sched: rewrite tg_shares_up Paul Turner
2010-11-12 11:10   ` Peter Zijlstra
2010-11-12  3:24 ` [tg_shares_up rewrite v3 02/11] sched: on-demand (active) cfs_rq list Paul Turner
2010-11-12  3:24 ` [tg_shares_up rewrite v3 03/11] sched: make tg_shares_up() walk on-demand Paul Turner
2010-11-12  3:24 ` [tg_shares_up rewrite v3 04/11] sched: fix load corruption from update_cfs_shares Paul Turner
2010-11-12  3:24 ` Paul Turner [this message]
2010-11-12  3:24 ` [tg_shares_up rewrite v3 06/11] sched: hierarchal order on shares update list Paul Turner
2010-11-12  3:24 ` [tg_shares_up rewrite v3 07/11] sched: add sysctl_sched_shares_window Paul Turner
2010-11-12  3:24 ` [tg_shares_up rewrite v3 08/11] sched: update shares on idle_balance Paul Turner
2010-11-12  3:24 ` [tg_shares_up rewrite v3 09/11] sched: demand based update_cfs_load() Paul Turner
2010-11-12 10:53   ` Peter Zijlstra
2010-11-12 12:12     ` Peter Zijlstra
2010-11-13  1:01     ` Paul Turner
2010-11-13  2:42       ` Paul Turner
2010-11-13 22:45         ` Peter Zijlstra
2010-11-12  3:24 ` [tg_shares_up rewrite v3 10/11] sched: allow update_cfs_load to update global load Paul Turner
2010-11-12  3:24 ` [tg_shares_up rewrite v3 11/11] sched: update tg->shares after cpu.shares write Paul Turner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20101112032701.712083681@google.com \
    --to=pjt@google.com \
    --cc=a.p.zijlstra@chello.nl \
    --cc=balbir@linux.vnet.ibm.com \
    --cc=bharata@linux.vnet.ibm.com \
    --cc=cfriesen@nortel.com \
    --cc=davem@davemloft.net \
    --cc=kmr@us.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=pbourdon@excellency.fr \
    --cc=svaidy@linux.vnet.ibm.com \
    --cc=vatsa@in.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.