All of lore.kernel.org
 help / color / mirror / Atom feed
From: pjt@google.com
To: linux-kernel@vger.kernel.org
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>,
	Ingo Molnar <mingo@elte.hu>,
	Srivatsa Vaddagiri <vatsa@in.ibm.com>,
	Chris Friesen <cfriesen@nortel.com>,
	Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>,
	Pierre Bourdon <pbourdon@excellency.fr>,
	Paul Turner <pjt@google.com>,
	Bharata B Rao <bharata@linux.vnet.ibm.com>
Subject: [RFC tg_shares_up improvements - v1 05/12] sched: fix update_cfs_load synchronization
Date: Fri, 15 Oct 2010 21:43:54 -0700	[thread overview]
Message-ID: <20101016045118.832209343@google.com> (raw)
In-Reply-To: 20101016044349.830426011@google.com

[-- Attachment #1: sched-tg-fix-update_cfs_load.patch --]
[-- Type: text/plain, Size: 4089 bytes --]

Using cfs_rq->nr_running is not sufficient to synchronize update_cfs_load with
the put path since nr_running accounting occurs at deactivation.

It's also not safe to make the removal decision based on load_avg as this fails
with both high periods and low shares.  Resolve this by clipping history at 8
foldings.

Note: the above will always occur from update_shares() since in the 
last-task-sleep-case that task will still be cfs_rq->curr when update_cfs_load is 
called.

Signed-off-by: Paul Turner <pjt@google.com>

---
 kernel/sched.c      |    2 +-
 kernel/sched_fair.c |   33 +++++++++++++++++++++------------
 2 files changed, 22 insertions(+), 13 deletions(-)

Index: kernel/sched_fair.c
===================================================================
--- kernel/sched_fair.c.orig
+++ kernel/sched_fair.c
@@ -674,10 +674,11 @@ account_entity_dequeue(struct cfs_rq *cf
 }
 
 #if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED
-static void update_cfs_load(struct cfs_rq *cfs_rq, int lb)
+static void update_cfs_load(struct cfs_rq *cfs_rq)
 {
 	u64 period = sched_avg_period();
 	u64 now, delta;
+	unsigned long load = cfs_rq->load.weight;
 
 	if (!cfs_rq)
 		return;
@@ -685,9 +686,19 @@ static void update_cfs_load(struct cfs_r
 	now = rq_of(cfs_rq)->clock;
 	delta = now - cfs_rq->load_stamp;
 
+	/* truncate load history at 4 idle periods */
+	if (cfs_rq->load_stamp > cfs_rq->load_last &&
+	    now - cfs_rq->load_last > 4 * period) {
+		cfs_rq->load_period = 0;
+		cfs_rq->load_avg = 0;
+	}
+
 	cfs_rq->load_stamp = now;
 	cfs_rq->load_period += delta;
-	cfs_rq->load_avg += delta * cfs_rq->load.weight;
+	if (load) {
+		cfs_rq->load_last = now;
+		cfs_rq->load_avg += delta * load;
+	}
 
 	while (cfs_rq->load_period > period) {
 		/*
@@ -700,10 +711,8 @@ static void update_cfs_load(struct cfs_r
 		cfs_rq->load_avg /= 2;
 	}
 
-	if (lb && !cfs_rq->nr_running) {
-		if (cfs_rq->load_avg < (period / 8))
-			list_del_leaf_cfs_rq(cfs_rq);
-	}
+	if (!cfs_rq->curr && !cfs_rq->nr_running && !cfs_rq->load_avg)
+		list_del_leaf_cfs_rq(cfs_rq);
 }
 
 static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
@@ -750,7 +759,7 @@ static void update_cfs_shares(struct cfs
 	reweight_entity(cfs_rq_of(se), se, shares);
 }
 #else /* CONFIG_FAIR_GROUP_SCHED */
-static inline void update_cfs_load(struct cfs_rq *cfs_rq, int lb)
+static inline void update_cfs_load(struct cfs_rq *cfs_rq)
 {
 }
 
@@ -880,7 +889,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, st
 	 * Update run-time statistics of the 'current'.
 	 */
 	update_curr(cfs_rq);
-	update_cfs_load(cfs_rq, 0);
+	update_cfs_load(cfs_rq);
 	update_cfs_shares(cfs_rq_of(se), se->load.weight);
 	account_entity_enqueue(cfs_rq, se);
 
@@ -941,7 +950,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, st
 	if (se != cfs_rq->curr)
 		__dequeue_entity(cfs_rq, se);
 	se->on_rq = 0;
-	update_cfs_load(cfs_rq, 0);
+	update_cfs_load(cfs_rq);
 	account_entity_dequeue(cfs_rq, se);
 	update_min_vruntime(cfs_rq);
 	update_cfs_shares(cfs_rq_of(se), 0);
@@ -1176,7 +1185,7 @@ enqueue_task_fair(struct rq *rq, struct 
 	for_each_sched_entity(se) {
 		struct cfs_rq *cfs_rq = cfs_rq_of(se);
 
-		update_cfs_load(cfs_rq, 0);
+		update_cfs_load(cfs_rq);
 		update_cfs_shares(cfs_rq, 0);
 	}
 
@@ -1206,7 +1215,7 @@ static void dequeue_task_fair(struct rq 
 	for_each_sched_entity(se) {
 		struct cfs_rq *cfs_rq = cfs_rq_of(se);
 
-		update_cfs_load(cfs_rq, 0);
+		update_cfs_load(cfs_rq);
 		update_cfs_shares(cfs_rq, 0);
 	}
 
@@ -2019,7 +2028,7 @@ static int tg_shares_up(struct task_grou
 	raw_spin_lock_irqsave(&rq->lock, flags);
 
 	update_rq_clock(rq);
-	update_cfs_load(cfs_rq, 1);
+	update_cfs_load(cfs_rq);
 
 	load_avg = div64_u64(cfs_rq->load_avg, cfs_rq->load_period+1);
 	load_avg -= cfs_rq->load_contribution;
Index: kernel/sched.c
===================================================================
--- kernel/sched.c.orig
+++ kernel/sched.c
@@ -357,7 +357,7 @@ struct cfs_rq {
 
 	u64 load_avg;
 	u64 load_period;
-	u64 load_stamp;
+	u64 load_stamp, load_last;
 
 	unsigned long load_contribution;
 #endif

-- 


  parent reply	other threads:[~2010-10-16  4:54 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-10-16  4:43 [RFC tg_shares_up improvements - v1 00/12] [RFC tg_shares_up - v1 00/12] Reducing cost of tg->shares distribution pjt
2010-10-16  4:43 ` [RFC tg_shares_up improvements - v1 01/12] sched: rewrite tg_shares_up pjt
2010-10-21  6:04   ` Bharata B Rao
2010-10-21  6:28     ` Paul Turner
2010-10-21  8:08   ` Bharata B Rao
2010-10-21  8:38     ` Paul Turner
2010-10-21  9:08     ` Peter Zijlstra
     [not found]   ` <AANLkTi=zYAfb_izD15ROxH=C6+zPzX+XEGw7r5UUoAar@mail.gmail.com>
2010-11-04 21:00     ` Paul Turner
2010-10-16  4:43 ` [RFC tg_shares_up improvements - v1 02/12] sched: on-demand (active) cfs_rq list pjt
2010-10-16  4:43 ` [RFC tg_shares_up improvements - v1 03/12] sched: make tg_shares_up() walk on-demand pjt
2010-10-16  4:43 ` [RFC tg_shares_up improvements - v1 04/12] sched: fix load corruption from update_cfs_shares pjt
2010-10-16  4:43 ` pjt [this message]
2010-10-21  9:52   ` [RFC tg_shares_up improvements - v1 05/12] sched: fix update_cfs_load synchronization Bharata B Rao
2010-10-21 18:25     ` Paul Turner
2010-10-16  4:43 ` [RFC tg_shares_up improvements - v1 06/12] sched: hierarchal order on shares update list pjt
2010-10-16  4:43 ` [RFC tg_shares_up improvements - v1 07/12] sched: add sysctl_sched_shares_window pjt
2010-10-16  4:43 ` [RFC tg_shares_up improvements - v1 08/12] sched: update shares on idle_balance pjt
2010-10-16  4:43 ` [RFC tg_shares_up improvements - v1 09/12] sched: demand based update_cfs_load() pjt
2010-10-16  4:43 ` [RFC tg_shares_up improvements - v1 10/12] sched: allow update_cfs_load to update global load pjt
2010-10-16  4:44 ` [RFC tg_shares_up improvements - v1 11/12] sched: update tg->shares after cpu.shares write pjt
2010-10-16  4:44 ` [RFC tg_shares_up improvements - v1 12/12] debug: export effective shares for analysis versus specified pjt
2010-10-16 19:46 ` [RFC tg_shares_up improvements - v1 00/12] [RFC tg_shares_up - v1 00/12] Reducing cost of tg->shares distribution Peter Zijlstra
2010-10-21  6:36   ` Paul Turner
2010-10-22  0:14     ` Paul Turner
2010-10-17  5:24 ` Balbir Singh
2010-10-17  9:38   ` Peter Zijlstra
2010-10-17 12:09     ` Balbir Singh
2010-11-03 18:27 ` Karl Rister

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20101016045118.832209343@google.com \
    --to=pjt@google.com \
    --cc=a.p.zijlstra@chello.nl \
    --cc=bharata@linux.vnet.ibm.com \
    --cc=cfriesen@nortel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=pbourdon@excellency.fr \
    --cc=svaidy@linux.vnet.ibm.com \
    --cc=vatsa@in.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.