From: pjt@google.com
To: linux-kernel@vger.kernel.org
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>,
Ingo Molnar <mingo@elte.hu>,
Srivatsa Vaddagiri <vatsa@in.ibm.com>,
Chris Friesen <cfriesen@nortel.com>,
Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>,
Pierre Bourdon <pbourdon@excellency.fr>,
Paul Turner <pjt@google.com>,
Bharata B Rao <bharata@linux.vnet.ibm.com>
Subject: [RFC tg_shares_up improvements - v1 05/12] sched: fix update_cfs_load synchronization
Date: Fri, 15 Oct 2010 21:43:54 -0700 [thread overview]
Message-ID: <20101016045118.832209343@google.com> (raw)
In-Reply-To: 20101016044349.830426011@google.com
[-- Attachment #1: sched-tg-fix-update_cfs_load.patch --]
[-- Type: text/plain, Size: 4089 bytes --]
Using cfs_rq->nr_running is not sufficient to synchronize update_cfs_load with
the put path since nr_running accounting occurs at deactivation.
It's also not safe to make the removal decision based on load_avg as this fails
with both high periods and low shares. Resolve this by clipping history at 8
foldings.
Note: the above will always occur from update_shares() since in the
last-task-sleep-case that task will still be cfs_rq->curr when update_cfs_load is
called.
Signed-off-by: Paul Turner <pjt@google.com>
---
kernel/sched.c | 2 +-
kernel/sched_fair.c | 33 +++++++++++++++++++++------------
2 files changed, 22 insertions(+), 13 deletions(-)
Index: kernel/sched_fair.c
===================================================================
--- kernel/sched_fair.c.orig
+++ kernel/sched_fair.c
@@ -674,10 +674,11 @@ account_entity_dequeue(struct cfs_rq *cf
}
#if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED
-static void update_cfs_load(struct cfs_rq *cfs_rq, int lb)
+static void update_cfs_load(struct cfs_rq *cfs_rq)
{
u64 period = sched_avg_period();
u64 now, delta;
+ unsigned long load = cfs_rq->load.weight;
if (!cfs_rq)
return;
@@ -685,9 +686,19 @@ static void update_cfs_load(struct cfs_r
now = rq_of(cfs_rq)->clock;
delta = now - cfs_rq->load_stamp;
+ /* truncate load history at 4 idle periods */
+ if (cfs_rq->load_stamp > cfs_rq->load_last &&
+ now - cfs_rq->load_last > 4 * period) {
+ cfs_rq->load_period = 0;
+ cfs_rq->load_avg = 0;
+ }
+
cfs_rq->load_stamp = now;
cfs_rq->load_period += delta;
- cfs_rq->load_avg += delta * cfs_rq->load.weight;
+ if (load) {
+ cfs_rq->load_last = now;
+ cfs_rq->load_avg += delta * load;
+ }
while (cfs_rq->load_period > period) {
/*
@@ -700,10 +711,8 @@ static void update_cfs_load(struct cfs_r
cfs_rq->load_avg /= 2;
}
- if (lb && !cfs_rq->nr_running) {
- if (cfs_rq->load_avg < (period / 8))
- list_del_leaf_cfs_rq(cfs_rq);
- }
+ if (!cfs_rq->curr && !cfs_rq->nr_running && !cfs_rq->load_avg)
+ list_del_leaf_cfs_rq(cfs_rq);
}
static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
@@ -750,7 +759,7 @@ static void update_cfs_shares(struct cfs
reweight_entity(cfs_rq_of(se), se, shares);
}
#else /* CONFIG_FAIR_GROUP_SCHED */
-static inline void update_cfs_load(struct cfs_rq *cfs_rq, int lb)
+static inline void update_cfs_load(struct cfs_rq *cfs_rq)
{
}
@@ -880,7 +889,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, st
* Update run-time statistics of the 'current'.
*/
update_curr(cfs_rq);
- update_cfs_load(cfs_rq, 0);
+ update_cfs_load(cfs_rq);
update_cfs_shares(cfs_rq_of(se), se->load.weight);
account_entity_enqueue(cfs_rq, se);
@@ -941,7 +950,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, st
if (se != cfs_rq->curr)
__dequeue_entity(cfs_rq, se);
se->on_rq = 0;
- update_cfs_load(cfs_rq, 0);
+ update_cfs_load(cfs_rq);
account_entity_dequeue(cfs_rq, se);
update_min_vruntime(cfs_rq);
update_cfs_shares(cfs_rq_of(se), 0);
@@ -1176,7 +1185,7 @@ enqueue_task_fair(struct rq *rq, struct
for_each_sched_entity(se) {
struct cfs_rq *cfs_rq = cfs_rq_of(se);
- update_cfs_load(cfs_rq, 0);
+ update_cfs_load(cfs_rq);
update_cfs_shares(cfs_rq, 0);
}
@@ -1206,7 +1215,7 @@ static void dequeue_task_fair(struct rq
for_each_sched_entity(se) {
struct cfs_rq *cfs_rq = cfs_rq_of(se);
- update_cfs_load(cfs_rq, 0);
+ update_cfs_load(cfs_rq);
update_cfs_shares(cfs_rq, 0);
}
@@ -2019,7 +2028,7 @@ static int tg_shares_up(struct task_grou
raw_spin_lock_irqsave(&rq->lock, flags);
update_rq_clock(rq);
- update_cfs_load(cfs_rq, 1);
+ update_cfs_load(cfs_rq);
load_avg = div64_u64(cfs_rq->load_avg, cfs_rq->load_period+1);
load_avg -= cfs_rq->load_contribution;
Index: kernel/sched.c
===================================================================
--- kernel/sched.c.orig
+++ kernel/sched.c
@@ -357,7 +357,7 @@ struct cfs_rq {
u64 load_avg;
u64 load_period;
- u64 load_stamp;
+ u64 load_stamp, load_last;
unsigned long load_contribution;
#endif
--
next prev parent reply other threads:[~2010-10-16 4:54 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-10-16 4:43 [RFC tg_shares_up improvements - v1 00/12] [RFC tg_shares_up - v1 00/12] Reducing cost of tg->shares distribution pjt
2010-10-16 4:43 ` [RFC tg_shares_up improvements - v1 01/12] sched: rewrite tg_shares_up pjt
2010-10-21 6:04 ` Bharata B Rao
2010-10-21 6:28 ` Paul Turner
2010-10-21 8:08 ` Bharata B Rao
2010-10-21 8:38 ` Paul Turner
2010-10-21 9:08 ` Peter Zijlstra
[not found] ` <AANLkTi=zYAfb_izD15ROxH=C6+zPzX+XEGw7r5UUoAar@mail.gmail.com>
2010-11-04 21:00 ` Paul Turner
2010-10-16 4:43 ` [RFC tg_shares_up improvements - v1 02/12] sched: on-demand (active) cfs_rq list pjt
2010-10-16 4:43 ` [RFC tg_shares_up improvements - v1 03/12] sched: make tg_shares_up() walk on-demand pjt
2010-10-16 4:43 ` [RFC tg_shares_up improvements - v1 04/12] sched: fix load corruption from update_cfs_shares pjt
2010-10-16 4:43 ` pjt [this message]
2010-10-21 9:52 ` [RFC tg_shares_up improvements - v1 05/12] sched: fix update_cfs_load synchronization Bharata B Rao
2010-10-21 18:25 ` Paul Turner
2010-10-16 4:43 ` [RFC tg_shares_up improvements - v1 06/12] sched: hierarchal order on shares update list pjt
2010-10-16 4:43 ` [RFC tg_shares_up improvements - v1 07/12] sched: add sysctl_sched_shares_window pjt
2010-10-16 4:43 ` [RFC tg_shares_up improvements - v1 08/12] sched: update shares on idle_balance pjt
2010-10-16 4:43 ` [RFC tg_shares_up improvements - v1 09/12] sched: demand based update_cfs_load() pjt
2010-10-16 4:43 ` [RFC tg_shares_up improvements - v1 10/12] sched: allow update_cfs_load to update global load pjt
2010-10-16 4:44 ` [RFC tg_shares_up improvements - v1 11/12] sched: update tg->shares after cpu.shares write pjt
2010-10-16 4:44 ` [RFC tg_shares_up improvements - v1 12/12] debug: export effective shares for analysis versus specified pjt
2010-10-16 19:46 ` [RFC tg_shares_up improvements - v1 00/12] [RFC tg_shares_up - v1 00/12] Reducing cost of tg->shares distribution Peter Zijlstra
2010-10-21 6:36 ` Paul Turner
2010-10-22 0:14 ` Paul Turner
2010-10-17 5:24 ` Balbir Singh
2010-10-17 9:38 ` Peter Zijlstra
2010-10-17 12:09 ` Balbir Singh
2010-11-03 18:27 ` Karl Rister
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20101016045118.832209343@google.com \
--to=pjt@google.com \
--cc=a.p.zijlstra@chello.nl \
--cc=bharata@linux.vnet.ibm.com \
--cc=cfriesen@nortel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@elte.hu \
--cc=pbourdon@excellency.fr \
--cc=svaidy@linux.vnet.ibm.com \
--cc=vatsa@in.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox