From: Frederic Weisbecker <fweisbec@gmail.com>
To: LKML <linux-kernel@vger.kernel.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>,
Alessio Igor Bogani <abogani@kernel.org>,
Andrew Morton <akpm@linux-foundation.org>,
Avi Kivity <avi@redhat.com>, Chris Metcalf <cmetcalf@tilera.com>,
Christoph Lameter <cl@linux.com>,
Geoff Levand <geoff@infradead.org>,
Gilad Ben Yossef <gilad@benyossef.com>,
Hakan Akkan <hakanakkan@gmail.com>,
Ingo Molnar <mingo@kernel.org>,
"Paul E. McKenney" <paulmck@linux.vnet.ibm.com>,
Paul Gortmaker <paul.gortmaker@windriver.com>,
Peter Zijlstra <peterz@infradead.org>,
Steven Rostedt <rostedt@goodmis.org>,
Thomas Gleixner <tglx@linutronix.de>
Subject: [PATCH 03/24] cputime: Allow dynamic switch between tick/virtual based cputime accounting
Date: Thu, 20 Dec 2012 19:32:50 +0100 [thread overview]
Message-ID: <1356028391-14427-4-git-send-email-fweisbec@gmail.com> (raw)
In-Reply-To: <1356028391-14427-1-git-send-email-fweisbec@gmail.com>
Allow to dynamically switch between tick and virtual based cputime accounting.
This way we can provide a kind of "on-demand" virtual based cputime
accounting. In this mode, the kernel will rely on the user hooks
subsystem to dynamically hook on kernel boundaries.
This is in preparation for beeing able to stop the timer tick further
idle. Doing so will depend on CONFIG_VIRT_CPU_ACCOUNTING which makes
it possible to account the cputime without the tick by hooking on
kernel/user boundaries.
Depending whether the tick is stopped or not, we can switch between
tick and vtime based accounting anytime in order to minimize the
overhead associated to user hooks.
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Alessio Igor Bogani <abogani@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Avi Kivity <avi@redhat.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Geoff Levand <geoff@infradead.org>
Cc: Gilad Ben Yossef <gilad@benyossef.com>
Cc: Hakan Akkan <hakanakkan@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
---
include/linux/kernel_stat.h | 2 +-
include/linux/sched.h | 4 +-
include/linux/vtime.h | 9 +++++++
init/Kconfig | 6 ++++
kernel/fork.c | 2 +-
kernel/sched/cputime.c | 57 ++++++++++++++++++++++++++++---------------
kernel/time/tick-sched.c | 5 +++-
7 files changed, 60 insertions(+), 25 deletions(-)
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 66b7078..ed5f6ed 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -127,7 +127,7 @@ extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t)
extern void account_steal_time(cputime_t);
extern void account_idle_time(cputime_t);
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
static inline void account_process_tick(struct task_struct *tsk, int user)
{
vtime_account_user(tsk);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 651b51a..547c1f0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -597,7 +597,7 @@ struct signal_struct {
cputime_t utime, stime, cutime, cstime;
cputime_t gtime;
cputime_t cgtime;
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
struct cputime prev_cputime;
#endif
unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
@@ -1357,7 +1357,7 @@ struct task_struct {
cputime_t utime, stime, utimescaled, stimescaled;
cputime_t gtime;
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
struct cputime prev_cputime;
#endif
unsigned long nvcsw, nivcsw; /* context switch counts */
diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index 58392aa..e57020d 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -10,11 +10,20 @@ extern void vtime_account_system_irqsafe(struct task_struct *tsk);
extern void vtime_account_idle(struct task_struct *tsk);
extern void vtime_account_user(struct task_struct *tsk);
extern void vtime_account(struct task_struct *tsk);
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+extern bool vtime_accounting(void);
#else
+static inline bool vtime_accounting(void) { return true; }
+#endif
+
+#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
static inline void vtime_task_switch(struct task_struct *prev) { }
static inline void vtime_account_system(struct task_struct *tsk) { }
static inline void vtime_account_system_irqsafe(struct task_struct *tsk) { }
+static inline void vtime_account_user(struct task_struct *tsk) { }
static inline void vtime_account(struct task_struct *tsk) { }
+static inline bool vtime_accounting(void) { return false; }
#endif
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
diff --git a/init/Kconfig b/init/Kconfig
index a64b3e8..9d7000a 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -342,6 +342,7 @@ config VIRT_CPU_ACCOUNTING
bool "Deterministic task and CPU time accounting"
depends on HAVE_VIRT_CPU_ACCOUNTING || HAVE_CONTEXT_TRACKING
select VIRT_CPU_ACCOUNTING_GEN if !HAVE_VIRT_CPU_ACCOUNTING
+ select VIRT_CPU_ACCOUNTING_NATIVE if HAVE_VIRT_CPU_ACCOUNTING
default y if PPC64
help
Select this option to enable more accurate task and CPU time
@@ -367,11 +368,16 @@ endchoice
config VIRT_CPU_ACCOUNTING_GEN
select CONTEXT_TRACKING
+ depends on VIRT_CPU_ACCOUNTING && HAVE_CONTEXT_TRACKING
bool
help
Implement a generic virtual based cputime accounting by using
the context tracking subsystem.
+config VIRT_CPU_ACCOUNTING_NATIVE
+ depends on VIRT_CPU_ACCOUNTING && HAVE_VIRT_CPU_ACCOUNTING
+ bool
+
config BSD_PROCESS_ACCT
bool "BSD Process Accounting"
help
diff --git a/kernel/fork.c b/kernel/fork.c
index 3c31e87..a81efb8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1221,7 +1221,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->utime = p->stime = p->gtime = 0;
p->utimescaled = p->stimescaled = 0;
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
p->prev_cputime.utime = p->prev_cputime.stime = 0;
#endif
#if defined(SPLIT_RSS_COUNTING)
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index da0a9e7..e1fcab4 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -317,8 +317,6 @@ out:
rcu_read_unlock();
}
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
-
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
/*
* Account a tick to a process and cpustat
@@ -388,6 +386,7 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
struct rq *rq) {}
#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
/*
* Account a single tick of cpu time.
* @p: the process that the cpu time gets accounted to
@@ -398,6 +397,11 @@ void account_process_tick(struct task_struct *p, int user_tick)
cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
struct rq *rq = this_rq();
+ if (vtime_accounting()) {
+ vtime_account_user(p);
+ return;
+ }
+
if (sched_clock_irqtime) {
irqtime_account_process_tick(p, user_tick, rq);
return;
@@ -439,29 +443,13 @@ void account_idle_ticks(unsigned long ticks)
account_idle_time(jiffies_to_cputime(ticks));
}
-
#endif
+
/*
* Use precise platform statistics if available:
*/
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
-void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
-{
- *ut = p->utime;
- *st = p->stime;
-}
-
-void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
-{
- struct task_cputime cputime;
-
- thread_group_cputime(p, &cputime);
-
- *ut = cputime.utime;
- *st = cputime.stime;
-}
-
void vtime_account_system_irqsafe(struct task_struct *tsk)
{
unsigned long flags;
@@ -517,8 +505,25 @@ void vtime_account(struct task_struct *tsk)
}
EXPORT_SYMBOL_GPL(vtime_account);
#endif /* __ARCH_HAS_VTIME_ACCOUNT */
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
-#else
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
+{
+ *ut = p->utime;
+ *st = p->stime;
+}
+
+void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
+{
+ struct task_cputime cputime;
+
+ thread_group_cputime(p, &cputime);
+
+ *ut = cputime.utime;
+ *st = cputime.stime;
+}
+#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
#ifndef nsecs_to_cputime
# define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs)
@@ -548,6 +553,12 @@ static void cputime_adjust(struct task_cputime *curr,
{
cputime_t rtime, utime, total;
+ if (vtime_accounting()) {
+ *ut = curr->utime;
+ *st = curr->stime;
+ return;
+ }
+
utime = curr->utime;
total = utime + curr->stime;
@@ -601,6 +612,7 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime
thread_group_cputime(p, &cputime);
cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st);
}
+#endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
static DEFINE_PER_CPU(long, last_jiffies) = INITIAL_JIFFIES;
@@ -642,6 +654,11 @@ void vtime_account_idle(struct task_struct *tsk)
account_idle_time(delta_cpu);
}
+bool vtime_accounting(void)
+{
+ return context_tracking_active();
+}
+
static int __cpuinit vtime_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index fb8e5e4..ad0e6fa 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -632,8 +632,11 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
{
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
unsigned long ticks;
+
+ if (vtime_accounting())
+ return;
/*
* We stopped the tick in idle. Update process times would miss the
* time we slept as update_process_times does only a 1 tick
--
1.7.5.4
next prev parent reply other threads:[~2012-12-20 18:33 UTC|newest]
Thread overview: 44+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-12-20 18:32 [ANNOUNCE] 3.7-nohz1 Frederic Weisbecker
2012-12-20 18:32 ` [PATCH 01/24] context_tracking: Add comments on interface and internals Frederic Weisbecker
2012-12-20 18:32 ` [PATCH 02/24] cputime: Generic on-demand virtual cputime accounting Frederic Weisbecker
2012-12-21 5:11 ` Steven Rostedt
2012-12-26 8:19 ` Li Zhong
2012-12-29 13:15 ` Frederic Weisbecker
2012-12-20 18:32 ` Frederic Weisbecker [this message]
2012-12-21 15:05 ` [PATCH 03/24] cputime: Allow dynamic switch between tick/virtual based " Steven Rostedt
2012-12-22 17:43 ` Frederic Weisbecker
2012-12-20 18:32 ` [PATCH 04/24] cputime: Use accessors to read task cputime stats Frederic Weisbecker
2012-12-20 18:32 ` [PATCH 05/24] cputime: Safely read cputime of full dynticks CPUs Frederic Weisbecker
2012-12-21 15:09 ` Steven Rostedt
2012-12-22 17:51 ` Frederic Weisbecker
2012-12-20 18:32 ` [PATCH 06/24] nohz: Basic full dynticks interface Frederic Weisbecker
2012-12-20 18:32 ` [PATCH 07/24] nohz: Assign timekeeping duty to a non-full-nohz CPU Frederic Weisbecker
2012-12-21 16:13 ` Steven Rostedt
2012-12-22 16:39 ` Frederic Weisbecker
2012-12-22 17:05 ` Steven Rostedt
2012-12-20 18:32 ` [PATCH 08/24] nohz: Trace timekeeping update Frederic Weisbecker
2012-12-20 18:32 ` [PATCH 09/24] nohz: Wake up full dynticks CPUs when a timer gets enqueued Frederic Weisbecker
2012-12-20 18:32 ` [PATCH 10/24] rcu: Restart the tick on non-responding full dynticks CPUs Frederic Weisbecker
2012-12-20 18:32 ` [PATCH 11/24] sched: Comment on rq->clock correctness in ttwu_do_wakeup() in nohz Frederic Weisbecker
2012-12-20 18:32 ` [PATCH 12/24] sched: Update rq clock on nohz CPU before migrating tasks Frederic Weisbecker
2012-12-20 18:33 ` [PATCH 13/24] sched: Update rq clock on nohz CPU before setting fair group shares Frederic Weisbecker
2012-12-20 18:33 ` [PATCH 14/24] sched: Update rq clock on tickless CPUs before calling check_preempt_curr() Frederic Weisbecker
2012-12-20 18:33 ` [PATCH 15/24] sched: Update rq clock earlier in unthrottle_cfs_rq Frederic Weisbecker
2012-12-20 18:33 ` [PATCH 16/24] sched: Update clock of nohz busiest rq before balancing Frederic Weisbecker
2012-12-20 18:33 ` [PATCH 17/24] sched: Update rq clock before idle balancing Frederic Weisbecker
2012-12-20 18:33 ` [PATCH 18/24] sched: Update nohz rq clock before searching busiest group on load balancing Frederic Weisbecker
2012-12-20 18:33 ` [PATCH 19/24] nohz: Move nohz load balancer selection into idle logic Frederic Weisbecker
2012-12-20 18:33 ` [PATCH 20/24] nohz: Full dynticks mode Frederic Weisbecker
2012-12-26 6:12 ` Namhyung Kim
2012-12-26 7:02 ` Namhyung Kim
2012-12-29 13:21 ` Frederic Weisbecker
2012-12-20 18:33 ` [PATCH 21/24] nohz: Only stop the tick on RCU nocb CPUs Frederic Weisbecker
2012-12-20 18:33 ` [PATCH 22/24] nohz: Don't turn off the tick if rcu needs it Frederic Weisbecker
2012-12-20 18:33 ` [PATCH 23/24] nohz: Don't stop the tick if posix cpu timers are running Frederic Weisbecker
2012-12-20 18:33 ` [PATCH 24/24] nohz: Add some tracing Frederic Weisbecker
2012-12-21 2:35 ` [ANNOUNCE] 3.7-nohz1 Steven Rostedt
2012-12-23 23:43 ` Frederic Weisbecker
2012-12-30 3:56 ` Paul E. McKenney
2013-01-04 23:42 ` Frederic Weisbecker
2013-01-07 13:06 ` Paul E. McKenney
2012-12-21 5:20 ` Hakan Akkan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1356028391-14427-4-git-send-email-fweisbec@gmail.com \
--to=fweisbec@gmail.com \
--cc=abogani@kernel.org \
--cc=akpm@linux-foundation.org \
--cc=avi@redhat.com \
--cc=cl@linux.com \
--cc=cmetcalf@tilera.com \
--cc=geoff@infradead.org \
--cc=gilad@benyossef.com \
--cc=hakanakkan@gmail.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@kernel.org \
--cc=paul.gortmaker@windriver.com \
--cc=paulmck@linux.vnet.ibm.com \
--cc=peterz@infradead.org \
--cc=rostedt@goodmis.org \
--cc=tglx@linutronix.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).