* [PATCH 1/5] user_hooks: New user hooks subsystem
2012-07-27 15:40 [RFC PATCH 0/5] cputime: Generic virtual based cputime accounting Frederic Weisbecker
@ 2012-07-27 15:40 ` Frederic Weisbecker
2012-07-27 16:40 ` Paul E. McKenney
2012-07-30 15:08 ` Peter Zijlstra
2012-07-27 15:40 ` [PATCH 2/5] cputime: Don't allow virtual and irq finegrained cputime accounting simultaneously Frederic Weisbecker
` (3 subsequent siblings)
4 siblings, 2 replies; 22+ messages in thread
From: Frederic Weisbecker @ 2012-07-27 15:40 UTC (permalink / raw)
To: LKML
Cc: Frederic Weisbecker, Alessio Igor Bogani, Andrew Morton,
Avi Kivity, Chris Metcalf, Christoph Lameter, Geoff Levand,
Gilad Ben Yossef, Hakan Akkan, H. Peter Anvin, Ingo Molnar,
Kevin Hilman, Max Krasnyansky, Paul E. McKenney, Peter Zijlstra,
Stephen Hemminger, Steven Rostedt, Sven-Thorsten Dietrich,
Thomas Gleixner
Create a new subsystem that handles the hooks on kernel/user
boundaries currently used by RCU for its userspace extended
quiescent state.
We need to pull this up from RCU into this new level of indirection
because these hooks are also going to be used to implement an "on
demand" generic virtual cputime accounting. A necessary step to
shutdown the tick while still accounting the cputime.
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Alessio Igor Bogani <abogani@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Avi Kivity <avi@redhat.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Geoff Levand <geoff@infradead.org>
Cc: Gilad Ben Yossef <gilad@benyossef.com>
Cc: Hakan Akkan <hakanakkan@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Kevin Hilman <khilman@ti.com>
Cc: Max Krasnyansky <maxk@qualcomm.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephen Hemminger <shemminger@vyatta.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Sven-Thorsten Dietrich <thebigcorporation@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
---
arch/Kconfig | 10 ++--
arch/x86/Kconfig | 2 +-
arch/x86/include/asm/{rcu.h => user_hooks.h} | 12 +++---
arch/x86/kernel/ptrace.c | 6 +-
arch/x86/kernel/signal.c | 5 +-
arch/x86/kernel/traps.c | 2 +-
arch/x86/mm/fault.c | 2 +-
include/linux/rcupdate.h | 2 -
include/linux/sched.h | 8 ----
include/linux/user_hooks.h | 18 ++++++++
init/Kconfig | 24 +++++++----
kernel/Makefile | 1 +
kernel/rcutree.c | 42 +------------------
kernel/sched/core.c | 9 ++--
kernel/user_hooks.c | 56 ++++++++++++++++++++++++++
15 files changed, 117 insertions(+), 82 deletions(-)
rename arch/x86/include/asm/{rcu.h => user_hooks.h} (56%)
create mode 100644 include/linux/user_hooks.h
create mode 100644 kernel/user_hooks.c
diff --git a/arch/Kconfig b/arch/Kconfig
index d891c62..b8b987c 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -277,14 +277,14 @@ config SECCOMP_FILTER
config HAVE_VIRT_CPU_ACCOUNTING
bool
-config HAVE_RCU_USER_QS
+config HAVE_USER_HOOKS
bool
help
Provide kernel entry/exit hooks necessary for userspace
RCU extended quiescent state. Syscalls need to be wrapped inside
- rcu_user_exit()-rcu_user_enter() through the slow path using
- TIF_NOHZ flag. Exceptions handlers must be wrapped as well. Irqs
- are already protected inside rcu_irq_enter/rcu_irq_exit() but
- preemption or signal handling on irq exit still need to be protected.
+ user_exit()-user_enter() through the slow path using TIF_NOHZ flag.
+ Exceptions handlers must be wrapped as well. Irqs are already
+ protected inside rcu_irq_enter/rcu_irq_exit() but preemption or
+ signal handling on irq exit still need to be protected.
source "kernel/gcov/Kconfig"
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 38dfcc2..ee2ca37 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -95,7 +95,7 @@ config X86
select KTIME_SCALAR if X86_32
select GENERIC_STRNCPY_FROM_USER
select GENERIC_STRNLEN_USER
- select HAVE_RCU_USER_QS if X86_64
+ select HAVE_USER_HOOKS if X86_64
config INSTRUCTION_DECODER
def_bool (KPROBES || PERF_EVENTS || UPROBES)
diff --git a/arch/x86/include/asm/rcu.h b/arch/x86/include/asm/user_hooks.h
similarity index 56%
rename from arch/x86/include/asm/rcu.h
rename to arch/x86/include/asm/user_hooks.h
index 439815b..b5d10ef 100644
--- a/arch/x86/include/asm/rcu.h
+++ b/arch/x86/include/asm/user_hooks.h
@@ -1,19 +1,19 @@
-#ifndef _ASM_X86_RCU_H
-#define _ASM_X86_RCU_H
+#ifndef _ASM_X86_USER_HOOKS_H
+#define _ASM_X86_USER_HOOKS_H
-#include <linux/rcupdate.h>
+#include <linux/user_hooks.h>
#include <asm/ptrace.h>
static inline void exception_enter(struct pt_regs *regs)
{
- rcu_user_exit();
+ user_exit();
}
static inline void exception_exit(struct pt_regs *regs)
{
-#ifdef CONFIG_RCU_USER_QS
+#ifdef CONFIG_USER_HOOKS
if (user_mode(regs))
- rcu_user_enter();
+ user_enter();
#endif
}
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 9f94f8e..a8137b3 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -21,7 +21,7 @@
#include <linux/signal.h>
#include <linux/perf_event.h>
#include <linux/hw_breakpoint.h>
-#include <linux/rcupdate.h>
+#include <linux/user_hooks.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -1464,7 +1464,7 @@ long syscall_trace_enter(struct pt_regs *regs)
{
long ret = 0;
- rcu_user_exit();
+ user_exit();
/*
* If we stepped into a sysenter/syscall insn, it trapped in
@@ -1530,5 +1530,5 @@ void syscall_trace_leave(struct pt_regs *regs)
if (step || test_thread_flag(TIF_SYSCALL_TRACE))
tracehook_report_syscall_exit(regs, step);
- rcu_user_enter();
+ user_enter();
}
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 5cc2579..61edb6b 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -19,6 +19,7 @@
#include <linux/uaccess.h>
#include <linux/user-return-notifier.h>
#include <linux/uprobes.h>
+#include <linux/user_hooks.h>
#include <asm/processor.h>
#include <asm/ucontext.h>
@@ -776,7 +777,7 @@ static void do_signal(struct pt_regs *regs)
void
do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
{
- rcu_user_exit();
+ user_exit();
#ifdef CONFIG_X86_MCE
/* notify userspace of pending MCEs */
@@ -804,7 +805,7 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
clear_thread_flag(TIF_IRET);
#endif /* CONFIG_X86_32 */
- rcu_user_enter();
+ user_enter();
}
void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 9b8195b..f563a07 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -52,7 +52,7 @@
#include <asm/i387.h>
#include <asm/fpu-internal.h>
#include <asm/mce.h>
-#include <asm/rcu.h>
+#include <asm/user_hooks.h>
#include <asm/mach_traps.h>
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 7dde46d..1906366 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -18,7 +18,7 @@
#include <asm/pgalloc.h> /* pgd_*(), ... */
#include <asm/kmemcheck.h> /* kmemcheck_*(), ... */
#include <asm/fixmap.h> /* VSYSCALL_START */
-#include <asm/rcu.h> /* exception_enter(), ... */
+#include <asm/user_hooks.h> /* exception_enter(), ... */
/*
* Page fault error code bits:
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 1fc0a0e..e411117 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -197,8 +197,6 @@ extern void rcu_user_enter(void);
extern void rcu_user_exit(void);
extern void rcu_user_enter_irq(void);
extern void rcu_user_exit_irq(void);
-extern void rcu_user_hooks_switch(struct task_struct *prev,
- struct task_struct *next);
#else
static inline void rcu_user_enter(void) { }
static inline void rcu_user_exit(void) { }
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 30105f4..7b7a438 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1899,14 +1899,6 @@ static inline void rcu_copy_process(struct task_struct *p)
#endif
-static inline void rcu_switch(struct task_struct *prev,
- struct task_struct *next)
-{
-#ifdef CONFIG_RCU_USER_QS
- rcu_user_hooks_switch(prev, next);
-#endif
-}
-
#ifdef CONFIG_SMP
extern void do_set_cpus_allowed(struct task_struct *p,
const struct cpumask *new_mask);
diff --git a/include/linux/user_hooks.h b/include/linux/user_hooks.h
new file mode 100644
index 0000000..720292d
--- /dev/null
+++ b/include/linux/user_hooks.h
@@ -0,0 +1,18 @@
+#ifndef _LINUX_USER_HOOKS_H
+#define _LINUX_USER_HOOKS_H
+
+#ifdef CONFIG_USER_HOOKS
+#include <linux/sched.h>
+
+extern void user_enter(void);
+extern void user_exit(void);
+extern void user_hooks_switch(struct task_struct *prev,
+ struct task_struct *next);
+#else
+static inline void user_enter(void) { }
+static inline void user_exit(void) { }
+static inline void user_hooks_switch(struct task_struct *prev,
+ struct task_struct *next) { }
+#endif /* !CONFIG_USER_HOOKS */
+
+#endif
diff --git a/init/Kconfig b/init/Kconfig
index cc1d581..3348b85 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -404,6 +404,19 @@ config AUDIT_LOGINUID_IMMUTABLE
source "kernel/irq/Kconfig"
source "kernel/time/Kconfig"
+config USER_HOOKS
+ bool
+
+config USER_HOOKS_FORCE
+ bool "Force userspace hooks"
+ depends on USER_HOOKS
+ help
+ Set the hooks in user/kernel boundaries by default in order to
+ test the features that rely on it such as userspace RCU extended
+ quiescent states.
+ This test is there for debugging until we have a real user like a
+ full adaptive nohz option.
+
menu "RCU Subsystem"
choice
@@ -456,7 +469,8 @@ config PREEMPT_RCU
config RCU_USER_QS
bool "Consider userspace as in RCU extended quiescent state"
- depends on HAVE_RCU_USER_QS && SMP
+ depends on HAVE_USER_HOOKS && SMP
+ select USER_HOOKS
help
This option sets hooks on kernel / userspace boundaries and
puts RCU in extended quiescent state when the CPU runs in
@@ -464,14 +478,6 @@ config RCU_USER_QS
excluded from the global RCU state machine and thus doesn't
to keep the timer tick on for RCU.
-config RCU_USER_QS_FORCE
- bool "Force userspace extended QS by default"
- depends on RCU_USER_QS
- help
- Set the hooks in user/kernel boundaries by default in order to
- test this feature that treats userspace as an extended quiescent
- state until we have a real user like a full adaptive nohz option.
-
config RCU_FANOUT
int "Tree-based hierarchical RCU fanout value"
range 2 64 if 64BIT
diff --git a/kernel/Makefile b/kernel/Makefile
index c0cc67a..78d2c1e 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -110,6 +110,7 @@ obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o
obj-$(CONFIG_PADATA) += padata.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-$(CONFIG_JUMP_LABEL) += jump_label.o
+obj-$(CONFIG_USER_HOOKS) += user_hooks.o
$(obj)/configs.o: $(obj)/config_data.h
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 318d00e..f6a24cb 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -212,9 +212,6 @@ EXPORT_SYMBOL_GPL(rcu_note_context_switch);
DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
.dynticks_nesting = DYNTICK_TASK_EXIT_IDLE,
.dynticks = ATOMIC_INIT(1),
-#if defined(CONFIG_RCU_USER_QS) && !defined(CONFIG_RCU_USER_QS_FORCE)
- .ignore_user_qs = true,
-#endif
};
static int blimit = 10; /* Maximum callbacks per rcu_do_batch. */
@@ -448,18 +445,7 @@ EXPORT_SYMBOL_GPL(rcu_idle_enter);
*/
void rcu_user_enter(void)
{
- unsigned long flags;
- struct rcu_dynticks *rdtp;
-
- WARN_ON_ONCE(!current->mm);
-
- local_irq_save(flags);
- rdtp = &__get_cpu_var(rcu_dynticks);
- if (!rdtp->ignore_user_qs && !rdtp->in_user) {
- rdtp->in_user = true;
- rcu_eqs_enter(1);
- }
- local_irq_restore(flags);
+ rcu_eqs_enter(1);
}
EXPORT_SYMBOL_GPL(rcu_user_enter);
@@ -597,16 +583,7 @@ EXPORT_SYMBOL_GPL(rcu_idle_exit);
*/
void rcu_user_exit(void)
{
- unsigned long flags;
- struct rcu_dynticks *rdtp;
-
- local_irq_save(flags);
- rdtp = &__get_cpu_var(rcu_dynticks);
- if (rdtp->in_user) {
- rdtp->in_user = false;
- rcu_eqs_exit(1);
- }
- local_irq_restore(flags);
+ rcu_eqs_exit(1);
}
EXPORT_SYMBOL_GPL(rcu_user_exit);
@@ -730,21 +707,6 @@ int rcu_is_cpu_idle(void)
}
EXPORT_SYMBOL(rcu_is_cpu_idle);
-#ifdef CONFIG_RCU_USER_QS
-void rcu_user_hooks_switch(struct task_struct *prev,
- struct task_struct *next)
-{
- struct rcu_dynticks *rdtp;
-
- /* Interrupts are disabled in context switch */
- rdtp = &__get_cpu_var(rcu_dynticks);
- if (!rdtp->ignore_user_qs) {
- clear_tsk_thread_flag(prev, TIF_NOHZ);
- set_tsk_thread_flag(next, TIF_NOHZ);
- }
-}
-#endif /* #ifdef CONFIG_RCU_USER_QS */
-
#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU)
/*
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 94a4894..fd7525b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -72,6 +72,7 @@
#include <linux/slab.h>
#include <linux/init_task.h>
#include <linux/binfmts.h>
+#include <linux/user_hooks.h>
#include <asm/switch_to.h>
#include <asm/tlb.h>
@@ -1926,7 +1927,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
#endif
/* Here we just switch the register state and the stack. */
- rcu_switch(prev, next);
+ user_hooks_switch(prev, next);
switch_to(prev, next, prev);
barrier();
@@ -2920,9 +2921,9 @@ EXPORT_SYMBOL(schedule);
asmlinkage void __sched schedule_user(void)
{
- rcu_user_exit();
+ user_exit();
schedule();
- rcu_user_enter();
+ user_enter();
}
/**
@@ -3026,7 +3027,7 @@ asmlinkage void __sched preempt_schedule_irq(void)
/* Catch callers which need to be fixed */
BUG_ON(ti->preempt_count || !irqs_disabled());
- rcu_user_exit();
+ user_exit();
do {
add_preempt_count(PREEMPT_ACTIVE);
local_irq_enable();
diff --git a/kernel/user_hooks.c b/kernel/user_hooks.c
new file mode 100644
index 0000000..63174b0
--- /dev/null
+++ b/kernel/user_hooks.c
@@ -0,0 +1,56 @@
+#include <linux/user_hooks.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+#include <linux/percpu.h>
+
+struct user_hooks {
+ bool hooking;
+ bool in_user;
+};
+
+DEFINE_PER_CPU(struct user_hooks, user_hooks) = {
+#ifdef CONFIG_USER_HOOKS_FORCE
+ .hooking = true,
+#endif
+};
+
+void user_enter(void)
+{
+ unsigned long flags;
+ struct user_hooks *uh;
+
+ WARN_ON_ONCE(!current->mm);
+ local_irq_save(flags);
+ uh = &__get_cpu_var(user_hooks);
+ if (uh->hooking && !uh->in_user) {
+ uh->in_user = true;
+ rcu_user_enter();
+ }
+ local_irq_restore(flags);
+}
+
+void user_exit(void)
+{
+ unsigned long flags;
+ struct user_hooks *uh;
+
+ local_irq_save(flags);
+ uh = &__get_cpu_var(user_hooks);
+ if (uh->in_user) {
+ uh->in_user = false;
+ rcu_user_exit();
+ }
+ local_irq_restore(flags);
+}
+
+void user_hooks_switch(struct task_struct *prev,
+ struct task_struct *next)
+{
+ struct user_hooks *uh;
+
+ uh = &__get_cpu_var(user_hooks);
+ if (uh->hooking) {
+ clear_tsk_thread_flag(prev, TIF_NOHZ);
+ set_tsk_thread_flag(next, TIF_NOHZ);
+ }
+}
--
1.7.5.4
^ permalink raw reply related [flat|nested] 22+ messages in thread* Re: [PATCH 1/5] user_hooks: New user hooks subsystem
2012-07-27 15:40 ` [PATCH 1/5] user_hooks: New user hooks subsystem Frederic Weisbecker
@ 2012-07-27 16:40 ` Paul E. McKenney
2012-07-27 16:58 ` Frederic Weisbecker
2012-07-30 15:08 ` Peter Zijlstra
1 sibling, 1 reply; 22+ messages in thread
From: Paul E. McKenney @ 2012-07-27 16:40 UTC (permalink / raw)
To: Frederic Weisbecker
Cc: LKML, Alessio Igor Bogani, Andrew Morton, Avi Kivity,
Chris Metcalf, Christoph Lameter, Geoff Levand, Gilad Ben Yossef,
Hakan Akkan, H. Peter Anvin, Ingo Molnar, Kevin Hilman,
Max Krasnyansky, Peter Zijlstra, Stephen Hemminger,
Steven Rostedt, Sven-Thorsten Dietrich, Thomas Gleixner
On Fri, Jul 27, 2012 at 05:40:30PM +0200, Frederic Weisbecker wrote:
> Create a new subsystem that handles the hooks on kernel/user
> boundaries currently used by RCU for its userspace extended
> quiescent state.
>
> We need to pull this up from RCU into this new level of indirection
> because these hooks are also going to be used to implement an "on
> demand" generic virtual cputime accounting. A necessary step to
> shutdown the tick while still accounting the cputime.
So this eliminates the case where the architecture might enter an
RCU extended quiescent state multiple times, but exit it only once?
(I am hoping that it does...)
Thanx, Paul
[ . . . ]
> diff --git a/kernel/rcutree.c b/kernel/rcutree.c
> index 318d00e..f6a24cb 100644
> --- a/kernel/rcutree.c
> +++ b/kernel/rcutree.c
> @@ -212,9 +212,6 @@ EXPORT_SYMBOL_GPL(rcu_note_context_switch);
> DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
> .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE,
> .dynticks = ATOMIC_INIT(1),
> -#if defined(CONFIG_RCU_USER_QS) && !defined(CONFIG_RCU_USER_QS_FORCE)
> - .ignore_user_qs = true,
> -#endif
> };
>
> static int blimit = 10; /* Maximum callbacks per rcu_do_batch. */
> @@ -448,18 +445,7 @@ EXPORT_SYMBOL_GPL(rcu_idle_enter);
> */
> void rcu_user_enter(void)
> {
> - unsigned long flags;
> - struct rcu_dynticks *rdtp;
> -
> - WARN_ON_ONCE(!current->mm);
> -
> - local_irq_save(flags);
> - rdtp = &__get_cpu_var(rcu_dynticks);
> - if (!rdtp->ignore_user_qs && !rdtp->in_user) {
> - rdtp->in_user = true;
> - rcu_eqs_enter(1);
> - }
> - local_irq_restore(flags);
> + rcu_eqs_enter(1);
> }
> EXPORT_SYMBOL_GPL(rcu_user_enter);
>
> @@ -597,16 +583,7 @@ EXPORT_SYMBOL_GPL(rcu_idle_exit);
> */
> void rcu_user_exit(void)
> {
> - unsigned long flags;
> - struct rcu_dynticks *rdtp;
> -
> - local_irq_save(flags);
> - rdtp = &__get_cpu_var(rcu_dynticks);
> - if (rdtp->in_user) {
> - rdtp->in_user = false;
> - rcu_eqs_exit(1);
> - }
> - local_irq_restore(flags);
> + rcu_eqs_exit(1);
> }
> EXPORT_SYMBOL_GPL(rcu_user_exit);
>
> @@ -730,21 +707,6 @@ int rcu_is_cpu_idle(void)
> }
> EXPORT_SYMBOL(rcu_is_cpu_idle);
>
> -#ifdef CONFIG_RCU_USER_QS
> -void rcu_user_hooks_switch(struct task_struct *prev,
> - struct task_struct *next)
> -{
> - struct rcu_dynticks *rdtp;
> -
> - /* Interrupts are disabled in context switch */
> - rdtp = &__get_cpu_var(rcu_dynticks);
> - if (!rdtp->ignore_user_qs) {
> - clear_tsk_thread_flag(prev, TIF_NOHZ);
> - set_tsk_thread_flag(next, TIF_NOHZ);
> - }
> -}
> -#endif /* #ifdef CONFIG_RCU_USER_QS */
> -
> #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU)
>
> /*
[ . . . ]
^ permalink raw reply [flat|nested] 22+ messages in thread* Re: [PATCH 1/5] user_hooks: New user hooks subsystem
2012-07-27 16:40 ` Paul E. McKenney
@ 2012-07-27 16:58 ` Frederic Weisbecker
0 siblings, 0 replies; 22+ messages in thread
From: Frederic Weisbecker @ 2012-07-27 16:58 UTC (permalink / raw)
To: Paul E. McKenney
Cc: LKML, Alessio Igor Bogani, Andrew Morton, Avi Kivity,
Chris Metcalf, Christoph Lameter, Geoff Levand, Gilad Ben Yossef,
Hakan Akkan, H. Peter Anvin, Ingo Molnar, Kevin Hilman,
Max Krasnyansky, Peter Zijlstra, Stephen Hemminger,
Steven Rostedt, Sven-Thorsten Dietrich, Thomas Gleixner
On Fri, Jul 27, 2012 at 09:40:54AM -0700, Paul E. McKenney wrote:
> On Fri, Jul 27, 2012 at 05:40:30PM +0200, Frederic Weisbecker wrote:
> > Create a new subsystem that handles the hooks on kernel/user
> > boundaries currently used by RCU for its userspace extended
> > quiescent state.
> >
> > We need to pull this up from RCU into this new level of indirection
> > because these hooks are also going to be used to implement an "on
> > demand" generic virtual cputime accounting. A necessary step to
> > shutdown the tick while still accounting the cputime.
>
> So this eliminates the case where the architecture might enter an
> RCU extended quiescent state multiple times, but exit it only once?
> (I am hoping that it does...)
Yeah. It should handle that.
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH 1/5] user_hooks: New user hooks subsystem
2012-07-27 15:40 ` [PATCH 1/5] user_hooks: New user hooks subsystem Frederic Weisbecker
2012-07-27 16:40 ` Paul E. McKenney
@ 2012-07-30 15:08 ` Peter Zijlstra
2012-07-30 15:27 ` Steven Rostedt
2012-07-30 15:51 ` Frederic Weisbecker
1 sibling, 2 replies; 22+ messages in thread
From: Peter Zijlstra @ 2012-07-30 15:08 UTC (permalink / raw)
To: Frederic Weisbecker
Cc: LKML, Alessio Igor Bogani, Andrew Morton, Avi Kivity,
Chris Metcalf, Christoph Lameter, Geoff Levand, Gilad Ben Yossef,
Hakan Akkan, H. Peter Anvin, Ingo Molnar, Kevin Hilman,
Max Krasnyansky, Paul E. McKenney, Stephen Hemminger,
Steven Rostedt, Sven-Thorsten Dietrich, Thomas Gleixner
On Fri, 2012-07-27 at 17:40 +0200, Frederic Weisbecker wrote:
> +++ b/kernel/user_hooks.c
> @@ -0,0 +1,56 @@
> +#include <linux/user_hooks.h>
> +#include <linux/rcupdate.h>
> +#include <linux/sched.h>
> +#include <linux/percpu.h>
> +
> +struct user_hooks {
> + bool hooking;
> + bool in_user;
> +};
I really detest using bool in structures.. but that's just me. Also this
really wants a comment as to wtf 'hooking' means. in_user I can just
about guess.
> +DEFINE_PER_CPU(struct user_hooks, user_hooks) = {
> +#ifdef CONFIG_USER_HOOKS_FORCE
> + .hooking = true,
> +#endif
> +};
> +
> +void user_enter(void)
> +{
> + unsigned long flags;
> + struct user_hooks *uh;
> +
> + WARN_ON_ONCE(!current->mm);
> + local_irq_save(flags);
> + uh = &__get_cpu_var(user_hooks);
> + if (uh->hooking && !uh->in_user) {
> + uh->in_user = true;
> + rcu_user_enter();
> + }
By not using __get_cpu_var() but __this_cpu_*() you generate much better
code (esp. on x86).
IOW. something like:
if (__this_cpu_read(uh.hooking) && !__this_cpu_read(uh.in_user)) {
__this_cpu_write(uh.in_user, true);
rcu_user_enter();
}
> + local_irq_restore(flags);
> +}
> +
> +void user_exit(void)
> +{
> + unsigned long flags;
> + struct user_hooks *uh;
> +
> + local_irq_save(flags);
> + uh = &__get_cpu_var(user_hooks);
> + if (uh->in_user) {
> + uh->in_user = false;
> + rcu_user_exit();
> + }
> + local_irq_restore(flags);
> +}
> +
> +void user_hooks_switch(struct task_struct *prev,
> + struct task_struct *next)
> +{
> + struct user_hooks *uh;
> +
> + uh = &__get_cpu_var(user_hooks);
> + if (uh->hooking) {
> + clear_tsk_thread_flag(prev, TIF_NOHZ);
> + set_tsk_thread_flag(next, TIF_NOHZ);
> + }
This seems pointless to me.. why are we flipping that flag on context
switch instead of keeping it enabled at all times? This are two atomic
ops in the context switch path, why?
> +}
^ permalink raw reply [flat|nested] 22+ messages in thread* Re: [PATCH 1/5] user_hooks: New user hooks subsystem
2012-07-30 15:08 ` Peter Zijlstra
@ 2012-07-30 15:27 ` Steven Rostedt
2012-07-30 16:30 ` Peter Zijlstra
2012-07-30 15:51 ` Frederic Weisbecker
1 sibling, 1 reply; 22+ messages in thread
From: Steven Rostedt @ 2012-07-30 15:27 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Frederic Weisbecker, LKML, Alessio Igor Bogani, Andrew Morton,
Avi Kivity, Chris Metcalf, Christoph Lameter, Geoff Levand,
Gilad Ben Yossef, Hakan Akkan, H. Peter Anvin, Ingo Molnar,
Kevin Hilman, Max Krasnyansky, Paul E. McKenney,
Stephen Hemminger, Sven-Thorsten Dietrich, Thomas Gleixner
On Mon, 2012-07-30 at 17:08 +0200, Peter Zijlstra wrote:
> On Fri, 2012-07-27 at 17:40 +0200, Frederic Weisbecker wrote:
> > +++ b/kernel/user_hooks.c
> > @@ -0,0 +1,56 @@
> > +#include <linux/user_hooks.h>
> > +#include <linux/rcupdate.h>
> > +#include <linux/sched.h>
> > +#include <linux/percpu.h>
> > +
> > +struct user_hooks {
> > + bool hooking;
> > + bool in_user;
> > +};
>
> I really detest using bool in structures.. but that's just me. Also this
> really wants a comment as to wtf 'hooking' means. in_user I can just
> about guess.
I'm curious to what you have against bool in structures? Would you
prefer a:
struct user_hooks {
unsigned int hooking:1;
unsigned int in_user:1;
};
instead? I haven't checked, but I would hope that gcc would optimize the
struct into a single word.
But I could see that it can cause races as that would make modifying
hooking and in_user dependent on each other. That is, if one CPU updates
hooking as another CPU updates in_user, that could cause a
read-modify-write race. At least in this case the modification is only
done on local cpu variables.
-- Steve
^ permalink raw reply [flat|nested] 22+ messages in thread* Re: [PATCH 1/5] user_hooks: New user hooks subsystem
2012-07-30 15:27 ` Steven Rostedt
@ 2012-07-30 16:30 ` Peter Zijlstra
0 siblings, 0 replies; 22+ messages in thread
From: Peter Zijlstra @ 2012-07-30 16:30 UTC (permalink / raw)
To: Steven Rostedt
Cc: Frederic Weisbecker, LKML, Alessio Igor Bogani, Andrew Morton,
Avi Kivity, Chris Metcalf, Christoph Lameter, Geoff Levand,
Gilad Ben Yossef, Hakan Akkan, H. Peter Anvin, Ingo Molnar,
Kevin Hilman, Max Krasnyansky, Paul E. McKenney,
Stephen Hemminger, Sven-Thorsten Dietrich, Thomas Gleixner
On Mon, 2012-07-30 at 11:27 -0400, Steven Rostedt wrote:
> I'm curious to what you have against bool in structures?
_Bool as per the C std doesn't have a specified storage. Now IIRC hpa
recently said that all GCC versions so far were consistent and used char
(a byte) for it, but I might mis-remember.
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH 1/5] user_hooks: New user hooks subsystem
2012-07-30 15:08 ` Peter Zijlstra
2012-07-30 15:27 ` Steven Rostedt
@ 2012-07-30 15:51 ` Frederic Weisbecker
2012-07-30 16:07 ` Steven Rostedt
2012-07-31 7:06 ` Ingo Molnar
1 sibling, 2 replies; 22+ messages in thread
From: Frederic Weisbecker @ 2012-07-30 15:51 UTC (permalink / raw)
To: Peter Zijlstra
Cc: LKML, Alessio Igor Bogani, Andrew Morton, Avi Kivity,
Chris Metcalf, Christoph Lameter, Geoff Levand, Gilad Ben Yossef,
Hakan Akkan, H. Peter Anvin, Ingo Molnar, Kevin Hilman,
Max Krasnyansky, Paul E. McKenney, Stephen Hemminger,
Steven Rostedt, Sven-Thorsten Dietrich, Thomas Gleixner
On Mon, Jul 30, 2012 at 05:08:12PM +0200, Peter Zijlstra wrote:
> On Fri, 2012-07-27 at 17:40 +0200, Frederic Weisbecker wrote:
> > +++ b/kernel/user_hooks.c
> > @@ -0,0 +1,56 @@
> > +#include <linux/user_hooks.h>
> > +#include <linux/rcupdate.h>
> > +#include <linux/sched.h>
> > +#include <linux/percpu.h>
> > +
> > +struct user_hooks {
> > + bool hooking;
> > + bool in_user;
> > +};
>
> I really detest using bool in structures.. but that's just me. Also this
> really wants a comment as to wtf 'hooking' means. in_user I can just
> about guess.
I really don't mind changing that to int. I just like them as bool because
they better describe the purpose of the field.
hooking means that the hooks are set (the TIF flag is set on the current task
and we also handle the exception hooks).
I can call that is_hooking instead? And/or add a comment to explain the
purpose of this.
>
> > +DEFINE_PER_CPU(struct user_hooks, user_hooks) = {
> > +#ifdef CONFIG_USER_HOOKS_FORCE
> > + .hooking = true,
> > +#endif
> > +};
> > +
> > +void user_enter(void)
> > +{
> > + unsigned long flags;
> > + struct user_hooks *uh;
> > +
> > + WARN_ON_ONCE(!current->mm);
> > + local_irq_save(flags);
> > + uh = &__get_cpu_var(user_hooks);
> > + if (uh->hooking && !uh->in_user) {
> > + uh->in_user = true;
> > + rcu_user_enter();
> > + }
>
> By not using __get_cpu_var() but __this_cpu_*() you generate much better
> code (esp. on x86).
>
> IOW. something like:
>
> if (__this_cpu_read(uh.hooking) && !__this_cpu_read(uh.in_user)) {
> __this_cpu_write(uh.in_user, true);
> rcu_user_enter();
> }
Ok, I'll replace.
>
> > + local_irq_restore(flags);
> > +}
> > +
> > +void user_exit(void)
> > +{
> > + unsigned long flags;
> > + struct user_hooks *uh;
> > +
> > + local_irq_save(flags);
> > + uh = &__get_cpu_var(user_hooks);
> > + if (uh->in_user) {
> > + uh->in_user = false;
> > + rcu_user_exit();
> > + }
> > + local_irq_restore(flags);
> > +}
> > +
> > +void user_hooks_switch(struct task_struct *prev,
> > + struct task_struct *next)
> > +{
> > + struct user_hooks *uh;
> > +
> > + uh = &__get_cpu_var(user_hooks);
> > + if (uh->hooking) {
> > + clear_tsk_thread_flag(prev, TIF_NOHZ);
> > + set_tsk_thread_flag(next, TIF_NOHZ);
> > + }
>
> This seems pointless to me.. why are we flipping that flag on context
> switch instead of keeping it enabled at all times? This are two atomic
> ops in the context switch path, why?
Because the hooks are per cpu. If we activate the hooks on CPU 1 but not
on CPU 2 and prev was running on CPU 1 and migrates on CPU 2, it's going
to keep the hook on that CPU 2 if we don't clear the flag.
^ permalink raw reply [flat|nested] 22+ messages in thread* Re: [PATCH 1/5] user_hooks: New user hooks subsystem
2012-07-30 15:51 ` Frederic Weisbecker
@ 2012-07-30 16:07 ` Steven Rostedt
2012-07-30 16:31 ` Peter Zijlstra
2012-07-30 16:32 ` Peter Zijlstra
2012-07-31 7:06 ` Ingo Molnar
1 sibling, 2 replies; 22+ messages in thread
From: Steven Rostedt @ 2012-07-30 16:07 UTC (permalink / raw)
To: Frederic Weisbecker
Cc: Peter Zijlstra, LKML, Alessio Igor Bogani, Andrew Morton,
Avi Kivity, Chris Metcalf, Christoph Lameter, Geoff Levand,
Gilad Ben Yossef, Hakan Akkan, H. Peter Anvin, Ingo Molnar,
Kevin Hilman, Max Krasnyansky, Paul E. McKenney,
Stephen Hemminger, Sven-Thorsten Dietrich, Thomas Gleixner
On Mon, 2012-07-30 at 17:51 +0200, Frederic Weisbecker wrote:
> On Mon, Jul 30, 2012 at 05:08:12PM +0200, Peter Zijlstra wrote:
> > On Fri, 2012-07-27 at 17:40 +0200, Frederic Weisbecker wrote:
> > > +++ b/kernel/user_hooks.c
> > > @@ -0,0 +1,56 @@
> > > +#include <linux/user_hooks.h>
> > > +#include <linux/rcupdate.h>
> > > +#include <linux/sched.h>
> > > +#include <linux/percpu.h>
> > > +
> > > +struct user_hooks {
> > > + bool hooking;
> > > + bool in_user;
> > > +};
> >
> > I really detest using bool in structures.. but that's just me. Also this
> > really wants a comment as to wtf 'hooking' means. in_user I can just
> > about guess.
>
> I really don't mind changing that to int. I just like them as bool because
> they better describe the purpose of the field.
Not only does bool describe it better, it should also allow gcc to
optimize it better as well. Unless Peter has a legitimate rational why
using bool in struct is bad, I would keep it as is.
>
> hooking means that the hooks are set (the TIF flag is set on the current task
> and we also handle the exception hooks).
>
> I can call that is_hooking instead? And/or add a comment to explain the
> purpose of this.
Would 'is_hooked' be better? 'is_hooking' sounds more like what women in
high heels, really short skirts and lots of makeup are doing late night
on a corner of a Paris street ;-)
A comment to explain the purpose should be added regardless.
-- Steve
^ permalink raw reply [flat|nested] 22+ messages in thread* Re: [PATCH 1/5] user_hooks: New user hooks subsystem
2012-07-30 16:07 ` Steven Rostedt
@ 2012-07-30 16:31 ` Peter Zijlstra
2012-07-30 16:32 ` Peter Zijlstra
1 sibling, 0 replies; 22+ messages in thread
From: Peter Zijlstra @ 2012-07-30 16:31 UTC (permalink / raw)
To: Steven Rostedt
Cc: Frederic Weisbecker, LKML, Alessio Igor Bogani, Andrew Morton,
Avi Kivity, Chris Metcalf, Christoph Lameter, Geoff Levand,
Gilad Ben Yossef, Hakan Akkan, H. Peter Anvin, Ingo Molnar,
Kevin Hilman, Max Krasnyansky, Paul E. McKenney,
Stephen Hemminger, Sven-Thorsten Dietrich, Thomas Gleixner
On Mon, 2012-07-30 at 12:07 -0400, Steven Rostedt wrote:
>
> Would 'is_hooked' be better? 'is_hooking' sounds more like what women in
> high heels, really short skirts and lots of makeup are doing late night
> on a corner of a Paris street ;-)
This is exactly the first thing I though of when I read the name ;-)
> A comment to explain the purpose should be added regardless.
Ack.
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH 1/5] user_hooks: New user hooks subsystem
2012-07-30 16:07 ` Steven Rostedt
2012-07-30 16:31 ` Peter Zijlstra
@ 2012-07-30 16:32 ` Peter Zijlstra
1 sibling, 0 replies; 22+ messages in thread
From: Peter Zijlstra @ 2012-07-30 16:32 UTC (permalink / raw)
To: Steven Rostedt
Cc: Frederic Weisbecker, LKML, Alessio Igor Bogani, Andrew Morton,
Avi Kivity, Chris Metcalf, Christoph Lameter, Geoff Levand,
Gilad Ben Yossef, Hakan Akkan, H. Peter Anvin, Ingo Molnar,
Kevin Hilman, Max Krasnyansky, Paul E. McKenney,
Stephen Hemminger, Sven-Thorsten Dietrich, Thomas Gleixner
On Mon, 2012-07-30 at 12:07 -0400, Steven Rostedt wrote:
>
> Not only does bool describe it better, it should also allow gcc to
> optimize it better as well. Unless Peter has a legitimate rational why
> using bool in struct is bad, I would keep it as is.
I don't mind too much, but like said, I hate using types without
specified storage, you never really know what the compiler will end up
doing.
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH 1/5] user_hooks: New user hooks subsystem
2012-07-30 15:51 ` Frederic Weisbecker
2012-07-30 16:07 ` Steven Rostedt
@ 2012-07-31 7:06 ` Ingo Molnar
2012-07-31 10:48 ` Frederic Weisbecker
1 sibling, 1 reply; 22+ messages in thread
From: Ingo Molnar @ 2012-07-31 7:06 UTC (permalink / raw)
To: Frederic Weisbecker
Cc: Peter Zijlstra, LKML, Alessio Igor Bogani, Andrew Morton,
Avi Kivity, Chris Metcalf, Christoph Lameter, Geoff Levand,
Gilad Ben Yossef, Hakan Akkan, H. Peter Anvin, Kevin Hilman,
Max Krasnyansky, Paul E. McKenney, Stephen Hemminger,
Steven Rostedt, Sven-Thorsten Dietrich, Thomas Gleixner
* Frederic Weisbecker <fweisbec@gmail.com> wrote:
> On Mon, Jul 30, 2012 at 05:08:12PM +0200, Peter Zijlstra wrote:
> > On Fri, 2012-07-27 at 17:40 +0200, Frederic Weisbecker wrote:
> > > +++ b/kernel/user_hooks.c
> > > @@ -0,0 +1,56 @@
> > > +#include <linux/user_hooks.h>
> > > +#include <linux/rcupdate.h>
> > > +#include <linux/sched.h>
> > > +#include <linux/percpu.h>
> > > +
> > > +struct user_hooks {
> > > + bool hooking;
> > > + bool in_user;
> > > +};
> >
> > I really detest using bool in structures.. but that's just me. Also this
> > really wants a comment as to wtf 'hooking' means. in_user I can just
> > about guess.
>
> I really don't mind changing that to int. I just like them as
> bool because they better describe the purpose of the field.
>
> hooking means that the hooks are set (the TIF flag is set on
> the current task and we also handle the exception hooks).
>
> I can call that is_hooking instead? And/or add a comment to
> explain the purpose of this.
Please don't use this horrible naming - use something more
technical like struct user_callback and callback::active, ok?
Thanks,
Ingo
^ permalink raw reply [flat|nested] 22+ messages in thread* Re: [PATCH 1/5] user_hooks: New user hooks subsystem
2012-07-31 7:06 ` Ingo Molnar
@ 2012-07-31 10:48 ` Frederic Weisbecker
2012-07-31 14:57 ` Ingo Molnar
0 siblings, 1 reply; 22+ messages in thread
From: Frederic Weisbecker @ 2012-07-31 10:48 UTC (permalink / raw)
To: Ingo Molnar
Cc: Peter Zijlstra, LKML, Alessio Igor Bogani, Andrew Morton,
Avi Kivity, Chris Metcalf, Christoph Lameter, Geoff Levand,
Gilad Ben Yossef, Hakan Akkan, H. Peter Anvin, Kevin Hilman,
Max Krasnyansky, Paul E. McKenney, Stephen Hemminger,
Steven Rostedt, Sven-Thorsten Dietrich, Thomas Gleixner
On Tue, Jul 31, 2012 at 09:06:40AM +0200, Ingo Molnar wrote:
>
> * Frederic Weisbecker <fweisbec@gmail.com> wrote:
>
> > On Mon, Jul 30, 2012 at 05:08:12PM +0200, Peter Zijlstra wrote:
> > > On Fri, 2012-07-27 at 17:40 +0200, Frederic Weisbecker wrote:
> > > > +++ b/kernel/user_hooks.c
> > > > @@ -0,0 +1,56 @@
> > > > +#include <linux/user_hooks.h>
> > > > +#include <linux/rcupdate.h>
> > > > +#include <linux/sched.h>
> > > > +#include <linux/percpu.h>
> > > > +
> > > > +struct user_hooks {
> > > > + bool hooking;
> > > > + bool in_user;
> > > > +};
> > >
> > > I really detest using bool in structures.. but that's just me. Also this
> > > really wants a comment as to wtf 'hooking' means. in_user I can just
> > > about guess.
> >
> > I really don't mind changing that to int. I just like them as
> > bool because they better describe the purpose of the field.
> >
> > hooking means that the hooks are set (the TIF flag is set on
> > the current task and we also handle the exception hooks).
> >
> > I can call that is_hooking instead? And/or add a comment to
> > explain the purpose of this.
>
> Please don't use this horrible naming - use something more
> technical like struct user_callback and callback::active, ok?
Ok, user callback should be fine. I'll respin with that.
^ permalink raw reply [flat|nested] 22+ messages in thread* Re: [PATCH 1/5] user_hooks: New user hooks subsystem
2012-07-31 10:48 ` Frederic Weisbecker
@ 2012-07-31 14:57 ` Ingo Molnar
2012-07-31 16:14 ` Peter Zijlstra
0 siblings, 1 reply; 22+ messages in thread
From: Ingo Molnar @ 2012-07-31 14:57 UTC (permalink / raw)
To: Frederic Weisbecker
Cc: Peter Zijlstra, LKML, Alessio Igor Bogani, Andrew Morton,
Avi Kivity, Chris Metcalf, Christoph Lameter, Geoff Levand,
Gilad Ben Yossef, Hakan Akkan, H. Peter Anvin, Kevin Hilman,
Max Krasnyansky, Paul E. McKenney, Stephen Hemminger,
Steven Rostedt, Sven-Thorsten Dietrich, Thomas Gleixner
* Frederic Weisbecker <fweisbec@gmail.com> wrote:
> On Tue, Jul 31, 2012 at 09:06:40AM +0200, Ingo Molnar wrote:
> >
> > * Frederic Weisbecker <fweisbec@gmail.com> wrote:
> >
> > > On Mon, Jul 30, 2012 at 05:08:12PM +0200, Peter Zijlstra wrote:
> > > > On Fri, 2012-07-27 at 17:40 +0200, Frederic Weisbecker wrote:
> > > > > +++ b/kernel/user_hooks.c
> > > > > @@ -0,0 +1,56 @@
> > > > > +#include <linux/user_hooks.h>
> > > > > +#include <linux/rcupdate.h>
> > > > > +#include <linux/sched.h>
> > > > > +#include <linux/percpu.h>
> > > > > +
> > > > > +struct user_hooks {
> > > > > + bool hooking;
> > > > > + bool in_user;
> > > > > +};
> > > >
> > > > I really detest using bool in structures.. but that's just me. Also this
> > > > really wants a comment as to wtf 'hooking' means. in_user I can just
> > > > about guess.
> > >
> > > I really don't mind changing that to int. I just like them as
> > > bool because they better describe the purpose of the field.
> > >
> > > hooking means that the hooks are set (the TIF flag is set on
> > > the current task and we also handle the exception hooks).
> > >
> > > I can call that is_hooking instead? And/or add a comment to
> > > explain the purpose of this.
> >
> > Please don't use this horrible naming - use something more
> > technical like struct user_callback and callback::active, ok?
>
> Ok, user callback should be fine. I'll respin with that.
One problem I have with the word 'hook' is that it's rarely
clear whether it's used as a noun or a verb - and the naming in
your patch shows that kind of confusion in action.
'callback', while a longer word, is almost always used as a noun
within the kernel - and it also has a pretty narrow meaning.
Thanks,
Ingo
^ permalink raw reply [flat|nested] 22+ messages in thread* Re: [PATCH 1/5] user_hooks: New user hooks subsystem
2012-07-31 14:57 ` Ingo Molnar
@ 2012-07-31 16:14 ` Peter Zijlstra
2012-08-01 12:28 ` Frederic Weisbecker
0 siblings, 1 reply; 22+ messages in thread
From: Peter Zijlstra @ 2012-07-31 16:14 UTC (permalink / raw)
To: Ingo Molnar
Cc: Frederic Weisbecker, LKML, Alessio Igor Bogani, Andrew Morton,
Avi Kivity, Chris Metcalf, Christoph Lameter, Geoff Levand,
Gilad Ben Yossef, Hakan Akkan, H. Peter Anvin, Kevin Hilman,
Max Krasnyansky, Paul E. McKenney, Stephen Hemminger,
Steven Rostedt, Sven-Thorsten Dietrich, Thomas Gleixner
On Tue, 2012-07-31 at 16:57 +0200, Ingo Molnar wrote:
>
> 'callback', while a longer word, is almost always used as a noun
> within the kernel - and it also has a pretty narrow meaning.
An altogether different naming would be something like:
struct user_kernel_tracking {
int want_uk_tracking;
enum {
in_kernel = 0,
in_user,
} uk_state;
};
^ permalink raw reply [flat|nested] 22+ messages in thread* Re: [PATCH 1/5] user_hooks: New user hooks subsystem
2012-07-31 16:14 ` Peter Zijlstra
@ 2012-08-01 12:28 ` Frederic Weisbecker
2012-08-01 12:43 ` Steven Rostedt
0 siblings, 1 reply; 22+ messages in thread
From: Frederic Weisbecker @ 2012-08-01 12:28 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Ingo Molnar, LKML, Alessio Igor Bogani, Andrew Morton, Avi Kivity,
Chris Metcalf, Christoph Lameter, Geoff Levand, Gilad Ben Yossef,
Hakan Akkan, H. Peter Anvin, Kevin Hilman, Max Krasnyansky,
Paul E. McKenney, Stephen Hemminger, Steven Rostedt,
Sven-Thorsten Dietrich, Thomas Gleixner
On Tue, Jul 31, 2012 at 06:14:22PM +0200, Peter Zijlstra wrote:
> On Tue, 2012-07-31 at 16:57 +0200, Ingo Molnar wrote:
> >
> > 'callback', while a longer word, is almost always used as a noun
> > within the kernel - and it also has a pretty narrow meaning.
>
> An altogether different naming would be something like:
>
> struct user_kernel_tracking {
> int want_uk_tracking;
> enum {
> in_kernel = 0,
> in_user,
> } uk_state;
> };
>
>
You bet we might also extend this to track guest as well in the future
because it appears that we could also enter into RCU extended quiescent
state when we run in guest.
So we probably need to generalize a bit more. Some naming based on
"code domain"?
struct code_domain {
int is_tracking;
enum {
in_kernel,
in_user,
in_guest
} state;
}
^ permalink raw reply [flat|nested] 22+ messages in thread* Re: [PATCH 1/5] user_hooks: New user hooks subsystem
2012-08-01 12:28 ` Frederic Weisbecker
@ 2012-08-01 12:43 ` Steven Rostedt
2012-08-01 12:45 ` Frederic Weisbecker
0 siblings, 1 reply; 22+ messages in thread
From: Steven Rostedt @ 2012-08-01 12:43 UTC (permalink / raw)
To: Frederic Weisbecker
Cc: Peter Zijlstra, Ingo Molnar, LKML, Alessio Igor Bogani,
Andrew Morton, Avi Kivity, Chris Metcalf, Christoph Lameter,
Geoff Levand, Gilad Ben Yossef, Hakan Akkan, H. Peter Anvin,
Kevin Hilman, Max Krasnyansky, Paul E. McKenney,
Stephen Hemminger, Sven-Thorsten Dietrich, Thomas Gleixner
On Wed, 2012-08-01 at 14:28 +0200, Frederic Weisbecker wrote:
> On Tue, Jul 31, 2012 at 06:14:22PM +0200, Peter Zijlstra wrote:
> So we probably need to generalize a bit more. Some naming based on
> "code domain"?
>
> struct code_domain {
> int is_tracking;
> enum {
> in_kernel,
> in_user,
> in_guest
> } state;
> }
Is there a fundamental difference between 'in_user' and 'in_guest'
though?
-- Steve
^ permalink raw reply [flat|nested] 22+ messages in thread* Re: [PATCH 1/5] user_hooks: New user hooks subsystem
2012-08-01 12:43 ` Steven Rostedt
@ 2012-08-01 12:45 ` Frederic Weisbecker
0 siblings, 0 replies; 22+ messages in thread
From: Frederic Weisbecker @ 2012-08-01 12:45 UTC (permalink / raw)
To: Steven Rostedt
Cc: Peter Zijlstra, Ingo Molnar, LKML, Alessio Igor Bogani,
Andrew Morton, Avi Kivity, Chris Metcalf, Christoph Lameter,
Geoff Levand, Gilad Ben Yossef, Hakan Akkan, H. Peter Anvin,
Kevin Hilman, Max Krasnyansky, Paul E. McKenney,
Stephen Hemminger, Sven-Thorsten Dietrich, Thomas Gleixner
On Wed, Aug 01, 2012 at 08:43:03AM -0400, Steven Rostedt wrote:
> On Wed, 2012-08-01 at 14:28 +0200, Frederic Weisbecker wrote:
> > On Tue, Jul 31, 2012 at 06:14:22PM +0200, Peter Zijlstra wrote:
>
> > So we probably need to generalize a bit more. Some naming based on
> > "code domain"?
> >
> > struct code_domain {
> > int is_tracking;
> > enum {
> > in_kernel,
> > in_user,
> > in_guest
> > } state;
> > }
>
> Is there a fundamental difference between 'in_user' and 'in_guest'
> though?
Probably not from RCU POV. But the cputime is not accounted the same.
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH 2/5] cputime: Don't allow virtual and irq finegrained cputime accounting simultaneously
2012-07-27 15:40 [RFC PATCH 0/5] cputime: Generic virtual based cputime accounting Frederic Weisbecker
2012-07-27 15:40 ` [PATCH 1/5] user_hooks: New user hooks subsystem Frederic Weisbecker
@ 2012-07-27 15:40 ` Frederic Weisbecker
2012-07-27 15:40 ` [PATCH 3/5] cputime: Allow dynamic switch between tick/virtual based cputime accounting Frederic Weisbecker
` (2 subsequent siblings)
4 siblings, 0 replies; 22+ messages in thread
From: Frederic Weisbecker @ 2012-07-27 15:40 UTC (permalink / raw)
To: LKML
Cc: Frederic Weisbecker, Alessio Igor Bogani, Andrew Morton,
Avi Kivity, Chris Metcalf, Christoph Lameter, Geoff Levand,
Gilad Ben Yossef, Hakan Akkan, H. Peter Anvin, Ingo Molnar,
Kevin Hilman, Max Krasnyansky, Paul E. McKenney, Peter Zijlstra,
Stephen Hemminger, Steven Rostedt, Sven-Thorsten Dietrich,
Thomas Gleixner
We may soon be able to run both at the same time. But we need
to rework a bit the irq finegrained accounting before that.
Just do a mutual exclusion for now.
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Alessio Igor Bogani <abogani@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Avi Kivity <avi@redhat.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Geoff Levand <geoff@infradead.org>
Cc: Gilad Ben Yossef <gilad@benyossef.com>
Cc: Hakan Akkan <hakanakkan@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Kevin Hilman <khilman@ti.com>
Cc: Max Krasnyansky <maxk@qualcomm.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephen Hemminger <shemminger@vyatta.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Sven-Thorsten Dietrich <thebigcorporation@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
---
init/Kconfig | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/init/Kconfig b/init/Kconfig
index 3348b85..21da9b7 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -269,7 +269,7 @@ config POSIX_MQUEUE_SYSCTL
config VIRT_CPU_ACCOUNTING
bool "Deterministic task and CPU time accounting"
- depends on HAVE_VIRT_CPU_ACCOUNTING
+ depends on HAVE_VIRT_CPU_ACCOUNTING && !IRQ_TIME_ACCOUNTING
default y if PPC64
help
Select this option to enable more accurate task and CPU time
--
1.7.5.4
^ permalink raw reply related [flat|nested] 22+ messages in thread* [PATCH 3/5] cputime: Allow dynamic switch between tick/virtual based cputime accounting
2012-07-27 15:40 [RFC PATCH 0/5] cputime: Generic virtual based cputime accounting Frederic Weisbecker
2012-07-27 15:40 ` [PATCH 1/5] user_hooks: New user hooks subsystem Frederic Weisbecker
2012-07-27 15:40 ` [PATCH 2/5] cputime: Don't allow virtual and irq finegrained cputime accounting simultaneously Frederic Weisbecker
@ 2012-07-27 15:40 ` Frederic Weisbecker
2012-07-27 15:40 ` [PATCH 4/5] cputime: Rename account_system_vtime to account_vtime Frederic Weisbecker
2012-07-27 15:40 ` [PATCH 5/5] cputime: Generic on-demand virtual cputime accounting Frederic Weisbecker
4 siblings, 0 replies; 22+ messages in thread
From: Frederic Weisbecker @ 2012-07-27 15:40 UTC (permalink / raw)
To: LKML
Cc: Frederic Weisbecker, Alessio Igor Bogani, Andrew Morton,
Avi Kivity, Chris Metcalf, Christoph Lameter, Geoff Levand,
Gilad Ben Yossef, Hakan Akkan, H. Peter Anvin, Ingo Molnar,
Kevin Hilman, Max Krasnyansky, Paul E. McKenney, Peter Zijlstra,
Stephen Hemminger, Steven Rostedt, Sven-Thorsten Dietrich,
Thomas Gleixner
Allow to dynamically switch between tick and virtual based cputime accounting.
This way we can provide a kind of "on-demand" virtual based cputime
accounting. In this mode, the kernel will rely on the user hooks
subsystem to dynamically hook on kernel boundaries.
This is in preparation for beeing able to stop the timer tick further
idle. Doing so will depend on CONFIG_VIRT_CPU_ACCOUNTING which makes
it possible to account the cputime without the tick by hooking on
kernel/user boundaries.
Depending whether the tick is stopped or not, we can switch between
tick and vtime based accounting anytime in order to minimize the
overhead associated to user hooks.
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Alessio Igor Bogani <abogani@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Avi Kivity <avi@redhat.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Geoff Levand <geoff@infradead.org>
Cc: Gilad Ben Yossef <gilad@benyossef.com>
Cc: Hakan Akkan <hakanakkan@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Kevin Hilman <khilman@ti.com>
Cc: Max Krasnyansky <maxk@qualcomm.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephen Hemminger <shemminger@vyatta.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Sven-Thorsten Dietrich <thebigcorporation@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
---
arch/ia64/include/asm/cputime.h | 5 ++++
arch/ia64/kernel/time.c | 2 +-
arch/powerpc/include/asm/cputime.h | 5 ++++
arch/powerpc/kernel/time.c | 2 +-
arch/s390/include/asm/cputime.h | 5 ++++
arch/s390/kernel/vtime.c | 2 +-
include/asm-generic/cputime.h | 5 ++++
include/linux/kernel_stat.h | 3 ++
include/linux/sched.h | 5 +---
kernel/fork.c | 3 +-
kernel/sched/cputime.c | 39 +++++++++++++++--------------------
kernel/time/tick-sched.c | 28 ++++++++++++-------------
12 files changed, 58 insertions(+), 46 deletions(-)
diff --git a/arch/ia64/include/asm/cputime.h b/arch/ia64/include/asm/cputime.h
index 3deac95..9532b9c 100644
--- a/arch/ia64/include/asm/cputime.h
+++ b/arch/ia64/include/asm/cputime.h
@@ -103,5 +103,10 @@ static inline void cputime_to_timeval(const cputime_t ct, struct timeval *val)
#define cputime64_to_clock_t(__ct) \
cputime_to_clock_t((__force cputime_t)__ct)
+static inline bool accounting_vtime(void)
+{
+ return true;
+}
+
#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
#endif /* __IA64_CPUTIME_H */
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 6247197..7afcf93 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -144,7 +144,7 @@ EXPORT_SYMBOL_GPL(account_system_vtime);
* Called from the timer interrupt handler to charge accumulated user time
* to the current process. Must be called with interrupts disabled.
*/
-void account_process_tick(struct task_struct *p, int user_tick)
+void account_process_tick_vtime(struct task_struct *p, int user_tick)
{
struct thread_info *ti = task_thread_info(p);
cputime_t delta_utime;
diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h
index 487d46f..901e0ac 100644
--- a/arch/powerpc/include/asm/cputime.h
+++ b/arch/powerpc/include/asm/cputime.h
@@ -228,6 +228,11 @@ static inline cputime_t clock_t_to_cputime(const unsigned long clk)
#define cputime64_to_clock_t(ct) cputime_to_clock_t((cputime_t)(ct))
+static inline bool accounting_vtime(void)
+{
+ return true;
+}
+
#endif /* __KERNEL__ */
#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
#endif /* __POWERPC_CPUTIME_H */
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 49da7f0..3f1918a 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -354,7 +354,7 @@ EXPORT_SYMBOL_GPL(account_system_vtime);
* (i.e. since the last entry from usermode) so that
* get_paca()->user_time_scaled is up to date.
*/
-void account_process_tick(struct task_struct *tsk, int user_tick)
+void account_process_tick_vtime(struct task_struct *tsk, int user_tick)
{
cputime_t utime, utimescaled;
diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index 718374d..05ffda7 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -188,4 +188,9 @@ static inline int s390_nohz_delay(int cpu)
#define arch_needs_cpu(cpu) s390_nohz_delay(cpu)
+static inline bool accounting_vtime(void)
+{
+ return true;
+}
+
#endif /* _S390_CPUTIME_H */
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 506e9bd..29f20fc 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -99,7 +99,7 @@ void account_switch_vtime(struct task_struct *prev)
S390_lowcore.system_timer = ti->system_timer;
}
-void account_process_tick(struct task_struct *tsk, int user_tick)
+void account_process_tick_vtime(struct task_struct *tsk, int user_tick)
{
do_account_vtime(tsk, HARDIRQ_OFFSET);
}
diff --git a/include/asm-generic/cputime.h b/include/asm-generic/cputime.h
index 9a62937..212c8bb 100644
--- a/include/asm-generic/cputime.h
+++ b/include/asm-generic/cputime.h
@@ -66,4 +66,9 @@ typedef u64 __nocast cputime64_t;
#define cputime64_to_clock_t(__ct) \
jiffies_64_to_clock_t(cputime64_to_jiffies64(__ct))
+static inline bool accounting_vtime(void)
+{
+ return false;
+}
+
#endif
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index bbe5d15..1270b86 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -132,8 +132,11 @@ extern void account_idle_ticks(unsigned long ticks);
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
extern void account_switch_vtime(struct task_struct *prev);
+extern bool account_process_tick_vtime(struct task_struct *p, int user_tick);
#else
static inline void account_switch_vtime(struct task_struct *prev) { }
+static inline void account_process_tick_vtime(struct task_struct *p,
+ int user_tick) { }
#endif
#endif /* _LINUX_KERNEL_STAT_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7b7a438..6209682 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -612,9 +612,7 @@ struct signal_struct {
cputime_t utime, stime, cutime, cstime;
cputime_t gtime;
cputime_t cgtime;
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
cputime_t prev_utime, prev_stime;
-#endif
unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt;
unsigned long inblock, oublock, cinblock, coublock;
@@ -1363,9 +1361,8 @@ struct task_struct {
cputime_t utime, stime, utimescaled, stimescaled;
cputime_t gtime;
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
cputime_t prev_utime, prev_stime;
-#endif
+
unsigned long nvcsw, nivcsw; /* context switch counts */
struct timespec start_time; /* monotonic time */
struct timespec real_start_time; /* boot based time */
diff --git a/kernel/fork.c b/kernel/fork.c
index 3e2ed43..b19dd02 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1245,9 +1245,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->utime = p->stime = p->gtime = 0;
p->utimescaled = p->stimescaled = 0;
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
p->prev_utime = p->prev_stime = 0;
-#endif
+
#if defined(SPLIT_RSS_COUNTING)
memset(&p->rss_stat, 0, sizeof(p->rss_stat));
#endif
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index fd5bd01..ff525ca 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -287,8 +287,6 @@ static __always_inline bool steal_account_process_tick(void)
return false;
}
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
-
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
/*
* Account a tick to a process and cpustat
@@ -368,6 +366,11 @@ void account_process_tick(struct task_struct *p, int user_tick)
cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
struct rq *rq = this_rq();
+ if (accounting_vtime()) {
+ account_process_tick_vtime(p, user_tick);
+ return;
+ }
+
if (sched_clock_irqtime) {
irqtime_account_process_tick(p, user_tick, rq);
return;
@@ -410,29 +413,10 @@ void account_idle_ticks(unsigned long ticks)
account_idle_time(jiffies_to_cputime(ticks));
}
-#endif
/*
* Use precise platform statistics if available:
*/
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
-void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
-{
- *ut = p->utime;
- *st = p->stime;
-}
-
-void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
-{
- struct task_cputime cputime;
-
- thread_group_cputime(p, &cputime);
-
- *ut = cputime.utime;
- *st = cputime.stime;
-}
-#else
-
#ifndef nsecs_to_cputime
# define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs)
#endif
@@ -441,6 +425,12 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
{
cputime_t rtime, utime = p->utime, total = utime + p->stime;
+ if (accounting_vtime()) {
+ *ut = p->utime;
+ *st = p->stime;
+ return;
+ }
+
/*
* Use CFS's precise accounting:
*/
@@ -476,6 +466,12 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
thread_group_cputime(p, &cputime);
+ if (accounting_vtime()) {
+ *ut = cputime.utime;
+ *st = cputime.stime;
+ return;
+ }
+
total = cputime.utime + cputime.stime;
rtime = nsecs_to_cputime(cputime.sum_exec_runtime);
@@ -494,4 +490,3 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
*ut = sig->prev_utime;
*st = sig->prev_stime;
}
-#endif
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 45b17ae..76dc22b 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -557,9 +557,7 @@ void tick_nohz_idle_exit(void)
{
int cpu = smp_processor_id();
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
unsigned long ticks;
-#endif
ktime_t now;
local_irq_disable();
@@ -584,19 +582,19 @@ void tick_nohz_idle_exit(void)
tick_do_update_jiffies64(now);
update_cpu_load_nohz();
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
- /*
- * We stopped the tick in idle. Update process times would miss the
- * time we slept as update_process_times does only a 1 tick
- * accounting. Enforce that this is accounted to idle !
- */
- ticks = jiffies - ts->idle_jiffies;
- /*
- * We might be one off. Do not randomly account a huge number of ticks!
- */
- if (ticks && ticks < LONG_MAX)
- account_idle_ticks(ticks);
-#endif
+ if (!accounting_vtime()) {
+ /*
+ * We stopped the tick in idle. Update process times would miss the
+ * time we slept as update_process_times does only a 1 tick
+ * accounting. Enforce that this is accounted to idle !
+ */
+ ticks = jiffies - ts->idle_jiffies;
+ /*
+ * We might be one off. Do not randomly account a huge number of ticks!
+ */
+ if (ticks && ticks < LONG_MAX)
+ account_idle_ticks(ticks);
+ }
calc_load_exit_idle();
touch_softlockup_watchdog();
--
1.7.5.4
^ permalink raw reply related [flat|nested] 22+ messages in thread* [PATCH 4/5] cputime: Rename account_system_vtime to account_vtime
2012-07-27 15:40 [RFC PATCH 0/5] cputime: Generic virtual based cputime accounting Frederic Weisbecker
` (2 preceding siblings ...)
2012-07-27 15:40 ` [PATCH 3/5] cputime: Allow dynamic switch between tick/virtual based cputime accounting Frederic Weisbecker
@ 2012-07-27 15:40 ` Frederic Weisbecker
2012-07-27 15:40 ` [PATCH 5/5] cputime: Generic on-demand virtual cputime accounting Frederic Weisbecker
4 siblings, 0 replies; 22+ messages in thread
From: Frederic Weisbecker @ 2012-07-27 15:40 UTC (permalink / raw)
To: LKML
Cc: Frederic Weisbecker, Alessio Igor Bogani, Andrew Morton,
Avi Kivity, Chris Metcalf, Christoph Lameter, Geoff Levand,
Gilad Ben Yossef, Hakan Akkan, H. Peter Anvin, Ingo Molnar,
Kevin Hilman, Max Krasnyansky, Paul E. McKenney, Peter Zijlstra,
Stephen Hemminger, Steven Rostedt, Sven-Thorsten Dietrich,
Thomas Gleixner
account_system_vtime() can be called from random places:
hard/softirq entry/exit, kvm guest entry/exit, and even
context switches on powerpc.
Rename it to the even more generic account_vtime() name,
this reflect well that we are in a random place in the
kernel where we have either system, idle or user time
to account.
This way we can reuse the "system" version and expand it
with other domains such as "user" or "idle" to prepare
for the user hooks based virtual cputime accounting that
knows better when to flush the time for which domain of
execution.
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Alessio Igor Bogani <abogani@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Avi Kivity <avi@redhat.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Geoff Levand <geoff@infradead.org>
Cc: Gilad Ben Yossef <gilad@benyossef.com>
Cc: Hakan Akkan <hakanakkan@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Kevin Hilman <khilman@ti.com>
Cc: Max Krasnyansky <maxk@qualcomm.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephen Hemminger <shemminger@vyatta.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Sven-Thorsten Dietrich <thebigcorporation@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
---
arch/ia64/kernel/time.c | 4 ++--
arch/powerpc/kernel/time.c | 8 ++++----
arch/s390/kernel/vtime.c | 4 ++--
include/linux/hardirq.h | 8 ++++----
include/linux/kvm_host.h | 4 ++--
kernel/sched/cputime.c | 8 ++++----
kernel/softirq.c | 6 +++---
7 files changed, 21 insertions(+), 21 deletions(-)
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 7afcf93..03de550 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -116,7 +116,7 @@ void account_switch_vtime(struct task_struct *prev)
* Account time for a transition between system, hard irq or soft irq state.
* Note that this function is called with interrupts enabled.
*/
-void account_system_vtime(struct task_struct *tsk)
+void account_vtime(struct task_struct *tsk)
{
struct thread_info *ti = task_thread_info(tsk);
unsigned long flags;
@@ -138,7 +138,7 @@ void account_system_vtime(struct task_struct *tsk)
local_irq_restore(flags);
}
-EXPORT_SYMBOL_GPL(account_system_vtime);
+EXPORT_SYMBOL_GPL(account_vtime);
/*
* Called from the timer interrupt handler to charge accumulated user time
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 3f1918a..fba763d 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -291,7 +291,7 @@ static inline u64 calculate_stolen_time(u64 stop_tb)
* Account time for a transition between system, hard irq
* or soft irq state.
*/
-void account_system_vtime(struct task_struct *tsk)
+void account_vtime(struct task_struct *tsk)
{
u64 now, nowscaled, delta, deltascaled;
unsigned long flags;
@@ -343,14 +343,14 @@ void account_system_vtime(struct task_struct *tsk)
}
local_irq_restore(flags);
}
-EXPORT_SYMBOL_GPL(account_system_vtime);
+EXPORT_SYMBOL_GPL(account_vtime);
/*
* Transfer the user and system times accumulated in the paca
* by the exception entry and exit code to the generic process
* user and system time records.
* Must be called with interrupts disabled.
- * Assumes that account_system_vtime() has been called recently
+ * Assumes that account_vtime() has been called recently
* (i.e. since the last entry from usermode) so that
* get_paca()->user_time_scaled is up to date.
*/
@@ -368,7 +368,7 @@ void account_process_tick_vtime(struct task_struct *tsk, int user_tick)
void account_switch_vtime(struct task_struct *prev)
{
- account_system_vtime(prev);
+ account_vtime(prev);
account_process_tick(prev, 0);
}
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 29f20fc..95f8105 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -108,7 +108,7 @@ void account_process_tick_vtime(struct task_struct *tsk, int user_tick)
* Update process times based on virtual cpu times stored by entry.S
* to the lowcore fields user_timer, system_timer & steal_clock.
*/
-void account_system_vtime(struct task_struct *tsk)
+void account_vtime(struct task_struct *tsk)
{
struct thread_info *ti = task_thread_info(tsk);
__u64 timer, system;
@@ -122,7 +122,7 @@ void account_system_vtime(struct task_struct *tsk)
ti->system_timer = S390_lowcore.system_timer;
account_system_time(tsk, 0, system, system);
}
-EXPORT_SYMBOL_GPL(account_system_vtime);
+EXPORT_SYMBOL_GPL(account_vtime);
void __kprobes vtime_stop_cpu(void)
{
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index bb7f309..6432e33 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -132,11 +132,11 @@ extern void synchronize_irq(unsigned int irq);
struct task_struct;
#if !defined(CONFIG_VIRT_CPU_ACCOUNTING) && !defined(CONFIG_IRQ_TIME_ACCOUNTING)
-static inline void account_system_vtime(struct task_struct *tsk)
+static inline void account_vtime(struct task_struct *tsk)
{
}
#else
-extern void account_system_vtime(struct task_struct *tsk);
+extern void account_vtime(struct task_struct *tsk);
#endif
#if defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU)
@@ -162,7 +162,7 @@ extern void rcu_nmi_exit(void);
*/
#define __irq_enter() \
do { \
- account_system_vtime(current); \
+ account_vtime(current); \
add_preempt_count(HARDIRQ_OFFSET); \
trace_hardirq_enter(); \
} while (0)
@@ -178,7 +178,7 @@ extern void irq_enter(void);
#define __irq_exit() \
do { \
trace_hardirq_exit(); \
- account_system_vtime(current); \
+ account_vtime(current); \
sub_preempt_count(HARDIRQ_OFFSET); \
} while (0)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index c446435..54b5859 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -676,7 +676,7 @@ static inline int kvm_deassign_device(struct kvm *kvm,
static inline void kvm_guest_enter(void)
{
BUG_ON(preemptible());
- account_system_vtime(current);
+ account_vtime(current);
current->flags |= PF_VCPU;
/* KVM does not hold any references to rcu protected data when it
* switches CPU into a guest mode. In fact switching to a guest mode
@@ -690,7 +690,7 @@ static inline void kvm_guest_enter(void)
static inline void kvm_guest_exit(void)
{
- account_system_vtime(current);
+ account_vtime(current);
current->flags &= ~PF_VCPU;
}
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index ff525ca..53b03cc 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -9,11 +9,11 @@
/*
* There are no locks covering percpu hardirq/softirq time.
- * They are only modified in account_system_vtime, on corresponding CPU
+ * They are only modified in account_vtime, on corresponding CPU
* with interrupts disabled. So, writes are safe.
* They are read and saved off onto struct rq in update_rq_clock().
* This may result in other CPU reading this CPU's irq time and can
- * race with irq/account_system_vtime on this CPU. We would either get old
+ * race with irq/account_vtime on this CPU. We would either get old
* or new value with a side effect of accounting a slice of irq time to wrong
* task when irq is in progress while we read rq->clock. That is a worthy
* compromise in place of having locks on each irq in account_system_time.
@@ -42,7 +42,7 @@ DEFINE_PER_CPU(seqcount_t, irq_time_seq);
* Called before incrementing preempt_count on {soft,}irq_enter
* and before decrementing preempt_count on {soft,}irq_exit.
*/
-void account_system_vtime(struct task_struct *curr)
+void account_vtime(struct task_struct *curr)
{
unsigned long flags;
s64 delta;
@@ -72,7 +72,7 @@ void account_system_vtime(struct task_struct *curr)
irq_time_write_end();
local_irq_restore(flags);
}
-EXPORT_SYMBOL_GPL(account_system_vtime);
+EXPORT_SYMBOL_GPL(account_vtime);
static int irqtime_account_hi_update(void)
{
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 671f959..8e20a7d 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -212,7 +212,7 @@ asmlinkage void __do_softirq(void)
int cpu;
pending = local_softirq_pending();
- account_system_vtime(current);
+ account_vtime(current);
__local_bh_disable((unsigned long)__builtin_return_address(0),
SOFTIRQ_OFFSET);
@@ -263,7 +263,7 @@ restart:
lockdep_softirq_exit();
- account_system_vtime(current);
+ account_vtime(current);
__local_bh_enable(SOFTIRQ_OFFSET);
}
@@ -331,7 +331,7 @@ static inline void invoke_softirq(void)
*/
void irq_exit(void)
{
- account_system_vtime(current);
+ account_vtime(current);
trace_hardirq_exit();
sub_preempt_count(IRQ_EXIT_OFFSET);
if (!in_interrupt() && local_softirq_pending())
--
1.7.5.4
^ permalink raw reply related [flat|nested] 22+ messages in thread* [PATCH 5/5] cputime: Generic on-demand virtual cputime accounting
2012-07-27 15:40 [RFC PATCH 0/5] cputime: Generic virtual based cputime accounting Frederic Weisbecker
` (3 preceding siblings ...)
2012-07-27 15:40 ` [PATCH 4/5] cputime: Rename account_system_vtime to account_vtime Frederic Weisbecker
@ 2012-07-27 15:40 ` Frederic Weisbecker
4 siblings, 0 replies; 22+ messages in thread
From: Frederic Weisbecker @ 2012-07-27 15:40 UTC (permalink / raw)
To: LKML
Cc: Frederic Weisbecker, Alessio Igor Bogani, Andrew Morton,
Avi Kivity, Chris Metcalf, Christoph Lameter, Geoff Levand,
Gilad Ben Yossef, Hakan Akkan, H. Peter Anvin, Ingo Molnar,
Kevin Hilman, Max Krasnyansky, Paul E. McKenney, Peter Zijlstra,
Stephen Hemminger, Steven Rostedt, Sven-Thorsten Dietrich,
Thomas Gleixner
If we want to stop the tick further idle, we need to be
able to account the cputime without using the tick.
Virtual based cputime accounting solves that problem by
hooking into kernel/user boundaries.
However implementing CONFIG_VIRT_CPU_ACCOUNTING requires
to set low level hooks and involves more overhead. But
we already have a generic user hooks subsystem that is
required for RCU needs by archs which will want to
shut down the tick outside idle.
This patch implements a generic virtual based cputime
accounting that relies on these generic user hooks.
There are some upsides of doing this:
- This requires no arch code to implement CONFIG_VIRT_CPU_ACCOUNTING
if user hooks are already built (already necessary for RCU in full
tickless mode).
- We can rely on the generic user hooks subsystem to dynamically
(de)activate the hooks, so that we can switch anytime between virtual
and tick based accounting. This way we don't have the overhead
of the virtual accounting when the tick is running periodically.
And a few downsides:
- It relies on jiffies and the hooks are set in high level code. This
results in less precise cputime accounting than with a true native
virtual based cputime accounting which hooks on low level code and use
a cpu hardware clock. Precision is not the goal of this though.
- There is probably more overhead than a native virtual based cputime
accounting. But this relies on hooks that are already set anyway.
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Alessio Igor Bogani <abogani@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Avi Kivity <avi@redhat.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Geoff Levand <geoff@infradead.org>
Cc: Gilad Ben Yossef <gilad@benyossef.com>
Cc: Hakan Akkan <hakanakkan@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Kevin Hilman <khilman@ti.com>
Cc: Max Krasnyansky <maxk@qualcomm.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephen Hemminger <shemminger@vyatta.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Sven-Thorsten Dietrich <thebigcorporation@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
---
include/asm-generic/cputime.h | 2 +
include/linux/kernel_stat.h | 12 ++++-
include/linux/user_hooks.h | 18 +++++++
init/Kconfig | 11 ++++-
kernel/sched/cputime.c | 112 +++++++++++++++++++++++++++++++++++++++++
kernel/user_hooks.c | 10 ++--
6 files changed, 158 insertions(+), 7 deletions(-)
diff --git a/include/asm-generic/cputime.h b/include/asm-generic/cputime.h
index 212c8bb..2a78aa7 100644
--- a/include/asm-generic/cputime.h
+++ b/include/asm-generic/cputime.h
@@ -66,9 +66,11 @@ typedef u64 __nocast cputime64_t;
#define cputime64_to_clock_t(__ct) \
jiffies_64_to_clock_t(cputime64_to_jiffies64(__ct))
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_GEN
static inline bool accounting_vtime(void)
{
return false;
}
+#endif
#endif
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 1270b86..6e509a9 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -130,13 +130,23 @@ extern void account_process_tick(struct task_struct *, int user);
extern void account_steal_ticks(unsigned long ticks);
extern void account_idle_ticks(unsigned long ticks);
+
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
extern void account_switch_vtime(struct task_struct *prev);
-extern bool account_process_tick_vtime(struct task_struct *p, int user_tick);
+extern void account_process_tick_vtime(struct task_struct *p, int user_tick);
#else
static inline void account_switch_vtime(struct task_struct *prev) { }
static inline void account_process_tick_vtime(struct task_struct *p,
int user_tick) { }
#endif
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+void account_system_vtime(struct task_struct *tsk);
+void account_user_vtime(struct task_struct *tsk);
+bool accounting_vtime(void);
+#else
+static inline void account_system_vtime(struct task_struct *tsk) { }
+static inline void account_user_vtime(struct task_struct *tsk) { }
+#endif
+
#endif /* _LINUX_KERNEL_STAT_H */
diff --git a/include/linux/user_hooks.h b/include/linux/user_hooks.h
index 720292d..07385f1 100644
--- a/include/linux/user_hooks.h
+++ b/include/linux/user_hooks.h
@@ -3,6 +3,24 @@
#ifdef CONFIG_USER_HOOKS
#include <linux/sched.h>
+#include <linux/percpu.h>
+
+struct user_hooks {
+ bool hooking;
+ bool in_user;
+};
+
+DECLARE_PER_CPU(struct user_hooks, user_hooks);
+
+static inline bool in_user(void)
+{
+ return __get_cpu_var(user_hooks).in_user;
+}
+
+static inline bool user_hooks_hooking(void)
+{
+ return __get_cpu_var(user_hooks).hooking;
+}
extern void user_enter(void);
extern void user_exit(void);
diff --git a/init/Kconfig b/init/Kconfig
index 21da9b7..b0ed659 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -269,7 +269,9 @@ config POSIX_MQUEUE_SYSCTL
config VIRT_CPU_ACCOUNTING
bool "Deterministic task and CPU time accounting"
- depends on HAVE_VIRT_CPU_ACCOUNTING && !IRQ_TIME_ACCOUNTING
+ depends on HAVE_VIRT_CPU_ACCOUNTING || HAVE_USER_HOOKS
+ depends on !IRQ_TIME_ACCOUNTING
+ select VIRT_CPU_ACCOUNTING_GEN if !HAVE_VIRT_CPU_ACCOUNTING
default y if PPC64
help
Select this option to enable more accurate task and CPU time
@@ -280,6 +282,13 @@ config VIRT_CPU_ACCOUNTING
stolen time on logically-partitioned systems running on
IBM POWER5-based machines.
+config VIRT_CPU_ACCOUNTING_GEN
+ select USER_HOOKS
+ bool
+ help
+ Implement a generic virtual based cputime accounting by using
+ the user hooks subsystem.
+
config BSD_PROCESS_ACCT
bool "BSD Process Accounting"
help
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 53b03cc..284d136 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -2,6 +2,7 @@
#include <linux/sched.h>
#include <linux/tsacct_kern.h>
#include <linux/kernel_stat.h>
+#include <linux/user_hooks.h>
#include "sched.h"
@@ -490,3 +491,114 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
*ut = sig->prev_utime;
*st = sig->prev_stime;
}
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+static DEFINE_PER_CPU(long, last_jiffies) = INITIAL_JIFFIES;
+
+static cputime_t get_vtime_delta(void)
+{
+ long delta;
+
+ delta = jiffies - __this_cpu_read(last_jiffies);
+ __this_cpu_add(last_jiffies, delta);
+
+ return jiffies_to_cputime(delta);
+}
+
+void account_system_vtime(struct task_struct *tsk)
+{
+ cputime_t delta_cpu = get_vtime_delta();
+
+ account_system_time(tsk, irq_count(), delta_cpu, cputime_to_scaled(delta_cpu));
+}
+
+void account_user_vtime(struct task_struct *tsk)
+{
+ cputime_t delta_cpu = get_vtime_delta();
+
+ account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
+}
+
+static void account_idle_vtime(void)
+{
+ cputime_t delta_cpu = get_vtime_delta();
+
+ account_idle_time(delta_cpu);
+}
+
+void account_vtime(struct task_struct *tsk)
+{
+ unsigned long count = irq_count();
+
+ if (!count) {
+ /*
+ * If we interrupted user, in_user() is 1
+ * because the user hooks subsys don't hook
+ * on irq entry/exit. This way we know if
+ * we need to flush user time on kernel entry.
+ */
+ if (in_user())
+ account_user_vtime(tsk);
+ } else {
+ if (count == HARDIRQ_OFFSET ||
+ count == SOFTIRQ_OFFSET) {
+ if (is_idle_task(tsk))
+ account_idle_vtime();
+ else
+ account_system_vtime(tsk);
+ }
+ }
+}
+
+void account_switch_vtime(struct task_struct *prev)
+{
+ if (is_idle_task(prev))
+ account_idle_vtime();
+ else
+ account_system_vtime(prev);
+}
+
+/*
+ * This is a kind of hack: if we flush user time only on
+ * irq entry, we miss the jiffies update and the time is spuriously
+ * accounted to system time.
+ */
+void account_process_tick_vtime(struct task_struct *p, int user_tick)
+{
+ if (in_user())
+ account_user_vtime(p);
+}
+
+bool accounting_vtime(void)
+{
+ return user_hooks_hooking();
+}
+
+static int __cpuinit vtime_cpu_notify(struct notifier_block *self,
+ unsigned long action, void *hcpu)
+{
+ long cpu = (long)hcpu;
+ long *last_jiffies_cpu = per_cpu_ptr(&last_jiffies, cpu);
+
+ switch (action) {
+ case CPU_UP_PREPARE:
+ case CPU_UP_PREPARE_FROZEN:
+ /*
+ * CHECKME: ensure that's visible by the CPU
+ * once it wakes up
+ */
+ *last_jiffies_cpu = jiffies;
+ default:
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static int __init init_vtime(void)
+{
+ cpu_notifier(vtime_cpu_notify, 0);
+ return 0;
+}
+early_initcall(init_vtime);
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
diff --git a/kernel/user_hooks.c b/kernel/user_hooks.c
index 63174b0..31f5d7e 100644
--- a/kernel/user_hooks.c
+++ b/kernel/user_hooks.c
@@ -1,12 +1,8 @@
#include <linux/user_hooks.h>
#include <linux/rcupdate.h>
#include <linux/sched.h>
-#include <linux/percpu.h>
+#include <linux/kernel_stat.h>
-struct user_hooks {
- bool hooking;
- bool in_user;
-};
DEFINE_PER_CPU(struct user_hooks, user_hooks) = {
#ifdef CONFIG_USER_HOOKS_FORCE
@@ -24,6 +20,8 @@ void user_enter(void)
uh = &__get_cpu_var(user_hooks);
if (uh->hooking && !uh->in_user) {
uh->in_user = true;
+ if (accounting_vtime())
+ account_system_vtime(current);
rcu_user_enter();
}
local_irq_restore(flags);
@@ -39,6 +37,8 @@ void user_exit(void)
if (uh->in_user) {
uh->in_user = false;
rcu_user_exit();
+ if (accounting_vtime())
+ account_user_vtime(current);
}
local_irq_restore(flags);
}
--
1.7.5.4
^ permalink raw reply related [flat|nested] 22+ messages in thread