* [RFC][PATCH -rt] perf_counters: defer poll() wakeups to softirq
@ 2009-08-25 13:51 Peter Zijlstra
2009-08-25 14:50 ` Arnaldo Carvalho de Melo
0 siblings, 1 reply; 2+ messages in thread
From: Peter Zijlstra @ 2009-08-25 13:51 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Ingo Molnar, Thomas Gleixner; +Cc: linux-kernel
Use timer softirq for wakeups on preempt_rt
Normally pending work is work that cannot be done from NMI context, such
as wakeups and disabling the counter. The pending work is a single
linked list using atomic ops so that it functions from NMI context.
Normally this is called from IRQ context through use of an self-IPI
(x86) or upon enabling hard interrupts (powerpc). Architectures that do
not implement perf_counter_set_pending() nor call
perf_counter_do_pending() upon leaving NMI context will get a polling
fallback from the timer softirq.
However, in -rt we cannot do the wakeup from IRQ context because its a
wait_queue wakup, which can be O(n), so defer all wakeups to the softirq
fallback by creating a second pending list that's only processed from
there.
[ not tested at all... ]
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
include/linux/perf_counter.h | 5 +++
kernel/perf_counter.c | 61 +++++++++++++++++++++++++++++++++--------
kernel/timer.c | 2 +-
4 files changed, 65 insertions(+), 16 deletions(-)
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 972f90d..e61eee1 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -612,6 +612,9 @@ struct perf_counter {
int pending_kill;
int pending_disable;
struct perf_pending_entry pending;
+#ifdef CONFIG_PREEMPT_RT
+ struct perf_pending_entry pending_softirq;
+#endif
atomic_t event_limit;
@@ -703,6 +706,7 @@ extern void perf_counter_exit_task(struct task_struct *child);
extern void perf_counter_free_task(struct task_struct *task);
extern void set_perf_counter_pending(void);
extern void perf_counter_do_pending(void);
+extern void perf_counter_do_pending_softirq(void);
extern void perf_counter_print_debug(void);
extern void __perf_disable(void);
extern bool __perf_enable(void);
@@ -787,6 +791,7 @@ static inline int perf_counter_init_task(struct task_struct *child) { return 0;
static inline void perf_counter_exit_task(struct task_struct *child) { }
static inline void perf_counter_free_task(struct task_struct *task) { }
static inline void perf_counter_do_pending(void) { }
+static inline void perf_counter_do_pending_softirq(void) { }
static inline void perf_counter_print_debug(void) { }
static inline void perf_disable(void) { }
static inline void perf_enable(void) { }
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 53abcbe..d3b065d 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2397,45 +2397,69 @@ static void perf_pending_counter(struct perf_pending_entry *entry)
__perf_counter_disable(counter);
}
+#ifndef CONFIG_PREEMPT_RT
if (counter->pending_wakeup) {
counter->pending_wakeup = 0;
perf_counter_wakeup(counter);
}
+#endif
}
+#ifdef CONFIG_PREEMPT_RT
+static void perf_pending_counter_softirq(struct perf_pending_entry *entry)
+{
+ struct perf_counter *counter = container_of(entry,
+ struct perf_counter, pending_softirq);
+
+ if (counter->pending_wakeup) {
+ counter->pending_wakeup = 0;
+ perf_counter_wakeup(counter);
+ }
+}
+#endif
+
#define PENDING_TAIL ((struct perf_pending_entry *)-1UL)
static DEFINE_PER_CPU(struct perf_pending_entry *, perf_pending_head) = {
PENDING_TAIL,
};
-static void perf_pending_queue(struct perf_pending_entry *entry,
- void (*func)(struct perf_pending_entry *))
-{
- struct perf_pending_entry **head;
+static DEFINE_PER_CPU(struct perf_pending_entry *, perf_pending_softirq_head) = {
+ PENDING_TAIL,
+};
+static void __perf_pending_queue(struct perf_pending_entry **head,
+ struct perf_pending_entry *entry,
+ void (*func)(struct perf_pending_entry *))
+{
if (cmpxchg(&entry->next, NULL, PENDING_TAIL) != NULL)
return;
entry->func = func;
- head = &get_cpu_var(perf_pending_head);
-
do {
entry->next = *head;
} while (cmpxchg(head, entry->next, entry) != entry->next);
+}
- set_perf_counter_pending();
+static void perf_pending_queue(struct perf_pending_entry *entry,
+ void (*func)(struct perf_pending_entry *))
+{
+ struct perf_pending_entry **head;
+
+ head = &get_cpu_var(perf_pending_head);
+ __perf_pending_queue(head, entry, func);
+ put_cpu_var(perf_pending_head);
- put_cpu_var(perf_pending_head);
+ set_perf_counter_pending();
}
-static int __perf_pending_run(void)
+static int __perf_pending_run(struct perf_pending_entry **head)
{
struct perf_pending_entry *list;
int nr = 0;
- list = xchg(&__get_cpu_var(perf_pending_head), PENDING_TAIL);
+ list = xchg(head, PENDING_TAIL);
while (list != PENDING_TAIL) {
void (*func)(struct perf_pending_entry *);
struct perf_pending_entry *entry = list;
@@ -2465,7 +2489,8 @@ static inline int perf_not_pending(struct perf_counter *counter)
* need to wait.
*/
get_cpu();
- __perf_pending_run();
+ __perf_pending_run(&__get_cpu_var(perf_pending_head));
+ __perf_pending_run(&__get_cpu_var(perf_pending_softirq_head));
put_cpu();
/*
@@ -2483,7 +2508,13 @@ static void perf_pending_sync(struct perf_counter *counter)
void perf_counter_do_pending(void)
{
- __perf_pending_run();
+ __perf_pending_run(&__get_cpu_var(perf_pending_head));
+}
+
+void perf_counter_do_pending_softirq(void)
+{
+ __perf_pending_run(&__get_cpu_var(perf_pending_head));
+ __perf_pending_run(&__get_cpu_var(perf_pending_softirq_head));
}
/*
@@ -2543,8 +2574,14 @@ static void perf_output_wakeup(struct perf_output_handle *handle)
if (handle->nmi) {
handle->counter->pending_wakeup = 1;
+#ifndef CONFIG_PREEMPT_RT
perf_pending_queue(&handle->counter->pending,
perf_pending_counter);
+#else
+ __perf_pending_queue(&__get_cpu_var(perf_pending_softirq_head),
+ &handle->counter->pending_softirq,
+ perf_pending_counter_softirq);
+#endif
} else
perf_counter_wakeup(handle->counter);
}
diff --git a/kernel/timer.c b/kernel/timer.c
index 33fc9d1..1dd1456 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1188,7 +1188,7 @@ static void run_timer_softirq(struct softirq_action *h)
{
struct tvec_base *base = __get_cpu_var(tvec_bases);
- perf_counter_do_pending();
+ perf_counter_do_pending_softirq();
hrtimer_run_pending();
^ permalink raw reply related [flat|nested] 2+ messages in thread* Re: [RFC][PATCH -rt] perf_counters: defer poll() wakeups to softirq
2009-08-25 13:51 [RFC][PATCH -rt] perf_counters: defer poll() wakeups to softirq Peter Zijlstra
@ 2009-08-25 14:50 ` Arnaldo Carvalho de Melo
0 siblings, 0 replies; 2+ messages in thread
From: Arnaldo Carvalho de Melo @ 2009-08-25 14:50 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: Ingo Molnar, Thomas Gleixner, linux-kernel
Em Tue, Aug 25, 2009 at 03:51:05PM +0200, Peter Zijlstra escreveu:
> Use timer softirq for wakeups on preempt_rt
>
> Normally pending work is work that cannot be done from NMI context, such
> as wakeups and disabling the counter. The pending work is a single
> linked list using atomic ops so that it functions from NMI context.
>
> Normally this is called from IRQ context through use of an self-IPI
> (x86) or upon enabling hard interrupts (powerpc). Architectures that do
> not implement perf_counter_set_pending() nor call
> perf_counter_do_pending() upon leaving NMI context will get a polling
> fallback from the timer softirq.
>
> However, in -rt we cannot do the wakeup from IRQ context because its a
> wait_queue wakup, which can be O(n), so defer all wakeups to the softirq
> fallback by creating a second pending list that's only processed from
> there.
>
> [ not tested at all... ]
Thanks a lot, no crashes, tons of samples collected, looks fine after:
[root@hs21xm-2 ~]# uptime
09:49:48 up 38 min, 3 users, load average: 2.77, 18.72, 25.09
Will continue testing, but I guess I can give a:
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
> ---
> include/linux/perf_counter.h | 5 +++
> kernel/perf_counter.c | 61 +++++++++++++++++++++++++++++++++--------
> kernel/timer.c | 2 +-
> 4 files changed, 65 insertions(+), 16 deletions(-)
>
> diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
> index 972f90d..e61eee1 100644
> --- a/include/linux/perf_counter.h
> +++ b/include/linux/perf_counter.h
> @@ -612,6 +612,9 @@ struct perf_counter {
> int pending_kill;
> int pending_disable;
> struct perf_pending_entry pending;
> +#ifdef CONFIG_PREEMPT_RT
> + struct perf_pending_entry pending_softirq;
> +#endif
>
> atomic_t event_limit;
>
> @@ -703,6 +706,7 @@ extern void perf_counter_exit_task(struct task_struct *child);
> extern void perf_counter_free_task(struct task_struct *task);
> extern void set_perf_counter_pending(void);
> extern void perf_counter_do_pending(void);
> +extern void perf_counter_do_pending_softirq(void);
> extern void perf_counter_print_debug(void);
> extern void __perf_disable(void);
> extern bool __perf_enable(void);
> @@ -787,6 +791,7 @@ static inline int perf_counter_init_task(struct task_struct *child) { return 0;
> static inline void perf_counter_exit_task(struct task_struct *child) { }
> static inline void perf_counter_free_task(struct task_struct *task) { }
> static inline void perf_counter_do_pending(void) { }
> +static inline void perf_counter_do_pending_softirq(void) { }
> static inline void perf_counter_print_debug(void) { }
> static inline void perf_disable(void) { }
> static inline void perf_enable(void) { }
> diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
> index 53abcbe..d3b065d 100644
> --- a/kernel/perf_counter.c
> +++ b/kernel/perf_counter.c
> @@ -2397,45 +2397,69 @@ static void perf_pending_counter(struct perf_pending_entry *entry)
> __perf_counter_disable(counter);
> }
>
> +#ifndef CONFIG_PREEMPT_RT
> if (counter->pending_wakeup) {
> counter->pending_wakeup = 0;
> perf_counter_wakeup(counter);
> }
> +#endif
> }
>
> +#ifdef CONFIG_PREEMPT_RT
> +static void perf_pending_counter_softirq(struct perf_pending_entry *entry)
> +{
> + struct perf_counter *counter = container_of(entry,
> + struct perf_counter, pending_softirq);
> +
> + if (counter->pending_wakeup) {
> + counter->pending_wakeup = 0;
> + perf_counter_wakeup(counter);
> + }
> +}
> +#endif
> +
> #define PENDING_TAIL ((struct perf_pending_entry *)-1UL)
>
> static DEFINE_PER_CPU(struct perf_pending_entry *, perf_pending_head) = {
> PENDING_TAIL,
> };
>
> -static void perf_pending_queue(struct perf_pending_entry *entry,
> - void (*func)(struct perf_pending_entry *))
> -{
> - struct perf_pending_entry **head;
> +static DEFINE_PER_CPU(struct perf_pending_entry *, perf_pending_softirq_head) = {
> + PENDING_TAIL,
> +};
>
> +static void __perf_pending_queue(struct perf_pending_entry **head,
> + struct perf_pending_entry *entry,
> + void (*func)(struct perf_pending_entry *))
> +{
> if (cmpxchg(&entry->next, NULL, PENDING_TAIL) != NULL)
> return;
>
> entry->func = func;
>
> - head = &get_cpu_var(perf_pending_head);
> -
> do {
> entry->next = *head;
> } while (cmpxchg(head, entry->next, entry) != entry->next);
> +}
>
> - set_perf_counter_pending();
> +static void perf_pending_queue(struct perf_pending_entry *entry,
> + void (*func)(struct perf_pending_entry *))
> +{
> + struct perf_pending_entry **head;
> +
> + head = &get_cpu_var(perf_pending_head);
> + __perf_pending_queue(head, entry, func);
> + put_cpu_var(perf_pending_head);
>
> - put_cpu_var(perf_pending_head);
> + set_perf_counter_pending();
> }
>
> -static int __perf_pending_run(void)
> +static int __perf_pending_run(struct perf_pending_entry **head)
> {
> struct perf_pending_entry *list;
> int nr = 0;
>
> - list = xchg(&__get_cpu_var(perf_pending_head), PENDING_TAIL);
> + list = xchg(head, PENDING_TAIL);
> while (list != PENDING_TAIL) {
> void (*func)(struct perf_pending_entry *);
> struct perf_pending_entry *entry = list;
> @@ -2465,7 +2489,8 @@ static inline int perf_not_pending(struct perf_counter *counter)
> * need to wait.
> */
> get_cpu();
> - __perf_pending_run();
> + __perf_pending_run(&__get_cpu_var(perf_pending_head));
> + __perf_pending_run(&__get_cpu_var(perf_pending_softirq_head));
> put_cpu();
>
> /*
> @@ -2483,7 +2508,13 @@ static void perf_pending_sync(struct perf_counter *counter)
>
> void perf_counter_do_pending(void)
> {
> - __perf_pending_run();
> + __perf_pending_run(&__get_cpu_var(perf_pending_head));
> +}
> +
> +void perf_counter_do_pending_softirq(void)
> +{
> + __perf_pending_run(&__get_cpu_var(perf_pending_head));
> + __perf_pending_run(&__get_cpu_var(perf_pending_softirq_head));
> }
>
> /*
> @@ -2543,8 +2574,14 @@ static void perf_output_wakeup(struct perf_output_handle *handle)
>
> if (handle->nmi) {
> handle->counter->pending_wakeup = 1;
> +#ifndef CONFIG_PREEMPT_RT
> perf_pending_queue(&handle->counter->pending,
> perf_pending_counter);
> +#else
> + __perf_pending_queue(&__get_cpu_var(perf_pending_softirq_head),
> + &handle->counter->pending_softirq,
> + perf_pending_counter_softirq);
> +#endif
> } else
> perf_counter_wakeup(handle->counter);
> }
> diff --git a/kernel/timer.c b/kernel/timer.c
> index 33fc9d1..1dd1456 100644
> --- a/kernel/timer.c
> +++ b/kernel/timer.c
> @@ -1188,7 +1188,7 @@ static void run_timer_softirq(struct softirq_action *h)
> {
> struct tvec_base *base = __get_cpu_var(tvec_bases);
>
> - perf_counter_do_pending();
> + perf_counter_do_pending_softirq();
>
> hrtimer_run_pending();
>
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2009-08-25 14:50 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-08-25 13:51 [RFC][PATCH -rt] perf_counters: defer poll() wakeups to softirq Peter Zijlstra
2009-08-25 14:50 ` Arnaldo Carvalho de Melo
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.