From: Arnaldo Carvalho de Melo <acme@redhat.com>
To: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@elte.hu>, Thomas Gleixner <tglx@linutronix.de>,
linux-kernel <linux-kernel@vger.kernel.org>
Subject: Re: [RFC][PATCH -rt] perf_counters: defer poll() wakeups to softirq
Date: Tue, 25 Aug 2009 11:50:39 -0300 [thread overview]
Message-ID: <20090825145039.GD16110@ghostprotocols.net> (raw)
In-Reply-To: <1251208265.7538.1157.camel@twins>
Em Tue, Aug 25, 2009 at 03:51:05PM +0200, Peter Zijlstra escreveu:
> Use timer softirq for wakeups on preempt_rt
>
> Normally pending work is work that cannot be done from NMI context, such
> as wakeups and disabling the counter. The pending work is a single
> linked list using atomic ops so that it functions from NMI context.
>
> Normally this is called from IRQ context through use of an self-IPI
> (x86) or upon enabling hard interrupts (powerpc). Architectures that do
> not implement perf_counter_set_pending() nor call
> perf_counter_do_pending() upon leaving NMI context will get a polling
> fallback from the timer softirq.
>
> However, in -rt we cannot do the wakeup from IRQ context because its a
> wait_queue wakup, which can be O(n), so defer all wakeups to the softirq
> fallback by creating a second pending list that's only processed from
> there.
>
> [ not tested at all... ]
Thanks a lot, no crashes, tons of samples collected, looks fine after:
[root@hs21xm-2 ~]# uptime
09:49:48 up 38 min, 3 users, load average: 2.77, 18.72, 25.09
Will continue testing, but I guess I can give a:
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
> ---
> include/linux/perf_counter.h | 5 +++
> kernel/perf_counter.c | 61 +++++++++++++++++++++++++++++++++--------
> kernel/timer.c | 2 +-
> 4 files changed, 65 insertions(+), 16 deletions(-)
>
> diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
> index 972f90d..e61eee1 100644
> --- a/include/linux/perf_counter.h
> +++ b/include/linux/perf_counter.h
> @@ -612,6 +612,9 @@ struct perf_counter {
> int pending_kill;
> int pending_disable;
> struct perf_pending_entry pending;
> +#ifdef CONFIG_PREEMPT_RT
> + struct perf_pending_entry pending_softirq;
> +#endif
>
> atomic_t event_limit;
>
> @@ -703,6 +706,7 @@ extern void perf_counter_exit_task(struct task_struct *child);
> extern void perf_counter_free_task(struct task_struct *task);
> extern void set_perf_counter_pending(void);
> extern void perf_counter_do_pending(void);
> +extern void perf_counter_do_pending_softirq(void);
> extern void perf_counter_print_debug(void);
> extern void __perf_disable(void);
> extern bool __perf_enable(void);
> @@ -787,6 +791,7 @@ static inline int perf_counter_init_task(struct task_struct *child) { return 0;
> static inline void perf_counter_exit_task(struct task_struct *child) { }
> static inline void perf_counter_free_task(struct task_struct *task) { }
> static inline void perf_counter_do_pending(void) { }
> +static inline void perf_counter_do_pending_softirq(void) { }
> static inline void perf_counter_print_debug(void) { }
> static inline void perf_disable(void) { }
> static inline void perf_enable(void) { }
> diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
> index 53abcbe..d3b065d 100644
> --- a/kernel/perf_counter.c
> +++ b/kernel/perf_counter.c
> @@ -2397,45 +2397,69 @@ static void perf_pending_counter(struct perf_pending_entry *entry)
> __perf_counter_disable(counter);
> }
>
> +#ifndef CONFIG_PREEMPT_RT
> if (counter->pending_wakeup) {
> counter->pending_wakeup = 0;
> perf_counter_wakeup(counter);
> }
> +#endif
> }
>
> +#ifdef CONFIG_PREEMPT_RT
> +static void perf_pending_counter_softirq(struct perf_pending_entry *entry)
> +{
> + struct perf_counter *counter = container_of(entry,
> + struct perf_counter, pending_softirq);
> +
> + if (counter->pending_wakeup) {
> + counter->pending_wakeup = 0;
> + perf_counter_wakeup(counter);
> + }
> +}
> +#endif
> +
> #define PENDING_TAIL ((struct perf_pending_entry *)-1UL)
>
> static DEFINE_PER_CPU(struct perf_pending_entry *, perf_pending_head) = {
> PENDING_TAIL,
> };
>
> -static void perf_pending_queue(struct perf_pending_entry *entry,
> - void (*func)(struct perf_pending_entry *))
> -{
> - struct perf_pending_entry **head;
> +static DEFINE_PER_CPU(struct perf_pending_entry *, perf_pending_softirq_head) = {
> + PENDING_TAIL,
> +};
>
> +static void __perf_pending_queue(struct perf_pending_entry **head,
> + struct perf_pending_entry *entry,
> + void (*func)(struct perf_pending_entry *))
> +{
> if (cmpxchg(&entry->next, NULL, PENDING_TAIL) != NULL)
> return;
>
> entry->func = func;
>
> - head = &get_cpu_var(perf_pending_head);
> -
> do {
> entry->next = *head;
> } while (cmpxchg(head, entry->next, entry) != entry->next);
> +}
>
> - set_perf_counter_pending();
> +static void perf_pending_queue(struct perf_pending_entry *entry,
> + void (*func)(struct perf_pending_entry *))
> +{
> + struct perf_pending_entry **head;
> +
> + head = &get_cpu_var(perf_pending_head);
> + __perf_pending_queue(head, entry, func);
> + put_cpu_var(perf_pending_head);
>
> - put_cpu_var(perf_pending_head);
> + set_perf_counter_pending();
> }
>
> -static int __perf_pending_run(void)
> +static int __perf_pending_run(struct perf_pending_entry **head)
> {
> struct perf_pending_entry *list;
> int nr = 0;
>
> - list = xchg(&__get_cpu_var(perf_pending_head), PENDING_TAIL);
> + list = xchg(head, PENDING_TAIL);
> while (list != PENDING_TAIL) {
> void (*func)(struct perf_pending_entry *);
> struct perf_pending_entry *entry = list;
> @@ -2465,7 +2489,8 @@ static inline int perf_not_pending(struct perf_counter *counter)
> * need to wait.
> */
> get_cpu();
> - __perf_pending_run();
> + __perf_pending_run(&__get_cpu_var(perf_pending_head));
> + __perf_pending_run(&__get_cpu_var(perf_pending_softirq_head));
> put_cpu();
>
> /*
> @@ -2483,7 +2508,13 @@ static void perf_pending_sync(struct perf_counter *counter)
>
> void perf_counter_do_pending(void)
> {
> - __perf_pending_run();
> + __perf_pending_run(&__get_cpu_var(perf_pending_head));
> +}
> +
> +void perf_counter_do_pending_softirq(void)
> +{
> + __perf_pending_run(&__get_cpu_var(perf_pending_head));
> + __perf_pending_run(&__get_cpu_var(perf_pending_softirq_head));
> }
>
> /*
> @@ -2543,8 +2574,14 @@ static void perf_output_wakeup(struct perf_output_handle *handle)
>
> if (handle->nmi) {
> handle->counter->pending_wakeup = 1;
> +#ifndef CONFIG_PREEMPT_RT
> perf_pending_queue(&handle->counter->pending,
> perf_pending_counter);
> +#else
> + __perf_pending_queue(&__get_cpu_var(perf_pending_softirq_head),
> + &handle->counter->pending_softirq,
> + perf_pending_counter_softirq);
> +#endif
> } else
> perf_counter_wakeup(handle->counter);
> }
> diff --git a/kernel/timer.c b/kernel/timer.c
> index 33fc9d1..1dd1456 100644
> --- a/kernel/timer.c
> +++ b/kernel/timer.c
> @@ -1188,7 +1188,7 @@ static void run_timer_softirq(struct softirq_action *h)
> {
> struct tvec_base *base = __get_cpu_var(tvec_bases);
>
> - perf_counter_do_pending();
> + perf_counter_do_pending_softirq();
>
> hrtimer_run_pending();
>
prev parent reply other threads:[~2009-08-25 14:50 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-08-25 13:51 [RFC][PATCH -rt] perf_counters: defer poll() wakeups to softirq Peter Zijlstra
2009-08-25 14:50 ` Arnaldo Carvalho de Melo [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20090825145039.GD16110@ghostprotocols.net \
--to=acme@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@elte.hu \
--cc=peterz@infradead.org \
--cc=tglx@linutronix.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.