From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: Ingo Molnar <mingo@elte.hu>, Paul Mackerras <paulus@samba.org>,
Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>,
Steven Rostedt <rostedt@goodmis.org>,
Thomas Gleixner <tglx@linutronix.de>,
linux-kernel@vger.kernel.org,
Peter Zijlstra <a.p.zijlstra@chello.nl>
Subject: [PATCH 3/5] perf: Convert the perf output buffer to local_t
Date: Tue, 18 May 2010 15:33:01 +0200 [thread overview]
Message-ID: <20100518133726.044269924@chello.nl> (raw)
In-Reply-To: 20100518133258.000434886@chello.nl
[-- Attachment #1: perf-buffer-local_t.patch --]
[-- Type: text/plain, Size: 4695 bytes --]
Since there is now only a single writer, we can use local_t instead
and avoid all these pesky LOCK insn.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
include/linux/perf_event.h | 15 +++++++--------
kernel/perf_event.c | 30 +++++++++++++++---------------
2 files changed, 22 insertions(+), 23 deletions(-)
Index: linux-2.6/include/linux/perf_event.h
===================================================================
--- linux-2.6.orig/include/linux/perf_event.h
+++ linux-2.6/include/linux/perf_event.h
@@ -485,6 +485,7 @@ struct perf_guest_info_callbacks {
#include <linux/ftrace.h>
#include <linux/cpu.h>
#include <asm/atomic.h>
+#include <asm/local.h>
#define PERF_MAX_STACK_DEPTH 255
@@ -588,20 +589,18 @@ struct perf_mmap_data {
#ifdef CONFIG_PERF_USE_VMALLOC
struct work_struct work;
#endif
- int data_order;
+ int data_order; /* allocation order */
int nr_pages; /* nr of data pages */
int writable; /* are we writable */
int nr_locked; /* nr pages mlocked */
atomic_t poll; /* POLL_ for wakeups */
- atomic_t events; /* event_id limit */
- atomic_long_t head; /* write position */
-
- atomic_t wakeup; /* needs a wakeup */
- atomic_t lost; /* nr records lost */
-
- atomic_t nest; /* nested writers */
+ local_t head; /* write position */
+ local_t nest; /* nested writers */
+ local_t events; /* event limit */
+ local_t wakeup; /* needs a wakeup */
+ local_t lost; /* nr records lost */
long watermark; /* wakeup watermark */
Index: linux-2.6/kernel/perf_event.c
===================================================================
--- linux-2.6.orig/kernel/perf_event.c
+++ linux-2.6/kernel/perf_event.c
@@ -2916,7 +2916,7 @@ static void perf_output_get_handle(struc
struct perf_mmap_data *data = handle->data;
preempt_disable();
- atomic_inc(&data->nest);
+ local_inc(&data->nest);
}
static void perf_output_put_handle(struct perf_output_handle *handle)
@@ -2925,13 +2925,13 @@ static void perf_output_put_handle(struc
unsigned long head;
again:
- head = atomic_long_read(&data->head);
+ head = local_read(&data->head);
/*
* IRQ/NMI can happen here, which means we can miss a head update.
*/
- if (!atomic_dec_and_test(&data->nest))
+ if (!local_dec_and_test(&data->nest))
return;
/*
@@ -2945,12 +2945,12 @@ again:
* Now check if we missed an update, rely on the (compiler)
* barrier in atomic_dec_and_test() to re-read data->head.
*/
- if (unlikely(head != atomic_long_read(&data->head))) {
- atomic_inc(&data->nest);
+ if (unlikely(head != local_read(&data->head))) {
+ local_inc(&data->nest);
goto again;
}
- if (atomic_xchg(&data->wakeup, 0))
+ if (local_xchg(&data->wakeup, 0))
perf_output_wakeup(handle);
preempt_enable();
@@ -3031,7 +3031,7 @@ int perf_output_begin(struct perf_output
if (!data->nr_pages)
goto out;
- have_lost = atomic_read(&data->lost);
+ have_lost = local_read(&data->lost);
if (have_lost)
size += sizeof(lost_event);
@@ -3045,24 +3045,24 @@ int perf_output_begin(struct perf_output
*/
tail = ACCESS_ONCE(data->user_page->data_tail);
smp_rmb();
- offset = head = atomic_long_read(&data->head);
+ offset = head = local_read(&data->head);
head += size;
if (unlikely(!perf_output_space(data, tail, offset, head)))
goto fail;
- } while (atomic_long_cmpxchg(&data->head, offset, head) != offset);
+ } while (local_cmpxchg(&data->head, offset, head) != offset);
handle->offset = offset;
handle->head = head;
if (head - tail > data->watermark)
- atomic_inc(&data->wakeup);
+ local_inc(&data->wakeup);
if (have_lost) {
lost_event.header.type = PERF_RECORD_LOST;
lost_event.header.misc = 0;
lost_event.header.size = sizeof(lost_event);
lost_event.id = event->id;
- lost_event.lost = atomic_xchg(&data->lost, 0);
+ lost_event.lost = local_xchg(&data->lost, 0);
perf_output_put(handle, lost_event);
}
@@ -3070,7 +3070,7 @@ int perf_output_begin(struct perf_output
return 0;
fail:
- atomic_inc(&data->lost);
+ local_inc(&data->lost);
perf_output_put_handle(handle);
out:
rcu_read_unlock();
@@ -3086,10 +3086,10 @@ void perf_output_end(struct perf_output_
int wakeup_events = event->attr.wakeup_events;
if (handle->sample && wakeup_events) {
- int events = atomic_inc_return(&data->events);
+ int events = local_inc_return(&data->events);
if (events >= wakeup_events) {
- atomic_sub(wakeup_events, &data->events);
- atomic_inc(&data->wakeup);
+ local_sub(wakeup_events, &data->events);
+ local_inc(&data->wakeup);
}
}
next prev parent reply other threads:[~2010-05-18 13:46 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-05-18 13:32 [PATCH 0/5] Optimize perf ring-buffer Peter Zijlstra
2010-05-18 13:32 ` [PATCH 1/5] perf: Disallow mmap() on per-task inherited events Peter Zijlstra
2010-05-19 7:19 ` Frederic Weisbecker
2010-05-25 0:55 ` Paul Mackerras
2010-05-25 8:19 ` Peter Zijlstra
2010-05-18 13:33 ` [PATCH 2/5] perf: Remove IRQ-disable from the perf_output path Peter Zijlstra
2010-05-18 13:33 ` Peter Zijlstra [this message]
2010-05-18 13:33 ` [PATCH 4/5] perf: Avoid local_xchg Peter Zijlstra
2010-05-18 13:33 ` [RFC PATCH 5/5] perf: Implement perf_output_addr() Peter Zijlstra
2010-05-18 14:09 ` Peter Zijlstra
2010-05-19 7:21 ` Frederic Weisbecker
2010-05-19 7:58 ` Peter Zijlstra
2010-05-19 9:03 ` Frederic Weisbecker
2010-05-19 14:47 ` Steven Rostedt
2010-05-19 15:05 ` Peter Zijlstra
2010-05-19 15:38 ` Steven Rostedt
2010-05-19 15:50 ` Peter Zijlstra
2010-05-19 16:08 ` Steven Rostedt
2010-05-19 16:15 ` Peter Zijlstra
2010-05-19 16:27 ` Steven Rostedt
2010-05-19 16:34 ` Peter Zijlstra
2010-05-19 7:14 ` [PATCH 0/5] Optimize perf ring-buffer Frederic Weisbecker
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20100518133726.044269924@chello.nl \
--to=a.p.zijlstra@chello.nl \
--cc=acme@infradead.org \
--cc=fweisbec@gmail.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@elte.hu \
--cc=paulus@samba.org \
--cc=rostedt@goodmis.org \
--cc=tglx@linutronix.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.