* [PATCH 1/6] perf_counter: more elaborate write API
2009-03-25 11:30 [PATCH 0/6] perf_counter: new output ABI Peter Zijlstra
@ 2009-03-25 11:30 ` Peter Zijlstra
2009-03-25 12:06 ` [tip:perfcounters/core] " Peter Zijlstra
2009-03-25 11:30 ` [PATCH 2/6] perf_counter: output objects Peter Zijlstra
` (5 subsequent siblings)
6 siblings, 1 reply; 27+ messages in thread
From: Peter Zijlstra @ 2009-03-25 11:30 UTC (permalink / raw)
To: Ingo Molnar, linux-kernel
Cc: Paul Mackerras, Mike Galbraith, Arjan van de Ven, Wu Fengguang,
Peter Zijlstra
[-- Attachment #1: perf_counter-write-api.patch --]
[-- Type: text/plain, Size: 3733 bytes --]
Provide a begin, copy, end interface to the output buffer.
begin() reserves the space,
copy() copies the data over, considering page boundaries,
end() finalizes the event and does the wakeup.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
kernel/perf_counter.c | 96 +++++++++++++++++++++++++++++++++-----------------
1 file changed, 65 insertions(+), 31 deletions(-)
Index: linux-2.6/kernel/perf_counter.c
===================================================================
--- linux-2.6.orig/kernel/perf_counter.c
+++ linux-2.6/kernel/perf_counter.c
@@ -1411,16 +1411,20 @@ static const struct file_operations perf
* Output
*/
-static int perf_output_write(struct perf_counter *counter, int nmi,
- void *buf, ssize_t size)
+struct perf_output_handle {
+ struct perf_counter *counter;
+ struct perf_mmap_data *data;
+ unsigned int offset;
+ int wakeup;
+};
+
+static int perf_output_begin(struct perf_output_handle *handle,
+ struct perf_counter *counter, unsigned int size)
{
struct perf_mmap_data *data;
- unsigned int offset, head, nr;
- unsigned int len;
- int ret, wakeup;
+ unsigned int offset, head;
rcu_read_lock();
- ret = -ENOSPC;
data = rcu_dereference(counter->data);
if (!data)
goto out;
@@ -1428,45 +1432,75 @@ static int perf_output_write(struct perf
if (!data->nr_pages)
goto out;
- ret = -EINVAL;
- if (size > PAGE_SIZE)
- goto out;
-
do {
offset = head = atomic_read(&data->head);
head += size;
} while (atomic_cmpxchg(&data->head, offset, head) != offset);
- wakeup = (offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT);
+ handle->counter = counter;
+ handle->data = data;
+ handle->offset = offset;
+ handle->wakeup = (offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT);
- nr = (offset >> PAGE_SHIFT) & (data->nr_pages - 1);
- offset &= PAGE_SIZE - 1;
+ return 0;
- len = min_t(unsigned int, PAGE_SIZE - offset, size);
- memcpy(data->data_pages[nr] + offset, buf, len);
- size -= len;
-
- if (size) {
- nr = (nr + 1) & (data->nr_pages - 1);
- memcpy(data->data_pages[nr], buf + len, size);
- }
+out:
+ rcu_read_unlock();
+ return -ENOSPC;
+}
- /*
- * generate a poll() wakeup for every page boundary crossed
- */
- if (wakeup) {
- atomic_xchg(&data->wakeup, POLL_IN);
- __perf_counter_update_userpage(counter, data);
+static void perf_output_copy(struct perf_output_handle *handle,
+ void *buf, unsigned int len)
+{
+ unsigned int offset = handle->offset;
+ unsigned int pages_mask = handle->data->nr_pages - 1;
+ unsigned int size;
+ void **pages = handle->data->data_pages;
+
+ do {
+ unsigned int page_offset;
+ int nr;
+
+ nr = (offset >> PAGE_SHIFT) & pages_mask;
+ page_offset = offset & (PAGE_SIZE - 1);
+ size = min_t(unsigned int, PAGE_SIZE - page_offset, len);
+ memcpy(pages[nr] + page_offset, buf, size);
+ len -= size;
+ buf += size;
+ offset += size;
+ } while (len);
+
+ handle->offset = offset;
+}
+
+static void perf_output_end(struct perf_output_handle *handle, int nmi)
+{
+ if (handle->wakeup) {
+ (void)atomic_xchg(&handle->data->wakeup, POLL_IN);
+ __perf_counter_update_userpage(handle->counter, handle->data);
if (nmi) {
- counter->wakeup_pending = 1;
+ handle->counter->wakeup_pending = 1;
set_perf_counter_pending();
} else
- wake_up(&counter->waitq);
+ wake_up(&handle->counter->waitq);
}
- ret = 0;
-out:
rcu_read_unlock();
+}
+
+static int perf_output_write(struct perf_counter *counter, int nmi,
+ void *buf, ssize_t size)
+{
+ struct perf_output_handle handle;
+ int ret;
+
+ ret = perf_output_begin(&handle, counter, size);
+ if (ret)
+ goto out;
+ perf_output_copy(&handle, buf, size);
+ perf_output_end(&handle, nmi);
+
+out:
return ret;
}
--
^ permalink raw reply [flat|nested] 27+ messages in thread* [tip:perfcounters/core] perf_counter: more elaborate write API
2009-03-25 11:30 ` [PATCH 1/6] perf_counter: more elaborate write API Peter Zijlstra
@ 2009-03-25 12:06 ` Peter Zijlstra
0 siblings, 0 replies; 27+ messages in thread
From: Peter Zijlstra @ 2009-03-25 12:06 UTC (permalink / raw)
To: linux-tip-commits
Cc: linux-kernel, paulus, hpa, mingo, a.p.zijlstra, efault, arjan,
tglx, fengguang.wu, mingo
Commit-ID: c3f81c52021ae1c90c2718aba5114fbd54db20ff
Gitweb: http://git.kernel.org/tip/c3f81c52021ae1c90c2718aba5114fbd54db20ff
Author: Peter Zijlstra <a.p.zijlstra@chello.nl>
AuthorDate: Wed, 25 Mar 2009 12:30:22 +0100
Committer: Ingo Molnar <mingo@elte.hu>
CommitDate: Wed, 25 Mar 2009 13:02:51 +0100
perf_counter: more elaborate write API
Provide a begin, copy, end interface to the output buffer.
begin() reserves the space,
copy() copies the data over, considering page boundaries,
end() finalizes the event and does the wakeup.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Wu Fengguang <fengguang.wu@intel.com>
LKML-Reference: <20090325113316.740550870@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
kernel/perf_counter.c | 109 +++++++++++++++++++++++++++++++++---------------
1 files changed, 75 insertions(+), 34 deletions(-)
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index affe227..0422fd9 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -9,6 +9,7 @@
*/
#include <linux/fs.h>
+#include <linux/mm.h>
#include <linux/cpu.h>
#include <linux/smp.h>
#include <linux/file.h>
@@ -16,15 +17,14 @@
#include <linux/sysfs.h>
#include <linux/ptrace.h>
#include <linux/percpu.h>
+#include <linux/vmstat.h>
+#include <linux/hardirq.h>
+#include <linux/rculist.h>
#include <linux/uaccess.h>
#include <linux/syscalls.h>
#include <linux/anon_inodes.h>
#include <linux/kernel_stat.h>
#include <linux/perf_counter.h>
-#include <linux/mm.h>
-#include <linux/vmstat.h>
-#include <linux/rculist.h>
-#include <linux/hardirq.h>
#include <asm/irq_regs.h>
@@ -1411,16 +1411,20 @@ static const struct file_operations perf_fops = {
* Output
*/
-static int perf_output_write(struct perf_counter *counter, int nmi,
- void *buf, ssize_t size)
+struct perf_output_handle {
+ struct perf_counter *counter;
+ struct perf_mmap_data *data;
+ unsigned int offset;
+ int wakeup;
+};
+
+static int perf_output_begin(struct perf_output_handle *handle,
+ struct perf_counter *counter, unsigned int size)
{
struct perf_mmap_data *data;
- unsigned int offset, head, nr;
- unsigned int len;
- int ret, wakeup;
+ unsigned int offset, head;
rcu_read_lock();
- ret = -ENOSPC;
data = rcu_dereference(counter->data);
if (!data)
goto out;
@@ -1428,45 +1432,82 @@ static int perf_output_write(struct perf_counter *counter, int nmi,
if (!data->nr_pages)
goto out;
- ret = -EINVAL;
- if (size > PAGE_SIZE)
- goto out;
-
do {
offset = head = atomic_read(&data->head);
head += size;
} while (atomic_cmpxchg(&data->head, offset, head) != offset);
- wakeup = (offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT);
+ handle->counter = counter;
+ handle->data = data;
+ handle->offset = offset;
+ handle->wakeup = (offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT);
- nr = (offset >> PAGE_SHIFT) & (data->nr_pages - 1);
- offset &= PAGE_SIZE - 1;
+ return 0;
- len = min_t(unsigned int, PAGE_SIZE - offset, size);
- memcpy(data->data_pages[nr] + offset, buf, len);
- size -= len;
+out:
+ rcu_read_unlock();
- if (size) {
- nr = (nr + 1) & (data->nr_pages - 1);
- memcpy(data->data_pages[nr], buf + len, size);
- }
+ return -ENOSPC;
+}
- /*
- * generate a poll() wakeup for every page boundary crossed
- */
- if (wakeup) {
- atomic_xchg(&data->wakeup, POLL_IN);
- __perf_counter_update_userpage(counter, data);
+static void perf_output_copy(struct perf_output_handle *handle,
+ void *buf, unsigned int len)
+{
+ unsigned int pages_mask;
+ unsigned int offset;
+ unsigned int size;
+ void **pages;
+
+ offset = handle->offset;
+ pages_mask = handle->data->nr_pages - 1;
+ pages = handle->data->data_pages;
+
+ do {
+ unsigned int page_offset;
+ int nr;
+
+ nr = (offset >> PAGE_SHIFT) & pages_mask;
+ page_offset = offset & (PAGE_SIZE - 1);
+ size = min_t(unsigned int, PAGE_SIZE - page_offset, len);
+
+ memcpy(pages[nr] + page_offset, buf, size);
+
+ len -= size;
+ buf += size;
+ offset += size;
+ } while (len);
+
+ handle->offset = offset;
+}
+
+static void perf_output_end(struct perf_output_handle *handle, int nmi)
+{
+ if (handle->wakeup) {
+ (void)atomic_xchg(&handle->data->wakeup, POLL_IN);
+ __perf_counter_update_userpage(handle->counter, handle->data);
if (nmi) {
- counter->wakeup_pending = 1;
+ handle->counter->wakeup_pending = 1;
set_perf_counter_pending();
} else
- wake_up(&counter->waitq);
+ wake_up(&handle->counter->waitq);
}
- ret = 0;
-out:
rcu_read_unlock();
+}
+
+static int perf_output_write(struct perf_counter *counter, int nmi,
+ void *buf, ssize_t size)
+{
+ struct perf_output_handle handle;
+ int ret;
+ ret = perf_output_begin(&handle, counter, size);
+ if (ret)
+ goto out;
+
+ perf_output_copy(&handle, buf, size);
+ perf_output_end(&handle, nmi);
+
+out:
return ret;
}
^ permalink raw reply related [flat|nested] 27+ messages in thread
* [PATCH 2/6] perf_counter: output objects
2009-03-25 11:30 [PATCH 0/6] perf_counter: new output ABI Peter Zijlstra
2009-03-25 11:30 ` [PATCH 1/6] perf_counter: more elaborate write API Peter Zijlstra
@ 2009-03-25 11:30 ` Peter Zijlstra
2009-03-25 12:06 ` [tip:perfcounters/core] " Peter Zijlstra
2009-03-25 11:30 ` [PATCH 3/6] perf_counter: sanity check on the output API Peter Zijlstra
` (4 subsequent siblings)
6 siblings, 1 reply; 27+ messages in thread
From: Peter Zijlstra @ 2009-03-25 11:30 UTC (permalink / raw)
To: Ingo Molnar, linux-kernel
Cc: Paul Mackerras, Mike Galbraith, Arjan van de Ven, Wu Fengguang,
Peter Zijlstra
[-- Attachment #1: perf_counter-output-obj.patch --]
[-- Type: text/plain, Size: 4090 bytes --]
Provide a {type,size} header for each output entry.
This should provide extensible output, and the ability to mix multiple streams.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
include/linux/perf_counter.h | 11 ++++++++
kernel/perf_counter.c | 55 ++++++++++++++++++++++++++++++++-----------
2 files changed, 53 insertions(+), 13 deletions(-)
Index: linux-2.6/include/linux/perf_counter.h
===================================================================
--- linux-2.6.orig/include/linux/perf_counter.h
+++ linux-2.6/include/linux/perf_counter.h
@@ -156,6 +156,16 @@ struct perf_counter_mmap_page {
__u32 data_head; /* head in the data section */
};
+struct perf_event_header {
+ __u32 type;
+ __u32 size;
+};
+
+enum perf_event_type {
+ PERF_EVENT_IP = 0,
+ PERF_EVENT_GROUP = 1,
+};
+
#ifdef __KERNEL__
/*
* Kernel-internal data types and definitions:
@@ -260,6 +270,7 @@ struct perf_counter {
struct list_head list_entry;
struct list_head event_entry;
struct list_head sibling_list;
+ int nr_siblings;
struct perf_counter *group_leader;
const struct hw_perf_counter_ops *hw_ops;
Index: linux-2.6/kernel/perf_counter.c
===================================================================
--- linux-2.6.orig/kernel/perf_counter.c
+++ linux-2.6/kernel/perf_counter.c
@@ -75,8 +75,10 @@ list_add_counter(struct perf_counter *co
*/
if (counter->group_leader == counter)
list_add_tail(&counter->list_entry, &ctx->counter_list);
- else
+ else {
list_add_tail(&counter->list_entry, &group_leader->sibling_list);
+ group_leader->nr_siblings++;
+ }
list_add_rcu(&counter->event_entry, &ctx->event_list);
}
@@ -89,6 +91,9 @@ list_del_counter(struct perf_counter *co
list_del_init(&counter->list_entry);
list_del_rcu(&counter->event_entry);
+ if (counter->group_leader != counter)
+ counter->group_leader->nr_siblings--;
+
/*
* If this was a group counter with sibling counters then
* upgrade the siblings to singleton counters by adding them
@@ -381,9 +386,11 @@ static int is_software_only_group(struct
if (!is_software_counter(leader))
return 0;
+
list_for_each_entry(counter, &leader->sibling_list, list_entry)
if (!is_software_counter(counter))
return 0;
+
return 1;
}
@@ -1473,6 +1480,9 @@ static void perf_output_copy(struct perf
handle->offset = offset;
}
+#define perf_output_put(handle, x) \
+ perf_output_copy((handle), &(x), sizeof(x))
+
static void perf_output_end(struct perf_output_handle *handle, int nmi)
{
if (handle->wakeup) {
@@ -1507,34 +1517,53 @@ out:
static void perf_output_simple(struct perf_counter *counter,
int nmi, struct pt_regs *regs)
{
- u64 entry;
+ struct {
+ struct perf_event_header header;
+ u64 ip;
+ } event;
+
+ event.header.type = PERF_EVENT_IP;
+ event.header.size = sizeof(event);
+ event.ip = instruction_pointer(regs);
- entry = instruction_pointer(regs);
-
- perf_output_write(counter, nmi, &entry, sizeof(entry));
+ perf_output_write(counter, nmi, &event, sizeof(event));
}
-struct group_entry {
- u64 event;
- u64 counter;
-};
-
static void perf_output_group(struct perf_counter *counter, int nmi)
{
+ struct perf_output_handle handle;
+ struct perf_event_header header;
struct perf_counter *leader, *sub;
+ unsigned int size;
+ struct {
+ u64 event;
+ u64 counter;
+ } entry;
+ int ret;
+
+ size = sizeof(header) + counter->nr_siblings * sizeof(entry);
+
+ ret = perf_output_begin(&handle, counter, size);
+ if (ret)
+ return;
+
+ header.type = PERF_EVENT_GROUP;
+ header.size = size;
+
+ perf_output_put(&handle, header);
leader = counter->group_leader;
list_for_each_entry(sub, &leader->sibling_list, list_entry) {
- struct group_entry entry;
-
if (sub != counter)
sub->hw_ops->read(sub);
entry.event = sub->hw_event.config;
entry.counter = atomic64_read(&sub->count);
- perf_output_write(counter, nmi, &entry, sizeof(entry));
+ perf_output_put(&handle, entry);
}
+
+ perf_output_end(&handle, nmi);
}
void perf_counter_output(struct perf_counter *counter,
--
^ permalink raw reply [flat|nested] 27+ messages in thread* [tip:perfcounters/core] perf_counter: output objects
2009-03-25 11:30 ` [PATCH 2/6] perf_counter: output objects Peter Zijlstra
@ 2009-03-25 12:06 ` Peter Zijlstra
0 siblings, 0 replies; 27+ messages in thread
From: Peter Zijlstra @ 2009-03-25 12:06 UTC (permalink / raw)
To: linux-tip-commits
Cc: linux-kernel, paulus, hpa, mingo, a.p.zijlstra, efault, arjan,
tglx, fengguang.wu, mingo
Commit-ID: cc1383cf4762fa39ba7ca6864e150422eb405a33
Gitweb: http://git.kernel.org/tip/cc1383cf4762fa39ba7ca6864e150422eb405a33
Author: Peter Zijlstra <a.p.zijlstra@chello.nl>
AuthorDate: Wed, 25 Mar 2009 12:30:23 +0100
Committer: Ingo Molnar <mingo@elte.hu>
CommitDate: Wed, 25 Mar 2009 13:02:51 +0100
perf_counter: output objects
Provide a {type,size} header for each output entry.
This should provide extensible output, and the ability to mix multiple streams.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Wu Fengguang <fengguang.wu@intel.com>
LKML-Reference: <20090325113316.831607932@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
include/linux/perf_counter.h | 11 ++++++++
kernel/perf_counter.c | 53 ++++++++++++++++++++++++++++++++---------
2 files changed, 52 insertions(+), 12 deletions(-)
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 48212c1..c256635 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -156,6 +156,16 @@ struct perf_counter_mmap_page {
__u32 data_head; /* head in the data section */
};
+struct perf_event_header {
+ __u32 type;
+ __u32 size;
+};
+
+enum perf_event_type {
+ PERF_EVENT_IP = 0,
+ PERF_EVENT_GROUP = 1,
+};
+
#ifdef __KERNEL__
/*
* Kernel-internal data types and definitions:
@@ -260,6 +270,7 @@ struct perf_counter {
struct list_head list_entry;
struct list_head event_entry;
struct list_head sibling_list;
+ int nr_siblings;
struct perf_counter *group_leader;
const struct hw_perf_counter_ops *hw_ops;
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 0422fd9..d76e311 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -75,8 +75,10 @@ list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
*/
if (counter->group_leader == counter)
list_add_tail(&counter->list_entry, &ctx->counter_list);
- else
+ else {
list_add_tail(&counter->list_entry, &group_leader->sibling_list);
+ group_leader->nr_siblings++;
+ }
list_add_rcu(&counter->event_entry, &ctx->event_list);
}
@@ -89,6 +91,9 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
list_del_init(&counter->list_entry);
list_del_rcu(&counter->event_entry);
+ if (counter->group_leader != counter)
+ counter->group_leader->nr_siblings--;
+
/*
* If this was a group counter with sibling counters then
* upgrade the siblings to singleton counters by adding them
@@ -381,9 +386,11 @@ static int is_software_only_group(struct perf_counter *leader)
if (!is_software_counter(leader))
return 0;
+
list_for_each_entry(counter, &leader->sibling_list, list_entry)
if (!is_software_counter(counter))
return 0;
+
return 1;
}
@@ -1480,6 +1487,9 @@ static void perf_output_copy(struct perf_output_handle *handle,
handle->offset = offset;
}
+#define perf_output_put(handle, x) \
+ perf_output_copy((handle), &(x), sizeof(x))
+
static void perf_output_end(struct perf_output_handle *handle, int nmi)
{
if (handle->wakeup) {
@@ -1514,34 +1524,53 @@ out:
static void perf_output_simple(struct perf_counter *counter,
int nmi, struct pt_regs *regs)
{
- u64 entry;
+ struct {
+ struct perf_event_header header;
+ u64 ip;
+ } event;
- entry = instruction_pointer(regs);
+ event.header.type = PERF_EVENT_IP;
+ event.header.size = sizeof(event);
+ event.ip = instruction_pointer(regs);
- perf_output_write(counter, nmi, &entry, sizeof(entry));
+ perf_output_write(counter, nmi, &event, sizeof(event));
}
-struct group_entry {
- u64 event;
- u64 counter;
-};
-
static void perf_output_group(struct perf_counter *counter, int nmi)
{
+ struct perf_output_handle handle;
+ struct perf_event_header header;
struct perf_counter *leader, *sub;
+ unsigned int size;
+ struct {
+ u64 event;
+ u64 counter;
+ } entry;
+ int ret;
+
+ size = sizeof(header) + counter->nr_siblings * sizeof(entry);
+
+ ret = perf_output_begin(&handle, counter, size);
+ if (ret)
+ return;
+
+ header.type = PERF_EVENT_GROUP;
+ header.size = size;
+
+ perf_output_put(&handle, header);
leader = counter->group_leader;
list_for_each_entry(sub, &leader->sibling_list, list_entry) {
- struct group_entry entry;
-
if (sub != counter)
sub->hw_ops->read(sub);
entry.event = sub->hw_event.config;
entry.counter = atomic64_read(&sub->count);
- perf_output_write(counter, nmi, &entry, sizeof(entry));
+ perf_output_put(&handle, entry);
}
+
+ perf_output_end(&handle, nmi);
}
void perf_counter_output(struct perf_counter *counter,
^ permalink raw reply related [flat|nested] 27+ messages in thread
* [PATCH 3/6] perf_counter: sanity check on the output API
2009-03-25 11:30 [PATCH 0/6] perf_counter: new output ABI Peter Zijlstra
2009-03-25 11:30 ` [PATCH 1/6] perf_counter: more elaborate write API Peter Zijlstra
2009-03-25 11:30 ` [PATCH 2/6] perf_counter: output objects Peter Zijlstra
@ 2009-03-25 11:30 ` Peter Zijlstra
2009-03-25 12:06 ` [tip:perfcounters/core] " Peter Zijlstra
2009-03-25 11:30 ` [PATCH 4/6] perf_counter: optionally provide the pid/tid of the sampled task Peter Zijlstra
` (3 subsequent siblings)
6 siblings, 1 reply; 27+ messages in thread
From: Peter Zijlstra @ 2009-03-25 11:30 UTC (permalink / raw)
To: Ingo Molnar, linux-kernel
Cc: Paul Mackerras, Mike Galbraith, Arjan van de Ven, Wu Fengguang,
Peter Zijlstra
[-- Attachment #1: perf_counter-output-validate.patch --]
[-- Type: text/plain, Size: 988 bytes --]
Ensure we never write more than we said we would.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
kernel/perf_counter.c | 4 ++++
1 file changed, 4 insertions(+)
Index: linux-2.6/kernel/perf_counter.c
===================================================================
--- linux-2.6.orig/kernel/perf_counter.c
+++ linux-2.6/kernel/perf_counter.c
@@ -1422,6 +1422,7 @@ struct perf_output_handle {
struct perf_counter *counter;
struct perf_mmap_data *data;
unsigned int offset;
+ unsigned int head;
int wakeup;
};
@@ -1447,6 +1448,7 @@ static int perf_output_begin(struct perf
handle->counter = counter;
handle->data = data;
handle->offset = offset;
+ handle->head = head;
handle->wakeup = (offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT);
return 0;
@@ -1478,6 +1480,8 @@ static void perf_output_copy(struct perf
} while (len);
handle->offset = offset;
+
+ WARN_ON(handle->offset > handle->head);
}
#define perf_output_put(handle, x) \
--
^ permalink raw reply [flat|nested] 27+ messages in thread* [tip:perfcounters/core] perf_counter: sanity check on the output API
2009-03-25 11:30 ` [PATCH 3/6] perf_counter: sanity check on the output API Peter Zijlstra
@ 2009-03-25 12:06 ` Peter Zijlstra
0 siblings, 0 replies; 27+ messages in thread
From: Peter Zijlstra @ 2009-03-25 12:06 UTC (permalink / raw)
To: linux-tip-commits
Cc: linux-kernel, paulus, hpa, mingo, a.p.zijlstra, efault, arjan,
tglx, fengguang.wu, mingo
Commit-ID: 91d876bd14c82826b69bf23f502cb283959ebb1f
Gitweb: http://git.kernel.org/tip/91d876bd14c82826b69bf23f502cb283959ebb1f
Author: Peter Zijlstra <a.p.zijlstra@chello.nl>
AuthorDate: Wed, 25 Mar 2009 12:30:24 +0100
Committer: Ingo Molnar <mingo@elte.hu>
CommitDate: Wed, 25 Mar 2009 13:02:52 +0100
perf_counter: sanity check on the output API
Ensure we never write more than we said we would.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Wu Fengguang <fengguang.wu@intel.com>
LKML-Reference: <20090325113316.921433024@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
kernel/perf_counter.c | 4 ++++
1 files changed, 4 insertions(+), 0 deletions(-)
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index d76e311..7669afe 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1422,6 +1422,7 @@ struct perf_output_handle {
struct perf_counter *counter;
struct perf_mmap_data *data;
unsigned int offset;
+ unsigned int head;
int wakeup;
};
@@ -1447,6 +1448,7 @@ static int perf_output_begin(struct perf_output_handle *handle,
handle->counter = counter;
handle->data = data;
handle->offset = offset;
+ handle->head = head;
handle->wakeup = (offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT);
return 0;
@@ -1485,6 +1487,8 @@ static void perf_output_copy(struct perf_output_handle *handle,
} while (len);
handle->offset = offset;
+
+ WARN_ON_ONCE(handle->offset > handle->head);
}
#define perf_output_put(handle, x) \
^ permalink raw reply related [flat|nested] 27+ messages in thread
* [PATCH 4/6] perf_counter: optionally provide the pid/tid of the sampled task
2009-03-25 11:30 [PATCH 0/6] perf_counter: new output ABI Peter Zijlstra
` (2 preceding siblings ...)
2009-03-25 11:30 ` [PATCH 3/6] perf_counter: sanity check on the output API Peter Zijlstra
@ 2009-03-25 11:30 ` Peter Zijlstra
2009-03-25 12:06 ` [tip:perfcounters/core] " Peter Zijlstra
2009-03-25 11:30 ` [PATCH 5/6] perf_counter: kerneltop: mmap_pages argument Peter Zijlstra
` (2 subsequent siblings)
6 siblings, 1 reply; 27+ messages in thread
From: Peter Zijlstra @ 2009-03-25 11:30 UTC (permalink / raw)
To: Ingo Molnar, linux-kernel
Cc: Paul Mackerras, Mike Galbraith, Arjan van de Ven, Wu Fengguang,
Peter Zijlstra
[-- Attachment #1: perf_counter-tid.patch --]
[-- Type: text/plain, Size: 2321 bytes --]
Allow cpu wide counters to profile userspace by providing what process
the sample belongs to.
This raises the first issue with the output type, lots of these
options: group, tid, callchain, etc.. are non-exclusive and could be
combined, suggesting a bitfield.
However, things like the mmap() data stream doesn't fit in that.
How to split the type field...
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
include/linux/perf_counter.h | 5 ++++-
kernel/perf_counter.c | 18 ++++++++++++++++--
2 files changed, 20 insertions(+), 3 deletions(-)
Index: linux-2.6/include/linux/perf_counter.h
===================================================================
--- linux-2.6.orig/include/linux/perf_counter.h
+++ linux-2.6/include/linux/perf_counter.h
@@ -127,8 +127,9 @@ struct perf_counter_hw_event {
exclude_kernel : 1, /* ditto kernel */
exclude_hv : 1, /* ditto hypervisor */
exclude_idle : 1, /* don't count when idle */
+ include_tid : 1, /* include the tid */
- __reserved_1 : 55;
+ __reserved_1 : 54;
__u32 extra_config_len;
__u32 __reserved_4;
@@ -164,6 +165,8 @@ struct perf_event_header {
enum perf_event_type {
PERF_EVENT_IP = 0,
PERF_EVENT_GROUP = 1,
+
+ __PERF_EVENT_TID = 0x100,
};
#ifdef __KERNEL__
Index: linux-2.6/kernel/perf_counter.c
===================================================================
--- linux-2.6.orig/kernel/perf_counter.c
+++ linux-2.6/kernel/perf_counter.c
@@ -1521,16 +1521,30 @@ out:
static void perf_output_simple(struct perf_counter *counter,
int nmi, struct pt_regs *regs)
{
+ unsigned int size;
struct {
struct perf_event_header header;
u64 ip;
+ u32 pid, tid;
} event;
event.header.type = PERF_EVENT_IP;
- event.header.size = sizeof(event);
event.ip = instruction_pointer(regs);
- perf_output_write(counter, nmi, &event, sizeof(event));
+ size = sizeof(event);
+
+ if (counter->hw_event.include_tid) {
+ /* namespace issues */
+ event.pid = current->group_leader->pid;
+ event.tid = current->pid;
+
+ event.header.type |= __PERF_EVENT_TID;
+ } else
+ size -= sizeof(u64);
+
+ event.header.size = size;
+
+ perf_output_write(counter, nmi, &event, size);
}
static void perf_output_group(struct perf_counter *counter, int nmi)
--
^ permalink raw reply [flat|nested] 27+ messages in thread* [tip:perfcounters/core] perf_counter: optionally provide the pid/tid of the sampled task
2009-03-25 11:30 ` [PATCH 4/6] perf_counter: optionally provide the pid/tid of the sampled task Peter Zijlstra
@ 2009-03-25 12:06 ` Peter Zijlstra
0 siblings, 0 replies; 27+ messages in thread
From: Peter Zijlstra @ 2009-03-25 12:06 UTC (permalink / raw)
To: linux-tip-commits
Cc: linux-kernel, paulus, hpa, mingo, a.p.zijlstra, efault, arjan,
tglx, fengguang.wu, mingo
Commit-ID: fd46efb85aea330c364ece28621c06e5845680e3
Gitweb: http://git.kernel.org/tip/fd46efb85aea330c364ece28621c06e5845680e3
Author: Peter Zijlstra <a.p.zijlstra@chello.nl>
AuthorDate: Wed, 25 Mar 2009 12:30:25 +0100
Committer: Ingo Molnar <mingo@elte.hu>
CommitDate: Wed, 25 Mar 2009 13:02:52 +0100
perf_counter: optionally provide the pid/tid of the sampled task
Allow cpu wide counters to profile userspace by providing what process
the sample belongs to.
This raises the first issue with the output type, lots of these
options: group, tid, callchain, etc.. are non-exclusive and could be
combined, suggesting a bitfield.
However, things like the mmap() data stream doesn't fit in that.
How to split the type field...
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Wu Fengguang <fengguang.wu@intel.com>
LKML-Reference: <20090325113317.013775235@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
include/linux/perf_counter.h | 5 ++++-
kernel/perf_counter.c | 18 ++++++++++++++++--
2 files changed, 20 insertions(+), 3 deletions(-)
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index c256635..7fdbdf8 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -127,8 +127,9 @@ struct perf_counter_hw_event {
exclude_kernel : 1, /* ditto kernel */
exclude_hv : 1, /* ditto hypervisor */
exclude_idle : 1, /* don't count when idle */
+ include_tid : 1, /* include the tid */
- __reserved_1 : 55;
+ __reserved_1 : 54;
__u32 extra_config_len;
__u32 __reserved_4;
@@ -164,6 +165,8 @@ struct perf_event_header {
enum perf_event_type {
PERF_EVENT_IP = 0,
PERF_EVENT_GROUP = 1,
+
+ __PERF_EVENT_TID = 0x100,
};
#ifdef __KERNEL__
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 7669afe..f3e1b27 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1528,16 +1528,30 @@ out:
static void perf_output_simple(struct perf_counter *counter,
int nmi, struct pt_regs *regs)
{
+ unsigned int size;
struct {
struct perf_event_header header;
u64 ip;
+ u32 pid, tid;
} event;
event.header.type = PERF_EVENT_IP;
- event.header.size = sizeof(event);
event.ip = instruction_pointer(regs);
- perf_output_write(counter, nmi, &event, sizeof(event));
+ size = sizeof(event);
+
+ if (counter->hw_event.include_tid) {
+ /* namespace issues */
+ event.pid = current->group_leader->pid;
+ event.tid = current->pid;
+
+ event.header.type |= __PERF_EVENT_TID;
+ } else
+ size -= sizeof(u64);
+
+ event.header.size = size;
+
+ perf_output_write(counter, nmi, &event, size);
}
static void perf_output_group(struct perf_counter *counter, int nmi)
^ permalink raw reply related [flat|nested] 27+ messages in thread
* [PATCH 5/6] perf_counter: kerneltop: mmap_pages argument
2009-03-25 11:30 [PATCH 0/6] perf_counter: new output ABI Peter Zijlstra
` (3 preceding siblings ...)
2009-03-25 11:30 ` [PATCH 4/6] perf_counter: optionally provide the pid/tid of the sampled task Peter Zijlstra
@ 2009-03-25 11:30 ` Peter Zijlstra
2009-03-25 12:07 ` [tip:perfcounters/core] " Peter Zijlstra
2009-03-25 12:18 ` [PATCH 5/6] " Ingo Molnar
2009-03-25 11:30 ` [PATCH 6/6] perf_counter: kerneltop: output event support Peter Zijlstra
2009-03-25 12:05 ` [PATCH 0/6] perf_counter: new output ABI Ingo Molnar
6 siblings, 2 replies; 27+ messages in thread
From: Peter Zijlstra @ 2009-03-25 11:30 UTC (permalink / raw)
To: Ingo Molnar, linux-kernel
Cc: Paul Mackerras, Mike Galbraith, Arjan van de Ven, Wu Fengguang,
Peter Zijlstra
[-- Attachment #1: mike-perf_counter_tools-mmap-pages.patch --]
[-- Type: text/plain, Size: 3613 bytes --]
provide a knob to set the number of mmap data pages.
Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
Documentation/perf_counter/kerneltop.c | 31 +++++++++++++++++++++++++------
1 file changed, 25 insertions(+), 6 deletions(-)
Index: linux-2.6/Documentation/perf_counter/kerneltop.c
===================================================================
--- linux-2.6.orig/Documentation/perf_counter/kerneltop.c
+++ linux-2.6/Documentation/perf_counter/kerneltop.c
@@ -178,6 +178,7 @@ static int nr_cpus = 0;
static int nmi = 1;
static int group = 0;
static unsigned int page_size;
+static unsigned int mmap_pages = 4;
static char *vmlinux;
@@ -326,6 +327,7 @@ static void display_help(void)
" -x path --vmlinux=<path> # the vmlinux binary, required for -s use\n"
" -z --zero # zero counts after display\n"
" -D --dump_symtab # dump symbol table to stderr on startup\n"
+ " -m pages --mmap_pages=<pages> # number of mmap data pages\n"
);
exit(0);
@@ -732,7 +734,9 @@ static int read_symbol(FILE *in, struct
/* Tag events to be skipped. */
if (!strcmp("default_idle", s->sym) || !strcmp("cpu_idle", s->sym))
s->skip = 1;
- if (!strcmp("enter_idle", s->sym) || !strcmp("exit_idle", s->sym))
+ else if (!strcmp("enter_idle", s->sym) || !strcmp("exit_idle", s->sym))
+ s->skip = 1;
+ else if (!strcmp("mwait_idle", s->sym))
s->skip = 1;
if (filter_match == 1) {
@@ -1042,9 +1046,10 @@ static void process_options(int argc, ch
{"symbol", required_argument, NULL, 's'},
{"stat", no_argument, NULL, 'S'},
{"zero", no_argument, NULL, 'z'},
+ {"mmap_pages", required_argument, NULL, 'm'},
{NULL, 0, NULL, 0 }
};
- int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hn:p:s:Sx:z",
+ int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hn:m:p:s:Sx:z",
long_options, &option_index);
if (c == -1)
break;
@@ -1081,6 +1086,7 @@ static void process_options(int argc, ch
case 'S': run_perfstat = 1; break;
case 'x': vmlinux = strdup(optarg); break;
case 'z': zero = 1; break;
+ case 'm': mmap_pages = atoi(optarg); break;
default: error = 1; break;
}
}
@@ -1134,17 +1140,30 @@ repeat:
return head;
}
+struct timeval last_read, this_read;
+
static void mmap_read(struct mmap_data *md)
{
unsigned int head = mmap_read_head(md);
unsigned int old = md->prev;
unsigned char *data = md->base + page_size;
+ gettimeofday(&this_read, NULL);
+
if (head - old > md->mask) {
- printf("ERROR: failed to keep up with mmap data\n");
- exit(-1);
+ struct timeval iv;
+ unsigned long msecs;
+
+ timersub(&this_read, &last_read, &iv);
+ msecs = iv.tv_sec*1000 + iv.tv_usec/1000;
+
+ fprintf(stderr, "WARNING: failed to keep up with mmap data. Last read %lu msecs ago.\n", msecs);
+
+ old = head;
}
+ last_read = this_read;
+
for (; old != head;) {
__u64 *ptr = (__u64 *)&data[old & md->mask];
old += sizeof(__u64);
@@ -1220,8 +1239,8 @@ int main(int argc, char *argv[])
mmap_array[i][counter].counter = counter;
mmap_array[i][counter].prev = 0;
- mmap_array[i][counter].mask = 2*page_size - 1;
- mmap_array[i][counter].base = mmap(NULL, 3*page_size,
+ mmap_array[i][counter].mask = mmap_pages*page_size - 1;
+ mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
PROT_READ, MAP_SHARED, fd[i][counter], 0);
if (mmap_array[i][counter].base == MAP_FAILED) {
printf("kerneltop error: failed to mmap with %d (%s)\n",
--
^ permalink raw reply [flat|nested] 27+ messages in thread* [tip:perfcounters/core] perf_counter: kerneltop: mmap_pages argument
2009-03-25 11:30 ` [PATCH 5/6] perf_counter: kerneltop: mmap_pages argument Peter Zijlstra
@ 2009-03-25 12:07 ` Peter Zijlstra
2009-03-25 12:18 ` [PATCH 5/6] " Ingo Molnar
1 sibling, 0 replies; 27+ messages in thread
From: Peter Zijlstra @ 2009-03-25 12:07 UTC (permalink / raw)
To: linux-tip-commits
Cc: linux-kernel, paulus, hpa, mingo, a.p.zijlstra, efault, arjan,
tglx, fengguang.wu, mingo
Commit-ID: a7aa0cab482525a039b7ce7b1c8cf62f57f441c9
Gitweb: http://git.kernel.org/tip/a7aa0cab482525a039b7ce7b1c8cf62f57f441c9
Author: Peter Zijlstra <a.p.zijlstra@chello.nl>
AuthorDate: Wed, 25 Mar 2009 12:30:26 +0100
Committer: Ingo Molnar <mingo@elte.hu>
CommitDate: Wed, 25 Mar 2009 13:02:53 +0100
perf_counter: kerneltop: mmap_pages argument
provide a knob to set the number of mmap data pages.
Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Wu Fengguang <fengguang.wu@intel.com>
LKML-Reference: <20090325113317.104545398@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
Documentation/perf_counter/kerneltop.c | 31 +++++++++++++++++++++++++------
1 files changed, 25 insertions(+), 6 deletions(-)
diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c
index 7ebde7a..3e45bf6 100644
--- a/Documentation/perf_counter/kerneltop.c
+++ b/Documentation/perf_counter/kerneltop.c
@@ -178,6 +178,7 @@ static int nr_cpus = 0;
static int nmi = 1;
static int group = 0;
static unsigned int page_size;
+static unsigned int mmap_pages = 4;
static char *vmlinux;
@@ -326,6 +327,7 @@ static void display_help(void)
" -x path --vmlinux=<path> # the vmlinux binary, required for -s use\n"
" -z --zero # zero counts after display\n"
" -D --dump_symtab # dump symbol table to stderr on startup\n"
+ " -m pages --mmap_pages=<pages> # number of mmap data pages\n"
);
exit(0);
@@ -732,7 +734,9 @@ static int read_symbol(FILE *in, struct sym_entry *s)
/* Tag events to be skipped. */
if (!strcmp("default_idle", s->sym) || !strcmp("cpu_idle", s->sym))
s->skip = 1;
- if (!strcmp("enter_idle", s->sym) || !strcmp("exit_idle", s->sym))
+ else if (!strcmp("enter_idle", s->sym) || !strcmp("exit_idle", s->sym))
+ s->skip = 1;
+ else if (!strcmp("mwait_idle", s->sym))
s->skip = 1;
if (filter_match == 1) {
@@ -1042,9 +1046,10 @@ static void process_options(int argc, char *argv[])
{"symbol", required_argument, NULL, 's'},
{"stat", no_argument, NULL, 'S'},
{"zero", no_argument, NULL, 'z'},
+ {"mmap_pages", required_argument, NULL, 'm'},
{NULL, 0, NULL, 0 }
};
- int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hn:p:s:Sx:z",
+ int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hn:m:p:s:Sx:z",
long_options, &option_index);
if (c == -1)
break;
@@ -1081,6 +1086,7 @@ static void process_options(int argc, char *argv[])
case 'S': run_perfstat = 1; break;
case 'x': vmlinux = strdup(optarg); break;
case 'z': zero = 1; break;
+ case 'm': mmap_pages = atoi(optarg); break;
default: error = 1; break;
}
}
@@ -1134,17 +1140,30 @@ repeat:
return head;
}
+struct timeval last_read, this_read;
+
static void mmap_read(struct mmap_data *md)
{
unsigned int head = mmap_read_head(md);
unsigned int old = md->prev;
unsigned char *data = md->base + page_size;
+ gettimeofday(&this_read, NULL);
+
if (head - old > md->mask) {
- printf("ERROR: failed to keep up with mmap data\n");
- exit(-1);
+ struct timeval iv;
+ unsigned long msecs;
+
+ timersub(&this_read, &last_read, &iv);
+ msecs = iv.tv_sec*1000 + iv.tv_usec/1000;
+
+ fprintf(stderr, "WARNING: failed to keep up with mmap data. Last read %lu msecs ago.\n", msecs);
+
+ old = head;
}
+ last_read = this_read;
+
for (; old != head;) {
__u64 *ptr = (__u64 *)&data[old & md->mask];
old += sizeof(__u64);
@@ -1220,8 +1239,8 @@ int main(int argc, char *argv[])
mmap_array[i][counter].counter = counter;
mmap_array[i][counter].prev = 0;
- mmap_array[i][counter].mask = 2*page_size - 1;
- mmap_array[i][counter].base = mmap(NULL, 3*page_size,
+ mmap_array[i][counter].mask = mmap_pages*page_size - 1;
+ mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
PROT_READ, MAP_SHARED, fd[i][counter], 0);
if (mmap_array[i][counter].base == MAP_FAILED) {
printf("kerneltop error: failed to mmap with %d (%s)\n",
^ permalink raw reply related [flat|nested] 27+ messages in thread* Re: [PATCH 5/6] perf_counter: kerneltop: mmap_pages argument
2009-03-25 11:30 ` [PATCH 5/6] perf_counter: kerneltop: mmap_pages argument Peter Zijlstra
2009-03-25 12:07 ` [tip:perfcounters/core] " Peter Zijlstra
@ 2009-03-25 12:18 ` Ingo Molnar
2009-03-25 12:27 ` Peter Zijlstra
1 sibling, 1 reply; 27+ messages in thread
From: Ingo Molnar @ 2009-03-25 12:18 UTC (permalink / raw)
To: Peter Zijlstra
Cc: linux-kernel, Paul Mackerras, Mike Galbraith, Arjan van de Ven,
Wu Fengguang
* Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> provide a knob to set the number of mmap data pages.
> + " -m pages --mmap_pages=<pages> # number of mmap data pages\n"
Btw., we really want this to be auto-tuning to a large degree. If
the kernel observes missed events, it should create a
PERF_EVENT_OVERFLOW==0x3 record, with the number of missed events -
or something like that.
Ingo
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 5/6] perf_counter: kerneltop: mmap_pages argument
2009-03-25 12:18 ` [PATCH 5/6] " Ingo Molnar
@ 2009-03-25 12:27 ` Peter Zijlstra
2009-03-25 12:35 ` Ingo Molnar
2009-03-26 2:22 ` Paul Mackerras
0 siblings, 2 replies; 27+ messages in thread
From: Peter Zijlstra @ 2009-03-25 12:27 UTC (permalink / raw)
To: Ingo Molnar
Cc: linux-kernel, Paul Mackerras, Mike Galbraith, Arjan van de Ven,
Wu Fengguang
On Wed, 2009-03-25 at 13:18 +0100, Ingo Molnar wrote:
> * Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
>
> > provide a knob to set the number of mmap data pages.
>
> > + " -m pages --mmap_pages=<pages> # number of mmap data pages\n"
>
> Btw., we really want this to be auto-tuning to a large degree. If
> the kernel observes missed events, it should create a
> PERF_EVENT_OVERFLOW==0x3 record, with the number of missed events -
> or something like that.
Well, who's to say we ever see that overflow record if we're having
trouble tracking the output as is?
How important is it for people to have accurate overflow information
other than the current -- we can't keep up -- kind?
One possible solution is making the control page writable and writing
the userspace read position to it, then the kernel can, on
perf_output_begin() detect the overflow and count the number of
overwritten events.
This overflow count could then be published back into the control page.
TBH I'm not much of a fan, making all these pages writable just opens a
whole can of worms, and that accurate overflow tracking will put more
code in the output path.
Also, when mixing streams (events,mmap) is a single: you missed 'n'
events still good?
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 5/6] perf_counter: kerneltop: mmap_pages argument
2009-03-25 12:27 ` Peter Zijlstra
@ 2009-03-25 12:35 ` Ingo Molnar
2009-03-25 12:41 ` Peter Zijlstra
2009-03-26 2:22 ` Paul Mackerras
1 sibling, 1 reply; 27+ messages in thread
From: Ingo Molnar @ 2009-03-25 12:35 UTC (permalink / raw)
To: Peter Zijlstra
Cc: linux-kernel, Paul Mackerras, Mike Galbraith, Arjan van de Ven,
Wu Fengguang
* Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> On Wed, 2009-03-25 at 13:18 +0100, Ingo Molnar wrote:
> > * Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> >
> > > provide a knob to set the number of mmap data pages.
> >
> > > + " -m pages --mmap_pages=<pages> # number of mmap data pages\n"
> >
> > Btw., we really want this to be auto-tuning to a large degree. If
> > the kernel observes missed events, it should create a
> > PERF_EVENT_OVERFLOW==0x3 record, with the number of missed events -
> > or something like that.
>
> Well, who's to say we ever see that overflow record if we're
> having trouble tracking the output as is?
it would overwrite the last (new) record - so it's deterministic and
the tail does not consume the head - just bites itself a bit.
But it would still be somewhat racy, if user-space _just_ managed to
process those records ...
> How important is it for people to have accurate overflow
> information other than the current -- we can't keep up -- kind?
it's somewhat important and could pave the way for the kernel to
react to overflow more intelligently (via iterim buffering or
whatever future mechanism).
It's also a general quality of implementation principle for kernel
code: if we want to hide information we want to hide it from
_user-space_, not the kernel. Hiding information from the kernel
almost always causes trouble down the line.
> One possible solution is making the control page writable and
> writing the userspace read position to it, then the kernel can, on
> perf_output_begin() detect the overflow and count the number of
> overwritten events.
>
> This overflow count could then be published back into the control
> page.
Ok, that's a nice idea - it keeps the amount of dirty cachelines
minimal.
> TBH I'm not much of a fan, making all these pages writable just
> opens a whole can of worms, and that accurate overflow tracking
> will put more code in the output path.
What can of worms can you see there? (It would not be COW-ed - if
you share those pages without knowing that they are shared then
confused user-space will have to keep broken pieces of iteself.)
> Also, when mixing streams (events,mmap) is a single: you missed
> 'n' events still good?
How would such mixing work? Multiple counters streaming into the
same mmap area?
Ingo
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 5/6] perf_counter: kerneltop: mmap_pages argument
2009-03-25 12:35 ` Ingo Molnar
@ 2009-03-25 12:41 ` Peter Zijlstra
2009-03-25 12:54 ` Ingo Molnar
0 siblings, 1 reply; 27+ messages in thread
From: Peter Zijlstra @ 2009-03-25 12:41 UTC (permalink / raw)
To: Ingo Molnar
Cc: linux-kernel, Paul Mackerras, Mike Galbraith, Arjan van de Ven,
Wu Fengguang
On Wed, 2009-03-25 at 13:35 +0100, Ingo Molnar wrote:
> > Also, when mixing streams (events,mmap) is a single: you missed
> > 'n' events still good?
>
> How would such mixing work? Multiple counters streaming into the
> same mmap area?
No basically having overflow events and mmap-vma changed events in a
single output stream.
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 5/6] perf_counter: kerneltop: mmap_pages argument
2009-03-25 12:41 ` Peter Zijlstra
@ 2009-03-25 12:54 ` Ingo Molnar
2009-03-25 12:57 ` Peter Zijlstra
0 siblings, 1 reply; 27+ messages in thread
From: Ingo Molnar @ 2009-03-25 12:54 UTC (permalink / raw)
To: Peter Zijlstra
Cc: linux-kernel, Paul Mackerras, Mike Galbraith, Arjan van de Ven,
Wu Fengguang
* Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> On Wed, 2009-03-25 at 13:35 +0100, Ingo Molnar wrote:
>
> > > Also, when mixing streams (events,mmap) is a single: you missed
> > > 'n' events still good?
> >
> > How would such mixing work? Multiple counters streaming into the
> > same mmap area?
>
> No basically having overflow events and mmap-vma changed events in
> a single output stream.
ah, and i missed the impact of variable size records - that too
makes it somewhat impractical to emit overflow records in situ. (the
kernel does not really know the precise start of the previous
record, typically.)
Ingo
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 5/6] perf_counter: kerneltop: mmap_pages argument
2009-03-25 12:54 ` Ingo Molnar
@ 2009-03-25 12:57 ` Peter Zijlstra
2009-03-25 14:52 ` Peter Zijlstra
0 siblings, 1 reply; 27+ messages in thread
From: Peter Zijlstra @ 2009-03-25 12:57 UTC (permalink / raw)
To: Ingo Molnar
Cc: linux-kernel, Paul Mackerras, Mike Galbraith, Arjan van de Ven,
Wu Fengguang
On Wed, 2009-03-25 at 13:54 +0100, Ingo Molnar wrote:
> * Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
>
> > On Wed, 2009-03-25 at 13:35 +0100, Ingo Molnar wrote:
> >
> > > > Also, when mixing streams (events,mmap) is a single: you missed
> > > > 'n' events still good?
> > >
> > > How would such mixing work? Multiple counters streaming into the
> > > same mmap area?
> >
> > No basically having overflow events and mmap-vma changed events in
> > a single output stream.
>
> ah, and i missed the impact of variable size records - that too
> makes it somewhat impractical to emit overflow records in situ. (the
> kernel does not really know the precise start of the previous
> record, typically.)
Alternatively, we could simply not emit new events until the read
position increases,. that's much simpler.
Still don't like mapping the stuff writable though..
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 5/6] perf_counter: kerneltop: mmap_pages argument
2009-03-25 12:57 ` Peter Zijlstra
@ 2009-03-25 14:52 ` Peter Zijlstra
2009-03-25 17:16 ` Ingo Molnar
0 siblings, 1 reply; 27+ messages in thread
From: Peter Zijlstra @ 2009-03-25 14:52 UTC (permalink / raw)
To: Ingo Molnar
Cc: linux-kernel, Paul Mackerras, Mike Galbraith, Arjan van de Ven,
Wu Fengguang
On Wed, 2009-03-25 at 13:57 +0100, Peter Zijlstra wrote:
> On Wed, 2009-03-25 at 13:54 +0100, Ingo Molnar wrote:
> > * Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> >
> > > On Wed, 2009-03-25 at 13:35 +0100, Ingo Molnar wrote:
> > >
> > > > > Also, when mixing streams (events,mmap) is a single: you missed
> > > > > 'n' events still good?
> > > >
> > > > How would such mixing work? Multiple counters streaming into the
> > > > same mmap area?
> > >
> > > No basically having overflow events and mmap-vma changed events in
> > > a single output stream.
> >
> > ah, and i missed the impact of variable size records - that too
> > makes it somewhat impractical to emit overflow records in situ. (the
> > kernel does not really know the precise start of the previous
> > record, typically.)
>
> Alternatively, we could simply not emit new events until the read
> position increases,. that's much simpler.
>
> Still don't like mapping the stuff writable though..
This is what it would look like I suppose...
Any thoughts?
Not-signed-off-by: me
---
include/linux/perf_counter.h | 4 ++
kernel/perf_counter.c | 67 +++++++++++++++++++++++++++++++++++++----
2 files changed, 64 insertions(+), 7 deletions(-)
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 6bf67ce..d5a599c 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -165,6 +165,8 @@ struct perf_counter_mmap_page {
__s64 offset; /* add to hardware counter value */
__u32 data_head; /* head in the data section */
+ __u32 data_tail; /* user-space written tail */
+ __u32 overflow; /* number of lost events */
};
struct perf_event_header {
@@ -269,8 +271,10 @@ struct file;
struct perf_mmap_data {
struct rcu_head rcu_head;
int nr_pages;
+ int writable;
atomic_t wakeup;
atomic_t head;
+ atomic_t overflow;
struct perf_counter_mmap_page *user_page;
void *data_pages[0];
};
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 3b862a7..1f5c515 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1330,6 +1330,7 @@ static void __perf_counter_update_userpage(struct perf_counter *counter,
userpg->offset -= atomic64_read(&counter->hw.prev_count);
userpg->data_head = atomic_read(&data->head);
+ userpg->overflow = atomic_read(&data->overflow);
smp_wmb();
++userpg->lock;
preempt_enable();
@@ -1375,6 +1376,28 @@ unlock:
return ret;
}
+static int perf_mmap_mkwrite(struct vm_area_struct *vma, struct page *page)
+{
+ int ret = -EINVAL;
+
+ rcu_read_lock();
+ data = rcu_dereference(counter->data);
+ if (!data)
+ goto unlock;
+
+ /*
+ * Only allow writes to the control page.
+ */
+ if (page != virt_to_page(data->user_page))
+ goto unlock;
+
+ ret = 0;
+unlock:
+ rcu_read_unlock();
+
+ return ret;
+}
+
static int perf_mmap_data_alloc(struct perf_counter *counter, int nr_pages)
{
struct perf_mmap_data *data;
@@ -1463,6 +1486,7 @@ static struct vm_operations_struct perf_mmap_vmops = {
.open = perf_mmap_open,
.close = perf_mmap_close,
.fault = perf_mmap_fault,
+ .page_mkwrite = perf_mmap_mkwrite,
};
static int perf_mmap(struct file *file, struct vm_area_struct *vma)
@@ -1473,7 +1497,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
unsigned long locked, lock_limit;
int ret = 0;
- if (!(vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_WRITE))
+ if (!(vma->vm_flags & VM_SHARED))
return -EINVAL;
vma_size = vma->vm_end - vma->vm_start;
@@ -1503,16 +1527,19 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
mutex_lock(&counter->mmap_mutex);
if (atomic_inc_not_zero(&counter->mmap_count))
- goto out;
+ goto unlock;
WARN_ON(counter->data);
ret = perf_mmap_data_alloc(counter, nr_pages);
- if (!ret)
- atomic_set(&counter->mmap_count, 1);
-out:
+ if (ret)
+ goto unlock;
+
+ atomic_set(&counter->mmap_count, 1);
+ if (vma->vm_flags & VM_WRITE)
+ counter->data->writable = 1;
+unlock:
mutex_unlock(&counter->mmap_mutex);
- vma->vm_flags &= ~VM_MAYWRITE;
vma->vm_flags |= VM_RESERVED;
vma->vm_ops = &perf_mmap_vmops;
@@ -1540,6 +1567,28 @@ struct perf_output_handle {
int wakeup;
};
+static int perf_output_overflow(struct perf_mmap_data *data,
+ unsigned int offset, unsigned int head)
+{
+ unsigned int tail;
+ unsigned int mask;
+
+ if (!data->writable)
+ return 0;
+
+ mask = (data->nr_pages << PAGE_SHIFT) - 1;
+ smp_rmb();
+ tail = ACCESS_ONCE(data->user_page->data_tail);
+
+ offset = (offset - tail) & mask;
+ head = (head - tail) & mask;
+
+ if ((int)(head - offset) < 0)
+ return 1;
+
+ return 0;
+}
+
static int perf_output_begin(struct perf_output_handle *handle,
struct perf_counter *counter, unsigned int size)
{
@@ -1552,11 +1601,13 @@ static int perf_output_begin(struct perf_output_handle *handle,
goto out;
if (!data->nr_pages)
- goto out;
+ goto fail;
do {
offset = head = atomic_read(&data->head);
head += size;
+ if (unlikely(perf_output_overflow(data, offset, head)))
+ goto fail;
} while (atomic_cmpxchg(&data->head, offset, head) != offset);
handle->counter = counter;
@@ -1567,6 +1618,8 @@ static int perf_output_begin(struct perf_output_handle *handle,
return 0;
+fail:
+ atomic_inc(&data->overflow);
out:
rcu_read_unlock();
^ permalink raw reply related [flat|nested] 27+ messages in thread* Re: [PATCH 5/6] perf_counter: kerneltop: mmap_pages argument
2009-03-25 14:52 ` Peter Zijlstra
@ 2009-03-25 17:16 ` Ingo Molnar
2009-03-25 21:18 ` Peter Zijlstra
0 siblings, 1 reply; 27+ messages in thread
From: Ingo Molnar @ 2009-03-25 17:16 UTC (permalink / raw)
To: Peter Zijlstra
Cc: linux-kernel, Paul Mackerras, Mike Galbraith, Arjan van de Ven,
Wu Fengguang
* Peter Zijlstra <peterz@infradead.org> wrote:
> On Wed, 2009-03-25 at 13:57 +0100, Peter Zijlstra wrote:
> > On Wed, 2009-03-25 at 13:54 +0100, Ingo Molnar wrote:
> > > * Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> > >
> > > > On Wed, 2009-03-25 at 13:35 +0100, Ingo Molnar wrote:
> > > >
> > > > > > Also, when mixing streams (events,mmap) is a single: you missed
> > > > > > 'n' events still good?
> > > > >
> > > > > How would such mixing work? Multiple counters streaming into the
> > > > > same mmap area?
> > > >
> > > > No basically having overflow events and mmap-vma changed events in
> > > > a single output stream.
> > >
> > > ah, and i missed the impact of variable size records - that too
> > > makes it somewhat impractical to emit overflow records in situ. (the
> > > kernel does not really know the precise start of the previous
> > > record, typically.)
> >
> > Alternatively, we could simply not emit new events until the read
> > position increases,. that's much simpler.
> >
> > Still don't like mapping the stuff writable though..
>
> This is what it would look like I suppose...
>
> Any thoughts?
>
> Not-signed-off-by: me
(you dont like it?)
> ---
> include/linux/perf_counter.h | 4 ++
> kernel/perf_counter.c | 67 +++++++++++++++++++++++++++++++++++++----
> 2 files changed, 64 insertions(+), 7 deletions(-)
>
> diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
> index 6bf67ce..d5a599c 100644
> --- a/include/linux/perf_counter.h
> +++ b/include/linux/perf_counter.h
> @@ -165,6 +165,8 @@ struct perf_counter_mmap_page {
> __s64 offset; /* add to hardware counter value */
>
> __u32 data_head; /* head in the data section */
> + __u32 data_tail; /* user-space written tail */
> + __u32 overflow; /* number of lost events */
small detail: i'd suggest to always pad things up to 64 bits. In
case someone adds a new field with u64.
> };
>
> struct perf_event_header {
> @@ -269,8 +271,10 @@ struct file;
> struct perf_mmap_data {
> struct rcu_head rcu_head;
> int nr_pages;
> + int writable;
> atomic_t wakeup;
> atomic_t head;
> + atomic_t overflow;
> struct perf_counter_mmap_page *user_page;
> void *data_pages[0];
> };
> diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
> index 3b862a7..1f5c515 100644
> --- a/kernel/perf_counter.c
> +++ b/kernel/perf_counter.c
> @@ -1330,6 +1330,7 @@ static void __perf_counter_update_userpage(struct perf_counter *counter,
> userpg->offset -= atomic64_read(&counter->hw.prev_count);
>
> userpg->data_head = atomic_read(&data->head);
> + userpg->overflow = atomic_read(&data->overflow);
> smp_wmb();
> ++userpg->lock;
> preempt_enable();
> @@ -1375,6 +1376,28 @@ unlock:
> return ret;
> }
>
> +static int perf_mmap_mkwrite(struct vm_area_struct *vma, struct page *page)
> +{
> + int ret = -EINVAL;
> +
> + rcu_read_lock();
> + data = rcu_dereference(counter->data);
> + if (!data)
> + goto unlock;
> +
> + /*
> + * Only allow writes to the control page.
> + */
> + if (page != virt_to_page(data->user_page))
> + goto unlock;
> +
> + ret = 0;
> +unlock:
> + rcu_read_unlock();
> +
> + return ret;
> +}
> +
I guess this:
rcu_read_lock();
data = rcu_dereference(counter->data);
/*
* Only allow writes to the control page.
*/
if (data && (page == virt_to_page(data->user_page))
ret = 0;
rcu_read_unlock();
is more compact?
> static int perf_mmap_data_alloc(struct perf_counter *counter, int nr_pages)
> {
> struct perf_mmap_data *data;
> @@ -1463,6 +1486,7 @@ static struct vm_operations_struct perf_mmap_vmops = {
> .open = perf_mmap_open,
> .close = perf_mmap_close,
> .fault = perf_mmap_fault,
> + .page_mkwrite = perf_mmap_mkwrite,
> };
(nit: this structure should align vertically)
>
> static int perf_mmap(struct file *file, struct vm_area_struct *vma)
> @@ -1473,7 +1497,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
> unsigned long locked, lock_limit;
> int ret = 0;
>
> - if (!(vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_WRITE))
> + if (!(vma->vm_flags & VM_SHARED))
> return -EINVAL;
>
> vma_size = vma->vm_end - vma->vm_start;
> @@ -1503,16 +1527,19 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
>
> mutex_lock(&counter->mmap_mutex);
> if (atomic_inc_not_zero(&counter->mmap_count))
> - goto out;
> + goto unlock;
>
> WARN_ON(counter->data);
> ret = perf_mmap_data_alloc(counter, nr_pages);
> - if (!ret)
> - atomic_set(&counter->mmap_count, 1);
> -out:
> + if (ret)
> + goto unlock;
> +
> + atomic_set(&counter->mmap_count, 1);
> + if (vma->vm_flags & VM_WRITE)
> + counter->data->writable = 1;
> +unlock:
> mutex_unlock(&counter->mmap_mutex);
>
> - vma->vm_flags &= ~VM_MAYWRITE;
does ->vm_fflags have VM_MAYWRITE by default?
> vma->vm_flags |= VM_RESERVED;
> vma->vm_ops = &perf_mmap_vmops;
>
> @@ -1540,6 +1567,28 @@ struct perf_output_handle {
> int wakeup;
> };
>
> +static int perf_output_overflow(struct perf_mmap_data *data,
> + unsigned int offset, unsigned int head)
> +{
> + unsigned int tail;
> + unsigned int mask;
> +
> + if (!data->writable)
> + return 0;
so mmap()-ing it readonly turns off overflow detection
automatically? That's a nice touch i think - user-space can avoid
this overhead, if it does not care about overflows.
> + mask = (data->nr_pages << PAGE_SHIFT) - 1;
btw., we could have a data->mask.
> + smp_rmb();
> + tail = ACCESS_ONCE(data->user_page->data_tail);
> +
> + offset = (offset - tail) & mask;
> + head = (head - tail) & mask;
> +
> + if ((int)(head - offset) < 0)
> + return 1;
> +
> + return 0;
I guess it should use bool and return true/false.
> +}
> +
> static int perf_output_begin(struct perf_output_handle *handle,
> struct perf_counter *counter, unsigned int size)
> {
> @@ -1552,11 +1601,13 @@ static int perf_output_begin(struct perf_output_handle *handle,
> goto out;
>
> if (!data->nr_pages)
> - goto out;
> + goto fail;
>
> do {
> offset = head = atomic_read(&data->head);
> head += size;
> + if (unlikely(perf_output_overflow(data, offset, head)))
> + goto fail;
> } while (atomic_cmpxchg(&data->head, offset, head) != offset);
>
> handle->counter = counter;
> @@ -1567,6 +1618,8 @@ static int perf_output_begin(struct perf_output_handle *handle,
>
> return 0;
>
> +fail:
> + atomic_inc(&data->overflow);
data->user_page->overflow should be increased too - so that
user-space can see it.
And do we really need data->overflow?
Ingo
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [PATCH 5/6] perf_counter: kerneltop: mmap_pages argument
2009-03-25 17:16 ` Ingo Molnar
@ 2009-03-25 21:18 ` Peter Zijlstra
0 siblings, 0 replies; 27+ messages in thread
From: Peter Zijlstra @ 2009-03-25 21:18 UTC (permalink / raw)
To: Ingo Molnar
Cc: linux-kernel, Paul Mackerras, Mike Galbraith, Arjan van de Ven,
Wu Fengguang
On Wed, 2009-03-25 at 18:16 +0100, Ingo Molnar wrote:
> * Peter Zijlstra <peterz@infradead.org> wrote:
>
> > On Wed, 2009-03-25 at 13:57 +0100, Peter Zijlstra wrote:
> > > On Wed, 2009-03-25 at 13:54 +0100, Ingo Molnar wrote:
> > > > * Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> > > >
> > > > > On Wed, 2009-03-25 at 13:35 +0100, Ingo Molnar wrote:
> > > > >
> > > > > > > Also, when mixing streams (events,mmap) is a single: you missed
> > > > > > > 'n' events still good?
> > > > > >
> > > > > > How would such mixing work? Multiple counters streaming into the
> > > > > > same mmap area?
> > > > >
> > > > > No basically having overflow events and mmap-vma changed events in
> > > > > a single output stream.
> > > >
> > > > ah, and i missed the impact of variable size records - that too
> > > > makes it somewhat impractical to emit overflow records in situ. (the
> > > > kernel does not really know the precise start of the previous
> > > > record, typically.)
> > >
> > > Alternatively, we could simply not emit new events until the read
> > > position increases,. that's much simpler.
> > >
> > > Still don't like mapping the stuff writable though..
> >
> > This is what it would look like I suppose...
> >
> > Any thoughts?
> >
> > Not-signed-off-by: me
>
> (you dont like it?)
Yeah, I'm still unconvinced we need more than the 'we're loosing data'
bit we already have and don't really like the extra code this
introduces, although it isn't nearly as bad as I initially thought it
would be.
> > ---
> > include/linux/perf_counter.h | 4 ++
> > kernel/perf_counter.c | 67 +++++++++++++++++++++++++++++++++++++----
> > 2 files changed, 64 insertions(+), 7 deletions(-)
> >
> > diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
> > index 6bf67ce..d5a599c 100644
> > --- a/include/linux/perf_counter.h
> > +++ b/include/linux/perf_counter.h
> > @@ -165,6 +165,8 @@ struct perf_counter_mmap_page {
> > __s64 offset; /* add to hardware counter value */
> >
> > __u32 data_head; /* head in the data section */
> > + __u32 data_tail; /* user-space written tail */
> > + __u32 overflow; /* number of lost events */
>
> small detail: i'd suggest to always pad things up to 64 bits. In
> case someone adds a new field with u64.
its not a packed struct, so at worst it will result in a hole which can
later be filled. but sure.
> > };
> > +static int perf_mmap_mkwrite(struct vm_area_struct *vma, struct page *page)
> > +{
> > + int ret = -EINVAL;
> > +
> > + rcu_read_lock();
> > + data = rcu_dereference(counter->data);
> > + if (!data)
> > + goto unlock;
> > +
> > + /*
> > + * Only allow writes to the control page.
> > + */
> > + if (page != virt_to_page(data->user_page))
> > + goto unlock;
> > +
> > + ret = 0;
> > +unlock:
> > + rcu_read_unlock();
> > +
> > + return ret;
> > +}
> > +
>
> I guess this:
>
> rcu_read_lock();
> data = rcu_dereference(counter->data);
>
> /*
> * Only allow writes to the control page.
> */
> if (data && (page == virt_to_page(data->user_page))
> ret = 0;
>
> rcu_read_unlock();
>
> is more compact?
Ah, quite.
> >
> > - vma->vm_flags &= ~VM_MAYWRITE;
>
> does ->vm_fflags have VM_MAYWRITE by default?
do_mmap_pgoff() has:
vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
> > +static int perf_output_overflow(struct perf_mmap_data *data,
> > + unsigned int offset, unsigned int head)
> > +{
> > + unsigned int tail;
> > + unsigned int mask;
> > +
> > + if (!data->writable)
> > + return 0;
>
> so mmap()-ing it readonly turns off overflow detection
> automatically? That's a nice touch i think - user-space can avoid
> this overhead, if it does not care about overflows.
Yep.
> > + mask = (data->nr_pages << PAGE_SHIFT) - 1;
>
> btw., we could have a data->mask.
I thought about it, couldn't make up my mind about it, its only 2
trivial integer ops.
> > + smp_rmb();
> > + tail = ACCESS_ONCE(data->user_page->data_tail);
> > +
> > + offset = (offset - tail) & mask;
> > + head = (head - tail) & mask;
> > +
> > + if ((int)(head - offset) < 0)
> > + return 1;
> > +
> > + return 0;
>
> I guess it should use bool and return true/false.
>
> > +}
Ah, right, we use this new-fangled C99 bool stuff these days ;-)
> > +fail:
> > + atomic_inc(&data->overflow);
>
> data->user_page->overflow should be increased too - so that
> user-space can see it.
Hmm, right, it would need to do that wake-up bit..
> And do we really need data->overflow?
atomic_t isn't really a user exposed typed, the assignment in
update_userpage() seemed like the best solution
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 5/6] perf_counter: kerneltop: mmap_pages argument
2009-03-25 12:27 ` Peter Zijlstra
2009-03-25 12:35 ` Ingo Molnar
@ 2009-03-26 2:22 ` Paul Mackerras
1 sibling, 0 replies; 27+ messages in thread
From: Paul Mackerras @ 2009-03-26 2:22 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Ingo Molnar, linux-kernel, Mike Galbraith, Arjan van de Ven,
Wu Fengguang
Peter Zijlstra writes:
> One possible solution is making the control page writable and writing
> the userspace read position to it, then the kernel can, on
> perf_output_begin() detect the overflow and count the number of
> overwritten events.
>
> This overflow count could then be published back into the control page.
We could in principle have many different processes mmapping the same
counter and reading the ring buffer, couldn't we? So which process
gets to put its read position in the control page?
> TBH I'm not much of a fan, making all these pages writable just opens a
> whole can of worms, and that accurate overflow tracking will put more
> code in the output path.
I agree.
Paul.
^ permalink raw reply [flat|nested] 27+ messages in thread
* [PATCH 6/6] perf_counter: kerneltop: output event support
2009-03-25 11:30 [PATCH 0/6] perf_counter: new output ABI Peter Zijlstra
` (4 preceding siblings ...)
2009-03-25 11:30 ` [PATCH 5/6] perf_counter: kerneltop: mmap_pages argument Peter Zijlstra
@ 2009-03-25 11:30 ` Peter Zijlstra
2009-03-25 12:07 ` [tip:perfcounters/core] " Peter Zijlstra
2009-04-04 0:21 ` [PATCH 6/6] " Corey Ashford
2009-03-25 12:05 ` [PATCH 0/6] perf_counter: new output ABI Ingo Molnar
6 siblings, 2 replies; 27+ messages in thread
From: Peter Zijlstra @ 2009-03-25 11:30 UTC (permalink / raw)
To: Ingo Molnar, linux-kernel
Cc: Paul Mackerras, Mike Galbraith, Arjan van de Ven, Wu Fengguang,
Peter Zijlstra
[-- Attachment #1: kerneltop-mmap-packets.patch --]
[-- Type: text/plain, Size: 3733 bytes --]
Teach kerneltop about the new output ABI.
XXX: anybody fancy integrating the PID/TID data into the output?
Bump the mmap_data pages a little because we bloated the output and
have to be more careful about overruns with structured data.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
Documentation/perf_counter/kerneltop.c | 65 +++++++++++++++++++++++++++++----
1 file changed, 59 insertions(+), 6 deletions(-)
Index: linux-2.6/Documentation/perf_counter/kerneltop.c
===================================================================
--- linux-2.6.orig/Documentation/perf_counter/kerneltop.c
+++ linux-2.6/Documentation/perf_counter/kerneltop.c
@@ -134,6 +134,11 @@
#endif
#define unlikely(x) __builtin_expect(!!(x), 0)
+#define min(x, y) ({ \
+ typeof(x) _min1 = (x); \
+ typeof(y) _min2 = (y); \
+ (void) (&_min1 == &_min2); \
+ _min1 < _min2 ? _min1 : _min2; })
asmlinkage int sys_perf_counter_open(
struct perf_counter_hw_event *hw_event_uptr __user,
@@ -178,7 +183,7 @@ static int nr_cpus = 0;
static int nmi = 1;
static int group = 0;
static unsigned int page_size;
-static unsigned int mmap_pages = 4;
+static unsigned int mmap_pages = 16;
static char *vmlinux;
@@ -1147,28 +1152,75 @@ static void mmap_read(struct mmap_data *
unsigned int head = mmap_read_head(md);
unsigned int old = md->prev;
unsigned char *data = md->base + page_size;
+ int diff;
gettimeofday(&this_read, NULL);
- if (head - old > md->mask) {
+ /*
+ * If we're further behind than half the buffer, there's a chance
+ * the writer will bite our tail and screw up the events under us.
+ *
+ * If we somehow ended up ahead of the head, we got messed up.
+ *
+ * In either case, truncate and restart at head.
+ */
+ diff = head - old;
+ if (diff > md->mask / 2 || diff < 0) {
struct timeval iv;
unsigned long msecs;
timersub(&this_read, &last_read, &iv);
msecs = iv.tv_sec*1000 + iv.tv_usec/1000;
- fprintf(stderr, "WARNING: failed to keep up with mmap data. Last read %lu msecs ago.\n", msecs);
+ fprintf(stderr, "WARNING: failed to keep up with mmap data."
+ " Last read %lu msecs ago.\n", msecs);
+ /*
+ * head points to a known good entry, start there.
+ */
old = head;
}
last_read = this_read;
for (; old != head;) {
- __u64 *ptr = (__u64 *)&data[old & md->mask];
- old += sizeof(__u64);
+ struct event_struct {
+ struct perf_event_header header;
+ __u64 ip;
+ __u32 pid, tid;
+ } *event = (struct event_struct *)&data[old & md->mask];
+ struct event_struct event_copy;
+
+ unsigned int size = event->header.size;
- process_event(*ptr, md->counter);
+ /*
+ * Event straddles the mmap boundary -- header should always
+ * be inside due to u64 alignment of output.
+ */
+ if ((old & md->mask) + size != ((old + size) & md->mask)) {
+ unsigned int offset = old;
+ unsigned int len = sizeof(*event), cpy;
+ void *dst = &event_copy;
+
+ do {
+ cpy = min(md->mask + 1 - (offset & md->mask), len);
+ memcpy(dst, &data[offset & md->mask], cpy);
+ offset += cpy;
+ dst += cpy;
+ len -= cpy;
+ } while (len);
+
+ event = &event_copy;
+ }
+
+ old += size;
+
+ switch (event->header.type) {
+ case PERF_EVENT_IP:
+ case PERF_EVENT_IP | __PERF_EVENT_TID:
+ process_event(event->ip, md->counter);
+ break;
+ }
}
md->prev = old;
@@ -1214,6 +1266,7 @@ int main(int argc, char *argv[])
hw_event.irq_period = event_count[counter];
hw_event.record_type = PERF_RECORD_IRQ;
hw_event.nmi = nmi;
+ hw_event.include_tid = 1;
fd[i][counter] = sys_perf_counter_open(&hw_event, tid, cpu, group_fd, 0);
if (fd[i][counter] < 0) {
--
^ permalink raw reply [flat|nested] 27+ messages in thread* [tip:perfcounters/core] perf_counter: kerneltop: output event support
2009-03-25 11:30 ` [PATCH 6/6] perf_counter: kerneltop: output event support Peter Zijlstra
@ 2009-03-25 12:07 ` Peter Zijlstra
2009-04-04 0:21 ` [PATCH 6/6] " Corey Ashford
1 sibling, 0 replies; 27+ messages in thread
From: Peter Zijlstra @ 2009-03-25 12:07 UTC (permalink / raw)
To: linux-tip-commits
Cc: linux-kernel, paulus, hpa, mingo, a.p.zijlstra, efault, arjan,
tglx, fengguang.wu, mingo
Commit-ID: ee8ff2070444f88524ef89555e3996c8be44170d
Gitweb: http://git.kernel.org/tip/ee8ff2070444f88524ef89555e3996c8be44170d
Author: Peter Zijlstra <a.p.zijlstra@chello.nl>
AuthorDate: Wed, 25 Mar 2009 12:30:27 +0100
Committer: Ingo Molnar <mingo@elte.hu>
CommitDate: Wed, 25 Mar 2009 13:02:53 +0100
perf_counter: kerneltop: output event support
Teach kerneltop about the new output ABI.
XXX: anybody fancy integrating the PID/TID data into the output?
Bump the mmap_data pages a little because we bloated the output and
have to be more careful about overruns with structured data.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Wu Fengguang <fengguang.wu@intel.com>
LKML-Reference: <20090325113317.192910290@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
Documentation/perf_counter/kerneltop.c | 65 +++++++++++++++++++++++++++++---
1 files changed, 59 insertions(+), 6 deletions(-)
diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c
index 3e45bf6..fda1438 100644
--- a/Documentation/perf_counter/kerneltop.c
+++ b/Documentation/perf_counter/kerneltop.c
@@ -134,6 +134,11 @@
#endif
#define unlikely(x) __builtin_expect(!!(x), 0)
+#define min(x, y) ({ \
+ typeof(x) _min1 = (x); \
+ typeof(y) _min2 = (y); \
+ (void) (&_min1 == &_min2); \
+ _min1 < _min2 ? _min1 : _min2; })
asmlinkage int sys_perf_counter_open(
struct perf_counter_hw_event *hw_event_uptr __user,
@@ -178,7 +183,7 @@ static int nr_cpus = 0;
static int nmi = 1;
static int group = 0;
static unsigned int page_size;
-static unsigned int mmap_pages = 4;
+static unsigned int mmap_pages = 16;
static char *vmlinux;
@@ -1147,28 +1152,75 @@ static void mmap_read(struct mmap_data *md)
unsigned int head = mmap_read_head(md);
unsigned int old = md->prev;
unsigned char *data = md->base + page_size;
+ int diff;
gettimeofday(&this_read, NULL);
- if (head - old > md->mask) {
+ /*
+ * If we're further behind than half the buffer, there's a chance
+ * the writer will bite our tail and screw up the events under us.
+ *
+ * If we somehow ended up ahead of the head, we got messed up.
+ *
+ * In either case, truncate and restart at head.
+ */
+ diff = head - old;
+ if (diff > md->mask / 2 || diff < 0) {
struct timeval iv;
unsigned long msecs;
timersub(&this_read, &last_read, &iv);
msecs = iv.tv_sec*1000 + iv.tv_usec/1000;
- fprintf(stderr, "WARNING: failed to keep up with mmap data. Last read %lu msecs ago.\n", msecs);
+ fprintf(stderr, "WARNING: failed to keep up with mmap data."
+ " Last read %lu msecs ago.\n", msecs);
+ /*
+ * head points to a known good entry, start there.
+ */
old = head;
}
last_read = this_read;
for (; old != head;) {
- __u64 *ptr = (__u64 *)&data[old & md->mask];
- old += sizeof(__u64);
+ struct event_struct {
+ struct perf_event_header header;
+ __u64 ip;
+ __u32 pid, tid;
+ } *event = (struct event_struct *)&data[old & md->mask];
+ struct event_struct event_copy;
+
+ unsigned int size = event->header.size;
+
+ /*
+ * Event straddles the mmap boundary -- header should always
+ * be inside due to u64 alignment of output.
+ */
+ if ((old & md->mask) + size != ((old + size) & md->mask)) {
+ unsigned int offset = old;
+ unsigned int len = sizeof(*event), cpy;
+ void *dst = &event_copy;
+
+ do {
+ cpy = min(md->mask + 1 - (offset & md->mask), len);
+ memcpy(dst, &data[offset & md->mask], cpy);
+ offset += cpy;
+ dst += cpy;
+ len -= cpy;
+ } while (len);
+
+ event = &event_copy;
+ }
- process_event(*ptr, md->counter);
+ old += size;
+
+ switch (event->header.type) {
+ case PERF_EVENT_IP:
+ case PERF_EVENT_IP | __PERF_EVENT_TID:
+ process_event(event->ip, md->counter);
+ break;
+ }
}
md->prev = old;
@@ -1214,6 +1266,7 @@ int main(int argc, char *argv[])
hw_event.irq_period = event_count[counter];
hw_event.record_type = PERF_RECORD_IRQ;
hw_event.nmi = nmi;
+ hw_event.include_tid = 1;
fd[i][counter] = sys_perf_counter_open(&hw_event, tid, cpu, group_fd, 0);
if (fd[i][counter] < 0) {
^ permalink raw reply related [flat|nested] 27+ messages in thread* Re: [PATCH 6/6] perf_counter: kerneltop: output event support
2009-03-25 11:30 ` [PATCH 6/6] perf_counter: kerneltop: output event support Peter Zijlstra
2009-03-25 12:07 ` [tip:perfcounters/core] " Peter Zijlstra
@ 2009-04-04 0:21 ` Corey Ashford
2009-04-04 12:17 ` Peter Zijlstra
1 sibling, 1 reply; 27+ messages in thread
From: Corey Ashford @ 2009-04-04 0:21 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Ingo Molnar, linux-kernel, Paul Mackerras, Mike Galbraith,
Arjan van de Ven, Wu Fengguang
As I was stealing code from kerneltop today to use in the PAPI profiling
implementation, I ran across the code below:
Peter Zijlstra wrote:
> Teach kerneltop about the new output ABI.
>
> XXX: anybody fancy integrating the PID/TID data into the output?
>
> Bump the mmap_data pages a little because we bloated the output and
> have to be more careful about overruns with structured data.
>
> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
> ---
[snip]
>
> @@ -1147,28 +1152,75 @@ static void mmap_read(struct mmap_data *
> unsigned int head = mmap_read_head(md);
> unsigned int old = md->prev;
> unsigned char *data = md->base + page_size;
> + int diff;
>
> gettimeofday(&this_read, NULL);
>
> - if (head - old > md->mask) {
> + /*
> + * If we're further behind than half the buffer, there's a chance
> + * the writer will bite our tail and screw up the events under us.
> + *
> + * If we somehow ended up ahead of the head, we got messed up.
> + *
> + * In either case, truncate and restart at head.
> + */
> + diff = head - old;
> + if (diff > md->mask / 2 || diff < 0) {
> struct timeval iv;
> unsigned long msecs;
>
> timersub(&this_read, &last_read, &iv);
> msecs = iv.tv_sec*1000 + iv.tv_usec/1000;
>
> - fprintf(stderr, "WARNING: failed to keep up with mmap data. Last read %lu msecs ago.\n", msecs);
> + fprintf(stderr, "WARNING: failed to keep up with mmap data."
> + " Last read %lu msecs ago.\n", msecs);
>
[snip]
The test for diff < 0 looks incorrect to me. This shouldn't be an
error, because it will frequently be the case that the head has wrapped
around back to the beginning of the mmap'd pages, while old is near the end.
What it needs to find out, I think, is if the modulo distance between
old and head is greater than 1/2 of the span of the mmap'd pages.
Here's a suggested change:
- diff = head - old;
+ diff = (head - old) & md->mask;
- if (diff > md->mask / 2 || diff < 0) {
+ if (diff > md->mask / 2) {
What do you think?
- Corey
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [PATCH 6/6] perf_counter: kerneltop: output event support
2009-04-04 0:21 ` [PATCH 6/6] " Corey Ashford
@ 2009-04-04 12:17 ` Peter Zijlstra
2009-04-04 18:10 ` Corey Ashford
0 siblings, 1 reply; 27+ messages in thread
From: Peter Zijlstra @ 2009-04-04 12:17 UTC (permalink / raw)
To: Corey Ashford
Cc: Ingo Molnar, linux-kernel, Paul Mackerras, Mike Galbraith,
Arjan van de Ven, Wu Fengguang
On Fri, 2009-04-03 at 17:21 -0700, Corey Ashford wrote:
> As I was stealing code from kerneltop today to use in the PAPI profiling
> implementation, I ran across the code below:
>
> Peter Zijlstra wrote:
> > Teach kerneltop about the new output ABI.
> >
> > XXX: anybody fancy integrating the PID/TID data into the output?
> >
> > Bump the mmap_data pages a little because we bloated the output and
> > have to be more careful about overruns with structured data.
> >
> > Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
> > ---
> [snip]
>
> >
> > @@ -1147,28 +1152,75 @@ static void mmap_read(struct mmap_data *
> > unsigned int head = mmap_read_head(md);
> > unsigned int old = md->prev;
> > unsigned char *data = md->base + page_size;
> > + int diff;
> >
> > gettimeofday(&this_read, NULL);
> >
> > - if (head - old > md->mask) {
> > + /*
> > + * If we're further behind than half the buffer, there's a chance
> > + * the writer will bite our tail and screw up the events under us.
> > + *
> > + * If we somehow ended up ahead of the head, we got messed up.
> > + *
> > + * In either case, truncate and restart at head.
> > + */
> > + diff = head - old;
> > + if (diff > md->mask / 2 || diff < 0) {
> > struct timeval iv;
> > unsigned long msecs;
> >
> > timersub(&this_read, &last_read, &iv);
> > msecs = iv.tv_sec*1000 + iv.tv_usec/1000;
> >
> > - fprintf(stderr, "WARNING: failed to keep up with mmap data. Last read %lu msecs ago.\n", msecs);
> > + fprintf(stderr, "WARNING: failed to keep up with mmap data."
> > + " Last read %lu msecs ago.\n", msecs);
> >
> [snip]
>
> The test for diff < 0 looks incorrect to me. This shouldn't be an
> error, because it will frequently be the case that the head has wrapped
> around back to the beginning of the mmap'd pages, while old is near the end.
>
> What it needs to find out, I think, is if the modulo distance between
> old and head is greater than 1/2 of the span of the mmap'd pages.
> Here's a suggested change:
>
> - diff = head - old;
> + diff = (head - old) & md->mask;
> - if (diff > md->mask / 2 || diff < 0) {
> + if (diff > md->mask / 2) {
>
>
> What do you think?
head and old are both u32 and are monotonically incremented, that means
that (s32)(head - old) < 0 will only be true if old is ahead (or more
than 2^31 behind) of head.
Since mask will be smaller than 2^31 this seemed like a reasonable
integrity test.
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [PATCH 6/6] perf_counter: kerneltop: output event support
2009-04-04 12:17 ` Peter Zijlstra
@ 2009-04-04 18:10 ` Corey Ashford
0 siblings, 0 replies; 27+ messages in thread
From: Corey Ashford @ 2009-04-04 18:10 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Ingo Molnar, linux-kernel, Paul Mackerras, Mike Galbraith,
Arjan van de Ven, Wu Fengguang
Peter Zijlstra wrote:
> On Fri, 2009-04-03 at 17:21 -0700, Corey Ashford wrote:
>> As I was stealing code from kerneltop today to use in the PAPI profiling
>> implementation, I ran across the code below:
>>
>> Peter Zijlstra wrote:
>>> Teach kerneltop about the new output ABI.
>>>
>>> XXX: anybody fancy integrating the PID/TID data into the output?
>>>
>>> Bump the mmap_data pages a little because we bloated the output and
>>> have to be more careful about overruns with structured data.
>>>
>>> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
>>> ---
>> [snip]
>>
>>> @@ -1147,28 +1152,75 @@ static void mmap_read(struct mmap_data *
>>> unsigned int head = mmap_read_head(md);
>>> unsigned int old = md->prev;
>>> unsigned char *data = md->base + page_size;
>>> + int diff;
>>>
>>> gettimeofday(&this_read, NULL);
>>>
>>> - if (head - old > md->mask) {
>>> + /*
>>> + * If we're further behind than half the buffer, there's a chance
>>> + * the writer will bite our tail and screw up the events under us.
>>> + *
>>> + * If we somehow ended up ahead of the head, we got messed up.
>>> + *
>>> + * In either case, truncate and restart at head.
>>> + */
>>> + diff = head - old;
>>> + if (diff > md->mask / 2 || diff < 0) {
>>> struct timeval iv;
>>> unsigned long msecs;
>>>
>>> timersub(&this_read, &last_read, &iv);
>>> msecs = iv.tv_sec*1000 + iv.tv_usec/1000;
>>>
>>> - fprintf(stderr, "WARNING: failed to keep up with mmap data. Last read %lu msecs ago.\n", msecs);
>>> + fprintf(stderr, "WARNING: failed to keep up with mmap data."
>>> + " Last read %lu msecs ago.\n", msecs);
>>>
>> [snip]
>>
>> The test for diff < 0 looks incorrect to me. This shouldn't be an
>> error, because it will frequently be the case that the head has wrapped
>> around back to the beginning of the mmap'd pages, while old is near the end.
>>
>> What it needs to find out, I think, is if the modulo distance between
>> old and head is greater than 1/2 of the span of the mmap'd pages.
>> Here's a suggested change:
>>
>> - diff = head - old;
>> + diff = (head - old) & md->mask;
>> - if (diff > md->mask / 2 || diff < 0) {
>> + if (diff > md->mask / 2) {
>>
>>
>> What do you think?
>
> head and old are both u32 and are monotonically incremented, that means
> that (s32)(head - old) < 0 will only be true if old is ahead (or more
> than 2^31 behind) of head.
>
> Since mask will be smaller than 2^31 this seemed like a reasonable
> integrity test.
Ah, I see. I assumed (but did not check) that there was logic to wrap
the old and head back to the beginning when they hit the end of the
mmap'd pages.
Thanks for your reply.
- Corey
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 0/6] perf_counter: new output ABI
2009-03-25 11:30 [PATCH 0/6] perf_counter: new output ABI Peter Zijlstra
` (5 preceding siblings ...)
2009-03-25 11:30 ` [PATCH 6/6] perf_counter: kerneltop: output event support Peter Zijlstra
@ 2009-03-25 12:05 ` Ingo Molnar
6 siblings, 0 replies; 27+ messages in thread
From: Ingo Molnar @ 2009-03-25 12:05 UTC (permalink / raw)
To: Peter Zijlstra
Cc: linux-kernel, Paul Mackerras, Mike Galbraith, Arjan van de Ven,
Wu Fengguang
* Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> The following patch-set extends the perf_counter output ABI and
> updates kerneltop accordingly.
thanks Peter - these are really nice extensions.
Type/size records solves one of my worries about the event ABI: it
now becomes easily extensible in a backwards-compatible manner.
Applied them to tip:perfcounters/core with minor edits.
Ingo
^ permalink raw reply [flat|nested] 27+ messages in thread