From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: Ingo Molnar <mingo@elte.hu>, Paul Mackerras <paulus@samba.org>,
stephane eranian <eranian@googlemail.com>
Cc: Corey J Ashford <cjashfor@us.ibm.com>,
LKML <linux-kernel@vger.kernel.org>,
Peter Zijlstra <a.p.zijlstra@chello.nl>
Subject: [PATCH 1/2] perf: rework the whole read vs group stuff
Date: Wed, 12 Aug 2009 17:35:30 +0200 [thread overview]
Message-ID: <20090812154118.044301415@chello.nl> (raw)
In-Reply-To: 20090812153529.716542680@chello.nl
[-- Attachment #1: perf-PERF_SAMPLE_READ.patch --]
[-- Type: text/plain, Size: 11588 bytes --]
Replace PERF_SAMPLE_GROUP with PERF_SAMPLE_READ and introduce
PERF_FORMAT_GROUP to deal with group reads in a more generic way.
This allows you to get group reads out of read() as well.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
include/linux/perf_counter.h | 36 +++++--
kernel/perf_counter.c | 213 +++++++++++++++++++++++++++++--------------
2 files changed, 172 insertions(+), 77 deletions(-)
Index: linux-2.6/include/linux/perf_counter.h
===================================================================
--- linux-2.6.orig/include/linux/perf_counter.h
+++ linux-2.6/include/linux/perf_counter.h
@@ -115,7 +115,7 @@ enum perf_counter_sample_format {
PERF_SAMPLE_TID = 1U << 1,
PERF_SAMPLE_TIME = 1U << 2,
PERF_SAMPLE_ADDR = 1U << 3,
- PERF_SAMPLE_GROUP = 1U << 4,
+ PERF_SAMPLE_READ = 1U << 4,
PERF_SAMPLE_CALLCHAIN = 1U << 5,
PERF_SAMPLE_ID = 1U << 6,
PERF_SAMPLE_CPU = 1U << 7,
@@ -130,13 +130,25 @@ enum perf_counter_sample_format {
* Bits that can be set in attr.read_format to request that
* reads on the counter should return the indicated quantities,
* in increasing order of bit value, after the counter value.
+ *
+ * struct {
+ * { u64 nr; } && PERF_FORMAT_GROUP
+ * { u64 time_enabled; } && PERF_FORMAT_ENABLED
+ * { u64 time_running; } && PERF_FORMAT_RUNNING
+ * { u64 value;
+ * { u64 id; } && PERF_FORMAT_ID
+ * } cntr[nr];
+ * };
+ *
+ * Where 'nr' defaults to 1 when !PERF_FORMAT_GROUP
*/
enum perf_counter_read_format {
PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0,
PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1,
PERF_FORMAT_ID = 1U << 2,
+ PERF_FORMAT_GROUP = 1U << 3,
- PERF_FORMAT_MAX = 1U << 3, /* non-ABI */
+ PERF_FORMAT_MAX = 1U << 4, /* non-ABI */
};
#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */
@@ -343,10 +355,13 @@ enum perf_event_type {
* struct {
* struct perf_event_header header;
* u32 pid, tid;
- * u64 value;
- * { u64 time_enabled; } && PERF_FORMAT_ENABLED
- * { u64 time_running; } && PERF_FORMAT_RUNNING
- * { u64 parent_id; } && PERF_FORMAT_ID
+ *
+ * { u64 nr; } && PERF_FORMAT_GROUP
+ * { u64 value;
+ * { u64 time_enabled; } && PERF_FORMAT_ENABLED
+ * { u64 time_running; } && PERF_FORMAT_RUNNING
+ * { u64 parent_id; } && PERF_FORMAT_ID
+ * } cntr[nr];
* };
*/
PERF_EVENT_READ = 8,
@@ -364,11 +379,16 @@ enum perf_event_type {
* { u32 cpu, res; } && PERF_SAMPLE_CPU
* { u64 period; } && PERF_SAMPLE_PERIOD
*
- * { u64 nr;
- * { u64 id, val; } cnt[nr]; } && PERF_SAMPLE_GROUP
+ * { { u64 nr; } && PERF_FORMAT_GROUP
+ * { u64 value;
+ * { u64 time_enabled; } && PERF_FORMAT_ENABLED
+ * { u64 time_running; } && PERF_FORMAT_RUNNING
+ * { u64 id; } && PERF_FORMAT_ID
+ * } cntr[nr]; } && PERF_SAMPLE_READ
*
* { u64 nr,
* u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN
+ *
* { u32 size;
* char data[size];}&& PERF_SAMPLE_RAW
* };
Index: linux-2.6/kernel/perf_counter.c
===================================================================
--- linux-2.6.orig/kernel/perf_counter.c
+++ linux-2.6/kernel/perf_counter.c
@@ -1692,7 +1692,32 @@ static int perf_release(struct inode *in
return 0;
}
-static u64 perf_counter_read_tree(struct perf_counter *counter)
+static int perf_counter_read_size(struct perf_counter *counter)
+{
+ int entry = sizeof(u64); /* value */
+ int size = 0;
+ int nr = 1;
+
+ if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+ size += sizeof(u64);
+
+ if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+ size += sizeof(u64);
+
+ if (counter->attr.read_format & PERF_FORMAT_ID)
+ entry += sizeof(u64);
+
+ if (counter->attr.read_format & PERF_FORMAT_GROUP) {
+ nr += counter->group_leader->nr_siblings;
+ size += sizeof(u64);
+ }
+
+ size += entry * nr;
+
+ return size;
+}
+
+static u64 perf_counter_read_value(struct perf_counter *counter)
{
struct perf_counter *child;
u64 total = 0;
@@ -1704,14 +1729,34 @@ static u64 perf_counter_read_tree(struct
return total;
}
+static int perf_counter_read_entry(struct perf_counter *counter,
+ u64 read_format, char __user *buf)
+{
+ u64 values[2];
+ int n = 0, count = 0;
+
+ values[n++] = perf_counter_read_value(counter);
+ if (read_format & PERF_FORMAT_ID)
+ values[n++] = primary_counter_id(counter);
+
+ count = n * sizeof(u64);
+
+ if (copy_to_user(buf, values, count))
+ return -EFAULT;
+
+ return count;
+}
+
/*
* Read the performance counter - simple non blocking version for now
*/
static ssize_t
perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
{
- u64 values[4];
- int n;
+ u64 read_format = counter->attr.read_format;
+ struct perf_counter *leader = counter, *sub;
+ int n = 0, size = 0, err = -EFAULT;
+ u64 values[3];
/*
* Return end-of-file for a read on a counter that is in
@@ -1721,28 +1766,52 @@ perf_read_hw(struct perf_counter *counte
if (counter->state == PERF_COUNTER_STATE_ERROR)
return 0;
+ if (count < perf_counter_read_size(counter))
+ return -ENOSPC;
+
WARN_ON_ONCE(counter->ctx->parent_ctx);
mutex_lock(&counter->child_mutex);
- values[0] = perf_counter_read_tree(counter);
- n = 1;
- if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
- values[n++] = counter->total_time_enabled +
- atomic64_read(&counter->child_total_time_enabled);
- if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
- values[n++] = counter->total_time_running +
- atomic64_read(&counter->child_total_time_running);
- if (counter->attr.read_format & PERF_FORMAT_ID)
- values[n++] = primary_counter_id(counter);
- mutex_unlock(&counter->child_mutex);
- if (count < n * sizeof(u64))
- return -EINVAL;
- count = n * sizeof(u64);
+ if (read_format & PERF_FORMAT_GROUP) {
+ leader = counter->group_leader;
+ values[n++] = 1 + leader->nr_siblings;
+ }
+ if (leader->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+ values[n++] = leader->total_time_enabled +
+ atomic64_read(&leader->child_total_time_enabled);
+ }
+ if (leader->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+ values[n++] = leader->total_time_running +
+ atomic64_read(&leader->child_total_time_running);
+ }
- if (copy_to_user(buf, values, count))
- return -EFAULT;
+ size = n * sizeof(u64);
- return count;
+ if (copy_to_user(buf, values, size))
+ goto unlock;
+
+ err = perf_counter_read_entry(leader, read_format, buf + size);
+ if (err < 0)
+ goto unlock;
+
+ size += err;
+
+ if (read_format & PERF_FORMAT_GROUP) {
+ list_for_each_entry(sub, &leader->sibling_list, list_entry) {
+ err = perf_counter_read_entry(counter, read_format,
+ buf + size);
+ if (err < 0)
+ goto unlock;
+
+ size += err;
+ }
+ }
+
+ err = size;
+unlock:
+ mutex_unlock(&counter->child_mutex);
+
+ return err;
}
static ssize_t
@@ -2631,6 +2700,49 @@ static u32 perf_counter_tid(struct perf_
return task_pid_nr_ns(p, counter->ns);
}
+void perf_output_read(struct perf_output_handle *handle, struct perf_counter *counter)
+{
+ struct perf_counter *leader = counter, *sub;
+ u64 read_format = counter->attr.read_format;
+ u64 values[5];
+ int n = 0;
+
+ if (read_format & PERF_FORMAT_GROUP) {
+ leader = counter->group_leader;
+ values[n++] = 1 + leader->nr_siblings;
+ }
+
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+ values[n++] = leader->total_time_enabled;
+
+ if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+ values[n++] = leader->total_time_running;
+
+ if (leader != counter)
+ leader->pmu->read(leader);
+ values[n++] = atomic64_read(&leader->count);
+
+ if (read_format & PERF_FORMAT_ID)
+ values[n++] = primary_counter_id(leader);
+
+ perf_output_copy(handle, values, n * sizeof(u64));
+
+ if (read_format & PERF_FORMAT_GROUP) {
+ list_for_each_entry(sub, &leader->sibling_list, list_entry) {
+ n = 0;
+
+ if (sub != counter)
+ sub->pmu->read(sub);
+
+ values[n++] = atomic64_read(&sub->count);
+ if (read_format & PERF_FORMAT_ID)
+ values[n++] = primary_counter_id(sub);
+
+ perf_output_copy(handle, values, n * sizeof(u64));
+ }
+ }
+}
+
void perf_counter_output(struct perf_counter *counter, int nmi,
struct perf_sample_data *data)
{
@@ -2642,10 +2754,6 @@ void perf_counter_output(struct perf_cou
struct {
u32 pid, tid;
} tid_entry;
- struct {
- u64 id;
- u64 counter;
- } group_entry;
struct perf_callchain_entry *callchain = NULL;
int callchain_size = 0;
u64 time;
@@ -2700,10 +2808,8 @@ void perf_counter_output(struct perf_cou
if (sample_type & PERF_SAMPLE_PERIOD)
header.size += sizeof(u64);
- if (sample_type & PERF_SAMPLE_GROUP) {
- header.size += sizeof(u64) +
- counter->nr_siblings * sizeof(group_entry);
- }
+ if (sample_type & PERF_SAMPLE_READ)
+ header.size += perf_counter_read_size(counter);
if (sample_type & PERF_SAMPLE_CALLCHAIN) {
callchain = perf_callchain(data->regs);
@@ -2761,25 +2867,10 @@ void perf_counter_output(struct perf_cou
perf_output_put(&handle, data->period);
/*
- * XXX PERF_SAMPLE_GROUP vs inherited counters seems difficult.
+ * XXX PERF_SAMPLE_READ vs inherited counters seems difficult.
*/
- if (sample_type & PERF_SAMPLE_GROUP) {
- struct perf_counter *leader, *sub;
- u64 nr = counter->nr_siblings;
-
- perf_output_put(&handle, nr);
-
- leader = counter->group_leader;
- list_for_each_entry(sub, &leader->sibling_list, list_entry) {
- if (sub != counter)
- sub->pmu->read(sub);
-
- group_entry.id = primary_counter_id(sub);
- group_entry.counter = atomic64_read(&sub->count);
-
- perf_output_put(&handle, group_entry);
- }
- }
+ if (sample_type & PERF_SAMPLE_READ)
+ perf_output_read(&handle, counter);
if (sample_type & PERF_SAMPLE_CALLCHAIN) {
if (callchain)
@@ -2818,8 +2909,6 @@ struct perf_read_event {
u32 pid;
u32 tid;
- u64 value;
- u64 format[3];
};
static void
@@ -2831,34 +2920,20 @@ perf_counter_read_event(struct perf_coun
.header = {
.type = PERF_EVENT_READ,
.misc = 0,
- .size = sizeof(event) - sizeof(event.format),
+ .size = sizeof(event) + perf_counter_read_size(counter),
},
.pid = perf_counter_pid(counter, task),
.tid = perf_counter_tid(counter, task),
- .value = atomic64_read(&counter->count),
};
- int ret, i = 0;
-
- if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
- event.header.size += sizeof(u64);
- event.format[i++] = counter->total_time_enabled;
- }
-
- if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
- event.header.size += sizeof(u64);
- event.format[i++] = counter->total_time_running;
- }
-
- if (counter->attr.read_format & PERF_FORMAT_ID) {
- event.header.size += sizeof(u64);
- event.format[i++] = primary_counter_id(counter);
- }
+ int ret;
ret = perf_output_begin(&handle, counter, event.header.size, 0, 0);
if (ret)
return;
- perf_output_copy(&handle, &event, event.header.size);
+ perf_output_put(&handle, event);
+ perf_output_read(&handle, counter);
+
perf_output_end(&handle);
}
@@ -3929,9 +4004,9 @@ perf_counter_alloc(struct perf_counter_a
atomic64_set(&hwc->period_left, hwc->sample_period);
/*
- * we currently do not support PERF_SAMPLE_GROUP on inherited counters
+ * we currently do not support PERF_FORMAT_GROUP on inherited counters
*/
- if (attr->inherit && (attr->sample_type & PERF_SAMPLE_GROUP))
+ if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP))
goto done;
switch (attr->type) {
--
next prev parent reply other threads:[~2009-08-12 15:42 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-08-12 15:35 [PATCH 0/2] perf_counter: fix the group mess Peter Zijlstra
2009-08-12 15:35 ` Peter Zijlstra [this message]
2009-08-12 15:35 ` [PATCH 2/2] perf_counter: Fix an ipi-deadlock Peter Zijlstra
2009-08-12 15:54 ` [PATCH 0/2] perf_counter: fix the group mess stephane eranian
2009-08-12 16:01 ` Peter Zijlstra
2009-08-13 7:51 ` [PATCH 3/2] perf tools: Fixup read ABI breakage Peter Zijlstra
2009-08-13 7:51 ` [PATCH 4/2] perf_counter: Fix swcounter context invariance Peter Zijlstra
2009-08-13 8:05 ` Frederic Weisbecker
2009-08-13 8:22 ` Peter Zijlstra
2009-08-13 8:29 ` Frederic Weisbecker
2009-08-13 10:21 ` [tip:perfcounters/urgent] " tip-bot for Peter Zijlstra
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20090812154118.044301415@chello.nl \
--to=a.p.zijlstra@chello.nl \
--cc=cjashfor@us.ibm.com \
--cc=eranian@googlemail.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@elte.hu \
--cc=paulus@samba.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.