public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] perf_counter: fix the group mess
@ 2009-08-12 15:35 Peter Zijlstra
  2009-08-12 15:35 ` [PATCH 1/2] perf: rework the whole read vs group stuff Peter Zijlstra
                   ` (4 more replies)
  0 siblings, 5 replies; 11+ messages in thread
From: Peter Zijlstra @ 2009-08-12 15:35 UTC (permalink / raw)
  To: Ingo Molnar, Paul Mackerras, stephane eranian; +Cc: Corey J Ashford, LKML


With these two patches the below proglet gives:

# ./test
EVNT: 0xffffffff811c0f4c scale: nan ID: 37 CNT: 1006180 ID: 38 CNT: 1010230 ID: 39 CNT: 1010332 ID: 40 CNT: 1010420

I can't seem to catch the SIGIO thing, but at least all the counters report
more or less the same value.  Also, that scale stuff doesn't seem to quite work
yet.. I'll be poking at that too.



---
#include "perf.h"

#include <sys/types.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#include <unistd.h>
#include <signal.h>
#include <fcntl.h>
#include <errno.h>
#include <string.h>

void work(void)
{
	int i;

	for (i = 0; i < 1000000000; i++) {
		asm("nop");
		asm("nop");
		asm("nop");
		asm("nop");
		asm("nop");
		asm("nop");
		asm("nop");
	}
}

unsigned long page_size;
int fd = -1, fd1 = 0;
pid_t me;
void *output;

void handle_sigio(int sig)
{
	printf("signal\n"); // not signal safe
	fflush(stdout);

	ioctl(fd, PERF_COUNTER_IOC_REFRESH, 1);
}

static unsigned long mmap_read_head(void)
{
	struct perf_counter_mmap_page *pc = output;
	long head;

	head = pc->data_head;
	rmb();

	return head;
}

static void *mmap_read_base(void)
{
	return output + page_size;
}

struct event {
	struct perf_event_header header;

	u64 ip;
	u64 nr;
	u64 time_enabled;
	u64 time_running;
	struct {
		u64 val;
		u64 id;
	} cnt[0];
};

int main(int argc, char **argv)
{
	struct perf_counter_attr attr;
	unsigned long offset = 0, head;
	int err, i;

	page_size = sysconf(_SC_PAGE_SIZE);
	me = getpid();

	memset(&attr, 0, sizeof(attr));

	attr.type = PERF_TYPE_HARDWARE;
	attr.config = PERF_COUNT_HW_CPU_CYCLES;
	attr.sample_period = 1000000;
	attr.sample_type = PERF_SAMPLE_IP |
			   PERF_SAMPLE_READ;
	attr.read_format = PERF_FORMAT_TOTAL_TIME_RUNNING |
			   PERF_FORMAT_TOTAL_TIME_ENABLED |
			   PERF_FORMAT_ID |
			   PERF_FORMAT_GROUP;
	attr.disabled = 1;
	attr.wakeup_events = 1;

	fd  = sys_perf_counter_open(&attr, me, -1, fd, 0);
	if (fd <= 0) {
		perror("FAIL fd: ");
		exit(-1);
	}

	attr.sample_period = 0;
	attr.disabled = 0;

	for (i = 0; i < 3; i++) {
		fd1 = sys_perf_counter_open(&attr, me, -1, fd, 0);
		if (fd1 <= 0) {
			perror("FAIL fd1: ");
			exit(-1);
		}
	}

	signal(SIGIO, handle_sigio);
	err = fcntl(fd, F_SETOWN, me);
	if (err == -1) {
		perror("FAIL fcntl: ");
		exit(-1);
	}

	output = mmap(NULL, page_size * 3, PROT_READ, MAP_SHARED, fd, 0);
	if (output == ((void *)-1)) {
		perror("FAIL mmap:");
		exit(-1);
	}

	ioctl(fd, PERF_COUNTER_IOC_REFRESH, 1);

	work();

	ioctl(fd, PERF_COUNTER_IOC_DISABLE, 0);

	head = mmap_read_head();

	for (; offset < head; ) {
		struct event *evnt = mmap_read_base() + offset;

		offset += evnt->header.size;

		printf("EVNT: %p scale: %f ", (void *)evnt->ip,
				((double)evnt->time_running)/evnt->time_enabled
				);
		for (i = 0; i < evnt->nr; i++) {
			printf("ID: %Lu CNT: %Lu ", 
					evnt->cnt[i].id, evnt->cnt[i].val);
		}
		printf("\n");
	}

	return 0;
}


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 1/2] perf: rework the whole read vs group stuff
  2009-08-12 15:35 [PATCH 0/2] perf_counter: fix the group mess Peter Zijlstra
@ 2009-08-12 15:35 ` Peter Zijlstra
  2009-08-12 15:35 ` [PATCH 2/2] perf_counter: Fix an ipi-deadlock Peter Zijlstra
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 11+ messages in thread
From: Peter Zijlstra @ 2009-08-12 15:35 UTC (permalink / raw)
  To: Ingo Molnar, Paul Mackerras, stephane eranian
  Cc: Corey J Ashford, LKML, Peter Zijlstra

[-- Attachment #1: perf-PERF_SAMPLE_READ.patch --]
[-- Type: text/plain, Size: 11588 bytes --]

Replace PERF_SAMPLE_GROUP with PERF_SAMPLE_READ and introduce
PERF_FORMAT_GROUP to deal with group reads in a more generic way.

This allows you to get group reads out of read() as well.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 include/linux/perf_counter.h |   36 +++++--
 kernel/perf_counter.c        |  213 +++++++++++++++++++++++++++++--------------
 2 files changed, 172 insertions(+), 77 deletions(-)

Index: linux-2.6/include/linux/perf_counter.h
===================================================================
--- linux-2.6.orig/include/linux/perf_counter.h
+++ linux-2.6/include/linux/perf_counter.h
@@ -115,7 +115,7 @@ enum perf_counter_sample_format {
 	PERF_SAMPLE_TID				= 1U << 1,
 	PERF_SAMPLE_TIME			= 1U << 2,
 	PERF_SAMPLE_ADDR			= 1U << 3,
-	PERF_SAMPLE_GROUP			= 1U << 4,
+	PERF_SAMPLE_READ			= 1U << 4,
 	PERF_SAMPLE_CALLCHAIN			= 1U << 5,
 	PERF_SAMPLE_ID				= 1U << 6,
 	PERF_SAMPLE_CPU				= 1U << 7,
@@ -130,13 +130,25 @@ enum perf_counter_sample_format {
  * Bits that can be set in attr.read_format to request that
  * reads on the counter should return the indicated quantities,
  * in increasing order of bit value, after the counter value.
+ *
+ * struct {
+ * 	{ u64		nr; 		} && PERF_FORMAT_GROUP
+ * 	{ u64		time_enabled;	} && PERF_FORMAT_ENABLED
+ * 	{ u64		time_running;	} && PERF_FORMAT_RUNNING
+ * 	{ u64		value;
+ * 	  { u64		id;		} && PERF_FORMAT_ID
+ * 	} 		cntr[nr];
+ * };
+ *
+ * Where 'nr' defaults to 1 when !PERF_FORMAT_GROUP
  */
 enum perf_counter_read_format {
 	PERF_FORMAT_TOTAL_TIME_ENABLED		= 1U << 0,
 	PERF_FORMAT_TOTAL_TIME_RUNNING		= 1U << 1,
 	PERF_FORMAT_ID				= 1U << 2,
+	PERF_FORMAT_GROUP			= 1U << 3,
 
-	PERF_FORMAT_MAX = 1U << 3, 		/* non-ABI */
+	PERF_FORMAT_MAX = 1U << 4, 		/* non-ABI */
 };
 
 #define PERF_ATTR_SIZE_VER0	64	/* sizeof first published struct */
@@ -343,10 +355,13 @@ enum perf_event_type {
 	 * struct {
 	 * 	struct perf_event_header	header;
 	 * 	u32				pid, tid;
-	 * 	u64				value;
-	 * 	{ u64		time_enabled; 	} && PERF_FORMAT_ENABLED
-	 * 	{ u64		time_running; 	} && PERF_FORMAT_RUNNING
-	 * 	{ u64		parent_id;	} && PERF_FORMAT_ID
+	 *
+	 * 	{ u64		nr;		} && PERF_FORMAT_GROUP
+	 * 	{ u64		value;
+	 * 	  { u64		time_enabled; 	} && PERF_FORMAT_ENABLED
+	 * 	  { u64		time_running; 	} && PERF_FORMAT_RUNNING
+	 * 	  { u64		parent_id;	} && PERF_FORMAT_ID
+	 * 	} 				cntr[nr];
 	 * };
 	 */
 	PERF_EVENT_READ			= 8,
@@ -364,11 +379,16 @@ enum perf_event_type {
 	 *	{ u32			cpu, res; } && PERF_SAMPLE_CPU
 	 * 	{ u64			period;   } && PERF_SAMPLE_PERIOD
 	 *
-	 *	{ u64			nr;
-	 *	  { u64 id, val; }	cnt[nr];  } && PERF_SAMPLE_GROUP
+	 *	{ { u64			nr;   } && PERF_FORMAT_GROUP
+	 *	  { u64		value;
+	 *	    { u64	time_enabled; } && PERF_FORMAT_ENABLED
+	 *	    { u64	time_running; } && PERF_FORMAT_RUNNING
+	 *	    { u64	id;           } && PERF_FORMAT_ID
+	 *	  }			cntr[nr]; } && PERF_SAMPLE_READ
 	 *
 	 *	{ u64			nr,
 	 *	  u64			ips[nr];  } && PERF_SAMPLE_CALLCHAIN
+	 *
 	 *	{ u32			size;
 	 *	  char                  data[size];}&& PERF_SAMPLE_RAW
 	 * };
Index: linux-2.6/kernel/perf_counter.c
===================================================================
--- linux-2.6.orig/kernel/perf_counter.c
+++ linux-2.6/kernel/perf_counter.c
@@ -1692,7 +1692,32 @@ static int perf_release(struct inode *in
 	return 0;
 }
 
-static u64 perf_counter_read_tree(struct perf_counter *counter)
+static int perf_counter_read_size(struct perf_counter *counter)
+{
+	int entry = sizeof(u64); /* value */
+	int size = 0;
+	int nr = 1;
+
+	if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+		size += sizeof(u64);
+
+	if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+		size += sizeof(u64);
+
+	if (counter->attr.read_format & PERF_FORMAT_ID)
+		entry += sizeof(u64);
+
+	if (counter->attr.read_format & PERF_FORMAT_GROUP) {
+		nr += counter->group_leader->nr_siblings;
+		size += sizeof(u64);
+	}
+
+	size += entry * nr;
+
+	return size;
+}
+
+static u64 perf_counter_read_value(struct perf_counter *counter)
 {
 	struct perf_counter *child;
 	u64 total = 0;
@@ -1704,14 +1729,34 @@ static u64 perf_counter_read_tree(struct
 	return total;
 }
 
+static int perf_counter_read_entry(struct perf_counter *counter,
+				   u64 read_format, char __user *buf)
+{
+	u64 values[2];
+	int n = 0, count = 0;
+
+	values[n++] = perf_counter_read_value(counter);
+	if (read_format & PERF_FORMAT_ID)
+		values[n++] = primary_counter_id(counter);
+
+	count = n * sizeof(u64);
+
+	if (copy_to_user(buf, values, count))
+		return -EFAULT;
+
+	return count;
+}
+
 /*
  * Read the performance counter - simple non blocking version for now
  */
 static ssize_t
 perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
 {
-	u64 values[4];
-	int n;
+	u64 read_format = counter->attr.read_format;
+	struct perf_counter *leader = counter, *sub;
+	int n = 0, size = 0, err = -EFAULT;
+	u64 values[3];
 
 	/*
 	 * Return end-of-file for a read on a counter that is in
@@ -1721,28 +1766,52 @@ perf_read_hw(struct perf_counter *counte
 	if (counter->state == PERF_COUNTER_STATE_ERROR)
 		return 0;
 
+	if (count < perf_counter_read_size(counter))
+		return -ENOSPC;
+
 	WARN_ON_ONCE(counter->ctx->parent_ctx);
 	mutex_lock(&counter->child_mutex);
-	values[0] = perf_counter_read_tree(counter);
-	n = 1;
-	if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
-		values[n++] = counter->total_time_enabled +
-			atomic64_read(&counter->child_total_time_enabled);
-	if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
-		values[n++] = counter->total_time_running +
-			atomic64_read(&counter->child_total_time_running);
-	if (counter->attr.read_format & PERF_FORMAT_ID)
-		values[n++] = primary_counter_id(counter);
-	mutex_unlock(&counter->child_mutex);
 
-	if (count < n * sizeof(u64))
-		return -EINVAL;
-	count = n * sizeof(u64);
+	if (read_format & PERF_FORMAT_GROUP) {
+		leader = counter->group_leader;
+		values[n++] = 1 + leader->nr_siblings;
+	}
+	if (leader->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+		values[n++] = leader->total_time_enabled +
+			atomic64_read(&leader->child_total_time_enabled);
+	}
+	if (leader->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+		values[n++] = leader->total_time_running +
+			atomic64_read(&leader->child_total_time_running);
+	}
 
-	if (copy_to_user(buf, values, count))
-		return -EFAULT;
+	size = n * sizeof(u64);
 
-	return count;
+	if (copy_to_user(buf, values, size))
+		goto unlock;
+
+	err = perf_counter_read_entry(leader, read_format, buf + size);
+	if (err < 0)
+		goto unlock;
+
+	size += err;
+
+	if (read_format & PERF_FORMAT_GROUP) {
+		list_for_each_entry(sub, &leader->sibling_list, list_entry) {
+			err = perf_counter_read_entry(counter, read_format,
+					buf + size);
+			if (err < 0)
+				goto unlock;
+
+			size += err;
+		}
+	}
+
+	err = size;
+unlock:
+	mutex_unlock(&counter->child_mutex);
+
+	return err;
 }
 
 static ssize_t
@@ -2631,6 +2700,49 @@ static u32 perf_counter_tid(struct perf_
 	return task_pid_nr_ns(p, counter->ns);
 }
 
+void perf_output_read(struct perf_output_handle *handle, struct perf_counter *counter)
+{
+	struct perf_counter *leader = counter, *sub;
+	u64 read_format = counter->attr.read_format;
+	u64 values[5];
+	int n = 0;
+
+	if (read_format & PERF_FORMAT_GROUP) {
+		leader = counter->group_leader;
+		values[n++] = 1 + leader->nr_siblings;
+	}
+
+	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+		values[n++] = leader->total_time_enabled;
+
+	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+		values[n++] = leader->total_time_running;
+
+	if (leader != counter)
+		leader->pmu->read(leader);
+	values[n++] = atomic64_read(&leader->count);
+
+	if (read_format & PERF_FORMAT_ID)
+		values[n++] = primary_counter_id(leader);
+
+	perf_output_copy(handle, values, n * sizeof(u64));
+
+	if (read_format & PERF_FORMAT_GROUP) {
+		list_for_each_entry(sub, &leader->sibling_list, list_entry) {
+			n = 0;
+
+			if (sub != counter)
+				sub->pmu->read(sub);
+
+			values[n++] = atomic64_read(&sub->count);
+			if (read_format & PERF_FORMAT_ID)
+				values[n++] = primary_counter_id(sub);
+
+			perf_output_copy(handle, values, n * sizeof(u64));
+		}
+	}
+}
+
 void perf_counter_output(struct perf_counter *counter, int nmi,
 				struct perf_sample_data *data)
 {
@@ -2642,10 +2754,6 @@ void perf_counter_output(struct perf_cou
 	struct {
 		u32 pid, tid;
 	} tid_entry;
-	struct {
-		u64 id;
-		u64 counter;
-	} group_entry;
 	struct perf_callchain_entry *callchain = NULL;
 	int callchain_size = 0;
 	u64 time;
@@ -2700,10 +2808,8 @@ void perf_counter_output(struct perf_cou
 	if (sample_type & PERF_SAMPLE_PERIOD)
 		header.size += sizeof(u64);
 
-	if (sample_type & PERF_SAMPLE_GROUP) {
-		header.size += sizeof(u64) +
-			counter->nr_siblings * sizeof(group_entry);
-	}
+	if (sample_type & PERF_SAMPLE_READ)
+		header.size += perf_counter_read_size(counter);
 
 	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
 		callchain = perf_callchain(data->regs);
@@ -2761,25 +2867,10 @@ void perf_counter_output(struct perf_cou
 		perf_output_put(&handle, data->period);
 
 	/*
-	 * XXX PERF_SAMPLE_GROUP vs inherited counters seems difficult.
+	 * XXX PERF_SAMPLE_READ vs inherited counters seems difficult.
 	 */
-	if (sample_type & PERF_SAMPLE_GROUP) {
-		struct perf_counter *leader, *sub;
-		u64 nr = counter->nr_siblings;
-
-		perf_output_put(&handle, nr);
-
-		leader = counter->group_leader;
-		list_for_each_entry(sub, &leader->sibling_list, list_entry) {
-			if (sub != counter)
-				sub->pmu->read(sub);
-
-			group_entry.id = primary_counter_id(sub);
-			group_entry.counter = atomic64_read(&sub->count);
-
-			perf_output_put(&handle, group_entry);
-		}
-	}
+	if (sample_type & PERF_SAMPLE_READ)
+		perf_output_read(&handle, counter);
 
 	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
 		if (callchain)
@@ -2818,8 +2909,6 @@ struct perf_read_event {
 
 	u32				pid;
 	u32				tid;
-	u64				value;
-	u64				format[3];
 };
 
 static void
@@ -2831,34 +2920,20 @@ perf_counter_read_event(struct perf_coun
 		.header = {
 			.type = PERF_EVENT_READ,
 			.misc = 0,
-			.size = sizeof(event) - sizeof(event.format),
+			.size = sizeof(event) + perf_counter_read_size(counter),
 		},
 		.pid = perf_counter_pid(counter, task),
 		.tid = perf_counter_tid(counter, task),
-		.value = atomic64_read(&counter->count),
 	};
-	int ret, i = 0;
-
-	if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
-		event.header.size += sizeof(u64);
-		event.format[i++] = counter->total_time_enabled;
-	}
-
-	if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
-		event.header.size += sizeof(u64);
-		event.format[i++] = counter->total_time_running;
-	}
-
-	if (counter->attr.read_format & PERF_FORMAT_ID) {
-		event.header.size += sizeof(u64);
-		event.format[i++] = primary_counter_id(counter);
-	}
+	int ret;
 
 	ret = perf_output_begin(&handle, counter, event.header.size, 0, 0);
 	if (ret)
 		return;
 
-	perf_output_copy(&handle, &event, event.header.size);
+	perf_output_put(&handle, event);
+	perf_output_read(&handle, counter);
+
 	perf_output_end(&handle);
 }
 
@@ -3929,9 +4004,9 @@ perf_counter_alloc(struct perf_counter_a
 	atomic64_set(&hwc->period_left, hwc->sample_period);
 
 	/*
-	 * we currently do not support PERF_SAMPLE_GROUP on inherited counters
+	 * we currently do not support PERF_FORMAT_GROUP on inherited counters
 	 */
-	if (attr->inherit && (attr->sample_type & PERF_SAMPLE_GROUP))
+	if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP))
 		goto done;
 
 	switch (attr->type) {

-- 


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 2/2] perf_counter: Fix an ipi-deadlock
  2009-08-12 15:35 [PATCH 0/2] perf_counter: fix the group mess Peter Zijlstra
  2009-08-12 15:35 ` [PATCH 1/2] perf: rework the whole read vs group stuff Peter Zijlstra
@ 2009-08-12 15:35 ` Peter Zijlstra
  2009-08-12 15:54 ` [PATCH 0/2] perf_counter: fix the group mess stephane eranian
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 11+ messages in thread
From: Peter Zijlstra @ 2009-08-12 15:35 UTC (permalink / raw)
  To: Ingo Molnar, Paul Mackerras, stephane eranian
  Cc: Corey J Ashford, LKML, Peter Zijlstra

[-- Attachment #1: perf-fix-pending.patch --]
[-- Type: text/plain, Size: 1369 bytes --]

perf_pending_counter() is called from IRQ context and will call
perf_counter_disable(), however perf_counter_disable() uses
smp_call_function_single() which doesn't fancy being used with IRQs
disabled due to IPI deadlocks.

Fix this by making it use the local __perf_counter_disable() call and
teaching the counter_sched_out() code about pending disables as well.

This should cover the case where a counter migrates before the pending
queue gets processed.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 kernel/perf_counter.c |    6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

Index: linux-2.6/kernel/perf_counter.c
===================================================================
--- linux-2.6.orig/kernel/perf_counter.c
+++ linux-2.6/kernel/perf_counter.c
@@ -307,6 +307,10 @@ counter_sched_out(struct perf_counter *c
 		return;
 
 	counter->state = PERF_COUNTER_STATE_INACTIVE;
+	if (counter->pending_disable) {
+		counter->pending_disable = 0;
+		counter->state = PERF_COUNTER_STATE_OFF;
+	}
 	counter->tstamp_stopped = ctx->time;
 	counter->pmu->disable(counter);
 	counter->oncpu = -1;
@@ -2315,7 +2319,7 @@ static void perf_pending_counter(struct 
 
 	if (counter->pending_disable) {
 		counter->pending_disable = 0;
-		perf_counter_disable(counter);
+		__perf_counter_disable(counter);
 	}
 
 	if (counter->pending_wakeup) {

-- 


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 0/2] perf_counter: fix the group mess
  2009-08-12 15:35 [PATCH 0/2] perf_counter: fix the group mess Peter Zijlstra
  2009-08-12 15:35 ` [PATCH 1/2] perf: rework the whole read vs group stuff Peter Zijlstra
  2009-08-12 15:35 ` [PATCH 2/2] perf_counter: Fix an ipi-deadlock Peter Zijlstra
@ 2009-08-12 15:54 ` stephane eranian
  2009-08-12 16:01   ` Peter Zijlstra
  2009-08-13  7:51 ` [PATCH 3/2] perf tools: Fixup read ABI breakage Peter Zijlstra
  2009-08-13  7:51 ` [PATCH 4/2] perf_counter: Fix swcounter context invariance Peter Zijlstra
  4 siblings, 1 reply; 11+ messages in thread
From: stephane eranian @ 2009-08-12 15:54 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Ingo Molnar, Paul Mackerras, Corey J Ashford, LKML

On Wed, Aug 12, 2009 at 5:35 PM, Peter Zijlstra<a.p.zijlstra@chello.nl> wrote:
>
> With these two patches the below proglet gives:
>
> # ./test
> EVNT: 0xffffffff811c0f4c scale: nan ID: 37 CNT: 1006180 ID: 38 CNT: 1010230 ID: 39 CNT: 1010332 ID: 40 CNT: 1010420
>
> I can't seem to catch the SIGIO thing, but at least all the counters report

I think it's because you are missing this fcntl() call to get async
notification:

       ret = fcntl(fd, F_SETFL, fcntl(fd, F_GETFL, 0) | O_ASYNC);
        if (ret == -1)
                err(1, "cannot set ASYNC");

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 0/2] perf_counter: fix the group mess
  2009-08-12 15:54 ` [PATCH 0/2] perf_counter: fix the group mess stephane eranian
@ 2009-08-12 16:01   ` Peter Zijlstra
  0 siblings, 0 replies; 11+ messages in thread
From: Peter Zijlstra @ 2009-08-12 16:01 UTC (permalink / raw)
  To: eranian; +Cc: Ingo Molnar, Paul Mackerras, Corey J Ashford, LKML

On Wed, 2009-08-12 at 17:54 +0200, stephane eranian wrote:
> On Wed, Aug 12, 2009 at 5:35 PM, Peter Zijlstra<a.p.zijlstra@chello.nl> wrote:
> >
> > With these two patches the below proglet gives:
> >
> > # ./test
> > EVNT: 0xffffffff811c0f4c scale: nan ID: 37 CNT: 1006180 ID: 38 CNT: 1010230 ID: 39 CNT: 1010332 ID: 40 CNT: 1010420
> >
> > I can't seem to catch the SIGIO thing, but at least all the counters report
> 
> I think it's because you are missing this fcntl() call to get async
> notification:
> 
>        ret = fcntl(fd, F_SETFL, fcntl(fd, F_GETFL, 0) | O_ASYNC);
>         if (ret == -1)
>                 err(1, "cannot set ASYNC");

Ah, indeed, thanks!

(with a reduced work function, or increased period)

# gcc -o test test.c; ./test
signal
signal
signal
signal
signal
signal
EVNT: 0x40091c scale: nan ID: 49 CNT: 1006676 ID: 50 CNT: 1010796 ID: 51 CNT: 1010960 ID: 52 CNT: 1010937
EVNT: 0x400928 scale: 1.000000 ID: 49 CNT: 2003019 ID: 50 CNT: 2010882 ID: 51 CNT: 2011475 ID: 52 CNT: 2012032
EVNT: 0x400928 scale: 1.000000 ID: 49 CNT: 3002707 ID: 50 CNT: 3015571 ID: 51 CNT: 3016714 ID: 52 CNT: 3017830
EVNT: 0x400916 scale: 1.000000 ID: 49 CNT: 4002677 ID: 50 CNT: 4020137 ID: 51 CNT: 4021880 ID: 52 CNT: 4023639
EVNT: 0x40091c scale: 1.000000 ID: 49 CNT: 5002749 ID: 50 CNT: 5024707 ID: 51 CNT: 5027078 ID: 52 CNT: 5029378
EVNT: 0x40091c scale: 1.000000 ID: 49 CNT: 6002799 ID: 50 CNT: 6029452 ID: 51 CNT: 6032339 ID: 52 CNT: 6035277



^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 3/2] perf tools: Fixup read ABI breakage
  2009-08-12 15:35 [PATCH 0/2] perf_counter: fix the group mess Peter Zijlstra
                   ` (2 preceding siblings ...)
  2009-08-12 15:54 ` [PATCH 0/2] perf_counter: fix the group mess stephane eranian
@ 2009-08-13  7:51 ` Peter Zijlstra
  2009-08-13  7:51 ` [PATCH 4/2] perf_counter: Fix swcounter context invariance Peter Zijlstra
  4 siblings, 0 replies; 11+ messages in thread
From: Peter Zijlstra @ 2009-08-13  7:51 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Paul Mackerras, stephane eranian, Corey J Ashford, LKML

Subject: perf tools: Fixup read ABI breakage
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu Aug 13 09:03:33 CEST 2009

Update the perf tool to the new read output format.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 tools/perf/builtin-record.c |    2 +-
 tools/perf/builtin-stat.c   |    8 +++++---
 tools/perf/util/event.h     |   10 ++++++++--
 3 files changed, 14 insertions(+), 6 deletions(-)

Index: linux-2.6/tools/perf/builtin-record.c
===================================================================
--- linux-2.6.orig/tools/perf/builtin-record.c
+++ linux-2.6/tools/perf/builtin-record.c
@@ -370,9 +370,9 @@ static void create_counter(int counter, 
 	struct perf_header_attr *h_attr;
 	int track = !counter; /* only the first counter needs these */
 	struct {
-		u64 count;
 		u64 time_enabled;
 		u64 time_running;
+		u64 count;
 		u64 id;
 	} read_data;
 
Index: linux-2.6/tools/perf/builtin-stat.c
===================================================================
--- linux-2.6.orig/tools/perf/builtin-stat.c
+++ linux-2.6/tools/perf/builtin-stat.c
@@ -168,10 +168,12 @@ static void read_counter(int counter)
 		close(fd[cpu][counter]);
 		fd[cpu][counter] = -1;
 
-		count[0] += single_count[0];
 		if (scale) {
-			count[1] += single_count[1];
-			count[2] += single_count[2];
+			count[0] += single_count[2]; /* value   */
+			count[1] += single_count[0]; /* enabled */
+			count[2] += single_count[1]; /* running */
+		} else {
+			count[0] += single_count[0]; /* value   */
 		}
 	}
 
Index: linux-2.6/tools/perf/util/event.h
===================================================================
--- linux-2.6.orig/tools/perf/util/event.h
+++ linux-2.6/tools/perf/util/event.h
@@ -4,6 +4,9 @@
 #include "util.h"
 #include <linux/list.h>
 
+/*
+ * PERF_SAMPLE_IP | PERF_SAMPLE_TID | *
+ */
 struct ip_event {
 	struct perf_event_header header;
 	u64 ip;
@@ -38,12 +41,15 @@ struct lost_event {
 	u64 lost;
 };
 
-struct read_event {
+/*
+ * PERF_FORMAT_ENABLED | PERF_FORMAT_RUNNING | PERF_FORMAT_ID
+ */
+struct read_event {
 	struct perf_event_header header;
 	u32 pid,tid;
-	u64 value;
 	u64 time_enabled;
 	u64 time_running;
+	u64 value;
 	u64 id;
 };
 


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 4/2] perf_counter: Fix swcounter context invariance
  2009-08-12 15:35 [PATCH 0/2] perf_counter: fix the group mess Peter Zijlstra
                   ` (3 preceding siblings ...)
  2009-08-13  7:51 ` [PATCH 3/2] perf tools: Fixup read ABI breakage Peter Zijlstra
@ 2009-08-13  7:51 ` Peter Zijlstra
  2009-08-13  8:05   ` Frederic Weisbecker
  2009-08-13 10:21   ` [tip:perfcounters/urgent] " tip-bot for Peter Zijlstra
  4 siblings, 2 replies; 11+ messages in thread
From: Peter Zijlstra @ 2009-08-13  7:51 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Paul Mackerras, stephane eranian, Corey J Ashford, LKML

Not really related to this topic, but it needs posting anyway.

---

Subject: perf_counter: Fix swcounter context invariance
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri Aug 07 13:29:13 CEST 2009

perf_swcounter_is_counting() uses a lock, which means we cannot use
swcounters from NMI or when holding that particular lock, this is
unintended.

The below removes the lock, this opens up race window, but not worse
than the swcounters already experience due to RCU traversal of the
context in perf_swcounter_ctx_event().

Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 kernel/perf_counter.c |   44 ++++++++++++++++++--------------------------
 1 file changed, 18 insertions(+), 26 deletions(-)

Index: linux-2.6/kernel/perf_counter.c
===================================================================
--- linux-2.6.orig/kernel/perf_counter.c
+++ linux-2.6/kernel/perf_counter.c
@@ -3519,40 +3519,32 @@ static void perf_swcounter_add(struct pe
 
 static int perf_swcounter_is_counting(struct perf_counter *counter)
 {
-	struct perf_counter_context *ctx;
-	unsigned long flags;
-	int count;
-
+	/*
+	 * The counter is active, we're good!
+	 */
 	if (counter->state == PERF_COUNTER_STATE_ACTIVE)
 		return 1;
 
+	/*
+	 * The counter is off/error, not counting.
+	 */
 	if (counter->state != PERF_COUNTER_STATE_INACTIVE)
 		return 0;
 
 	/*
-	 * If the counter is inactive, it could be just because
-	 * its task is scheduled out, or because it's in a group
-	 * which could not go on the PMU.  We want to count in
-	 * the first case but not the second.  If the context is
-	 * currently active then an inactive software counter must
-	 * be the second case.  If it's not currently active then
-	 * we need to know whether the counter was active when the
-	 * context was last active, which we can determine by
-	 * comparing counter->tstamp_stopped with ctx->time.
-	 *
-	 * We are within an RCU read-side critical section,
-	 * which protects the existence of *ctx.
+	 * The counter is inactive, if the context is active
+	 * we're part of a group that didn't make it on the 'pmu',
+	 * not counting.
 	 */
-	ctx = counter->ctx;
-	spin_lock_irqsave(&ctx->lock, flags);
-	count = 1;
-	/* Re-check state now we have the lock */
-	if (counter->state < PERF_COUNTER_STATE_INACTIVE ||
-	    counter->ctx->is_active ||
-	    counter->tstamp_stopped < ctx->time)
-		count = 0;
-	spin_unlock_irqrestore(&ctx->lock, flags);
-	return count;
+	if (counter->ctx->is_active)
+		return 0;
+
+	/*
+	 * We're inactive and the context is too, this means the
+	 * task is scheduled out, we're counting events that happen
+	 * to us, like migration events.
+	 */
+	return 1;
 }
 
 static int perf_swcounter_match(struct perf_counter *counter,


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 4/2] perf_counter: Fix swcounter context invariance
  2009-08-13  7:51 ` [PATCH 4/2] perf_counter: Fix swcounter context invariance Peter Zijlstra
@ 2009-08-13  8:05   ` Frederic Weisbecker
  2009-08-13  8:22     ` Peter Zijlstra
  2009-08-13 10:21   ` [tip:perfcounters/urgent] " tip-bot for Peter Zijlstra
  1 sibling, 1 reply; 11+ messages in thread
From: Frederic Weisbecker @ 2009-08-13  8:05 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Ingo Molnar, Paul Mackerras, stephane eranian, Corey J Ashford,
	LKML

On Thu, Aug 13, 2009 at 09:51:55AM +0200, Peter Zijlstra wrote:
> Not really related to this topic, but it needs posting anyway.
> 
> ---
> 
> Subject: perf_counter: Fix swcounter context invariance
> From: Peter Zijlstra <a.p.zijlstra@chello.nl>
> Date: Fri Aug 07 13:29:13 CEST 2009
> 
> perf_swcounter_is_counting() uses a lock, which means we cannot use
> swcounters from NMI or when holding that particular lock, this is
> unintended.
> 
> The below removes the lock, this opens up race window, but not worse
> than the swcounters already experience due to RCU traversal of the
> context in perf_swcounter_ctx_event().
> 
> Cc: Paul Mackerras <paulus@samba.org>
> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>



As a side effect, it's possible this also fixes the hard lockups
while opening a lockdep tracepoint counter.


> ---
>  kernel/perf_counter.c |   44 ++++++++++++++++++--------------------------
>  1 file changed, 18 insertions(+), 26 deletions(-)
> 
> Index: linux-2.6/kernel/perf_counter.c
> ===================================================================
> --- linux-2.6.orig/kernel/perf_counter.c
> +++ linux-2.6/kernel/perf_counter.c
> @@ -3519,40 +3519,32 @@ static void perf_swcounter_add(struct pe
>  
>  static int perf_swcounter_is_counting(struct perf_counter *counter)
>  {
> -	struct perf_counter_context *ctx;
> -	unsigned long flags;
> -	int count;
> -
> +	/*
> +	 * The counter is active, we're good!
> +	 */
>  	if (counter->state == PERF_COUNTER_STATE_ACTIVE)
>  		return 1;
>  
> +	/*
> +	 * The counter is off/error, not counting.
> +	 */
>  	if (counter->state != PERF_COUNTER_STATE_INACTIVE)
>  		return 0;
>  
>  	/*
> -	 * If the counter is inactive, it could be just because
> -	 * its task is scheduled out, or because it's in a group
> -	 * which could not go on the PMU.  We want to count in
> -	 * the first case but not the second.  If the context is
> -	 * currently active then an inactive software counter must
> -	 * be the second case.  If it's not currently active then
> -	 * we need to know whether the counter was active when the
> -	 * context was last active, which we can determine by
> -	 * comparing counter->tstamp_stopped with ctx->time.
> -	 *
> -	 * We are within an RCU read-side critical section,
> -	 * which protects the existence of *ctx.
> +	 * The counter is inactive, if the context is active
> +	 * we're part of a group that didn't make it on the 'pmu',
> +	 * not counting.
>  	 */
> -	ctx = counter->ctx;
> -	spin_lock_irqsave(&ctx->lock, flags);
> -	count = 1;
> -	/* Re-check state now we have the lock */
> -	if (counter->state < PERF_COUNTER_STATE_INACTIVE ||
> -	    counter->ctx->is_active ||
> -	    counter->tstamp_stopped < ctx->time)
> -		count = 0;
> -	spin_unlock_irqrestore(&ctx->lock, flags);
> -	return count;
> +	if (counter->ctx->is_active)
> +		return 0;
> +
> +	/*
> +	 * We're inactive and the context is too, this means the
> +	 * task is scheduled out, we're counting events that happen
> +	 * to us, like migration events.
> +	 */
> +	return 1;
>  }
>  
>  static int perf_swcounter_match(struct perf_counter *counter,
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 4/2] perf_counter: Fix swcounter context invariance
  2009-08-13  8:05   ` Frederic Weisbecker
@ 2009-08-13  8:22     ` Peter Zijlstra
  2009-08-13  8:29       ` Frederic Weisbecker
  0 siblings, 1 reply; 11+ messages in thread
From: Peter Zijlstra @ 2009-08-13  8:22 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: Ingo Molnar, Paul Mackerras, stephane eranian, Corey J Ashford,
	LKML

On Thu, 2009-08-13 at 10:05 +0200, Frederic Weisbecker wrote:
> On Thu, Aug 13, 2009 at 09:51:55AM +0200, Peter Zijlstra wrote:
> > Not really related to this topic, but it needs posting anyway.
> > 
> > ---
> > 
> > Subject: perf_counter: Fix swcounter context invariance
> > From: Peter Zijlstra <a.p.zijlstra@chello.nl>
> > Date: Fri Aug 07 13:29:13 CEST 2009
> > 
> > perf_swcounter_is_counting() uses a lock, which means we cannot use
> > swcounters from NMI or when holding that particular lock, this is
> > unintended.
> > 
> > The below removes the lock, this opens up race window, but not worse
> > than the swcounters already experience due to RCU traversal of the
> > context in perf_swcounter_ctx_event().
> > 
> > Cc: Paul Mackerras <paulus@samba.org>
> > Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
> 
> 
> 
> As a side effect, it's possible this also fixes the hard lockups
> while opening a lockdep tracepoint counter.

It will -- not a side-effect at all, intended consequence :-)

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 4/2] perf_counter: Fix swcounter context invariance
  2009-08-13  8:22     ` Peter Zijlstra
@ 2009-08-13  8:29       ` Frederic Weisbecker
  0 siblings, 0 replies; 11+ messages in thread
From: Frederic Weisbecker @ 2009-08-13  8:29 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Ingo Molnar, Paul Mackerras, stephane eranian, Corey J Ashford,
	LKML

On Thu, Aug 13, 2009 at 10:22:58AM +0200, Peter Zijlstra wrote:
> On Thu, 2009-08-13 at 10:05 +0200, Frederic Weisbecker wrote:
> > On Thu, Aug 13, 2009 at 09:51:55AM +0200, Peter Zijlstra wrote:
> > > Not really related to this topic, but it needs posting anyway.
> > > 
> > > ---
> > > 
> > > Subject: perf_counter: Fix swcounter context invariance
> > > From: Peter Zijlstra <a.p.zijlstra@chello.nl>
> > > Date: Fri Aug 07 13:29:13 CEST 2009
> > > 
> > > perf_swcounter_is_counting() uses a lock, which means we cannot use
> > > swcounters from NMI or when holding that particular lock, this is
> > > unintended.
> > > 
> > > The below removes the lock, this opens up race window, but not worse
> > > than the swcounters already experience due to RCU traversal of the
> > > context in perf_swcounter_ctx_event().
> > > 
> > > Cc: Paul Mackerras <paulus@samba.org>
> > > Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
> > 
> > 
> > 
> > As a side effect, it's possible this also fixes the hard lockups
> > while opening a lockdep tracepoint counter.
> 
> It will -- not a side-effect at all, intended consequence :-)


Great then! Important fix for .31 :-)


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [tip:perfcounters/urgent] perf_counter: Fix swcounter context invariance
  2009-08-13  7:51 ` [PATCH 4/2] perf_counter: Fix swcounter context invariance Peter Zijlstra
  2009-08-13  8:05   ` Frederic Weisbecker
@ 2009-08-13 10:21   ` tip-bot for Peter Zijlstra
  1 sibling, 0 replies; 11+ messages in thread
From: tip-bot for Peter Zijlstra @ 2009-08-13 10:21 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, paulus, hpa, mingo, eranian, cjashfor, a.p.zijlstra,
	peterz, fweisbec, tglx, mingo

Commit-ID:  bcfc2602e8541ac13b1def38e2591dca072cff7a
Gitweb:     http://git.kernel.org/tip/bcfc2602e8541ac13b1def38e2591dca072cff7a
Author:     Peter Zijlstra <peterz@infradead.org>
AuthorDate: Thu, 13 Aug 2009 09:51:55 +0200
Committer:  Ingo Molnar <mingo@elte.hu>
CommitDate: Thu, 13 Aug 2009 12:18:43 +0200

perf_counter: Fix swcounter context invariance

perf_swcounter_is_counting() uses a lock, which means we cannot
use swcounters from NMI or when holding that particular lock,
this is unintended.

The below removes the lock, this opens up race window, but not
worse than the swcounters already experience due to RCU
traversal of the context in perf_swcounter_ctx_event().

This also fixes the hard lockups while opening a lockdep
tracepoint counter.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Corey J Ashford <cjashfor@us.ibm.com>
LKML-Reference: <1250149915.10001.66.camel@twins>
Signed-off-by: Ingo Molnar <mingo@elte.hu>


---
 kernel/perf_counter.c |   44 ++++++++++++++++++--------------------------
 1 files changed, 18 insertions(+), 26 deletions(-)

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index e26d2fc..3dd4339 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -3444,40 +3444,32 @@ static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
 
 static int perf_swcounter_is_counting(struct perf_counter *counter)
 {
-	struct perf_counter_context *ctx;
-	unsigned long flags;
-	int count;
-
+	/*
+	 * The counter is active, we're good!
+	 */
 	if (counter->state == PERF_COUNTER_STATE_ACTIVE)
 		return 1;
 
+	/*
+	 * The counter is off/error, not counting.
+	 */
 	if (counter->state != PERF_COUNTER_STATE_INACTIVE)
 		return 0;
 
 	/*
-	 * If the counter is inactive, it could be just because
-	 * its task is scheduled out, or because it's in a group
-	 * which could not go on the PMU.  We want to count in
-	 * the first case but not the second.  If the context is
-	 * currently active then an inactive software counter must
-	 * be the second case.  If it's not currently active then
-	 * we need to know whether the counter was active when the
-	 * context was last active, which we can determine by
-	 * comparing counter->tstamp_stopped with ctx->time.
-	 *
-	 * We are within an RCU read-side critical section,
-	 * which protects the existence of *ctx.
+	 * The counter is inactive, if the context is active
+	 * we're part of a group that didn't make it on the 'pmu',
+	 * not counting.
 	 */
-	ctx = counter->ctx;
-	spin_lock_irqsave(&ctx->lock, flags);
-	count = 1;
-	/* Re-check state now we have the lock */
-	if (counter->state < PERF_COUNTER_STATE_INACTIVE ||
-	    counter->ctx->is_active ||
-	    counter->tstamp_stopped < ctx->time)
-		count = 0;
-	spin_unlock_irqrestore(&ctx->lock, flags);
-	return count;
+	if (counter->ctx->is_active)
+		return 0;
+
+	/*
+	 * We're inactive and the context is too, this means the
+	 * task is scheduled out, we're counting events that happen
+	 * to us, like migration events.
+	 */
+	return 1;
 }
 
 static int perf_swcounter_match(struct perf_counter *counter,

^ permalink raw reply related	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2009-08-13 10:22 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-08-12 15:35 [PATCH 0/2] perf_counter: fix the group mess Peter Zijlstra
2009-08-12 15:35 ` [PATCH 1/2] perf: rework the whole read vs group stuff Peter Zijlstra
2009-08-12 15:35 ` [PATCH 2/2] perf_counter: Fix an ipi-deadlock Peter Zijlstra
2009-08-12 15:54 ` [PATCH 0/2] perf_counter: fix the group mess stephane eranian
2009-08-12 16:01   ` Peter Zijlstra
2009-08-13  7:51 ` [PATCH 3/2] perf tools: Fixup read ABI breakage Peter Zijlstra
2009-08-13  7:51 ` [PATCH 4/2] perf_counter: Fix swcounter context invariance Peter Zijlstra
2009-08-13  8:05   ` Frederic Weisbecker
2009-08-13  8:22     ` Peter Zijlstra
2009-08-13  8:29       ` Frederic Weisbecker
2009-08-13 10:21   ` [tip:perfcounters/urgent] " tip-bot for Peter Zijlstra

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox