public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/1] perf: Fix race in sample output
@ 2014-06-25 18:44 Jiri Olsa
  2014-06-25 18:44 ` [PATCH 1/1] perf: Prevent race in PERF_SAMPLE_READ group format " Jiri Olsa
  0 siblings, 1 reply; 6+ messages in thread
From: Jiri Olsa @ 2014-06-25 18:44 UTC (permalink / raw)
  To: linux-kernel
  Cc: Arnaldo Carvalho de Melo, Corey Ashford, Frederic Weisbecker,
	Ingo Molnar, Paul Mackerras, Peter Zijlstra, Jiri Olsa

hi,
the perf test patch below is not to be merged. It's
just to show how to hit issue fixed by patch 1/1.

thanks,
jirka


Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 tools/perf/Makefile.perf          |   1 +
 tools/perf/tests/builtin-test.c   |   4 +
 tools/perf/tests/group-read-bug.c | 169 ++++++++++++++++++++++++++++++++++++++
 tools/perf/tests/tests.h          |   1 +
 4 files changed, 175 insertions(+)
 create mode 100644 tools/perf/tests/group-read-bug.c

diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 9670a16..909084f 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -419,6 +419,7 @@ endif
 endif
 LIB_OBJS += $(OUTPUT)tests/mmap-thread-lookup.o
 LIB_OBJS += $(OUTPUT)tests/thread-mg-share.o
+LIB_OBJS += $(OUTPUT)tests/group-read-bug.o
 
 BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o
 BUILTIN_OBJS += $(OUTPUT)builtin-bench.o
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 6f8b01b..2b2d544 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -154,6 +154,10 @@ static struct test {
 		.func = test__hists_cumulate,
 	},
 	{
+		.desc = "Test group read bug",
+		.func = test__group_read_bug,
+	},
+	{
 		.func = NULL,
 	},
 };
diff --git a/tools/perf/tests/group-read-bug.c b/tools/perf/tests/group-read-bug.c
new file mode 100644
index 0000000..d69fabf
--- /dev/null
+++ b/tools/perf/tests/group-read-bug.c
@@ -0,0 +1,169 @@
+#include <unistd.h>
+#include <sys/syscall.h>   /* For SYS_xxx definitions */
+#include <linux/perf_event.h>
+#include <linux/hw_breakpoint.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <signal.h>
+#include <pthread.h>
+#include "tests.h"
+#include "perf.h"
+#include "debug.h"
+#include "trace-event.h"
+
+static int fd_leader;
+static bool done;
+static int pid;
+
+static int create_event(int group_fd)
+{
+	struct perf_event_attr pe;
+	struct event_format* format;
+	int fd;
+
+	memset(&pe, 0, sizeof(struct perf_event_attr));
+	pe.type = PERF_TYPE_TRACEPOINT;
+	pe.size = sizeof(struct perf_event_attr);
+
+	format = trace_event__tp_format("syscalls", "sys_enter_read");
+	if (!format)
+		return TEST_FAIL;
+
+	pe.config = format->id;
+	pe.sample_period = 1;
+	pe.sample_type = PERF_SAMPLE_ID|PERF_SAMPLE_READ;
+	pe.read_format = PERF_FORMAT_ID|PERF_FORMAT_GROUP;
+
+	if (group_fd == -1)
+		pe.disabled = 1;
+
+	fd = sys_perf_event_open(&pe, pid, -1, group_fd, 0);
+	if (fd < 0) {
+		pr_debug("failed opening event %llx, errno %d\n",
+			 pe.config, errno);
+		return TEST_FAIL;
+	}
+
+	return fd;
+}
+
+static __u64 read_head(struct perf_event_mmap_page *pc)
+{
+	__u64 head = ACCESS_ONCE(pc->data_head);
+	rmb();
+	return head;
+}
+
+static void write_tail(struct perf_event_mmap_page *pc, __u64 tail)
+{
+	mb();
+	pc->data_tail = tail;
+}
+
+static void mmap_read(struct perf_event_mmap_page *pc, int mask)
+{
+	__u64 old = 0;
+	__u64 empty = 0;
+
+	while (!done) {
+		__u64 size, head;
+
+		head = read_head(pc);
+
+		if (old == head) {
+			empty++;
+			continue;
+		}
+
+		size = head - old;
+
+		pr_debug("empty %llu, head = %llu, size %llu\n", empty, head, size);
+		empty = 0;
+
+		if ((old & mask) + size != (head & mask)) {
+			size = mask + 1 - (old & mask);
+			old += size;
+		}
+
+		size = head - old;
+		old += size;
+		write_tail(pc, old);
+	}
+}
+
+static void *worker_thread(void *data __maybe_unused)
+{
+#define CNT 1000
+	int fds[CNT];
+
+	while (!done) {
+		int i;
+
+		for (i = 0; i < CNT; i++)
+			fds[i] = create_event(fd_leader);
+
+		for (i = 0; i < CNT; i++)
+			close(fds[i]);
+	}
+
+	return NULL;
+}
+
+static void *gen_thread(void *data __maybe_unused)
+{
+	pid = syscall(SYS_gettid);
+
+	while (!done) {
+		int i;
+
+		i = read(300, &i, sizeof(i));
+	}
+
+	return NULL;
+}
+
+static void signal_fn(int signo __maybe_unused)
+{
+	done = 1;
+}
+
+int test__group_read_bug(void)
+{
+	pthread_t pthread, pthread_gen;
+	int mmap_len  = page_size + (page_size  << 5);
+	int mmap_mask = mmap_len - page_size - 1;
+	void *buf;
+	int err;
+
+	signal(SIGINT,  signal_fn);
+
+	err = pthread_create(&pthread, NULL, gen_thread, NULL);
+	TEST_ASSERT_VAL("create gen", !err);
+
+	while(!pid) {
+		pr_debug("waiting for pid\n");
+		sleep(1);
+	}
+
+	fd_leader = create_event(-1);
+	TEST_ASSERT_VAL("create a leader", fd_leader >= 0);
+
+	buf = mmap(NULL, mmap_len, PROT_READ|PROT_WRITE, MAP_SHARED, fd_leader, 0);
+	TEST_ASSERT_VAL("mmap leader", buf != MAP_FAILED);
+
+	err = ioctl(fd_leader, PERF_EVENT_IOC_ENABLE, 0);
+	TEST_ASSERT_VAL("enable leader", !err);
+
+	err = pthread_create(&pthread_gen, NULL, worker_thread, NULL);
+	TEST_ASSERT_VAL("create worker", !err);
+
+	mmap_read(buf, mmap_mask);
+
+	munmap(buf, mmap_len);
+	close(fd_leader);
+
+	pthread_join(pthread, NULL);
+	pthread_join(pthread_gen, NULL);
+	return 0;
+}
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index ed64790..af43c47 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -48,6 +48,7 @@ int test__mmap_thread_lookup(void);
 int test__thread_mg_share(void);
 int test__hists_output(void);
 int test__hists_cumulate(void);
+int test__group_read_bug(void);
 
 #if defined(__x86_64__) || defined(__i386__) || defined(__arm__)
 #ifdef HAVE_DWARF_UNWIND_SUPPORT
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 1/1] perf: Prevent race in PERF_SAMPLE_READ group format sample output
  2014-06-25 18:44 [PATCH 0/1] perf: Fix race in sample output Jiri Olsa
@ 2014-06-25 18:44 ` Jiri Olsa
  2014-07-02  7:56   ` Jiri Olsa
  2014-07-07  9:04   ` Peter Zijlstra
  0 siblings, 2 replies; 6+ messages in thread
From: Jiri Olsa @ 2014-06-25 18:44 UTC (permalink / raw)
  To: linux-kernel
  Cc: Jiri Olsa, Arnaldo Carvalho de Melo, Corey Ashford,
	Frederic Weisbecker, Ingo Molnar, Paul Mackerras, Peter Zijlstra,
	Jiri Olsa

From: Jiri Olsa <jolsa@redhat.com>

While iterating siblings in perf_output_read_group we could
race with addition and removal of sibling in perf_group_attach
and perf_group_detach respective.

While in perf_output_read_group we are under active context,
so the only sibling_list modification could come via IPI in:
  perf_install_in_context or perf_remove_from_context

Disable interrupts before iterating siblings to prevent
this race.

Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 kernel/events/core.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index a33d9a2b..66649d3 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4509,6 +4509,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
 {
 	struct perf_event *leader = event->group_leader, *sub;
 	u64 read_format = event->attr.read_format;
+	unsigned long flags;
 	u64 values[5];
 	int n = 0;
 
@@ -4529,6 +4530,15 @@ static void perf_output_read_group(struct perf_output_handle *handle,
 
 	__output_copy(handle, values, n * sizeof(u64));
 
+	/*
+	 * We are now under active context, so the only sibling_list
+	 * modification could come via IPI in:
+	 *   perf_install_in_context and perf_remove_from_context
+	 *
+	 * Disable interrupts to prevent this race.
+	 */
+	local_irq_save(flags);
+
 	list_for_each_entry(sub, &leader->sibling_list, group_entry) {
 		n = 0;
 
@@ -4542,6 +4552,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
 
 		__output_copy(handle, values, n * sizeof(u64));
 	}
+	local_irq_restore(flags);
 }
 
 #define PERF_FORMAT_TOTAL_TIMES (PERF_FORMAT_TOTAL_TIME_ENABLED|\
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/1] perf: Prevent race in PERF_SAMPLE_READ group format sample output
  2014-06-25 18:44 ` [PATCH 1/1] perf: Prevent race in PERF_SAMPLE_READ group format " Jiri Olsa
@ 2014-07-02  7:56   ` Jiri Olsa
  2014-07-07  9:04   ` Peter Zijlstra
  1 sibling, 0 replies; 6+ messages in thread
From: Jiri Olsa @ 2014-07-02  7:56 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: linux-kernel, Arnaldo Carvalho de Melo, Corey Ashford,
	Frederic Weisbecker, Ingo Molnar, Paul Mackerras, Peter Zijlstra

hi,
any feedback?

thanks,
jirka

On Wed, Jun 25, 2014 at 08:44:35PM +0200, Jiri Olsa wrote:
> From: Jiri Olsa <jolsa@redhat.com>
> 
> While iterating siblings in perf_output_read_group we could
> race with addition and removal of sibling in perf_group_attach
> and perf_group_detach respective.
> 
> While in perf_output_read_group we are under active context,
> so the only sibling_list modification could come via IPI in:
>   perf_install_in_context or perf_remove_from_context
> 
> Disable interrupts before iterating siblings to prevent
> this race.
> 
> Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
> Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
> Cc: Frederic Weisbecker <fweisbec@gmail.com>
> Cc: Ingo Molnar <mingo@kernel.org>
> Cc: Paul Mackerras <paulus@samba.org>
> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
> Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> ---
>  kernel/events/core.c | 11 +++++++++++
>  1 file changed, 11 insertions(+)
> 
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index a33d9a2b..66649d3 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -4509,6 +4509,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
>  {
>  	struct perf_event *leader = event->group_leader, *sub;
>  	u64 read_format = event->attr.read_format;
> +	unsigned long flags;
>  	u64 values[5];
>  	int n = 0;
>  
> @@ -4529,6 +4530,15 @@ static void perf_output_read_group(struct perf_output_handle *handle,
>  
>  	__output_copy(handle, values, n * sizeof(u64));
>  
> +	/*
> +	 * We are now under active context, so the only sibling_list
> +	 * modification could come via IPI in:
> +	 *   perf_install_in_context and perf_remove_from_context
> +	 *
> +	 * Disable interrupts to prevent this race.
> +	 */
> +	local_irq_save(flags);
> +
>  	list_for_each_entry(sub, &leader->sibling_list, group_entry) {
>  		n = 0;
>  
> @@ -4542,6 +4552,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
>  
>  		__output_copy(handle, values, n * sizeof(u64));
>  	}
> +	local_irq_restore(flags);
>  }
>  
>  #define PERF_FORMAT_TOTAL_TIMES (PERF_FORMAT_TOTAL_TIME_ENABLED|\
> -- 
> 1.8.3.1
> 

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/1] perf: Prevent race in PERF_SAMPLE_READ group format sample output
  2014-06-25 18:44 ` [PATCH 1/1] perf: Prevent race in PERF_SAMPLE_READ group format " Jiri Olsa
  2014-07-02  7:56   ` Jiri Olsa
@ 2014-07-07  9:04   ` Peter Zijlstra
  2014-07-07 10:20     ` Jiri Olsa
  1 sibling, 1 reply; 6+ messages in thread
From: Peter Zijlstra @ 2014-07-07  9:04 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: linux-kernel, Jiri Olsa, Arnaldo Carvalho de Melo, Corey Ashford,
	Frederic Weisbecker, Ingo Molnar, Paul Mackerras

[-- Attachment #1: Type: text/plain, Size: 2199 bytes --]

On Wed, Jun 25, 2014 at 08:44:35PM +0200, Jiri Olsa wrote:
> From: Jiri Olsa <jolsa@redhat.com>
> 
> While iterating siblings in perf_output_read_group we could
> race with addition and removal of sibling in perf_group_attach
> and perf_group_detach respective.

So why would anybody do this?

> While in perf_output_read_group we are under active context,
> so the only sibling_list modification could come via IPI in:
>   perf_install_in_context or perf_remove_from_context
> 
> Disable interrupts before iterating siblings to prevent
> this race.
> 
> Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
> Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
> Cc: Frederic Weisbecker <fweisbec@gmail.com>
> Cc: Ingo Molnar <mingo@kernel.org>
> Cc: Paul Mackerras <paulus@samba.org>
> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
> Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> ---
>  kernel/events/core.c | 11 +++++++++++
>  1 file changed, 11 insertions(+)
> 
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index a33d9a2b..66649d3 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -4509,6 +4509,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
>  {
>  	struct perf_event *leader = event->group_leader, *sub;
>  	u64 read_format = event->attr.read_format;
> +	unsigned long flags;
>  	u64 values[5];
>  	int n = 0;
>  
> @@ -4529,6 +4530,15 @@ static void perf_output_read_group(struct perf_output_handle *handle,
>  
>  	__output_copy(handle, values, n * sizeof(u64));
>  
> +	/*
> +	 * We are now under active context, so the only sibling_list
> +	 * modification could come via IPI in:
> +	 *   perf_install_in_context and perf_remove_from_context
> +	 *
> +	 * Disable interrupts to prevent this race.
> +	 */
> +	local_irq_save(flags);

I think this is too late; you want it right at the beginning, before we
read ->nr_siblings, as that is also changed by
add_event_to_ctx()->perf_group_attach().

That said; it would be nice not to have to poke at the interrupt flag,
its expensive.

So is this really a problem, or just a case of: if you do silly things,
you get silly results?

[-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/1] perf: Prevent race in PERF_SAMPLE_READ group format sample output
  2014-07-07  9:04   ` Peter Zijlstra
@ 2014-07-07 10:20     ` Jiri Olsa
  2014-07-07 11:43       ` Peter Zijlstra
  0 siblings, 1 reply; 6+ messages in thread
From: Jiri Olsa @ 2014-07-07 10:20 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Jiri Olsa, linux-kernel, Arnaldo Carvalho de Melo, Corey Ashford,
	Frederic Weisbecker, Ingo Molnar, Paul Mackerras

On Mon, Jul 07, 2014 at 11:04:28AM +0200, Peter Zijlstra wrote:
> On Wed, Jun 25, 2014 at 08:44:35PM +0200, Jiri Olsa wrote:
> > From: Jiri Olsa <jolsa@redhat.com>
> > 
> > While iterating siblings in perf_output_read_group we could
> > race with addition and removal of sibling in perf_group_attach
> > and perf_group_detach respective.
> 
> So why would anybody do this?

the test program from 0/1 email hangs up my server
but no standard reason AFAICS

> 
> > While in perf_output_read_group we are under active context,
> > so the only sibling_list modification could come via IPI in:
> >   perf_install_in_context or perf_remove_from_context
> > 
> > Disable interrupts before iterating siblings to prevent
> > this race.
> > 
> > Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
> > Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
> > Cc: Frederic Weisbecker <fweisbec@gmail.com>
> > Cc: Ingo Molnar <mingo@kernel.org>
> > Cc: Paul Mackerras <paulus@samba.org>
> > Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
> > Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> > ---
> >  kernel/events/core.c | 11 +++++++++++
> >  1 file changed, 11 insertions(+)
> > 
> > diff --git a/kernel/events/core.c b/kernel/events/core.c
> > index a33d9a2b..66649d3 100644
> > --- a/kernel/events/core.c
> > +++ b/kernel/events/core.c
> > @@ -4509,6 +4509,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
> >  {
> >  	struct perf_event *leader = event->group_leader, *sub;
> >  	u64 read_format = event->attr.read_format;
> > +	unsigned long flags;
> >  	u64 values[5];
> >  	int n = 0;
> >  
> > @@ -4529,6 +4530,15 @@ static void perf_output_read_group(struct perf_output_handle *handle,
> >  
> >  	__output_copy(handle, values, n * sizeof(u64));
> >  
> > +	/*
> > +	 * We are now under active context, so the only sibling_list
> > +	 * modification could come via IPI in:
> > +	 *   perf_install_in_context and perf_remove_from_context
> > +	 *
> > +	 * Disable interrupts to prevent this race.
> > +	 */
> > +	local_irq_save(flags);
> 
> I think this is too late; you want it right at the beginning, before we
> read ->nr_siblings, as that is also changed by
> add_event_to_ctx()->perf_group_attach().
> 
> That said; it would be nice not to have to poke at the interrupt flag,
> its expensive.

right.. I'll check if we could use the rcu loop/locking here

> 
> So is this really a problem, or just a case of: if you do silly things,
> you get silly results?

I've got soft lockup, sometimes ended up with unkillable perf process
also few total server hangs

jirka

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/1] perf: Prevent race in PERF_SAMPLE_READ group format sample output
  2014-07-07 10:20     ` Jiri Olsa
@ 2014-07-07 11:43       ` Peter Zijlstra
  0 siblings, 0 replies; 6+ messages in thread
From: Peter Zijlstra @ 2014-07-07 11:43 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Jiri Olsa, linux-kernel, Arnaldo Carvalho de Melo, Corey Ashford,
	Frederic Weisbecker, Ingo Molnar, Paul Mackerras

[-- Attachment #1: Type: text/plain, Size: 822 bytes --]

On Mon, Jul 07, 2014 at 12:20:20PM +0200, Jiri Olsa wrote:
> On Mon, Jul 07, 2014 at 11:04:28AM +0200, Peter Zijlstra wrote:
> > On Wed, Jun 25, 2014 at 08:44:35PM +0200, Jiri Olsa wrote:
> > > From: Jiri Olsa <jolsa@redhat.com>
> > > 
> > > While iterating siblings in perf_output_read_group we could
> > > race with addition and removal of sibling in perf_group_attach
> > > and perf_group_detach respective.
> > 
> > So why would anybody do this?
> 
> the test program from 0/1 email hangs up my server
> but no standard reason AFAICS
> 
> I've got soft lockup, sometimes ended up with unkillable perf process
> also few total server hangs

OK, so that's useful information to have in a Changelog ;-)

Not immediately obvious how that can happen though. I can see the output
getting scrambled.



[-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2014-07-07 11:44 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-06-25 18:44 [PATCH 0/1] perf: Fix race in sample output Jiri Olsa
2014-06-25 18:44 ` [PATCH 1/1] perf: Prevent race in PERF_SAMPLE_READ group format " Jiri Olsa
2014-07-02  7:56   ` Jiri Olsa
2014-07-07  9:04   ` Peter Zijlstra
2014-07-07 10:20     ` Jiri Olsa
2014-07-07 11:43       ` Peter Zijlstra

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox