public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: vatsa@in.ibm.com
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>,
	Ingo Molnar <mingo@elte.hu>,
	linux-kernel@vger.kernel.org, Mike Galbraith <efault@gmx.de>,
	Thomas Gleixner <tglx@linutronix.de>,
	Paul Mackerras <paulus@samba.org>,
	Corey Ashford <cjashfor@linux.vnet.ibm.com>,
	Oleg Nesterov <oleg@redhat.com>
Subject: Re: perf counter issue - WARN_ON_ONCE(!list_empty(&tsk->perf_counter_ctx.counter_list));
Date: Fri, 15 May 2009 22:27:08 +0200	[thread overview]
Message-ID: <1242419228.32543.360.camel@laptop> (raw)
In-Reply-To: <1242409073.32543.187.camel@laptop>

On Fri, 2009-05-15 at 19:37 +0200, Peter Zijlstra wrote:
> On Fri, 2009-05-15 at 18:13 +0200, Peter Zijlstra wrote:
> > On Fri, 2009-05-15 at 21:28 +0530, Srivatsa Vaddagiri wrote:
> > > On Fri, May 15, 2009 at 11:51:44AM -0300, Arnaldo Carvalho de Melo wrote:
> > > > > hm, is there a reproducer perhaps? Is there some class file i could 
> > > > > run with specific parameters to reproduce it?
> > > > 
> > > > I'll try this with some java benchmarks we have, AMQP related, lets see
> > > > if I can reproduce it.
> > > 
> > > I tried this with SPECJbb - which I am not at liberty to distribute
> > > unfortunately. I will try and recreate it with volanomark or other open
> > > source benchmarks.
> > 
> > I could indeed reproduce with vmark. Am poking at it.. still clueless
> > though ;-)
> 
> [root@opteron tmp]# cat foo.c
> #include <pthread.h>
> #include <unistd.h>
> 
> void *thread(void *arg)
> {
>         sleep(5);
>         return NULL;
> }
> 
> void main(void)
> {
>         pthread_t thr;
>         pthread_create(&thr, NULL, thread, NULL);
> }
> 
> The above instantly triggers it. It appears we fail to cleanup on the
> reparent path. I'll go root around in exit.c.

OK, so the cleanup isn't solid.. I've been poking at things, and below
is the current state of my tinkering, but it seems to make things
worse...

With only the callback in do_exit() the above test works but hackbench
fails, with only the call in wait_task_zombie() hackbench works and the
above fails.

With both, we segfault the kernel on a list op on either :-)

Will continue poking tomorrow and such, unless someone beats me to it.


---
Index: linux-2.6/kernel/perf_counter.c
===================================================================
--- linux-2.6.orig/kernel/perf_counter.c
+++ linux-2.6/kernel/perf_counter.c
@@ -115,6 +115,7 @@ list_add_counter(struct perf_counter *co
 	}
 
 	list_add_rcu(&counter->event_entry, &ctx->event_list);
+	ctx->nr_counters++;
 }
 
 static void
@@ -122,6 +123,8 @@ list_del_counter(struct perf_counter *co
 {
 	struct perf_counter *sibling, *tmp;
 
+	ctx->nr_counters--;
+
 	list_del_init(&counter->list_entry);
 	list_del_rcu(&counter->event_entry);
 
@@ -209,7 +212,6 @@ static void __perf_counter_remove_from_c
 	counter_sched_out(counter, cpuctx, ctx);
 
 	counter->task = NULL;
-	ctx->nr_counters--;
 
 	/*
 	 * Protect the list operation against NMI by disabling the
@@ -276,7 +278,6 @@ retry:
 	 * succeed.
 	 */
 	if (!list_empty(&counter->list_entry)) {
-		ctx->nr_counters--;
 		list_del_counter(counter, ctx);
 		counter->task = NULL;
 	}
@@ -544,7 +545,6 @@ static void add_counter_to_ctx(struct pe
 			       struct perf_counter_context *ctx)
 {
 	list_add_counter(counter, ctx);
-	ctx->nr_counters++;
 	counter->prev_state = PERF_COUNTER_STATE_OFF;
 	counter->tstamp_enabled = ctx->time;
 	counter->tstamp_running = ctx->time;
@@ -3252,8 +3252,8 @@ __perf_counter_exit_task(struct task_str
 	 */
 	if (child != current) {
 		wait_task_inactive(child, 0);
-		list_del_init(&child_counter->list_entry);
 		update_counter_times(child_counter);
+		list_del_counter(child_counter, child_ctx);
 	} else {
 		struct perf_cpu_context *cpuctx;
 		unsigned long flags;
@@ -3272,9 +3272,7 @@ __perf_counter_exit_task(struct task_str
 		group_sched_out(child_counter, cpuctx, child_ctx);
 		update_counter_times(child_counter);
 
-		list_del_init(&child_counter->list_entry);
-
-		child_ctx->nr_counters--;
+		list_del_counter(child_counter, child_ctx);
 
 		perf_enable();
 		local_irq_restore(flags);
@@ -3288,13 +3286,6 @@ __perf_counter_exit_task(struct task_str
 	 */
 	if (parent_counter) {
 		sync_child_counter(child_counter, parent_counter);
-		list_for_each_entry_safe(sub, tmp, &child_counter->sibling_list,
-					 list_entry) {
-			if (sub->parent) {
-				sync_child_counter(sub, sub->parent);
-				free_counter(sub);
-			}
-		}
 		free_counter(child_counter);
 	}
 }
@@ -3315,9 +3306,18 @@ void perf_counter_exit_task(struct task_
 	if (likely(!child_ctx->nr_counters))
 		return;
 
+again:
 	list_for_each_entry_safe(child_counter, tmp, &child_ctx->counter_list,
 				 list_entry)
 		__perf_counter_exit_task(child, child_counter, child_ctx);
+
+	/*
+	 * If the last counter was a group counter, it will have appended all
+	 * its siblings to the list, but we obtained 'tmp' before that which
+	 * will still point to the list head terminating the iteration.
+	 */
+	if (!list_empty(&child_ctx->counter_list))
+		goto again;
 }
 
 /*
Index: linux-2.6/kernel/exit.c
===================================================================
--- linux-2.6.orig/kernel/exit.c
+++ linux-2.6/kernel/exit.c
@@ -155,7 +155,7 @@ static void delayed_put_task_struct(stru
 	struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
 
 #ifdef CONFIG_PERF_COUNTERS
-	WARN_ON_ONCE(!list_empty(&tsk->perf_counter_ctx.counter_list));
+	WARN_ON(!list_empty(&tsk->perf_counter_ctx.counter_list));
 #endif
 	trace_sched_process_free(tsk);
 	put_task_struct(tsk);
@@ -1002,6 +1002,8 @@ NORET_TYPE void do_exit(long code)
 	if (tsk->splice_pipe)
 		__free_pipe_info(tsk->splice_pipe);
 
+	perf_counter_exit_task(tsk);
+
 	preempt_disable();
 	/* causes final put_task_struct in finish_task_switch(). */
 	tsk->state = TASK_DEAD;



  reply	other threads:[~2009-05-15 20:27 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-05-13 16:54 perf counter issue - WARN_ON_ONCE(!list_empty(&tsk->perf_counter_ctx.counter_list)); Srivatsa Vaddagiri
2009-05-13 16:57 ` Srivatsa Vaddagiri
2009-05-15 13:56   ` Ingo Molnar
2009-05-15 14:51     ` Arnaldo Carvalho de Melo
2009-05-15 15:58       ` Srivatsa Vaddagiri
2009-05-15 16:13         ` Peter Zijlstra
2009-05-15 17:37           ` Peter Zijlstra
2009-05-15 20:27             ` Peter Zijlstra [this message]
2009-05-18  4:45               ` Paul Mackerras
2009-05-22  6:25               ` Srivatsa Vaddagiri

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1242419228.32543.360.camel@laptop \
    --to=peterz@infradead.org \
    --cc=acme@redhat.com \
    --cc=cjashfor@linux.vnet.ibm.com \
    --cc=efault@gmx.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=oleg@redhat.com \
    --cc=paulus@samba.org \
    --cc=tglx@linutronix.de \
    --cc=vatsa@in.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox