[PATCH 3/4] kthreads: rework kthread

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH 3/4] kthreads: rework kthread_stop()
@ 2009-01-30 12:33 Oleg Nesterov
  2009-01-30 12:50 ` Oleg Nesterov
  2009-01-30 21:47 ` Andrew Morton
  0 siblings, 2 replies; 15+ messages in thread
From: Oleg Nesterov @ 2009-01-30 12:33 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Christoph Hellwig, Eric W. Biederman, Ingo Molnar,
	Pavel Emelyanov, Rusty Russell, Vitaliy Gusev, linux-kernel

Based on Eric's patch which in turn was based on my patch.

kthread_stop() has the nasty problems:

- it runs unpredictably long with the global semaphore held.

- it deadlocks if kthread itself does kthread_stop() before it obeys
  the kthread_should_stop() request.

- it is not useable if kthread exits on its own, see for example the
  ugly "wait_to_die:" hack in migration_thread()

- it is not possible to just tell kthread it should stop, we must always
  wait for its exit.

With this patch kthread() allocates all neccesary data (struct kthread)
on its own stack, globals kthread_stop_xxx are deleted. ->vfork_done
is used as a pointer into "struct kthread", this means kthread_stop()
can easily wait for kthread's exit.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>

--- 6.29-rc3/kernel/kthread.c~3_EXIT	2009-01-30 10:45:18.000000000 +0100
+++ 6.29-rc3/kernel/kthread.c	2009-01-30 12:08:52.000000000 +0100
@@ -37,17 +37,13 @@ struct kthread_create_info
 	struct list_head list;
 };
 
-struct kthread_stop_info
-{
-	struct task_struct *k;
-	int err;
-	struct completion done;
+struct kthread {
+	int should_stop;
+	struct completion exited;
 };
 
-/* Thread stopping is done by setthing this var: lock serializes
- * multiple kthread_stop calls. */
-static DEFINE_MUTEX(kthread_stop_lock);
-static struct kthread_stop_info kthread_stop_info;
+#define to_kthread(tsk)	\
+	container_of((tsk)->vfork_done, struct kthread, exited)
 
 /**
  * kthread_should_stop - should this kthread return now?
@@ -58,20 +54,22 @@ static struct kthread_stop_info kthread_
  */
 int kthread_should_stop(void)
 {
-	return (kthread_stop_info.k == current);
+	return to_kthread(current)->should_stop;
 }
 EXPORT_SYMBOL(kthread_should_stop);
 
 static int kthread(void *_create)
 {
+	/* Copy data: it's on kthread's stack */
 	struct kthread_create_info *create = _create;
-	int (*threadfn)(void *data);
-	void *data;
-	int ret = -EINTR;
+	int (*threadfn)(void *data) = create->threadfn;
+	void *data = create->data;
+	struct kthread self;
+	int ret;
 
-	/* Copy data: it's on kthread's stack */
-	threadfn = create->threadfn;
-	data = create->data;
+	self.should_stop = 0;
+	init_completion(&self.exited);
+	current->vfork_done = &self.exited;
 
 	/* OK, tell user we're spawned, wait for stop or wakeup */
 	__set_current_state(TASK_UNINTERRUPTIBLE);
@@ -79,15 +77,12 @@ static int kthread(void *_create)
 	complete(&create->done);
 	schedule();
 
-	if (!kthread_should_stop())
+	ret = -EINTR;
+	if (!self.should_stop)
 		ret = threadfn(data);
 
-	/* It might have exited on its own, w/o kthread_stop.  Check. */
-	if (kthread_should_stop()) {
-		kthread_stop_info.err = ret;
-		complete(&kthread_stop_info.done);
-	}
-	return 0;
+	/* we can't just return, we must preserve "self" on stack */
+	do_exit(ret);
 }
 
 static void create_kthread(struct kthread_create_info *create)
@@ -197,30 +192,22 @@ EXPORT_SYMBOL(kthread_bind);
  */
 int kthread_stop(struct task_struct *k)
 {
+	struct kthread *kthread;
 	int ret;
 
-	mutex_lock(&kthread_stop_lock);
-
-	/* It could exit after stop_info.k set, but before wake_up_process. */
-	get_task_struct(k);
-
 	trace_sched_kthread_stop(k);
+	get_task_struct(k);
 
-	/* Must init completion *before* thread sees kthread_stop_info.k */
-	init_completion(&kthread_stop_info.done);
-	smp_wmb();
+	kthread = to_kthread(k);
+	barrier(); /* it might have exited */
+	if (k->vfork_done != NULL) {
+		kthread->should_stop = 1;
+		wake_up_process(k);
+		wait_for_completion(&kthread->exited);
+	}
+	ret = k->exit_code;
 
-	/* Now set kthread_should_stop() to true, and wake it up. */
-	kthread_stop_info.k = k;
-	wake_up_process(k);
 	put_task_struct(k);
-
-	/* Once it dies, reset stop ptr, gather result and we're done. */
-	wait_for_completion(&kthread_stop_info.done);
-	kthread_stop_info.k = NULL;
-	ret = kthread_stop_info.err;
-	mutex_unlock(&kthread_stop_lock);
-
 	trace_sched_kthread_stop_ret(ret);
 
 	return ret;


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 3/4] kthreads: rework kthread_stop()
  2009-01-30 12:33 [PATCH 3/4] kthreads: rework kthread_stop() Oleg Nesterov
@ 2009-01-30 12:50 ` Oleg Nesterov
  2009-01-31 12:16   ` Rusty Russell
  2009-01-30 21:47 ` Andrew Morton
  1 sibling, 1 reply; 15+ messages in thread
From: Oleg Nesterov @ 2009-01-30 12:50 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Christoph Hellwig, Eric W. Biederman, Ingo Molnar,
	Pavel Emelyanov, Rusty Russell, Vitaliy Gusev, linux-kernel

On 01/30, Oleg Nesterov wrote:
>
> With this patch kthread() allocates all neccesary data (struct kthread)
> on its own stack, globals kthread_stop_xxx are deleted. ->vfork_done
> is used as a pointer into "struct kthread", this means kthread_stop()
> can easily wait for kthread's exit.

To simplify the review, please see the code with the patch applied.

Oleg.

struct kthread {
	int should_stop;
	struct completion exited;
};

#define to_kthread(tsk)	\
	container_of((tsk)->vfork_done, struct kthread, exited)

int kthread_should_stop(void)
{
	return to_kthread(current)->should_stop;
}

static int kthread(void *_create)
{
	/* Copy data: it's on kthread's stack */
	struct kthread_create_info *create = _create;
	int (*threadfn)(void *data) = create->threadfn;
	void *data = create->data;
	struct kthread self;
	int ret;

	self.should_stop = 0;
	init_completion(&self.exited);
	current->vfork_done = &self.exited;

	/* OK, tell user we're spawned, wait for stop or wakeup */
	__set_current_state(TASK_UNINTERRUPTIBLE);
	create->result = current;
	complete(&create->done);
	schedule();

	ret = -EINTR;
	if (!self.should_stop)
		ret = threadfn(data);

	/* we can't just return, we must preserve "self" on stack */
	do_exit(ret);
}

int kthread_stop(struct task_struct *k)
{
	struct kthread *kthread;
	int ret;

	trace_sched_kthread_stop(k);
	get_task_struct(k);

	kthread = to_kthread(k);
	barrier(); /* it might have exited */
	if (k->vfork_done != NULL) {
		kthread->should_stop = 1;
		wake_up_process(k);
		wait_for_completion(&kthread->exited);
	}
	ret = k->exit_code;

	put_task_struct(k);
	trace_sched_kthread_stop_ret(ret);

	return ret;
}


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 3/4] kthreads: rework kthread_stop()
  2009-01-30 12:50 ` Oleg Nesterov
@ 2009-01-31 12:16   ` Rusty Russell
  2009-02-01 10:21     ` Oleg Nesterov
  0 siblings, 1 reply; 15+ messages in thread
From: Rusty Russell @ 2009-01-31 12:16 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Andrew Morton, Christoph Hellwig, Eric W. Biederman, Ingo Molnar,
	Pavel Emelyanov, Vitaliy Gusev, linux-kernel

On Friday 30 January 2009 23:20:58 Oleg Nesterov wrote:
> On 01/30, Oleg Nesterov wrote:
> >
> > With this patch kthread() allocates all neccesary data (struct kthread)
> > on its own stack, globals kthread_stop_xxx are deleted. ->vfork_done
> > is used as a pointer into "struct kthread", this means kthread_stop()
> > can easily wait for kthread's exit.
> 
> To simplify the review, please see the code with the patch applied.

Hmm, I thought about using the parent-child relationship and fairly normal
wait() semantics, but this looks simpler.

> struct kthread {
> 	int should_stop;
> 	struct completion exited;
> };

Mildly prefer bool in new code.

> #define to_kthread(tsk)	\
> 	container_of((tsk)->vfork_done, struct kthread, exited)

This needs a comment.  Especially since to_xxx(yyy) is usually simply a
container_of(yyy, xxx, member).  This one is special.

> int kthread_stop(struct task_struct *k)
> {
> 	struct kthread *kthread;
> 	int ret;
> 
> 	trace_sched_kthread_stop(k);
> 	get_task_struct(k);
> 
> 	kthread = to_kthread(k);
> 	barrier(); /* it might have exited */
> 	if (k->vfork_done != NULL) {
> 		kthread->should_stop = 1;
> 		wake_up_process(k);
> 		wait_for_completion(&kthread->exited);
> 	}
> 	ret = k->exit_code;

I don't think this works.  How does do_exit() preserve a stack var, other
than for a few cycles longer?  Sure, the vfork_done will be OK, but this code
here will not be.  I think you'd need a get_task_struct(current) before the
do_exit(ret) (the case where the kthread fn calls do_exit() is fine: you're
not allowed to call kthread stop on such threads).

In which case using vfork_done is really just a convenience pointer inside
struct task_struct to stash the struct kthread.  And that's horribly ugly, which is why I stuck with a simple global.  Changing to a linked-list of things to stop would avoid the deadlock you mentioned where a kthread stops another kthread.

Thanks,
Rusty.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 3/4] kthreads: rework kthread_stop()
  2009-01-31 12:16   ` Rusty Russell
@ 2009-02-01 10:21     ` Oleg Nesterov
  2009-02-02 17:57       ` Eric W. Biederman
  0 siblings, 1 reply; 15+ messages in thread
From: Oleg Nesterov @ 2009-02-01 10:21 UTC (permalink / raw)
  To: Rusty Russell
  Cc: Andrew Morton, Christoph Hellwig, Eric W. Biederman, Ingo Molnar,
	Pavel Emelyanov, Vitaliy Gusev, linux-kernel

On 01/31, Rusty Russell wrote:
>
> On Friday 30 January 2009 23:20:58 Oleg Nesterov wrote:
> > On 01/30, Oleg Nesterov wrote:
> > >
> > > With this patch kthread() allocates all neccesary data (struct kthread)
> > > on its own stack, globals kthread_stop_xxx are deleted. ->vfork_done
> > > is used as a pointer into "struct kthread", this means kthread_stop()
> > > can easily wait for kthread's exit.
> 
> > struct kthread {
> > 	int should_stop;
> > 	struct completion exited;
> > };
> 
> Mildly prefer bool in new code.

OK, and

> > #define to_kthread(tsk)	\
> > 	container_of((tsk)->vfork_done, struct kthread, exited)
> 
> This needs a comment.  Especially since to_xxx(yyy) is usually simply a
> container_of(yyy, xxx, member).  This one is special.

OK, I'll send the cleanup patch.

> > int kthread_stop(struct task_struct *k)
> > {
> > 	struct kthread *kthread;
> > 	int ret;
> >
> > 	trace_sched_kthread_stop(k);
> > 	get_task_struct(k);
> >
> > 	kthread = to_kthread(k);
> > 	barrier(); /* it might have exited */
> > 	if (k->vfork_done != NULL) {
> > 		kthread->should_stop = 1;
> > 		wake_up_process(k);
> > 		wait_for_completion(&kthread->exited);
> > 	}
> > 	ret = k->exit_code;
>
> I don't think this works.  How does do_exit() preserve a stack var, other
> than for a few cycles longer?  Sure, the vfork_done will be OK, but this code
> here will not be.  I think you'd need a get_task_struct(current) before the
> do_exit(ret)

I think this works ;)

This stack frame can't disappear until __put_task_struct()->...->free_thread_info().
So, if you have a reference to task_struct, then it it is safe to dereference
to_kthread(task).

Before this patch, kthread_stop() can only be used when we know that kthread
must not exit by its own. And with this patch we are safe in this case, note
that kthread_stop() does get_task_struct() before it sets ->should_stop = 1.
And this also pins the memory pointed by to_kthread().

> (the case where the kthread fn calls do_exit() is fine: you're
> not allowed to call kthread stop on such threads).

This was not allowed, but now this is fine. Please look at the 4/4 patch.
But, in that case you must pin the task_struct after kthread_create(),
otherwise (with or without this patch) you just can't use this task_struct
in any way.

> In which case using vfork_done is really just a convenience pointer inside
> struct task_struct to stash the struct kthread.  And that's horribly ugly,
> which is why I stuck with a simple global.  Changing to a linked-list of things
> to stop would avoid the deadlock you mentioned where a kthread stops another
> kthread.

Well, this patch overloads ->vfork_done, and I agree this is a bit ugly.
But what you suggest (if I undestand correctly) is more complex, and doesn't
have any advantages, imho.

What do you think?

Oleg.


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 3/4] kthreads: rework kthread_stop()
  2009-02-01 10:21     ` Oleg Nesterov
@ 2009-02-02 17:57       ` Eric W. Biederman
  2009-02-02 19:41         ` Oleg Nesterov
  0 siblings, 1 reply; 15+ messages in thread
From: Eric W. Biederman @ 2009-02-02 17:57 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Rusty Russell, Andrew Morton, Christoph Hellwig, Ingo Molnar,
	Pavel Emelyanov, Vitaliy Gusev, linux-kernel

Oleg Nesterov <oleg@redhat.com> writes:

> On 01/31, Rusty Russell wrote:
>>
>> On Friday 30 January 2009 23:20:58 Oleg Nesterov wrote:
>> > On 01/30, Oleg Nesterov wrote:
>> > >
>> > > With this patch kthread() allocates all neccesary data (struct kthread)
>> > > on its own stack, globals kthread_stop_xxx are deleted. ->vfork_done
>> > > is used as a pointer into "struct kthread", this means kthread_stop()
>> > > can easily wait for kthread's exit.
>> 
>> > struct kthread {
>> > 	int should_stop;
>> > 	struct completion exited;
>> > };
>> 
>> Mildly prefer bool in new code.
>
> OK, and
>
>> > #define to_kthread(tsk)	\
>> > 	container_of((tsk)->vfork_done, struct kthread, exited)
>> 
>> This needs a comment.  Especially since to_xxx(yyy) is usually simply a
>> container_of(yyy, xxx, member).  This one is special.
>
> OK, I'll send the cleanup patch.
>
>> > int kthread_stop(struct task_struct *k)
>> > {
>> > 	struct kthread *kthread;
>> > 	int ret;
>> >
>> > 	trace_sched_kthread_stop(k);
>> > 	get_task_struct(k);
>> >
>> > 	kthread = to_kthread(k);
>> > 	barrier(); /* it might have exited */
>> > 	if (k->vfork_done != NULL) {
>> > 		kthread->should_stop = 1;
>> > 		wake_up_process(k);
>> > 		wait_for_completion(&kthread->exited);
>> > 	}
>> > 	ret = k->exit_code;
>>
>> I don't think this works.  How does do_exit() preserve a stack var, other
>> than for a few cycles longer?  Sure, the vfork_done will be OK, but this code
>> here will not be.  I think you'd need a get_task_struct(current) before the
>> do_exit(ret)
>
> I think this works ;)
>
> This stack frame can't disappear until
> __put_task_struct()->...->free_thread_info().
> So, if you have a reference to task_struct, then it it is safe to dereference
> to_kthread(task).

Correct.

> Before this patch, kthread_stop() can only be used when we know that kthread
> must not exit by its own. And with this patch we are safe in this case, note
> that kthread_stop() does get_task_struct() before it sets ->should_stop = 1.
> And this also pins the memory pointed by to_kthread().

>> (the case where the kthread fn calls do_exit() is fine: you're
>> not allowed to call kthread stop on such threads).
>
> This was not allowed, but now this is fine. Please look at the 4/4 patch.
> But, in that case you must pin the task_struct after kthread_create(),
> otherwise (with or without this patch) you just can't use this task_struct
> in any way.

To finish the conversion of everything to kthreads from kernel_thread
we need to be able to call kthread_stop on threads that call do_exit.
That kthread_stop cannot be called on such threads is currently a major
deficiency of the kthread api.

>> In which case using vfork_done is really just a convenience pointer inside
>> struct task_struct to stash the struct kthread.  And that's horribly ugly,
>> which is why I stuck with a simple global.  Changing to a linked-list of
> things
>> to stop would avoid the deadlock you mentioned where a kthread stops another
>> kthread.
>
> Well, this patch overloads ->vfork_done, and I agree this is a bit ugly.
> But what you suggest (if I undestand correctly) is more complex, and doesn't
> have any advantages, imho.

No.  This patch does not overload or really abuse vfork_done.
vfork_done is named a bit misleadingly.  It should arguably be called
mm_done.  vfork_done (if set) always points to a completion that will
be completed when do_exit() -> mm_release() is called.

What we want is some completion called from inside of do_exit.
mm_release happens to be such a completion and the code already
exists.

If mm_release did not do: tsk->vfork_done = NULL then the entire
test of if (k->vfork_done != NULL) would be unnecessary.

Oleg on that note we should not need a barrier at all. We should be
able to simply say: 

cmplp = k->vfork_done;
if (cmplp){
	/* if vfork_done is NULL we have passed mm_release */
	kthread = container_of(cmplp, struct kthread, exited);
	kthread->should_stop = 1;
	wake_up_process(k);
	wait_for_completion(&kthread->exited);
}

Thinking of it I wish we had someplace we could store a pointer
that would not be cleared so we could remove that whole confusing
conditional.  I just looked through task_struct and there doesn't
appear to be anything promising.

Perhaps we could rename vfork_done mm_done and not clear it in
mm_release.

Eric

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 3/4] kthreads: rework kthread_stop()
  2009-02-02 17:57       ` Eric W. Biederman
@ 2009-02-02 19:41         ` Oleg Nesterov
  2009-02-03  3:25           ` Eric W. Biederman
  0 siblings, 1 reply; 15+ messages in thread
From: Oleg Nesterov @ 2009-02-02 19:41 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Rusty Russell, Andrew Morton, Christoph Hellwig, Ingo Molnar,
	Pavel Emelyanov, Vitaliy Gusev, linux-kernel

On 02/02, Eric W. Biederman wrote:
>
> Oleg on that note we should not need a barrier at all. We should be
> able to simply say:
>
> cmplp = k->vfork_done;
> if (cmplp){
> 	/* if vfork_done is NULL we have passed mm_release */
> 	kthread = container_of(cmplp, struct kthread, exited);
> 	kthread->should_stop = 1;
> 	wake_up_process(k);
> 	wait_for_completion(&kthread->exited);
> }

Yes, but the compiler can read ->vfork_done twice, and turn this code
into

	cmplp = k->vfork_done;
	if (cmplp){
		kthread = container_of(k->vfork_done, struct kthread, exited);
		...

and when we read k->vfork_done again it can be already NULL.
Probably we could use ACCESS_ONCE() instead.

Perhaps this barrier() is not needed in practice, but just to be safe.
And in fact I saw the bug report with this code:

	ac.ac_tty = current->signal->tty ?
		old_encode_dev(tty_devnum(current->signal->tty)) : 0;

this code is wrong anyway, but ->tty was read twice. I specially
asked for .s file because I wasn't able to believe the bug manifests
itself this way.

> Thinking of it I wish we had someplace we could store a pointer
> that would not be cleared so we could remove that whole confusing
> conditional.  I just looked through task_struct and there doesn't
> appear to be anything promising.
>
> Perhaps we could rename vfork_done mm_done and not clear it in
> mm_release.

Yes, in that case we don't need the barrier().

I was thinking about changing mm_release() too, but we should clear
->vfork_done (or whatever) in exec_mmap() anyway.

Oleg.


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 3/4] kthreads: rework kthread_stop()
  2009-02-02 19:41         ` Oleg Nesterov
@ 2009-02-03  3:25           ` Eric W. Biederman
  2009-02-03 13:41             ` Paul E. McKenney
  0 siblings, 1 reply; 15+ messages in thread
From: Eric W. Biederman @ 2009-02-03  3:25 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Rusty Russell, Andrew Morton, Christoph Hellwig, Ingo Molnar,
	Pavel Emelyanov, Vitaliy Gusev, linux-kernel

Oleg Nesterov <oleg@redhat.com> writes:

> On 02/02, Eric W. Biederman wrote:
>>
>> Oleg on that note we should not need a barrier at all. We should be
>> able to simply say:
>>
>> cmplp = k->vfork_done;
>> if (cmplp){
>> 	/* if vfork_done is NULL we have passed mm_release */
>> 	kthread = container_of(cmplp, struct kthread, exited);
>> 	kthread->should_stop = 1;
>> 	wake_up_process(k);
>> 	wait_for_completion(&kthread->exited);
>> }
>
> Yes, but the compiler can read ->vfork_done twice, and turn this code
> into
>
> 	cmplp = k->vfork_done;
> 	if (cmplp){
> 		kthread = container_of(k->vfork_done, struct kthread, exited);
> 		...
>
> and when we read k->vfork_done again it can be already NULL.
> Probably we could use ACCESS_ONCE() instead.
>
> Perhaps this barrier() is not needed in practice, but just to be safe.

Certainly.   I definitely see where you are coming from.
And of course all of this only works because a pointer is a word size
so it is read and updated atomically by the compiler.

I wish we had a good idiom we could use to make it clear what we
are doing.  The rcu pointer read code perhaps?

> And in fact I saw the bug report with this code:
>
> 	ac.ac_tty = current->signal->tty ?
> 		old_encode_dev(tty_devnum(current->signal->tty)) : 0;
>
> this code is wrong anyway, but ->tty was read twice. I specially
> asked for .s file because I wasn't able to believe the bug manifests
> itself this way.

Interesting.

>> Thinking of it I wish we had someplace we could store a pointer
>> that would not be cleared so we could remove that whole confusing
>> conditional.  I just looked through task_struct and there doesn't
>> appear to be anything promising.
>>
>> Perhaps we could rename vfork_done mm_done and not clear it in
>> mm_release.
>
> Yes, in that case we don't need the barrier().
>
> I was thinking about changing mm_release() too, but we should clear
> ->vfork_done (or whatever) in exec_mmap() anyway.

Yes.  I realized that just after I wrote that.  So clearing
vfork_done in all cases is a good idea so we don't make get sloppy.

Eric


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 3/4] kthreads: rework kthread_stop()
  2009-02-03  3:25           ` Eric W. Biederman
@ 2009-02-03 13:41             ` Paul E. McKenney
  2009-02-04  5:10               ` Eric W. Biederman
  0 siblings, 1 reply; 15+ messages in thread
From: Paul E. McKenney @ 2009-02-03 13:41 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Oleg Nesterov, Rusty Russell, Andrew Morton, Christoph Hellwig,
	Ingo Molnar, Pavel Emelyanov, Vitaliy Gusev, linux-kernel

On Mon, Feb 02, 2009 at 07:25:44PM -0800, Eric W. Biederman wrote:
> Oleg Nesterov <oleg@redhat.com> writes:
> 
> > On 02/02, Eric W. Biederman wrote:
> >>
> >> Oleg on that note we should not need a barrier at all. We should be
> >> able to simply say:
> >>
> >> cmplp = k->vfork_done;
> >> if (cmplp){
> >> 	/* if vfork_done is NULL we have passed mm_release */
> >> 	kthread = container_of(cmplp, struct kthread, exited);
> >> 	kthread->should_stop = 1;
> >> 	wake_up_process(k);
> >> 	wait_for_completion(&kthread->exited);
> >> }
> >
> > Yes, but the compiler can read ->vfork_done twice, and turn this code
> > into
> >
> > 	cmplp = k->vfork_done;
> > 	if (cmplp){
> > 		kthread = container_of(k->vfork_done, struct kthread, exited);
> > 		...
> >
> > and when we read k->vfork_done again it can be already NULL.
> > Probably we could use ACCESS_ONCE() instead.
> >
> > Perhaps this barrier() is not needed in practice, but just to be safe.
> 
> Certainly.   I definitely see where you are coming from.
> And of course all of this only works because a pointer is a word size
> so it is read and updated atomically by the compiler.
> 
> I wish we had a good idiom we could use to make it clear what we
> are doing.  The rcu pointer read code perhaps?

ACCESS_ONCE() suffices in many cases, but if the pointer being accessed
points to a structure that might recently have been initialized, then
rcu_dereference() will be required on Alpha.  Though perhaps the
discussion below removes the need entirely, but cannot say that I fully
understand this part of the kernel.

							Thanx, Paul

> > And in fact I saw the bug report with this code:
> >
> > 	ac.ac_tty = current->signal->tty ?
> > 		old_encode_dev(tty_devnum(current->signal->tty)) : 0;
> >
> > this code is wrong anyway, but ->tty was read twice. I specially
> > asked for .s file because I wasn't able to believe the bug manifests
> > itself this way.
> 
> Interesting.
> 
> >> Thinking of it I wish we had someplace we could store a pointer
> >> that would not be cleared so we could remove that whole confusing
> >> conditional.  I just looked through task_struct and there doesn't
> >> appear to be anything promising.
> >>
> >> Perhaps we could rename vfork_done mm_done and not clear it in
> >> mm_release.
> >
> > Yes, in that case we don't need the barrier().
> >
> > I was thinking about changing mm_release() too, but we should clear
> > ->vfork_done (or whatever) in exec_mmap() anyway.
> 
> Yes.  I realized that just after I wrote that.  So clearing
> vfork_done in all cases is a good idea so we don't make get sloppy.
> 
> Eric
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 3/4] kthreads: rework kthread_stop()
  2009-02-03 13:41             ` Paul E. McKenney
@ 2009-02-04  5:10               ` Eric W. Biederman
  2009-02-04 11:04                 ` Rusty Russell
  0 siblings, 1 reply; 15+ messages in thread
From: Eric W. Biederman @ 2009-02-04  5:10 UTC (permalink / raw)
  To: paulmck
  Cc: Rusty Russell, paulmck, Andrew Morton, Christoph Hellwig,
	Ingo Molnar, Pavel Emelyanov, Vitaliy Gusev, linux-kernel

"Paul E. McKenney" <paulmck@linux.vnet.ibm.com> writes:

> ACCESS_ONCE() suffices in many cases, but if the pointer being accessed
> points to a structure that might recently have been initialized, then
> rcu_dereference() will be required on Alpha.  Though perhaps the
> discussion below removes the need entirely, but cannot say that I fully
> understand this part of the kernel.

Thanks.  I had overlooked the addition of ACCESS_ONCE() into our set of
tricks.  I thought Oleg was referring to a hypothetical construct.

Currently Oleg's implementation is fine because of an explicit
memory barrier and two dereferences of the pointer.  It just isn't
especially clear.

ACCESS_ONCE would help.

Hmm.

I wonder if it would be better/clearer to define to_kthread as:

static struct kthread *to_kthread(struct task_struct *tsk)
{
	void *stack = task_stack_page(tsk);
	return (struct kthread *)(stack + kthread_offset);

}

And to measure kthread_offset the first time kthread() was
called.  I would love to have a fixed compile time offset but
I don't know how to measure it at compile time reliably.

Oleg what do you think.  It would remove the test and be simple and
obviously correct.

Eric

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 3/4] kthreads: rework kthread_stop()
  2009-02-04  5:10               ` Eric W. Biederman
@ 2009-02-04 11:04                 ` Rusty Russell
  2009-02-04 15:59                   ` Eric W. Biederman
  2009-02-04 20:46                   ` Jon Masters
  0 siblings, 2 replies; 15+ messages in thread
From: Rusty Russell @ 2009-02-04 11:04 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: paulmck, Andrew Morton, Christoph Hellwig, Ingo Molnar,
	Pavel Emelyanov, Vitaliy Gusev, linux-kernel

On Wednesday 04 February 2009 15:40:06 Eric W. Biederman wrote:
> static struct kthread *to_kthread(struct task_struct *tsk)
> {
> 	void *stack = task_stack_page(tsk);
> 	return (struct kthread *)(stack + kthread_offset);
>         
> }
...
> It would remove the test and be simple and obviously correct.

Clever? Sure.  Neat? Yes.

But you are using a definition of obvious with which I was not previously
familiar :)

Rusty.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 3/4] kthreads: rework kthread_stop()
  2009-02-04 11:04                 ` Rusty Russell
@ 2009-02-04 15:59                   ` Eric W. Biederman
  2009-02-05  1:03                     ` Rusty Russell
  2009-02-04 20:46                   ` Jon Masters
  1 sibling, 1 reply; 15+ messages in thread
From: Eric W. Biederman @ 2009-02-04 15:59 UTC (permalink / raw)
  To: Rusty Russell
  Cc: paulmck, Andrew Morton, Christoph Hellwig, Ingo Molnar,
	Pavel Emelyanov, Vitaliy Gusev, linux-kernel

Rusty Russell <rusty@rustcorp.com.au> writes:

> On Wednesday 04 February 2009 15:40:06 Eric W. Biederman wrote:
>> static struct kthread *to_kthread(struct task_struct *tsk)
>> {
>> 	void *stack = task_stack_page(tsk);
>> 	return (struct kthread *)(stack + kthread_offset);
>>         
>> }
> ...
>> It would remove the test and be simple and obviously correct.
>
> Clever? Sure.  Neat? Yes.
>
> But you are using a definition of obvious with which I was not previously
> familiar :)

Well the way you compute kthread_offset is:

	struct kthread kthread;
	void *stack = task_stack_page(current);
        kthread_offset = (void *)&kthread - stack;

Now Rusty I don't know about you but after I learned to do
addition and subtraction it has always been obvious to me that
one is the opposite of the other.

Further I think the rest of that code becomes a lot clearer if
we can remove that stupid, unnecessary conditional.  As worrying
if the process has exited implies we care about a lot of things
that we really don't and seem to make the code generally less
comprehensible.

I am slightly concerned that using task_stack_page(tsk) may be
overly clever, but compared to ACCESS_ONCE(), memory barriers,
or not letting kthread_stop be called on a thread that may exit
I think I am ahead of the game.

Eric

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 3/4] kthreads: rework kthread_stop()
  2009-02-04 15:59                   ` Eric W. Biederman
@ 2009-02-05  1:03                     ` Rusty Russell
  0 siblings, 0 replies; 15+ messages in thread
From: Rusty Russell @ 2009-02-05  1:03 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: paulmck, Andrew Morton, Christoph Hellwig, Ingo Molnar,
	Pavel Emelyanov, Vitaliy Gusev, linux-kernel

On Thursday 05 February 2009 02:29:35 Eric W. Biederman wrote:
> Rusty Russell <rusty@rustcorp.com.au> writes:
> > Clever? Sure.  Neat? Yes.
> >
> > But you are using a definition of obvious with which I was not previously
> > familiar :)
...
> Now Rusty I don't know about you but after I learned to do
> addition and subtraction it has always been obvious to me that
> one is the opposite of the other.

It is *not* obvious that the offset must be constant across all kthreads.  On
all architectures, and always will be.  That noone will *ever* put a
variable-size object on the stack in this code path.

I *think* it's true, but I've been surprised before.

> I am slightly concerned that using task_stack_page(tsk) may be
> overly clever, but compared to ACCESS_ONCE(), memory barriers,
> or not letting kthread_stop be called on a thread that may exit
> I think I am ahead of the game.

Absolutely agreed.  Just humor me please and put a BUG_ON in there :)

Thanks,
Rusty.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 3/4] kthreads: rework kthread_stop()
  2009-02-04 11:04                 ` Rusty Russell
  2009-02-04 15:59                   ` Eric W. Biederman
@ 2009-02-04 20:46                   ` Jon Masters
  1 sibling, 0 replies; 15+ messages in thread
From: Jon Masters @ 2009-02-04 20:46 UTC (permalink / raw)
  To: LKML

On Wed, 2009-02-04 at 21:34 +1030, Rusty Russell wrote:
> On Wednesday 04 February 2009 15:40:06 Eric W. Biederman wrote:
> > static struct kthread *to_kthread(struct task_struct *tsk)
> > {
> > 	void *stack = task_stack_page(tsk);
> > 	return (struct kthread *)(stack + kthread_offset);
> >         
> > }
> ...
> > It would remove the test and be simple and obviously correct.
> 
> Clever? Sure.  Neat? Yes.
> 
> But you are using a definition of obvious with which I was not previously
> familiar :)

I wonder if he knows precisely when Jon Corbet is reading his email and
gets these fantastic one-liners ready ahead of time, or if they flow
naturally by happenstance. I guess it's the latter.

Jon.



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 3/4] kthreads: rework kthread_stop()
  2009-01-30 12:33 [PATCH 3/4] kthreads: rework kthread_stop() Oleg Nesterov
  2009-01-30 12:50 ` Oleg Nesterov
@ 2009-01-30 21:47 ` Andrew Morton
  2009-02-01 10:49   ` Oleg Nesterov
  1 sibling, 1 reply; 15+ messages in thread
From: Andrew Morton @ 2009-01-30 21:47 UTC (permalink / raw)
  To: Oleg Nesterov; +Cc: hch, ebiederm, mingo, xemul, rusty, vgusev, linux-kernel

On Fri, 30 Jan 2009 13:33:58 +0100
Oleg Nesterov <oleg@redhat.com> wrote:

>  int kthread_stop(struct task_struct *k)
>  {
> +	struct kthread *kthread;
>  	int ret;
>  
> -	mutex_lock(&kthread_stop_lock);
> -
> -	/* It could exit after stop_info.k set, but before wake_up_process. */
> -	get_task_struct(k);
> -
>  	trace_sched_kthread_stop(k);
> +	get_task_struct(k);
>  
> -	/* Must init completion *before* thread sees kthread_stop_info.k */
> -	init_completion(&kthread_stop_info.done);
> -	smp_wmb();
> +	kthread = to_kthread(k);
> +	barrier(); /* it might have exited */

Why the change from smp_wmb() to plain old barrier()?

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 3/4] kthreads: rework kthread_stop()
  2009-01-30 21:47 ` Andrew Morton
@ 2009-02-01 10:49   ` Oleg Nesterov
  0 siblings, 0 replies; 15+ messages in thread
From: Oleg Nesterov @ 2009-02-01 10:49 UTC (permalink / raw)
  To: Andrew Morton; +Cc: hch, ebiederm, mingo, xemul, rusty, vgusev, linux-kernel

On 01/30, Andrew Morton wrote:
>
> On Fri, 30 Jan 2009 13:33:58 +0100
> Oleg Nesterov <oleg@redhat.com> wrote:
> 
> >  int kthread_stop(struct task_struct *k)
> >  {
> > +	struct kthread *kthread;
> >  	int ret;
> >  
> > -	mutex_lock(&kthread_stop_lock);
> > -
> > -	/* It could exit after stop_info.k set, but before wake_up_process. */
> > -	get_task_struct(k);
> > -
> >  	trace_sched_kthread_stop(k);
> > +	get_task_struct(k);
> >  
> > -	/* Must init completion *before* thread sees kthread_stop_info.k */
> > -	init_completion(&kthread_stop_info.done);
> > -	smp_wmb();
> > +	kthread = to_kthread(k);
> > +	barrier(); /* it might have exited */
> 
> Why the change from smp_wmb() to plain old barrier()?

These 2 barriers have nothing to do with each other.

Before this patch, smp_wmb() was needed to make sure kthread sees the
initialized &kthread_stop_info.done if it sees kthread_should_stop().

After the patch, this barrier() tells the compiler it must not move
"kthread = to_kthread(k)" down, under the "if (k->vfork_done)" check.
Because k->vfork_done is "volatile", it can be changed under us.

But, once we got the k->vfork_done != NULL we can use it safely, even
if the task exits in parallel. Because we have a reference to task_struct,
and thus to tsk->stack.

Oleg.

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2009-02-05  1:04 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-01-30 12:33 [PATCH 3/4] kthreads: rework kthread_stop() Oleg Nesterov
2009-01-30 12:50 ` Oleg Nesterov
2009-01-31 12:16   ` Rusty Russell
2009-02-01 10:21     ` Oleg Nesterov
2009-02-02 17:57       ` Eric W. Biederman
2009-02-02 19:41         ` Oleg Nesterov
2009-02-03  3:25           ` Eric W. Biederman
2009-02-03 13:41             ` Paul E. McKenney
2009-02-04  5:10               ` Eric W. Biederman
2009-02-04 11:04                 ` Rusty Russell
2009-02-04 15:59                   ` Eric W. Biederman
2009-02-05  1:03                     ` Rusty Russell
2009-02-04 20:46                   ` Jon Masters
2009-01-30 21:47 ` Andrew Morton
2009-02-01 10:49   ` Oleg Nesterov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox