From: Ingo Molnar <mingo@elte.hu>
To: Linus Torvalds <torvalds@osdl.org>
Cc: Andrew Morton <akpm@osdl.org>, linux-kernel@vger.kernel.org
Subject: Re: [patch] fix runqueue corruption, 2.6.1-rc3-A0
Date: Thu, 8 Jan 2004 20:31:05 +0100 [thread overview]
Message-ID: <20040108193105.GA32647@elte.hu> (raw)
In-Reply-To: <Pine.LNX.4.58.0401081045570.2158@home.osdl.org>
[-- Attachment #1: Type: text/plain, Size: 974 bytes --]
* Linus Torvalds <torvalds@osdl.org> wrote:
> The bug is more than that - we don't do the proper accountign setup
> for the CLONE_STOPPED case. Look at all the interactive bias stuff
> that doesn't get run at all..
yep, agreed, this is another bug. It is fixed by an existing scheduler
patch (queued for 2.6.2, sched-cleanup-2.6.0-A2) but we might want to do
it in 2.6.1. I've attached the patch, merged relative to 2.6.1-rc3 +
your version of the threading fix. The changelog:
- separate out the scheduler-state initializations from the fork path
and push them into sched.c.
- this also fixes a bug: CLONE_STOPPED tasks were not fully set up.
> or, preferably to get rid of the TASK_UNINTERRUPTIBLE special case
> altogether, and just say "this function has to be called for something
> that is already marked as being running".
yep, agreed, your patch is cleaner. (i've test-booted it on SMP, it
works fine. I've also test-booted the sched-cleanup patch.)
Ingo
[-- Attachment #2: sched-cleanup-2.6.1-rc3-A1 --]
[-- Type: text/plain, Size: 5917 bytes --]
- separate out the scheduler-state initializations from the fork path
and push them into sched.c.
- this also fixes a bug: CLONE_STOPPED tasks were not full set up.
--- linux/include/linux/sched.h.orig
+++ linux/include/linux/sched.h
@@ -579,6 +579,7 @@ extern int FASTCALL(wake_up_process(stru
static inline void kick_process(struct task_struct *tsk) { }
#endif
extern void FASTCALL(wake_up_forked_process(struct task_struct * tsk));
+extern void FASTCALL(sched_fork(task_t * p));
extern void FASTCALL(sched_exit(task_t * p));
asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru);
--- linux/kernel/fork.c.orig
+++ linux/kernel/fork.c
@@ -910,15 +910,7 @@ struct task_struct *copy_process(unsigne
if (p->binfmt && !try_module_get(p->binfmt->module))
goto bad_fork_cleanup_put_domain;
-#ifdef CONFIG_PREEMPT
- /*
- * schedule_tail drops this_rq()->lock so we compensate with a count
- * of 1. Also, we want to start with kernel preemption disabled.
- */
- p->thread_info->preempt_count = 1;
-#endif
p->did_exec = 0;
- p->state = TASK_RUNNING;
copy_flags(clone_flags, p);
if (clone_flags & CLONE_IDLETASK)
@@ -935,15 +927,12 @@ struct task_struct *copy_process(unsigne
p->proc_dentry = NULL;
- INIT_LIST_HEAD(&p->run_list);
-
INIT_LIST_HEAD(&p->children);
INIT_LIST_HEAD(&p->sibling);
INIT_LIST_HEAD(&p->posix_timers);
init_waitqueue_head(&p->wait_chldexit);
p->vfork_done = NULL;
spin_lock_init(&p->alloc_lock);
- spin_lock_init(&p->switch_lock);
spin_lock_init(&p->proc_lock);
clear_tsk_thread_flag(p, TIF_SIGPENDING);
@@ -958,7 +947,6 @@ struct task_struct *copy_process(unsigne
p->tty_old_pgrp = 0;
p->utime = p->stime = 0;
p->cutime = p->cstime = 0;
- p->array = NULL;
p->lock_depth = -1; /* -1 = no lock */
p->start_time = get_jiffies_64();
p->security = NULL;
@@ -1007,38 +995,12 @@ struct task_struct *copy_process(unsigne
p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL);
p->pdeath_signal = 0;
+ /* Perform scheduler related setup */
+ sched_fork(p);
+
/*
- * Share the timeslice between parent and child, thus the
- * total amount of pending timeslices in the system doesn't change,
- * resulting in more scheduling fairness.
- */
- local_irq_disable();
- p->time_slice = (current->time_slice + 1) >> 1;
- /*
- * The remainder of the first timeslice might be recovered by
- * the parent if the child exits early enough.
- */
- p->first_time_slice = 1;
- current->time_slice >>= 1;
- p->timestamp = sched_clock();
- if (!current->time_slice) {
- /*
- * This case is rare, it happens when the parent has only
- * a single jiffy left from its timeslice. Taking the
- * runqueue lock is not a problem.
- */
- current->time_slice = 1;
- preempt_disable();
- scheduler_tick(0, 0);
- local_irq_enable();
- preempt_enable();
- } else
- local_irq_enable();
- /*
- * Ok, add it to the run-queues and make it
- * visible to the rest of the system.
- *
- * Let it rip!
+ * Ok, make it visible to the rest of the system.
+ * We dont wake it up yet.
*/
p->tgid = p->pid;
p->group_leader = p;
--- linux/kernel/sched.c.orig
+++ linux/kernel/sched.c
@@ -674,17 +674,32 @@ int wake_up_state(task_t *p, unsigned in
}
/*
- * wake_up_forked_process - wake up a freshly forked process.
- *
- * This function will do some initial scheduler statistics housekeeping
- * that must be done for every newly created process.
+ * Perform scheduler related setup for a newly forked process p.
+ * p is forked by current.
*/
-void wake_up_forked_process(task_t * p)
+void sched_fork(task_t *p)
{
- unsigned long flags;
- runqueue_t *rq = task_rq_lock(current, &flags);
+ p->state = TASK_RUNNING;
+ INIT_LIST_HEAD(&p->run_list);
+ p->array = NULL;
+ spin_lock_init(&p->switch_lock);
+#ifdef CONFIG_PREEMPT
+ /*
+ * During context-switch we hold precisely one spinlock, which
+ * schedule_tail drops. (in the common case it's this_rq()->lock,
+ * but it also can be p->switch_lock.) So we compensate with a count
+ * of 1. Also, we want to start with kernel preemption disabled.
+ */
+ p->thread_info->preempt_count = 1;
+#endif
+ /*
+ * Share the timeslice between parent and child, thus the
+ * total amount of pending timeslices in the system doesn't change,
+ * resulting in more scheduling fairness.
+ */
+ local_irq_disable();
- BUG_ON(p->state != TASK_RUNNING);
+ p->time_slice = (current->time_slice + 1) >> 1;
/*
* We decrease the sleep average of forking parents
@@ -699,12 +714,47 @@ void wake_up_forked_process(task_t * p)
p->interactive_credit = 0;
- p->prio = effective_prio(p);
+ /*
+ * The remainder of the first timeslice might be recovered by
+ * the parent if the child exits early enough.
+ */
+ p->first_time_slice = 1;
+ current->time_slice >>= 1;
+ p->timestamp = sched_clock();
+ if (!current->time_slice) {
+ /*
+ * This case is rare, it happens when the parent has only
+ * a single jiffy left from its timeslice. Taking the
+ * runqueue lock is not a problem.
+ */
+ current->time_slice = 1;
+ preempt_disable();
+ scheduler_tick(0, 0);
+ local_irq_enable();
+ preempt_enable();
+ } else
+ local_irq_enable();
+}
+
+/*
+ * wake_up_forked_process - wake up a freshly forked process.
+ *
+ * (This function implements child-runs-first logic, that's why
+ * do_fork() cannot just use wake_up_process().)
+ */
+void wake_up_forked_process(task_t * p)
+{
+ unsigned long flags;
+ runqueue_t *rq = task_rq_lock(current, &flags);
+
+ BUG_ON(p->state != TASK_RUNNING);
+
set_task_cpu(p, smp_processor_id());
- if (unlikely(!current->array))
+ if (unlikely(!current->array)) {
+ p->prio = effective_prio(p);
__activate_task(p, rq);
- else {
+ } else {
p->prio = current->prio;
list_add_tail(&p->run_list, ¤t->run_list);
p->array = current->array;
prev parent reply other threads:[~2004-01-08 19:32 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2004-01-08 17:31 [patch] fix runqueue corruption, 2.6.1-rc3-A0 Ingo Molnar
2004-01-08 18:53 ` Linus Torvalds
2004-01-08 19:31 ` Ingo Molnar [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20040108193105.GA32647@elte.hu \
--to=mingo@elte.hu \
--cc=akpm@osdl.org \
--cc=linux-kernel@vger.kernel.org \
--cc=torvalds@osdl.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.