From: Gautham R Shenoy <ego@in.ibm.com>
To: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>,
Peter Zijlstra <peterz@infradead.org>,
Johannes Berg <johannes@sipsolutions.net>,
Martin Schwidefsky <schwidefsky@de.ibm.com>,
Srivatsa Vaddagiri <vatsa@in.ibm.com>,
linux-kernel@vger.kernel.org
Subject: Re: get_online_cpus() && workqueues
Date: Mon, 28 Apr 2008 17:27:00 +0530 [thread overview]
Message-ID: <20080428115700.GC23162@in.ibm.com> (raw)
In-Reply-To: <20080426144330.GA6150@tv-sign.ru>
On Sat, Apr 26, 2008 at 06:43:30PM +0400, Oleg Nesterov wrote:
> Gautham, Srivatsa, seriously, can't we uglify cpu.c a little bit to solve
> the problem. Please see the illustration patch below. It looks complicated,
> but in fact it is quite trivial.
>
> In short: work_struct can't use get_online_cpus() due to deadlock with the
> CPU_DEAD phase.
>
> Can't we add another nested lock which is dropped right after __cpu_die()?
> (in fact I think it could be dropped after __stop_machine_run).
>
> The new read-lock is get_online_map() (just a random name for now). The only
> difference wrt get_online_cpus() is that it doesn't protect against CPU_DEAD,
> but most users of get_online_cpus() doesn't need this, they only need a
> stable cpu_online_map and sometimes they need to be sure that some per-cpu
> object (say, cpu_workqueue_struct->thread) can't be destroyed under this
> lock.
>
> get_online_map() seem to fit for this, and can be used from work->func().
> (actually, I think most users of use get_online_cpus() could use the new
> helper instead, but this doen't matter).
>
> Heiko, what do you think? Is it suitable for arch_reinit_sched_domains()?
>
> Oleg.
>
> --- 25/kernel/cpu.c~HP_LOCK 2008-02-16 18:36:37.000000000 +0300
> +++ 25/kernel/cpu.c 2008-04-26 18:14:25.000000000 +0400
> @@ -25,7 +25,7 @@ static __cpuinitdata RAW_NOTIFIER_HEAD(c
> */
> static int cpu_hotplug_disabled;
>
> -static struct {
> +static struct cpu_lock {
> struct task_struct *active_writer;
> struct mutex lock; /* Synchronizes accesses to refcount, */
> /*
> @@ -33,41 +33,65 @@ static struct {
> * an ongoing cpu hotplug operation.
> */
> int refcount;
> -} cpu_hotplug;
> +} cpu_hotplug, online_map;
> +
> +static inline void __cpu_hotplug_init(struct cpu_lock *cpu_lock)
> +{
> + cpu_lock->active_writer = NULL;
> + mutex_init(&cpu_lock->lock);
> + cpu_lock->refcount = 0;
> +}
>
> void __init cpu_hotplug_init(void)
> {
> - cpu_hotplug.active_writer = NULL;
> - mutex_init(&cpu_hotplug.lock);
> - cpu_hotplug.refcount = 0;
> + __cpu_hotplug_init(&cpu_hotplug);
> + __cpu_hotplug_init(&online_map);
> }
>
> #ifdef CONFIG_HOTPLUG_CPU
>
> -void get_online_cpus(void)
> +void cpu_read_lock(struct cpu_lock *cpu_lock)
> {
> might_sleep();
> - if (cpu_hotplug.active_writer == current)
> + if (cpu_lock->active_writer == current)
> return;
> - mutex_lock(&cpu_hotplug.lock);
> - cpu_hotplug.refcount++;
> - mutex_unlock(&cpu_hotplug.lock);
> + mutex_lock(&cpu_lock->lock);
> + cpu_lock->refcount++;
> + mutex_unlock(&cpu_lock->lock);
> +}
>
> +void get_online_cpus(void)
> +{
> + cpu_read_lock(&cpu_hotplug);
> }
> EXPORT_SYMBOL_GPL(get_online_cpus);
>
> -void put_online_cpus(void)
> +void get_online_map(void)
> {
> - if (cpu_hotplug.active_writer == current)
> + cpu_read_lock(&online_map);
> +}
> +
> +void cpu_read_unlock(struct cpu_lock *cpu_lock)
> +{
> + if (cpu_lock->active_writer == current)
> return;
> - mutex_lock(&cpu_hotplug.lock);
> - if (!--cpu_hotplug.refcount && unlikely(cpu_hotplug.active_writer))
> - wake_up_process(cpu_hotplug.active_writer);
> - mutex_unlock(&cpu_hotplug.lock);
> + mutex_lock(&cpu_lock->lock);
> + if (!--cpu_lock->refcount && unlikely(cpu_lock->active_writer))
> + wake_up_process(cpu_lock->active_writer);
> + mutex_unlock(&cpu_lock->lock);
> +}
>
> +void put_online_cpus(void)
> +{
> + cpu_read_unlock(&cpu_hotplug);
> }
> EXPORT_SYMBOL_GPL(put_online_cpus);
>
> +void put_online_map(void)
> +{
> + cpu_read_unlock(&online_map);
> +}
> +
> #endif /* CONFIG_HOTPLUG_CPU */
>
> /*
> @@ -91,7 +115,7 @@ void cpu_maps_update_done(void)
> * Note that during a cpu-hotplug operation, the new readers, if any,
> * will be blocked by the cpu_hotplug.lock
> *
> - * Since cpu_hotplug_begin() is always called after invoking
> + * Since cpu_write_lock() is always called after invoking
> * cpu_maps_update_begin(), we can be sure that only one writer is active.
> *
> * Note that theoretically, there is a possibility of a livelock:
> @@ -106,25 +130,26 @@ void cpu_maps_update_done(void)
> * get_online_cpus() not an api which is called all that often.
> *
> */
> -static void cpu_hotplug_begin(void)
> +static void cpu_write_lock(struct cpu_lock *cpu_lock)
> {
> - cpu_hotplug.active_writer = current;
> + cpu_lock->active_writer = current;
>
> for (;;) {
> - mutex_lock(&cpu_hotplug.lock);
> - if (likely(!cpu_hotplug.refcount))
> + mutex_lock(&cpu_lock->lock);
> + if (likely(!cpu_lock->refcount))
> break;
> __set_current_state(TASK_UNINTERRUPTIBLE);
> - mutex_unlock(&cpu_hotplug.lock);
> + mutex_unlock(&cpu_lock->lock);
> schedule();
> }
> }
>
> -static void cpu_hotplug_done(void)
> +static void cpu_write_unlock(struct cpu_lock *cpu_lock)
> {
> - cpu_hotplug.active_writer = NULL;
> - mutex_unlock(&cpu_hotplug.lock);
> + cpu_lock->active_writer = NULL;
> + mutex_unlock(&cpu_lock->lock);
> }
> +
> /* Need to know about CPUs going up/down? */
> int __cpuinit register_cpu_notifier(struct notifier_block *nb)
> {
> @@ -207,7 +232,8 @@ static int _cpu_down(unsigned int cpu, i
> if (!cpu_online(cpu))
> return -EINVAL;
>
> - cpu_hotplug_begin();
> + cpu_write_lock(&cpu_hotplug);
> + cpu_write_lock(&online_map);
IMO, we should acquire the cpu_write_lock(&online_map)
just before __stop_machine_run() and release it once
stop_machine_run() returns.
IIUC, this lock protects only the cpu_online_map.
Ditto in case of cpu_up() where we should acquire the lock
just before calling __cpu_up() and release it immediately thereafter.
> err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
> hcpu, -1, &nr_calls);
> if (err == NOTIFY_BAD) {
> @@ -238,6 +264,7 @@ static int _cpu_down(unsigned int cpu, i
> err = PTR_ERR(p);
> goto out_allowed;
> }
> + err = -EAGAIN;
> goto out_thread;
> }
>
> @@ -247,6 +274,7 @@ static int _cpu_down(unsigned int cpu, i
>
> /* This actually kills the CPU. */
> __cpu_die(cpu);
> + cpu_write_unlock(&online_map);
>
> /* CPU is completely dead: tell everyone. Too late to complain. */
> if (raw_notifier_call_chain(&cpu_chain, CPU_DEAD | mod,
> @@ -260,7 +288,9 @@ out_thread:
> out_allowed:
> set_cpus_allowed(current, old_allowed);
> out_release:
> - cpu_hotplug_done();
> + if (err)
> + cpu_write_unlock(&online_map);
> + cpu_write_unlock(&cpu_hotplug);
> return err;
> }
>
> @@ -289,7 +319,8 @@ static int __cpuinit _cpu_up(unsigned in
> if (cpu_online(cpu) || !cpu_present(cpu))
> return -EINVAL;
>
> - cpu_hotplug_begin();
> + cpu_write_lock(&cpu_hotplug);
> + cpu_write_lock(&online_map);
> ret = __raw_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE | mod, hcpu,
> -1, &nr_calls);
> if (ret == NOTIFY_BAD) {
> @@ -313,7 +344,8 @@ out_notify:
> if (ret != 0)
> __raw_notifier_call_chain(&cpu_chain,
> CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL);
> - cpu_hotplug_done();
> + cpu_write_unlock(&online_map);
> + cpu_write_unlock(&cpu_hotplug);
>
> return ret;
> }
--
Thanks and Regards
gautham
prev parent reply other threads:[~2008-04-28 12:01 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-04-14 3:04 2.6.25-rc9 -- INFO: possible circular locking dependency detected Miles Lane
2008-04-14 3:29 ` Miles Lane
2008-04-14 6:54 ` Peter Zijlstra
2008-04-14 7:02 ` Heiko Carstens
2008-04-14 7:18 ` Ingo Molnar
2008-04-14 12:06 ` Peter Zijlstra
2008-04-14 12:27 ` Gautham R Shenoy
2008-04-14 12:42 ` Peter Zijlstra
2008-04-14 13:28 ` Gautham R Shenoy
2008-04-14 14:48 ` Gautham R Shenoy
2008-04-14 15:19 ` Heiko Carstens
2008-04-14 15:46 ` Gautham R Shenoy
2008-04-14 19:35 ` Heiko Carstens
2008-04-15 13:52 ` Gautham R Shenoy
2008-04-15 14:37 ` Heiko Carstens
[not found] ` <20080422123304.GA777@osiris.boeblingen.de.ibm.com>
[not found] ` <1208868236.7115.249.camel@twins>
[not found] ` <20080423035802.GA8895@in.ibm.com>
[not found] ` <20080424150714.GA8273@osiris.boeblingen.de.ibm.com>
[not found] ` <1209052984.7115.369.camel@twins>
[not found] ` <20080424155946.GA11160@tv-sign.ru>
[not found] ` <20080424194810.GA4821@osiris.boeblingen.de.ibm.com>
[not found] ` <20080424192706.GA165@tv-sign.ru>
[not found] ` <20080425064044.GA10817@osiris.boeblingen.de.ibm.com>
2008-04-26 14:43 ` get_online_cpus() && workqueues Oleg Nesterov
2008-04-27 12:22 ` Heiko Carstens
2008-04-27 14:25 ` Oleg Nesterov
2008-04-28 7:02 ` Gautham R Shenoy
2008-04-28 10:56 ` Oleg Nesterov
2008-04-28 12:03 ` Gautham R Shenoy
2008-04-28 12:40 ` Oleg Nesterov
2008-04-28 11:57 ` Gautham R Shenoy [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20080428115700.GC23162@in.ibm.com \
--to=ego@in.ibm.com \
--cc=heiko.carstens@de.ibm.com \
--cc=johannes@sipsolutions.net \
--cc=linux-kernel@vger.kernel.org \
--cc=oleg@tv-sign.ru \
--cc=peterz@infradead.org \
--cc=schwidefsky@de.ibm.com \
--cc=vatsa@in.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox