From: Robert Jennings <rcj@linux.vnet.ibm.com>
To: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Nathan Fontenot <nfont@linux.vnet.ibm.com>,
linuxppc-dev@lists.ozlabs.org, stable@kernel.org
Subject: Re: [PATCH] powerpc: Bring all threads online prior to migration/hibernation
Date: Wed, 1 May 2013 19:39:00 -0500 [thread overview]
Message-ID: <20130502003900.GA5009@linux.vnet.ibm.com> (raw)
In-Reply-To: <1367442296.22115.48.camel@pasglop>
* Benjamin Herrenschmidt (benh@kernel.crashing.org) wrote:
> On Wed, 2013-05-01 at 10:25 -0500, Nathan Fontenot wrote:
> > On 04/26/2013 04:32 PM, Robert Jennings wrote:
> > > With this patch before a migration/hibernation all threads present but
> > > not online will be brought online. After migration/hibernation those
> > > threads are taken back offline.
> > >
> > > During migration/hibernation all online CPUs must call H_JOIN, this is
> > > required by the hypervisor. Without this patch, threads that are offline
> > > (H_CEDE'd) will not be woken to make the H_JOIN call and the OS will be
> > > deadlocked (all threads either JOIN'd or CEDE'd).
> > >
> >
> > This fixes a long standing bug in partition migration.
> >
> > Acked-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
>
> I was about to apply this one but had to take it out of my branch as it
> breaks the UP build. Can you send a new revision of the patch ? It's a
> bug fix, I'll put it in at -rc1.
Yes, sorry. I'll get you a fix for the -rc1 timeframe for sure.
> Cheers,
> Ben.
>
> > > Cc: <stable@kernel.org>
> > > Signed-off-by: Robert Jennings <rcj@linux.vnet.ibm.com>
> > > ---
> > > arch/powerpc/include/asm/rtas.h | 2 +
> > > arch/powerpc/kernel/rtas.c | 95 ++++++++++++++++++++++++++++++
> > > arch/powerpc/platforms/pseries/suspend.c | 22 +++++++
> > > 3 files changed, 119 insertions(+)
> > >
> > > diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
> > > index aef00c6..ee38f29 100644
> > > --- a/arch/powerpc/include/asm/rtas.h
> > > +++ b/arch/powerpc/include/asm/rtas.h
> > > @@ -262,6 +262,8 @@ extern void rtas_progress(char *s, unsigned short hex);
> > > extern void rtas_initialize(void);
> > > extern int rtas_suspend_cpu(struct rtas_suspend_me_data *data);
> > > extern int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data);
> > > +extern int rtas_online_cpus_mask(cpumask_var_t cpus);
> > > +extern int rtas_offline_cpus_mask(cpumask_var_t cpus);
> > > extern int rtas_ibm_suspend_me(struct rtas_args *);
> > >
> > > struct rtc_time;
> > > diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
> > > index 1fd6e7b..855ee98 100644
> > > --- a/arch/powerpc/kernel/rtas.c
> > > +++ b/arch/powerpc/kernel/rtas.c
> > > @@ -19,6 +19,7 @@
> > > #include <linux/init.h>
> > > #include <linux/capability.h>
> > > #include <linux/delay.h>
> > > +#include <linux/cpu.h>
> > > #include <linux/smp.h>
> > > #include <linux/completion.h>
> > > #include <linux/cpumask.h>
> > > @@ -807,6 +808,77 @@ static void rtas_percpu_suspend_me(void *info)
> > > __rtas_suspend_cpu((struct rtas_suspend_me_data *)info, 1);
> > > }
> > >
> > > +enum rtas_cpu_state {
> > > + DOWN,
> > > + UP,
> > > +};
> > > +
> > > +/* On return cpumask will be altered to indicate CPUs changed */
> > > +static int rtas_cpu_state_change_mask(enum rtas_cpu_state state,
> > > + cpumask_var_t cpus)
> > > +{
> > > + int cpu;
> > > + int cpuret = 0;
> > > + int ret = 0;
> > > +
> > > + if (cpumask_empty(cpus))
> > > + return 0;
> > > +
> > > + for_each_cpu(cpu, cpus) {
> > > + switch (state) {
> > > + case DOWN:
> > > + cpuret = cpu_down(cpu);
> > > + break;
> > > + case UP:
> > > + cpuret = cpu_up(cpu);
> > > + break;
> > > + }
> > > + if (cpuret) {
> > > + pr_debug("%s: cpu_%s for cpu#%d returned %d.\n",
> > > + __func__,
> > > + ((state == UP) ? "up" : "down"),
> > > + cpu, cpuret);
> > > + if (!ret)
> > > + ret = cpuret;
> > > + if (state == UP) {
> > > + cpumask_shift_right(cpus, cpus, cpu);
> > > + cpumask_shift_left(cpus, cpus, cpu);
> > > + break;
> > > + } else
> > > + cpumask_clear_cpu(cpu, cpus);
> > > + }
> > > + }
> > > +
> > > + return ret;
> > > +}
> > > +
> > > +int rtas_online_cpus_mask(cpumask_var_t cpus)
> > > +{
> > > + int ret;
> > > +
> > > + ret = rtas_cpu_state_change_mask(UP, cpus);
> > > +
> > > + if (ret) {
> > > + cpumask_var_t tmp_mask;
> > > +
> > > + if (!alloc_cpumask_var(&tmp_mask, GFP_TEMPORARY))
> > > + return ret;
> > > +
> > > + cpumask_copy(tmp_mask, cpus);
> > > + rtas_offline_cpus_mask(tmp_mask);
> > > + free_cpumask_var(tmp_mask);
> > > + }
> > > +
> > > + return ret;
> > > +}
> > > +EXPORT_SYMBOL(rtas_online_cpus_mask);
> > > +
> > > +int rtas_offline_cpus_mask(cpumask_var_t cpus)
> > > +{
> > > + return rtas_cpu_state_change_mask(DOWN, cpus);
> > > +}
> > > +EXPORT_SYMBOL(rtas_offline_cpus_mask);
> > > +
> > > int rtas_ibm_suspend_me(struct rtas_args *args)
> > > {
> > > long state;
> > > @@ -814,6 +886,8 @@ int rtas_ibm_suspend_me(struct rtas_args *args)
> > > unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
> > > struct rtas_suspend_me_data data;
> > > DECLARE_COMPLETION_ONSTACK(done);
> > > + cpumask_var_t offline_mask;
> > > + int cpuret;
> > >
> > > if (!rtas_service_present("ibm,suspend-me"))
> > > return -ENOSYS;
> > > @@ -837,11 +911,24 @@ int rtas_ibm_suspend_me(struct rtas_args *args)
> > > return 0;
> > > }
> > >
> > > + if (!alloc_cpumask_var(&offline_mask, GFP_TEMPORARY))
> > > + return -ENOMEM;
> > > +
> > > atomic_set(&data.working, 0);
> > > atomic_set(&data.done, 0);
> > > atomic_set(&data.error, 0);
> > > data.token = rtas_token("ibm,suspend-me");
> > > data.complete = &done;
> > > +
> > > + /* All present CPUs must be online */
> > > + cpumask_andnot(offline_mask, cpu_present_mask, cpu_online_mask);
> > > + cpuret = rtas_online_cpus_mask(offline_mask);
> > > + if (cpuret) {
> > > + pr_err("%s: Could not bring present CPUs online.\n", __func__);
> > > + atomic_set(&data.error, cpuret);
> > > + goto out;
> > > + }
> > > +
> > > stop_topology_update();
> > >
> > > /* Call function on all CPUs. One of us will make the
> > > @@ -857,6 +944,14 @@ int rtas_ibm_suspend_me(struct rtas_args *args)
> > >
> > > start_topology_update();
> > >
> > > + /* Take down CPUs not online prior to suspend */
> > > + cpuret = rtas_offline_cpus_mask(offline_mask);
> > > + if (cpuret)
> > > + pr_warn("%s: Could not restore CPUs to offline state.\n",
> > > + __func__);
> > > +
> > > +out:
> > > + free_cpumask_var(offline_mask);
> > > return atomic_read(&data.error);
> > > }
> > > #else /* CONFIG_PPC_PSERIES */
> > > diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c
> > > index 47226e0..5f997e7 100644
> > > --- a/arch/powerpc/platforms/pseries/suspend.c
> > > +++ b/arch/powerpc/platforms/pseries/suspend.c
> > > @@ -16,6 +16,7 @@
> > > * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
> > > */
> > >
> > > +#include <linux/cpu.h>
> > > #include <linux/delay.h>
> > > #include <linux/suspend.h>
> > > #include <linux/stat.h>
> > > @@ -126,11 +127,15 @@ static ssize_t store_hibernate(struct device *dev,
> > > struct device_attribute *attr,
> > > const char *buf, size_t count)
> > > {
> > > + cpumask_var_t offline_mask;
> > > int rc;
> > >
> > > if (!capable(CAP_SYS_ADMIN))
> > > return -EPERM;
> > >
> > > + if (!alloc_cpumask_var(&offline_mask, GFP_TEMPORARY))
> > > + return -ENOMEM;
> > > +
> > > stream_id = simple_strtoul(buf, NULL, 16);
> > >
> > > do {
> > > @@ -140,15 +145,32 @@ static ssize_t store_hibernate(struct device *dev,
> > > } while (rc == -EAGAIN);
> > >
> > > if (!rc) {
> > > + /* All present CPUs must be online */
> > > + cpumask_andnot(offline_mask, cpu_present_mask,
> > > + cpu_online_mask);
> > > + rc = rtas_online_cpus_mask(offline_mask);
> > > + if (rc) {
> > > + pr_err("%s: Could not bring present CPUs online.\n",
> > > + __func__);
> > > + goto out;
> > > + }
> > > +
> > > stop_topology_update();
> > > rc = pm_suspend(PM_SUSPEND_MEM);
> > > start_topology_update();
> > > +
> > > + /* Take down CPUs not online prior to suspend */
> > > + if (!rtas_offline_cpus_mask(offline_mask))
> > > + pr_warn("%s: Could not restore CPUs to offline "
> > > + "state.\n", __func__);
> > > }
> > >
> > > stream_id = 0;
> > >
> > > if (!rc)
> > > rc = count;
> > > +out:
> > > + free_cpumask_var(offline_mask);
> > > return rc;
> > > }
> > >
> >
> >
>
>
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev
>
next prev parent reply other threads:[~2013-05-02 0:39 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-04-26 21:32 [PATCH] powerpc: Bring all threads online prior to migration/hibernation Robert Jennings
2013-05-01 15:25 ` Nathan Fontenot
2013-05-01 21:04 ` Benjamin Herrenschmidt
2013-05-02 0:39 ` Robert Jennings [this message]
2013-05-01 19:43 ` Srivatsa S. Bhat
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20130502003900.GA5009@linux.vnet.ibm.com \
--to=rcj@linux.vnet.ibm.com \
--cc=benh@kernel.crashing.org \
--cc=linuxppc-dev@lists.ozlabs.org \
--cc=nfont@linux.vnet.ibm.com \
--cc=stable@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.