* [PATCH 2/2][v4] powerpc: Make the CMM memory hotplug aware
From: Robert Jennings @ 2009-10-12 19:49 UTC (permalink / raw)
To: Benjamin Herrenschmidt
Cc: linux-mm, Mel Gorman, Gerald Schaefer, linux-kernel, linuxppc-dev,
Martin Schwidefsky, Badari Pulavarty, Brian King, Paul Mackerras,
Andrew Morton, Ingo Molnar, KAMEZAWA Hiroyuki
In-Reply-To: <1255324007.2192.106.camel@pasglop>
The Collaborative Memory Manager (CMM) module allocates individual pages
over time that are not migratable. On a long running system this can
severely impact the ability to find enough pages to support a hotplug
memory remove operation.
This patch adds a memory isolation notifier and a memory hotplug notifier.
The memory isolation notifier will return the number of pages found
in the range specified. This is used to determine if all of the used
pages in a pageblock are owned by the balloon (or other entities in
the notifier chain). The hotplug notifier will free pages in the range
which is to be removed. The priority of this hotplug notifier is low
so that it will be called near last, this helps avoids removing loaned
pages in operations that fail due to other handlers.
CMM activity will be halted when hotplug remove operations are active
and resume activity after a delay period to allow the hypervisor time
to adjust.
Signed-off-by: Robert Jennings <rcj@linux.vnet.ibm.com>
---
Changes since v3:
* Changed from atomic to mutex for hotplug state tracking.
* Clarified documentation for the new module parameter description.
Changes since v2:
* None, resent with parent patch to keep them together.
arch/powerpc/platforms/pseries/cmm.c | 221 ++++++++++++++++++++++++++++++++++-
1 file changed, 215 insertions(+), 6 deletions(-)
Index: b/arch/powerpc/platforms/pseries/cmm.c
===================================================================
--- a/arch/powerpc/platforms/pseries/cmm.c
+++ b/arch/powerpc/platforms/pseries/cmm.c
@@ -38,19 +38,28 @@
#include <asm/mmu.h>
#include <asm/pgalloc.h>
#include <asm/uaccess.h>
+#include <linux/memory.h>
#include "plpar_wrappers.h"
#define CMM_DRIVER_VERSION "1.0.0"
#define CMM_DEFAULT_DELAY 1
+#define CMM_HOTPLUG_DELAY 5
#define CMM_DEBUG 0
#define CMM_DISABLE 0
#define CMM_OOM_KB 1024
#define CMM_MIN_MEM_MB 256
#define KB2PAGES(_p) ((_p)>>(PAGE_SHIFT-10))
#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
+/*
+ * The priority level tries to ensure that this notifier is called as
+ * late as possible to reduce thrashing in the shared memory pool.
+ */
+#define CMM_MEM_HOTPLUG_PRI 1
+#define CMM_MEM_ISOLATE_PRI 15
static unsigned int delay = CMM_DEFAULT_DELAY;
+static unsigned int hotplug_delay = CMM_HOTPLUG_DELAY;
static unsigned int oom_kb = CMM_OOM_KB;
static unsigned int cmm_debug = CMM_DEBUG;
static unsigned int cmm_disabled = CMM_DISABLE;
@@ -65,6 +74,10 @@ MODULE_VERSION(CMM_DRIVER_VERSION);
module_param_named(delay, delay, uint, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. "
"[Default=" __stringify(CMM_DEFAULT_DELAY) "]");
+module_param_named(hotplug_delay, hotplug_delay, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(delay, "Delay (in seconds) after memory hotplug remove "
+ "before loaning resumes. "
+ "[Default=" __stringify(CMM_HOTPLUG_DELAY) "]");
module_param_named(oom_kb, oom_kb, uint, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(oom_kb, "Amount of memory in kb to free on OOM. "
"[Default=" __stringify(CMM_OOM_KB) "]");
@@ -92,6 +105,9 @@ static unsigned long oom_freed_pages;
static struct cmm_page_array *cmm_page_list;
static DEFINE_SPINLOCK(cmm_lock);
+static DEFINE_MUTEX(hotplug_mutex);
+static int hotplug_occurred; /* protected by the hotplug mutex */
+
static struct task_struct *cmm_thread_ptr;
/**
@@ -110,6 +126,17 @@ static long cmm_alloc_pages(long nr)
cmm_dbg("Begin request for %ld pages\n", nr);
while (nr) {
+ /* Exit if a hotplug operation is in progress or occurred */
+ if (mutex_trylock(&hotplug_mutex)) {
+ if (hotplug_occurred) {
+ mutex_unlock(&hotplug_mutex);
+ break;
+ }
+ mutex_unlock(&hotplug_mutex);
+ } else {
+ break;
+ }
+
addr = __get_free_page(GFP_NOIO | __GFP_NOWARN |
__GFP_NORETRY | __GFP_NOMEMALLOC);
if (!addr)
@@ -119,8 +146,10 @@ static long cmm_alloc_pages(long nr)
if (!pa || pa->index >= CMM_NR_PAGES) {
/* Need a new page for the page list. */
spin_unlock(&cmm_lock);
- npa = (struct cmm_page_array *)__get_free_page(GFP_NOIO | __GFP_NOWARN |
- __GFP_NORETRY | __GFP_NOMEMALLOC);
+ npa = (struct cmm_page_array *)__get_free_page(
+ GFP_NOIO | __GFP_NOWARN |
+ __GFP_NORETRY | __GFP_NOMEMALLOC |
+ __GFP_MOVABLE);
if (!npa) {
pr_info("%s: Can not allocate new page list\n", __func__);
free_page(addr);
@@ -273,9 +302,28 @@ static int cmm_thread(void *dummy)
while (1) {
timeleft = msleep_interruptible(delay * 1000);
- if (kthread_should_stop() || timeleft) {
- loaned_pages_target = loaned_pages;
+ if (kthread_should_stop() || timeleft)
break;
+
+ if (mutex_trylock(&hotplug_mutex)) {
+ if (hotplug_occurred) {
+ hotplug_occurred = 0;
+ mutex_unlock(&hotplug_mutex);
+ cmm_dbg("Hotplug operation has occurred, "
+ "loaning activity suspended "
+ "for %d seconds.\n",
+ hotplug_delay);
+ timeleft = msleep_interruptible(hotplug_delay *
+ 1000);
+ if (kthread_should_stop() || timeleft)
+ break;
+ continue;
+ }
+ mutex_unlock(&hotplug_mutex);
+ } else {
+ cmm_dbg("Hotplug operation in progress, activity "
+ "suspended\n");
+ continue;
}
cmm_get_mpp();
@@ -405,6 +453,159 @@ static struct notifier_block cmm_reboot_
};
/**
+ * cmm_count_pages - Count the number of pages loaned in a particular range.
+ *
+ * @arg: memory_isolate_notify structure with address range and count
+ *
+ * Return value:
+ * 0 on success
+ **/
+static unsigned long cmm_count_pages(void *arg)
+{
+ struct memory_isolate_notify *marg = arg;
+ struct cmm_page_array *pa;
+ unsigned long start = (unsigned long)pfn_to_kaddr(marg->start_pfn);
+ unsigned long end = start + (marg->nr_pages << PAGE_SHIFT);
+ unsigned long idx;
+
+ spin_lock(&cmm_lock);
+ pa = cmm_page_list;
+ while (pa) {
+ for (idx = 0; idx < pa->index; idx++)
+ if (pa->page[idx] >= start && pa->page[idx] < end)
+ marg->pages_found++;
+ pa = pa->next;
+ }
+ spin_unlock(&cmm_lock);
+ return 0;
+}
+
+/**
+ * cmm_memory_isolate_cb - Handle memory isolation notifier calls
+ * @self: notifier block struct
+ * @action: action to take
+ * @arg: struct memory_isolate_notify data for handler
+ *
+ * Return value:
+ * NOTIFY_OK or notifier error based on subfunction return value
+ **/
+static int cmm_memory_isolate_cb(struct notifier_block *self,
+ unsigned long action, void *arg)
+{
+ int ret = 0;
+
+ if (action == MEM_ISOLATE_COUNT)
+ ret = cmm_count_pages(arg);
+
+ if (ret)
+ ret = notifier_from_errno(ret);
+ else
+ ret = NOTIFY_OK;
+
+ return ret;
+}
+
+static struct notifier_block cmm_mem_isolate_nb = {
+ .notifier_call = cmm_memory_isolate_cb,
+ .priority = CMM_MEM_ISOLATE_PRI
+};
+
+/**
+ * cmm_mem_going_offline - Unloan pages where memory is to be removed
+ * @arg: memory_notify structure with page range to be offlined
+ *
+ * Return value:
+ * 0 on success
+ **/
+static int cmm_mem_going_offline(void *arg)
+{
+ struct memory_notify *marg = arg;
+ unsigned long start_page = (unsigned long)pfn_to_kaddr(marg->start_pfn);
+ unsigned long end_page = start_page + (marg->nr_pages << PAGE_SHIFT);
+ struct cmm_page_array *pa_curr, *pa_last;
+ unsigned long idx;
+ unsigned long freed = 0;
+
+ cmm_dbg("Memory going offline, searching 0x%lx (%ld pages).\n",
+ start_page, marg->nr_pages);
+ spin_lock(&cmm_lock);
+
+ pa_last = pa_curr = cmm_page_list;
+ while (pa_curr) {
+ for (idx = (pa_curr->index - 1); (idx + 1) > 0; idx--) {
+ if ((pa_curr->page[idx] < start_page) ||
+ (pa_curr->page[idx] >= end_page))
+ continue;
+
+ plpar_page_set_active(__pa(pa_curr->page[idx]));
+ free_page(pa_curr->page[idx]);
+ freed++;
+ loaned_pages--;
+ totalram_pages++;
+ pa_curr->page[idx] = pa_last->page[--pa_last->index];
+ if (pa_last->index == 0) {
+ if (pa_curr == pa_last)
+ pa_curr = pa_last->next;
+ pa_last = pa_last->next;
+ free_page((unsigned long)cmm_page_list);
+ cmm_page_list = pa_last;
+ continue;
+ }
+ }
+ pa_curr = pa_curr->next;
+ }
+ spin_unlock(&cmm_lock);
+ cmm_dbg("Released %ld pages in the search range.\n", freed);
+
+ return 0;
+}
+
+/**
+ * cmm_memory_cb - Handle memory hotplug notifier calls
+ * @self: notifier block struct
+ * @action: action to take
+ * @arg: struct memory_notify data for handler
+ *
+ * Return value:
+ * NOTIFY_OK or notifier error based on subfunction return value
+ *
+ **/
+static int cmm_memory_cb(struct notifier_block *self,
+ unsigned long action, void *arg)
+{
+ int ret = 0;
+
+ switch (action) {
+ case MEM_GOING_OFFLINE:
+ mutex_lock(&hotplug_mutex);
+ hotplug_occurred = 1;
+ ret = cmm_mem_going_offline(arg);
+ break;
+ case MEM_OFFLINE:
+ case MEM_CANCEL_OFFLINE:
+ mutex_unlock(&hotplug_mutex);
+ cmm_dbg("Memory offline operation complete.\n");
+ break;
+ case MEM_GOING_ONLINE:
+ case MEM_ONLINE:
+ case MEM_CANCEL_ONLINE:
+ break;
+ }
+
+ if (ret)
+ ret = notifier_from_errno(ret);
+ else
+ ret = NOTIFY_OK;
+
+ return ret;
+}
+
+static struct notifier_block cmm_mem_nb = {
+ .notifier_call = cmm_memory_cb,
+ .priority = CMM_MEM_HOTPLUG_PRI
+};
+
+/**
* cmm_init - Module initialization
*
* Return value:
@@ -426,18 +627,24 @@ static int cmm_init(void)
if ((rc = cmm_sysfs_register(&cmm_sysdev)))
goto out_reboot_notifier;
+ if (register_memory_notifier(&cmm_mem_nb) ||
+ register_memory_isolate_notifier(&cmm_mem_isolate_nb))
+ goto out_unregister_notifier;
+
if (cmm_disabled)
return rc;
cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
if (IS_ERR(cmm_thread_ptr)) {
rc = PTR_ERR(cmm_thread_ptr);
- goto out_unregister_sysfs;
+ goto out_unregister_notifier;
}
return rc;
-out_unregister_sysfs:
+out_unregister_notifier:
+ unregister_memory_notifier(&cmm_mem_nb);
+ unregister_memory_isolate_notifier(&cmm_mem_isolate_nb);
cmm_unregister_sysfs(&cmm_sysdev);
out_reboot_notifier:
unregister_reboot_notifier(&cmm_reboot_nb);
@@ -458,6 +665,8 @@ static void cmm_exit(void)
kthread_stop(cmm_thread_ptr);
unregister_oom_notifier(&cmm_oom_nb);
unregister_reboot_notifier(&cmm_reboot_nb);
+ unregister_memory_notifier(&cmm_mem_nb);
+ unregister_memory_isolate_notifier(&cmm_mem_isolate_nb);
cmm_free_pages(loaned_pages);
cmm_unregister_sysfs(&cmm_sysdev);
}
^ permalink raw reply
* Re: [PATCH] of/platform: Implement support for dev_pm_ops
From: Grant Likely @ 2009-10-12 22:09 UTC (permalink / raw)
To: Anton Vorontsov; +Cc: linux-pm, David Miller, linuxppc-dev
In-Reply-To: <20091012155041.GA1071@oksana.dev.rtsoft.ru>
On Mon, Oct 12, 2009 at 8:50 AM, Anton Vorontsov
<avorontsov@ru.mvista.com> wrote:
> Linux power management subsystem supports vast amount of new PM
> callbacks that are crucial for proper suspend and hibernation support
> in drivers.
>
> This patch implements support for dev_pm_ops, preserving support
> for legacy callbacks.
>
> Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Hmmm... I'm not very familiar with the PM callbacks, but change
doesn't look right to me. In particular, a lot of these new hooks
don't do anything remotely of_platform bus specific. For example,
of_platform_pm_prepare() checks if there is drv, drv->pm, and
drv->pm->prepare. If all are true, then it calls drv->pm->prepare().
I see that the platform bus platform_pm_prepare() function is
absolutely identical. I haven't looked, but I wouldn't be surprised
if other busses do the same.
I think these simple pm ops should be made library functions that
platform, of_platform and other simple busses can just populate their
pm ops structure with.
g.
> ---
> =A0drivers/of/platform.c | =A0305 +++++++++++++++++++++++++++++++++++++++=
+++++++---
> =A01 files changed, 290 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/of/platform.c b/drivers/of/platform.c
> index 298de0f..d58ade1 100644
> --- a/drivers/of/platform.c
> +++ b/drivers/of/platform.c
> @@ -65,47 +65,322 @@ static int of_platform_device_remove(struct device *=
dev)
> =A0 =A0 =A0 =A0return 0;
> =A0}
>
> -static int of_platform_device_suspend(struct device *dev, pm_message_t s=
tate)
> +static void of_platform_device_shutdown(struct device *dev)
> =A0{
> =A0 =A0 =A0 =A0struct of_device *of_dev =3D to_of_device(dev);
> =A0 =A0 =A0 =A0struct of_platform_driver *drv =3D to_of_platform_driver(d=
ev->driver);
> - =A0 =A0 =A0 int error =3D 0;
>
> - =A0 =A0 =A0 if (dev->driver && drv->suspend)
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 error =3D drv->suspend(of_dev, state);
> - =A0 =A0 =A0 return error;
> + =A0 =A0 =A0 if (dev->driver && drv->shutdown)
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 drv->shutdown(of_dev);
> =A0}
>
> -static int of_platform_device_resume(struct device * dev)
> +#ifdef CONFIG_PM_SLEEP
> +
> +static int of_platform_legacy_suspend(struct device *dev, pm_message_t m=
esg)
> =A0{
> =A0 =A0 =A0 =A0struct of_device *of_dev =3D to_of_device(dev);
> =A0 =A0 =A0 =A0struct of_platform_driver *drv =3D to_of_platform_driver(d=
ev->driver);
> - =A0 =A0 =A0 int error =3D 0;
> + =A0 =A0 =A0 int ret =3D 0;
>
> - =A0 =A0 =A0 if (dev->driver && drv->resume)
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 error =3D drv->resume(of_dev);
> - =A0 =A0 =A0 return error;
> + =A0 =A0 =A0 if (dev->driver && drv->suspend)
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 ret =3D drv->suspend(of_dev, mesg);
> + =A0 =A0 =A0 return ret;
> =A0}
>
> -static void of_platform_device_shutdown(struct device *dev)
> +static int of_platform_legacy_resume(struct device *dev)
> =A0{
> =A0 =A0 =A0 =A0struct of_device *of_dev =3D to_of_device(dev);
> =A0 =A0 =A0 =A0struct of_platform_driver *drv =3D to_of_platform_driver(d=
ev->driver);
> + =A0 =A0 =A0 int ret =3D 0;
>
> - =A0 =A0 =A0 if (dev->driver && drv->shutdown)
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 drv->shutdown(of_dev);
> + =A0 =A0 =A0 if (dev->driver && drv->resume)
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 ret =3D drv->resume(of_dev);
> + =A0 =A0 =A0 return ret;
> +}
> +
> +static int of_platform_pm_prepare(struct device *dev)
> +{
> + =A0 =A0 =A0 struct device_driver *drv =3D dev->driver;
> + =A0 =A0 =A0 int ret =3D 0;
> +
> + =A0 =A0 =A0 if (drv && drv->pm && drv->pm->prepare)
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 ret =3D drv->pm->prepare(dev);
> +
> + =A0 =A0 =A0 return ret;
> +}
> +
> +static void of_platform_pm_complete(struct device *dev)
> +{
> + =A0 =A0 =A0 struct device_driver *drv =3D dev->driver;
> +
> + =A0 =A0 =A0 if (drv && drv->pm && drv->pm->complete)
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 drv->pm->complete(dev);
> +}
> +
> +#ifdef CONFIG_SUSPEND
> +
> +static int of_platform_pm_suspend(struct device *dev)
> +{
> + =A0 =A0 =A0 struct device_driver *drv =3D dev->driver;
> + =A0 =A0 =A0 int ret =3D 0;
> +
> + =A0 =A0 =A0 if (!drv)
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 return 0;
> +
> + =A0 =A0 =A0 if (drv->pm) {
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (drv->pm->suspend)
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 ret =3D drv->pm->suspend(de=
v);
> + =A0 =A0 =A0 } else {
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 ret =3D of_platform_legacy_suspend(dev, PMS=
G_SUSPEND);
> + =A0 =A0 =A0 }
> +
> + =A0 =A0 =A0 return ret;
> =A0}
>
> +static int of_platform_pm_suspend_noirq(struct device *dev)
> +{
> + =A0 =A0 =A0 struct device_driver *drv =3D dev->driver;
> + =A0 =A0 =A0 int ret =3D 0;
> +
> + =A0 =A0 =A0 if (!drv)
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 return 0;
> +
> + =A0 =A0 =A0 if (drv->pm) {
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (drv->pm->suspend_noirq)
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 ret =3D drv->pm->suspend_no=
irq(dev);
> + =A0 =A0 =A0 }
> +
> + =A0 =A0 =A0 return ret;
> +}
> +
> +static int of_platform_pm_resume(struct device *dev)
> +{
> + =A0 =A0 =A0 struct device_driver *drv =3D dev->driver;
> + =A0 =A0 =A0 int ret =3D 0;
> +
> + =A0 =A0 =A0 if (!drv)
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 return 0;
> +
> + =A0 =A0 =A0 if (drv->pm) {
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (drv->pm->resume)
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 ret =3D drv->pm->resume(dev=
);
> + =A0 =A0 =A0 } else {
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 ret =3D of_platform_legacy_resume(dev);
> + =A0 =A0 =A0 }
> +
> + =A0 =A0 =A0 return ret;
> +}
> +
> +static int of_platform_pm_resume_noirq(struct device *dev)
> +{
> + =A0 =A0 =A0 struct device_driver *drv =3D dev->driver;
> + =A0 =A0 =A0 int ret =3D 0;
> +
> + =A0 =A0 =A0 if (!drv)
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 return 0;
> +
> + =A0 =A0 =A0 if (drv->pm) {
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (drv->pm->resume_noirq)
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 ret =3D drv->pm->resume_noi=
rq(dev);
> + =A0 =A0 =A0 }
> +
> + =A0 =A0 =A0 return ret;
> +}
> +
> +#else /* !CONFIG_SUSPEND */
> +
> +#define of_platform_pm_suspend =A0 =A0 =A0 =A0 NULL
> +#define of_platform_pm_resume =A0 =A0 =A0 =A0 =A0NULL
> +#define of_platform_pm_suspend_noirq =A0 NULL
> +#define of_platform_pm_resume_noirq =A0 =A0NULL
> +
> +#endif /* !CONFIG_SUSPEND */
> +
> +#ifdef CONFIG_HIBERNATION
> +
> +static int of_platform_pm_freeze(struct device *dev)
> +{
> + =A0 =A0 =A0 struct device_driver *drv =3D dev->driver;
> + =A0 =A0 =A0 int ret =3D 0;
> +
> + =A0 =A0 =A0 if (!drv)
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 return 0;
> +
> + =A0 =A0 =A0 if (drv->pm) {
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (drv->pm->freeze)
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 ret =3D drv->pm->freeze(dev=
);
> + =A0 =A0 =A0 } else {
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 ret =3D of_platform_legacy_suspend(dev, PMS=
G_FREEZE);
> + =A0 =A0 =A0 }
> +
> + =A0 =A0 =A0 return ret;
> +}
> +
> +static int of_platform_pm_freeze_noirq(struct device *dev)
> +{
> + =A0 =A0 =A0 struct device_driver *drv =3D dev->driver;
> + =A0 =A0 =A0 int ret =3D 0;
> +
> + =A0 =A0 =A0 if (!drv)
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 return 0;
> +
> + =A0 =A0 =A0 if (drv->pm) {
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (drv->pm->freeze_noirq)
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 ret =3D drv->pm->freeze_noi=
rq(dev);
> + =A0 =A0 =A0 }
> +
> + =A0 =A0 =A0 return ret;
> +}
> +
> +static int of_platform_pm_thaw(struct device *dev)
> +{
> + =A0 =A0 =A0 struct device_driver *drv =3D dev->driver;
> + =A0 =A0 =A0 int ret =3D 0;
> +
> + =A0 =A0 =A0 if (!drv)
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 return 0;
> +
> + =A0 =A0 =A0 if (drv->pm) {
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (drv->pm->thaw)
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 ret =3D drv->pm->thaw(dev);
> + =A0 =A0 =A0 } else {
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 ret =3D of_platform_legacy_resume(dev);
> + =A0 =A0 =A0 }
> +
> + =A0 =A0 =A0 return ret;
> +}
> +
> +static int of_platform_pm_thaw_noirq(struct device *dev)
> +{
> + =A0 =A0 =A0 struct device_driver *drv =3D dev->driver;
> + =A0 =A0 =A0 int ret =3D 0;
> +
> + =A0 =A0 =A0 if (!drv)
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 return 0;
> +
> + =A0 =A0 =A0 if (drv->pm) {
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (drv->pm->thaw_noirq)
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 ret =3D drv->pm->thaw_noirq=
(dev);
> + =A0 =A0 =A0 }
> +
> + =A0 =A0 =A0 return ret;
> +}
> +
> +static int of_platform_pm_poweroff(struct device *dev)
> +{
> + =A0 =A0 =A0 struct device_driver *drv =3D dev->driver;
> + =A0 =A0 =A0 int ret =3D 0;
> +
> + =A0 =A0 =A0 if (!drv)
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 return 0;
> +
> + =A0 =A0 =A0 if (drv->pm) {
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (drv->pm->poweroff)
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 ret =3D drv->pm->poweroff(d=
ev);
> + =A0 =A0 =A0 } else {
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 ret =3D of_platform_legacy_suspend(dev, PMS=
G_HIBERNATE);
> + =A0 =A0 =A0 }
> +
> + =A0 =A0 =A0 return ret;
> +}
> +
> +static int of_platform_pm_poweroff_noirq(struct device *dev)
> +{
> + =A0 =A0 =A0 struct device_driver *drv =3D dev->driver;
> + =A0 =A0 =A0 int ret =3D 0;
> +
> + =A0 =A0 =A0 if (!drv)
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 return 0;
> +
> + =A0 =A0 =A0 if (drv->pm) {
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (drv->pm->poweroff_noirq)
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 ret =3D drv->pm->poweroff_n=
oirq(dev);
> + =A0 =A0 =A0 }
> +
> + =A0 =A0 =A0 return ret;
> +}
> +
> +static int of_platform_pm_restore(struct device *dev)
> +{
> + =A0 =A0 =A0 struct device_driver *drv =3D dev->driver;
> + =A0 =A0 =A0 int ret =3D 0;
> +
> + =A0 =A0 =A0 if (!drv)
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 return 0;
> +
> + =A0 =A0 =A0 if (drv->pm) {
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (drv->pm->restore)
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 ret =3D drv->pm->restore(de=
v);
> + =A0 =A0 =A0 } else {
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 ret =3D of_platform_legacy_resume(dev);
> + =A0 =A0 =A0 }
> +
> + =A0 =A0 =A0 return ret;
> +}
> +
> +static int of_platform_pm_restore_noirq(struct device *dev)
> +{
> + =A0 =A0 =A0 struct device_driver *drv =3D dev->driver;
> + =A0 =A0 =A0 int ret =3D 0;
> +
> + =A0 =A0 =A0 if (!drv)
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 return 0;
> +
> + =A0 =A0 =A0 if (drv->pm) {
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (drv->pm->restore_noirq)
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 ret =3D drv->pm->restore_no=
irq(dev);
> + =A0 =A0 =A0 }
> +
> + =A0 =A0 =A0 return ret;
> +}
> +
> +#else /* !CONFIG_HIBERNATION */
> +
> +#define of_platform_pm_freeze =A0 =A0 =A0 =A0 =A0NULL
> +#define of_platform_pm_thaw =A0 =A0 =A0 =A0 =A0 =A0NULL
> +#define of_platform_pm_poweroff =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0NULL
> +#define of_platform_pm_restore =A0 =A0 =A0 =A0 NULL
> +#define of_platform_pm_freeze_noirq =A0 =A0NULL
> +#define of_platform_pm_thaw_noirq =A0 =A0 =A0 =A0 =A0 =A0 =A0NULL
> +#define of_platform_pm_poweroff_noirq =A0NULL
> +#define of_platform_pm_restore_noirq =A0 NULL
> +
> +#endif /* !CONFIG_HIBERNATION */
> +
> +static struct dev_pm_ops of_platform_dev_pm_ops =3D {
> + =A0 =A0 =A0 .prepare =3D of_platform_pm_prepare,
> + =A0 =A0 =A0 .complete =3D of_platform_pm_complete,
> + =A0 =A0 =A0 .suspend =3D of_platform_pm_suspend,
> + =A0 =A0 =A0 .resume =3D of_platform_pm_resume,
> + =A0 =A0 =A0 .freeze =3D of_platform_pm_freeze,
> + =A0 =A0 =A0 .thaw =3D of_platform_pm_thaw,
> + =A0 =A0 =A0 .poweroff =3D of_platform_pm_poweroff,
> + =A0 =A0 =A0 .restore =3D of_platform_pm_restore,
> + =A0 =A0 =A0 .suspend_noirq =3D of_platform_pm_suspend_noirq,
> + =A0 =A0 =A0 .resume_noirq =3D of_platform_pm_resume_noirq,
> + =A0 =A0 =A0 .freeze_noirq =3D of_platform_pm_freeze_noirq,
> + =A0 =A0 =A0 .thaw_noirq =3D of_platform_pm_thaw_noirq,
> + =A0 =A0 =A0 .poweroff_noirq =3D of_platform_pm_poweroff_noirq,
> + =A0 =A0 =A0 .restore_noirq =3D of_platform_pm_restore_noirq,
> +};
> +
> +#define OF_PLATFORM_PM_OPS_PTR (&of_platform_dev_pm_ops)
> +
> +#else /* !CONFIG_PM_SLEEP */
> +
> +#define OF_PLATFORM_PM_OPS_PTR NULL
> +
> +#endif /* !CONFIG_PM_SLEEP */
> +
> =A0int of_bus_type_init(struct bus_type *bus, const char *name)
> =A0{
> =A0 =A0 =A0 =A0bus->name =3D name;
> =A0 =A0 =A0 =A0bus->match =3D of_platform_bus_match;
> =A0 =A0 =A0 =A0bus->probe =3D of_platform_device_probe;
> =A0 =A0 =A0 =A0bus->remove =3D of_platform_device_remove;
> - =A0 =A0 =A0 bus->suspend =3D of_platform_device_suspend;
> - =A0 =A0 =A0 bus->resume =3D of_platform_device_resume;
> =A0 =A0 =A0 =A0bus->shutdown =3D of_platform_device_shutdown;
> =A0 =A0 =A0 =A0bus->dev_attrs =3D of_platform_device_attrs;
> + =A0 =A0 =A0 bus->pm =3D OF_PLATFORM_PM_OPS_PTR;
> =A0 =A0 =A0 =A0return bus_register(bus);
> =A0}
>
> --
> 1.6.3.3
>
--=20
Grant Likely, B.Sc., P.Eng.
Secret Lab Technologies Ltd.
^ permalink raw reply
* Re: [PATCH] of/platform: Implement support for dev_pm_ops
From: Anton Vorontsov @ 2009-10-12 22:44 UTC (permalink / raw)
To: Grant Likely; +Cc: linux-pm, David Miller, linuxppc-dev
In-Reply-To: <fa686aa40910121509n1b7dba87nbe9ddf34f0b4d7c@mail.gmail.com>
On Mon, Oct 12, 2009 at 03:09:53PM -0700, Grant Likely wrote:
> On Mon, Oct 12, 2009 at 8:50 AM, Anton Vorontsov
> <avorontsov@ru.mvista.com> wrote:
> > Linux power management subsystem supports vast amount of new PM
> > callbacks that are crucial for proper suspend and hibernation support
> > in drivers.
> >
> > This patch implements support for dev_pm_ops, preserving support
> > for legacy callbacks.
> >
> > Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
>
> Hmmm... I'm not very familiar with the PM callbacks, but change
> doesn't look right to me. In particular, a lot of these new hooks
> don't do anything remotely of_platform bus specific. For example,
> of_platform_pm_prepare() checks if there is drv, drv->pm, and
> drv->pm->prepare. If all are true, then it calls drv->pm->prepare().
> I see that the platform bus platform_pm_prepare() function is
> absolutely identical. I haven't looked, but I wouldn't be surprised
> if other busses do the same.
>
> I think these simple pm ops should be made library functions that
> platform, of_platform and other simple busses can just populate their
> pm ops structure with.
Some hooks can be made as library functions, but some can't (for
example that do of_plaform_driver->suspend(), as opposite to
of_platform_driver->driver.suspend(), i.e. the legacy hooks).
Also, if you look into PCI bus hooks, you'll see that these hooks
aren't pure proxies for drivers, they do real work, so they won't
like to reuse or share anything.
For OF platfrom bus, we can share these functions with platform:
of_platform_pm_suspend_noirq
of_platform_pm_resume_noirq
of_platform_pm_freeze_noirq
of_platform_pm_thaw_noirq
of_platform_pm_poweroff_noirq
of_platform_pm_restore_noirq
These we cannot share:
of_platform_pm_suspend
of_platform_pm_resume
of_platform_pm_freeze
of_platform_pm_thaw
of_platform_pm_poweroff
of_platform_pm_restore
I agree that there is some room for improvements in general (e.g.
merging platform and of_platform devices/drivers), but it's not as
easy as you would like to think. Let's make it in a separate step
that don't stop real features from being implemented (e.g.
hibernate).
For the six functions that we can reuse I can prepare a cleanup
patch that we can merge via -mm, or it can just sit and collect
needed acks and can be merged via any tree. But please, no
cross-tree dependencies for the cruicial features.
Thanks,
--
Anton Vorontsov
email: cbouatmailru@gmail.com
irc://irc.freenode.net/bd2
^ permalink raw reply
* [Patch] powerpc: Fix memory leak in axon_msi.c
From: Michael Ellerman @ 2009-10-13 0:29 UTC (permalink / raw)
To: linuxppc-dev list; +Cc: eric.sesterhenn
cppcheck found a memory leak in axon_msi, if dcr_base or dcr_len are zero,
we have already allocated msic, so we should free it in the error path.
Signed-off-by: Eric Sesterhenn <eric.sesterhenn@lsexperts.de>
Acked-by: Michael Ellerman <michael@ellerman.id.au>
--- linux/arch/powerpc/platforms/cell/axon_msi.c.orig 2009-10-12 14:48:26.000000000 +0200
+++ linux/arch/powerpc/platforms/cell/axon_msi.c 2009-10-12 14:48:52.000000000 +0200
@@ -365,7 +365,7 @@ static int axon_msi_probe(struct of_devi
printk(KERN_ERR
"axon_msi: couldn't parse dcr properties on %s\n",
dn->full_name);
- goto out;
+ goto out_free_msic;
}
msic->dcr_host = dcr_map(dn, dcr_base, dcr_len);
^ permalink raw reply
* Re: [Cbe-oss-dev] [PATCH] spufs: Fix test in spufs_switch_log_read()
From: Jeremy Kerr @ 2009-10-13 0:49 UTC (permalink / raw)
To: cbe-oss-dev; +Cc: linuxppc-dev, Andrew Morton, Roel Kluin, cbe-oss-dev
In-Reply-To: <4AD32B3F.9060804@gmail.com>
Roel,
> Or can this test be removed?
I'd prefer just to remove the test.
Cheers,
Jeremy
^ permalink raw reply
* Re: [PATCH 2/8] bitmap: Introduce bitmap_set, bitmap_clear, bitmap_find_next_zero_area
From: Akinobu Mita @ 2009-10-13 2:18 UTC (permalink / raw)
To: Andrew Morton
Cc: Fenghua Yu, Greg Kroah-Hartman, linux-ia64, Tony Luck, x86,
netdev, linux-kernel, linux-altix, Yevgeny Petrilin,
FUJITA Tomonori, linuxppc-dev, Ingo Molnar, Paul Mackerras,
H. Peter Anvin, sparclinux, Thomas Gleixner, linux-usb,
David S. Miller, Lothar Wassmann
In-Reply-To: <20091009164100.85a36188.akpm@linux-foundation.org>
On Fri, Oct 09, 2009 at 04:41:00PM -0700, Andrew Morton wrote:
> On Fri, 9 Oct 2009 17:29:15 +0900
> Akinobu Mita <akinobu.mita@gmail.com> wrote:
>
> > This introduces new bitmap functions:
> >
> > bitmap_set: Set specified bit area
> > bitmap_clear: Clear specified bit area
> > bitmap_find_next_zero_area: Find free bit area
> >
> > These are stolen from iommu helper.
> >
> > I changed the return value of bitmap_find_next_zero_area if there is
> > no zero area.
> >
> > find_next_zero_area in iommu helper: returns -1
> > bitmap_find_next_zero_area: return >= bitmap size
>
> I'll plan to merge this patch into 2.6.32 so we can trickle all the
> other patches into subsystems in an orderly fashion.
Sounds good.
> > +void bitmap_set(unsigned long *map, int i, int len)
> > +{
> > + int end = i + len;
> > +
> > + while (i < end) {
> > + __set_bit(i, map);
> > + i++;
> > + }
> > +}
>
> This is really inefficient, isn't it? It's a pretty trivial matter to
> romp through memory 32 or 64 bits at a time.
OK. I'll do
> > +unsigned long bitmap_find_next_zero_area(unsigned long *map,
> > + unsigned long size,
> > + unsigned long start,
> > + unsigned int nr,
> > + unsigned long align_mask)
> > +{
> > + unsigned long index, end, i;
> > +again:
> > + index = find_next_zero_bit(map, size, start);
> > +
> > + /* Align allocation */
> > + index = (index + align_mask) & ~align_mask;
> > +
> > + end = index + nr;
> > + if (end >= size)
> > + return end;
> > + i = find_next_bit(map, end, index);
> > + if (i < end) {
> > + start = i + 1;
> > + goto again;
> > + }
> > + return index;
> > +}
> > +EXPORT_SYMBOL(bitmap_find_next_zero_area);
>
> This needs documentation, please. It appears that `size' is the size
> of the bitmap and `nr' is the number of zeroed bits we're looking for,
> but an inattentive programmer could get those reversed.
>
> Also the semantics of `align_mask' could benefit from spelling out. Is
> the alignment with respect to memory boundaries or with respect to
> `map' or with respect to map+start or what?
OK. I will document it.
And I plan to change bitmap_find_next_zero_area() to take the alignment
instead of an align_mask as Roland said.
> And why does align_mask exist at all? I was a bit surprised to see it
> there. In which scenarios will it be non-zero?
Because the users of iommu-helper and mlx4 need the alignment requirement
for the zero area.
arch/powerpc/kernel/iommu.c
arch/x86/kernel/amd_iommu.c
arch/x86/kernel/pci-gart_64.c
drivers/net/mlx4/alloc.c
^ permalink raw reply
* Serial console under current qemu?
From: Rob Landley @ 2009-10-13 3:32 UTC (permalink / raw)
To: linuxppc-dev
Has anybody gotten a serial console to work under current qemu (ala the 0.11.0
release)?
I've tried the 2.6.30 and 2.6.31.4 kernels, and in both cases both the
bootloader and the kernel's boot messages write to the serial console just
fine, but as soon as userspace tries to write to /dev/console the kernel panics
with:
ieee1394: raw1394: /dev/raw1394 device initialized
mice: PS/2 mouse device common for all mice
TCP cubic registered
NET: Registered protocol family 17
VFS: Mounted root (squashfs filesystem) readonly on device 3:0.
Freeing unused kernel memory: 168k init
Type exit when done.Unable to handle kernel paging request for data at address
0x00000084
Faulting instruction address: 0xc012dc9c
Oops: Kernel access of bad area, sig: 11 [#1]
PowerMac
NIP: c012dc9c LR: c01467c0 CTR: c01467ac
REGS: cf831be0 TRAP: 0300 Not tainted (2.6.31.4)
MSR: 00009032 <EE,ME,IR,DR> CR: 42224022 XER: 00000000
DAR: 00000084, DSISR: 40000000
TASK = cf82f8f0[1] 'init.sh' THREAD: cf830000
GPR00: c01467c0 cf831c90 cf82f8f0 00000000 cf82f920 cf824e40 91a8bb6b 00000000
GPR08: 00000001 00000001 c01467ac 00000000 90778e6b 100834dc 0127e698 1005b940
GPR16: 100859a0 00000000 1007d074 100429a4 00000000 c02dc614 c0281678 c02dc498
GPR24: 0000000a cf830000 c0321e00 00000005 00000014 c0321e00 c02dc638 00000000
NIP [c012dc9c] tty_wakeup+0x14/0xa0
LR [c01467c0] uart_tasklet_action+0x14/0x24
Call Trace:
[cf831c90] [c012dcbc] tty_wakeup+0x34/0xa0 (unreliable)
[cf831ca0] [c01467c0] uart_tasklet_action+0x14/0x24
[cf831cb0] [c003123c] tasklet_action+0x80/0x104
[cf831cd0] [c0031368] __do_softirq+0xa8/0x120
[cf831d10] [c0006ea4] do_softirq+0x58/0x5c
[cf831d20] [c00311b8] irq_exit+0x98/0x9c
[cf831d30] [c0006f44] do_IRQ+0x9c/0xb4
[cf831d50] [c0012b60] ret_from_except+0x0/0x1c
--- Exception: 501 at uart_start+0x24/0x38
LR = uart_start+0x20/0x38
[cf831e20] [c0148130] uart_write+0xc0/0xe4
[cf831e50] [c0130bc0] n_tty_write+0x1d4/0x430
[cf831eb0] [c012db30] tty_write+0x188/0x268
[cf831ef0] [c0082314] vfs_write+0xb4/0x188
[cf831f10] [c008287c] sys_write+0x4c/0x90
[cf831f40] [c0012494] ret_from_syscall+0x0/0x40
--- Exception: c01 at 0x48039f8c
LR = 0x4804c4bc
Instruction dump:
7c0803a6 4e800020 80010024 bba10014 38210020 7c0803a6 4bfffd24 9421fff0
7c0802a6 bfc10008 7c7f1b78 90010014 <80030084> 70090020 4082002c 387f00e0
Kernel panic - not syncing: Fatal exception in interrupt
Call Trace:
[cf831b10] [c0008d74] show_stack+0x4c/0x16c (unreliable)
[cf831b50] [c002b600] panic+0x90/0x160
[cf831ba0] [c0010064] die+0x148/0x154
[cf831bc0] [c0015b34] bad_page_fault+0x90/0xd8
[cf831bd0] [c001294c] handle_page_fault+0x7c/0x80
--- Exception: 300 at tty_wakeup+0x14/0xa0
LR = uart_tasklet_action+0x14/0x24
[cf831c90] [c012dcbc] tty_wakeup+0x34/0xa0 (unreliable)
[cf831ca0] [c01467c0] uart_tasklet_action+0x14/0x24
[cf831cb0] [c003123c] tasklet_action+0x80/0x104
[cf831cd0] [c0031368] __do_softirq+0xa8/0x120
[cf831d10] [c0006ea4] do_softirq+0x58/0x5c
[cf831d20] [c00311b8] irq_exit+0x98/0x9c
[cf831d30] [c0006f44] do_IRQ+0x9c/0xb4
[cf831d50] [c0012b60] ret_from_except+0x0/0x1c
--- Exception: 501 at uart_start+0x24/0x38
LR = uart_start+0x20/0x38
[cf831e20] [c0148130] uart_write+0xc0/0xe4
[cf831e50] [c0130bc0] n_tty_write+0x1d4/0x430
[cf831eb0] [c012db30] tty_write+0x188/0x268
[cf831ef0] [c0082314] vfs_write+0xb4/0x188
[cf831f10] [c008287c] sys_write+0x4c/0x90
[cf831f40] [c0012494] ret_from_syscall+0x0/0x40
--- Exception: c01 at 0x48039f8c
LR = 0x4804c4bc
Rebooting in 1 seconds..
I've reproduced this with my Firmware Linux project (download
http://impactlinux.com/fwl/downloads/binaries/system-image-powerpc.tar.bz2 ,
extract it, run "sed -i 's@-hda @-hdc @' run-emulator.sh" because qemu's
device tree layout changed betwee 0.10.0 and 0.11.0, and then ./run-
emulator.sh).
I've also reproduced it with debian's kernel .config and root filesystem image,
details on that posted here:
http://lists.gnu.org/archive/html/qemu-devel/2009-10/msg01056.html
(Oddly, with debian's kernel .config -hda sets /dev/hda instead of /dev/hdc, I
need to track down why so I can fix it in my project's .config.)
The debian image boots fine with a graphics console, it's just trying to use
the serial console that panics it. I don't know if this is a qemu device
emulation issue, a kernel issue, or maybe something to do with the device tree
qemu's openbios is feeding in at boot time? I'm stumped.
Er... Help? Pretty please?
Rob
--
Latency is more important than throughput. It's that simple. - Linus Torvalds
^ permalink raw reply
* [PATCH] powerpc/mm: Fix hang accessing top of vmalloc space
From: Benjamin Herrenschmidt @ 2009-10-13 6:43 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Tejun Heo, paulus
On pSeries, we always force the IO space to be mapped using 4K
pages even with a 64K base page size to cope with some limitations
in the HV interface to some devices.
However, the SLB miss handler code to discriminate between vmalloc
and ioremap space uses a CPU feature section such that the code
is nop'ed out when the processor support large pages non-cachable
mappings.
Thus, we end up always using the ioremap page size for vmalloc
segments on such processors, causing a discrepency between the
segment and the hash table, and thus a hang continously hashing
the page.
It works for the first segment of the vmalloc space since that
segment is "bolted" in by C code correctly, and thankfully we
almost never use the vmalloc space beyond the first segment,
but the new percpu code made the bug happen.
This fixes it by removing the feature section from the assembly,
we now always do the comparison between vmalloc and ioremap.
Signed-off-by; Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
Sachin, can you verify that works for you ?
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index bc44dc4..95ce355 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -72,19 +72,17 @@ _GLOBAL(slb_miss_kernel_load_vmemmap)
1:
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
- /* vmalloc/ioremap mapping encoding bits, the "li" instructions below
- * will be patched by the kernel at boot
+ /* vmalloc mapping gets the encoding from the PACA as the mapping
+ * can be demoted from 64K -> 4K dynamically on some machines
*/
-BEGIN_FTR_SECTION
- /* check whether this is in vmalloc or ioremap space */
clrldi r11,r10,48
cmpldi r11,(VMALLOC_SIZE >> 28) - 1
bgt 5f
lhz r11,PACAVMALLOCSLLP(r13)
b 6f
5:
-END_FTR_SECTION_IFCLR(CPU_FTR_CI_LARGE_PAGE)
-_GLOBAL(slb_miss_kernel_load_io)
+ /* IO mapping */
+ _GLOBAL(slb_miss_kernel_load_io)
li r11,0
6:
BEGIN_FTR_SECTION
^ permalink raw reply related
* Re: [PATCH 0/8] gianfar: Add support for hibernation
From: David Miller @ 2009-10-13 6:57 UTC (permalink / raw)
To: avorontsov; +Cc: scottwood, linuxppc-dev, netdev, afleming
In-Reply-To: <20091012160000.GA32406@oksana.dev.rtsoft.ru>
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Mon, 12 Oct 2009 20:00:00 +0400
> Here are few patches that add support for hibernation for gianfar
> driver.
>
> Technically, we could just do gfar_close() and then gfar_enet_open()
> sequence to restore gianfar functionality after hibernation, but
> close/open does so many unneeded things (e.g. BDs buffers freeing and
> allocation, IRQ freeing and requesting), that I felt it would be much
> better to cleanup and refactor some code to make the hibernation [and
> not only hibernation] code a little bit prettier.
I applied all of this, it's a really nice patch set. If there are any
problems we can deal with it using follow-on fixups.
I noticed something, in patch #3 where you remove the spurious wrap
bit setting in startup_gfar(). It looks like that was not only
spurious but it was doing it wrong too.
It's writing garbage into the status word, because it's not using the
BD_LFLAG() macro to shift the value up 16 bits.
^ permalink raw reply
* Re: [PATCH] powerpc/mm: Fix hang accessing top of vmalloc space
From: Sachin Sant @ 2009-10-13 8:23 UTC (permalink / raw)
To: Benjamin Herrenschmidt; +Cc: Tejun Heo, paulus, linuxppc-dev
In-Reply-To: <1255416227.2192.184.camel@pasglop>
Benjamin Herrenschmidt wrote:
> On pSeries, we always force the IO space to be mapped using 4K
> pages even with a 64K base page size to cope with some limitations
> in the HV interface to some devices.
>
> However, the SLB miss handler code to discriminate between vmalloc
> and ioremap space uses a CPU feature section such that the code
> is nop'ed out when the processor support large pages non-cachable
> mappings.
>
> Thus, we end up always using the ioremap page size for vmalloc
> segments on such processors, causing a discrepency between the
> segment and the hash table, and thus a hang continously hashing
> the page.
>
> It works for the first segment of the vmalloc space since that
> segment is "bolted" in by C code correctly, and thankfully we
> almost never use the vmalloc space beyond the first segment,
> but the new percpu code made the bug happen.
>
> This fixes it by removing the feature section from the assembly,
> we now always do the comparison between vmalloc and ioremap.
>
> Signed-off-by; Benjamin Herrenschmidt <benh@kernel.crashing.org>
> ---
>
> Sachin, can you verify that works for you ?
Works great. Thanks Ben.
Tested by: Sachin Sant <sachinp@in.ibm.com>
Regards
-Sachin
--
---------------------------------
Sachin Sant
IBM Linux Technology Center
India Systems and Technology Labs
Bangalore, India
---------------------------------
^ permalink raw reply
* Re: [PATCH 2/8] bitmap: Introduce bitmap_set, bitmap_clear, bitmap_find_next_zero_area
From: Akinobu Mita @ 2009-10-13 9:10 UTC (permalink / raw)
To: Andrew Morton
Cc: Fenghua Yu, Greg Kroah-Hartman, linux-ia64, Tony Luck, x86,
netdev, linux-kernel, linux-altix, Yevgeny Petrilin,
FUJITA Tomonori, linuxppc-dev, Ingo Molnar, Paul Mackerras,
H. Peter Anvin, sparclinux, Thomas Gleixner, linux-usb,
David S. Miller, Lothar Wassmann
In-Reply-To: <20091013021818.GA3898@localhost.localdomain>
My user space testing exposed off-by-one error find_next_zero_area
in iommu-helper. Some zero area cannot be found by this bug.
Subject: [PATCH] Fix off-by-one error in find_next_zero_area
Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
---
lib/iommu-helper.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/lib/iommu-helper.c b/lib/iommu-helper.c
index 75dbda0..afc58bc 100644
--- a/lib/iommu-helper.c
+++ b/lib/iommu-helper.c
@@ -19,7 +19,7 @@ again:
index = (index + align_mask) & ~align_mask;
end = index + nr;
- if (end >= size)
+ if (end > size)
return -1;
for (i = index; i < end; i++) {
if (test_bit(i, map)) {
--
1.5.4.3
^ permalink raw reply related
* i2c-powermac fails
From: Jean Delvare @ 2009-10-13 9:23 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras; +Cc: linuxppc-dev, Tim Shepard
Hi Ben, Paul,
I had a report by Tim Shepard (Cc'd) that the therm_adt746x driver
sometimes fails to initialize on his PowerBook G4 running kernel
2.6.31. The following error message can be seen in the logs when the
failure happens:
therm_adt746x 7-002e: Thermostat failed to read config!
After enabling low-level i2c debugging, it turns out that the problem
is caused by low-level errors at the I2C bus level:
PowerMac i2c bus pmu 2 registered
PowerMac i2c bus pmu 1 registered
PowerMac i2c bus mac-io 0 registered
low_i2c:xfer() chan=0, addrdir=0x5d, mode=4, subsize=1, subaddr=0x0, 1 bytes, bus /uni-n@f8000000/i2c@f8001000/i2c-bus@0
low_i2c:kw_handle_interrupt(state_addr, isr: 6)
low_i2c:KW: NAK on address
low_i2c:xfer error -6
i2c-adapter i2c-7: I2C transfer at 0x2e failed, size 2, err -6
therm_adt746x 7-002e: Thermostat failed to read config!
PowerMac i2c bus uni-n 0 registered
So apparently the I2C controller doesn't see the ack from the ADT7467.
However the ADT7467 is a SMBus-compliant device, so it must always ack
his address.
It is worth noting that many other I2C errors happen and go unnoticed.
Below is the log of a "successful" therm_adt746x registration:
PowerMac i2c bus pmu 2 registered
PowerMac i2c bus pmu 1 registered
PowerMac i2c bus mac-io 0 registered
low_i2c:xfer() chan=0, addrdir=0x5d, mode=4, subsize=1, subaddr=0x0, 1 bytes, bus /uni-n@f8000000/i2c@f8001000/i2c-bus@0
low_i2c:kw_handle_interrupt(state_addr, isr: 2)
low_i2c:kw_handle_interrupt(state_read, isr: 5)
adt746x: ADT7467 initializing
low_i2c:xfer() chan=0, addrdir=0x5d, mode=4, subsize=1, subaddr=0x6b, 1 bytes, bus /uni-n@f8000000/i2c@f8001000/i2c-bus@0
low_i2c:kw_handle_interrupt(state_addr, isr: 2)
low_i2c:kw_handle_interrupt(state_read, isr: 5)
low_i2c:xfer() chan=0, addrdir=0x5c, mode=3, subsize=1, subaddr=0x6b, 1 bytes, bus /uni-n@f8000000/i2c@f8001000/i2c-bus@0
low_i2c:kw_handle_interrupt(state_addr, isr: 6)
low_i2c:KW: NAK on address
low_i2c:xfer error -6
i2c-adapter i2c-7: I2C transfer at 0x2e failed, size 2, err -6
low_i2c:xfer() chan=0, addrdir=0x5d, mode=4, subsize=1, subaddr=0x6a, 1 bytes, bus /uni-n@f8000000/i2c@f8001000/i2c-bus@0
low_i2c:kw_handle_interrupt(state_addr, isr: 2)
low_i2c:kw_handle_interrupt(state_read, isr: 1)
low_i2c:kw_handle_interrupt(state_stop, isr: 4)
low_i2c:xfer() chan=0, addrdir=0x5c, mode=3, subsize=1, subaddr=0x6a, 1 bytes, bus /uni-n@f8000000/i2c@f8001000/i2c-bus@0
ieee1394: Host added: ID:BUS[0-00:1023] GUID[001124fffed61a88]
low_i2c:kw_handle_interrupt(state_addr, isr: 6)
low_i2c:KW: NAK on address
low_i2c:xfer error -6
i2c-adapter i2c-7: I2C transfer at 0x2e failed, size 2, err -6
low_i2c:xfer() chan=0, addrdir=0x5d, mode=4, subsize=1, subaddr=0x6c, 1 bytes, bus /uni-n@f8000000/i2c@f8001000/i2c-bus@0
low_i2c:kw_handle_interrupt(state_addr, isr: 2)
low_i2c:kw_handle_interrupt(state_read, isr: 5)
low_i2c:xfer() chan=0, addrdir=0x5c, mode=3, subsize=1, subaddr=0x6c, 1 bytes, bus /uni-n@f8000000/i2c@f8001000/i2c-bus@0
low_i2c:kw_handle_interrupt(state_addr, isr: 2)
low_i2c:kw_handle_interrupt(state_write, isr: 1)
low_i2c:kw_handle_interrupt(state_stop, isr: 4)
adt746x: Lowering max temperatures from 81, 80, 87 to 70, 50, 70
low_i2c:xfer() chan=0, addrdir=0x5d, mode=4, subsize=1, subaddr=0x5c, 1 bytes, bus /uni-n@f8000000/i2c@f8001000/i2c-bus@0
eth0: Link is up at 1000 Mbps, full-duplex.
low_i2c:kw_handle_interrupt(state_addr, isr: 6)
low_i2c:KW: NAK on address
low_i2c:xfer error -6
i2c-adapter i2c-7: I2C transfer at 0x2e failed, size 2, err -6
low_i2c:xfer() chan=0, addrdir=0x5c, mode=3, subsize=1, subaddr=0x5c, 1 bytes, bus /uni-n@f8000000/i2c@f8001000/i2c-bus@0
low_i2c:kw_handle_interrupt(state_addr, isr: 6)
low_i2c:KW: NAK on address
low_i2c:xfer error -6
i2c-adapter i2c-7: I2C transfer at 0x2e failed, size 2, err -6
low_i2c:xfer() chan=0, addrdir=0x5c, mode=3, subsize=1, subaddr=0x30, 1 bytes, bus /uni-n@f8000000/i2c@f8001000/i2c-bus@0
low_i2c:kw_handle_interrupt(state_addr, isr: 2)
low_i2c:kw_handle_interrupt(state_write, isr: 1)
low_i2c:kw_handle_interrupt(state_stop, isr: 4)
PowerMac i2c bus uni-n 0 registered
As you can see there are 4 errors, but the config register read doesn't
fail so this is considered a success.
Ever heard of this problem?
One very interesting thing I've noticed is that therm_adt746x register
access _after_ the initialization works reliably. Errors only happen in
probe_thermostat(). This makes me suspect that the problem is either a
low level initialization happening too late, or another initialization
step happening in parallel and interfering with probe_thermostat().
Tim found evidences in older boot logs that the problem isn't new and
was already present back in kernel 2.6.24 at least.
Any idea what the problem can be and/or how to debug it further?
--
Jean Delvare
^ permalink raw reply
* Re: i2c-powermac fails
From: Benjamin Herrenschmidt @ 2009-10-13 9:32 UTC (permalink / raw)
To: Jean Delvare; +Cc: linuxppc-dev, Paul Mackerras, Tim Shepard
In-Reply-To: <20091013112304.74083fd1@hyperion.delvare>
On Tue, 2009-10-13 at 11:23 +0200, Jean Delvare wrote:
> Hi Ben, Paul,
>
> I had a report by Tim Shepard (Cc'd) that the therm_adt746x driver
> sometimes fails to initialize on his PowerBook G4 running kernel
> 2.6.31. The following error message can be seen in the logs when the
> failure happens:
>
> therm_adt746x 7-002e: Thermostat failed to read config!
>
> After enabling low-level i2c debugging, it turns out that the problem
> is caused by low-level errors at the I2C bus level:
Nothing comes to mind immediately, but I'll have another look tomorrow.
Maybe we are configuring the i2c bus too fast ? Another possibility
would be that the device needs some retries ...
Ben.
^ permalink raw reply
* Re: i2c-powermac fails
From: Jean Delvare @ 2009-10-13 9:49 UTC (permalink / raw)
To: Benjamin Herrenschmidt; +Cc: linuxppc-dev, Paul Mackerras, Tim Shepard
In-Reply-To: <1255426348.2192.187.camel@pasglop>
On Tue, 13 Oct 2009 20:32:28 +1100, Benjamin Herrenschmidt wrote:
> On Tue, 2009-10-13 at 11:23 +0200, Jean Delvare wrote:
> > Hi Ben, Paul,
> >
> > I had a report by Tim Shepard (Cc'd) that the therm_adt746x driver
> > sometimes fails to initialize on his PowerBook G4 running kernel
> > 2.6.31. The following error message can be seen in the logs when the
> > failure happens:
> >
> > therm_adt746x 7-002e: Thermostat failed to read config!
> >
> > After enabling low-level i2c debugging, it turns out that the problem
> > is caused by low-level errors at the I2C bus level:
>
> Nothing comes to mind immediately, but I'll have another look tomorrow.
>
> Maybe we are configuring the i2c bus too fast ? Another possibility
> would be that the device needs some retries ...
I guess that retrying would work around the problem, yes. But I do not
think this is the proper solution. If retries were needed, they would
be needed all the time, not just at initialization time. And as I said,
the SMBus specification says that devices have to always ack their
slave address (they can always delay the transaction later if they need
more time) so I am reasonably certain that the ADT7467 does ack his
address always. If it seems otherwise, this suggests that either the
message was not properly sent on the bus (so the ADT7467 did not have
anything to ack), or the ADT7467's ack went on the bus but the I2C
master didn't see it.
I2C bus being setup too fast sounds more likely. It might be worth
adding an arbitrary delay after initialization, just to see if it
helps. Not sure where though, as I'm not familiar with the Powermac
initialization steps. Maybe right before i2c_add_adapter() in
i2c_powermac_probe?
--
Jean Delvare
^ permalink raw reply
* Re: [U-Boot] Linux seamless booting
From: Kenneth Johansson @ 2009-10-13 12:22 UTC (permalink / raw)
To: Fortini Matteo
Cc: u-boot@lists.denx.de, Roberto Guerra,
linuxppc-dev@lists.ozlabs.org
In-Reply-To: <4AD33533.5030900@mta.it>
On Mon, 2009-10-12 at 15:54 +0200, Fortini Matteo wrote:
> Yes, that's what we're currently using, but the problem is a little
> broader: I should answer to CAN messages in at most 100-200ms from
> powerup, and that can be done in u-boot.
if you are in that interval you definitely need to go to a more exotic
start sequence than usual.
one solution would be to do as you suggest and do a special driver that
is living outside of the kernel during startup. you still need to hack
into the interrupt code to let your "external" driver handle the CAN.
then you need to hack up the ordinary driver to take over from yours.
I have not seen this solution on any project I worked on but should be
doable.
optimizing the boot time of linux so it starts up in 200ms is probably
going to be quite hard. I did 2 seconds to /sbin/init started from ide
driver without to much trouble. removing the IDE and going to a root on
NOR would probably get closer to 1.5 but to get down to 200ms would
probably mean removing most of u-boot and only keep the dram setup then
you probably need to remove most of the drivers from the kernel and load
them later as modules. I have never really tried to do a insane fast
boot like this so I'm not sure what problems you will run up against.
but maybe it's possible. but 200ms feels a bit to optimistic.
> However, handing CAN transmission control over to Linux is quite
> complicated nowadays, since it would involve passing structures in
> memory and hacking through device init.
> It'd be nice to have a framework with which u-boot could hand-over
> devices to Linux in a clean and defined way.
not likely to happen as a generic solution. Much better to just remove
the boat loader then and work on optimizing the linux startup code.
^ permalink raw reply
* Re: [Patch] powerpc: Fix memory leak in axon_msi.c
From: Arnd Bergmann @ 2009-10-13 13:30 UTC (permalink / raw)
To: linuxppc-dev, michael; +Cc: linuxppc-dev list, eric.sesterhenn
In-Reply-To: <1255393780.9570.3.camel@concordia>
On Tuesday 13 October 2009, Michael Ellerman wrote:
> cppcheck found a memory leak in axon_msi, if dcr_base or dcr_len are zero,
> we have already allocated msic, so we should free it in the error path.
>
> Signed-off-by: Eric Sesterhenn <eric.sesterhenn@lsexperts.de>
> Acked-by: Michael Ellerman <michael@ellerman.id.au>
Acked-by: Arnd Bergmann <arnd@arndb.de>
^ permalink raw reply
* Re: [Cbe-oss-dev] [PATCH] spufs: Fix test in spufs_switch_log_read()
From: Arnd Bergmann @ 2009-10-13 13:31 UTC (permalink / raw)
To: linuxppc-dev
Cc: cbe-oss-dev, Roel Kluin, linuxppc-dev, Jeremy Kerr, Andrew Morton,
cbe-oss-dev
In-Reply-To: <200910130849.56671.jk@ozlabs.org>
On Tuesday 13 October 2009, Jeremy Kerr wrote:
> > Or can this test be removed?
>
> I'd prefer just to remove the test.
Yes, sounds good.
Arnd <><
^ permalink raw reply
* Re: [PATCH 1/2][v2] mm: add notifier in pageblock isolation for balloon drivers
From: Robert Jennings @ 2009-10-13 15:48 UTC (permalink / raw)
To: Mel Gorman
Cc: linux-mm, Gerald Schaefer, linux-kernel, linuxppc-dev,
Martin Schwidefsky, Badari Pulavarty, Brian King, Paul Mackerras,
Andrew Morton, Ingo Molnar, KAMEZAWA Hiroyuki
In-Reply-To: <20091009204326.GH24845@csn.ul.ie>
On Fri, Oct 09, 2009 at 21:43:26 +0100, Mal Gorman wrote:
> As you have tested this recently, would you be willing to post the
> results? While it's not a requirement of the patch, it would be nice to have
> an idea of how the effectiveness of memory hot-remove is improved when used
> with the powerpc balloon. This might convince others developers for balloons
> to register with the notifier.
I did ten test runs without my patches and ten test runs with my patches
on a 2.6.32-rc3 kernel.
Without the patch:
6 out of 10 memory-remove operations without the patch removed 1 LMB
(64Mb), the rest of the memory-remove attempts failed to remove any LMBs.
With the patch:
All of the memory-remove operations removed some LMBs. The average
removed was just over 11 LMBs (704Mb) per attempt.
Linux was given 2Gb of memory. During the test runs the average memory in
use was 140Mb, not including cache and buffers, and the average amount
consumed by the balloon was 1217Mb. The system was idle while the
memory remove operation was performed. After each attempt the system
was rebooted and allowed ~10 minutes to settle after boot.
With a 2Gb configuration on POWER the LMB size is 64Mb. The drmgr command
(part of powerpc-utils) was used to remove memory by LBM, just as an
end-user would. Below is a list of the runs and the number of LMBs
removed.
Stock kernel (v2.6.32-rc3)
--------------------------
LMBs Used kb Loaned kb
removed
0 135232 1257280
0 151168 1231744
1 152128 1234176
1 150976 1239232
1 151808 1232064
0 136064 1249152
0 137088 1246976
1 135296 1289984
1 136384 1263104
1 152960 1243904
=======================
0.60 143910 1248762 Average
0.49 7929 16960 StdDev
Patched kernel
--------------------------
LMBs Used kb Loaned kb
removed
12 134336 1294336
10 152192 1250432
9 152832 1235520
15 153152 1237952
12 152320 1232704
13 135360 1252224
11 154176 1237056
10 153920 1243264
10 150720 1236416
13 151040 1230848
=======================
11.50 149005 1245075 Average
1.75 7158 17738 StdDev
Regards,
Robert Jennings
^ permalink raw reply
* From: Tim Abbott <tabbott@ksplice.com>
From: Tim Abbott @ 2009-10-13 15:53 UTC (permalink / raw)
To: linux-kernel; +Cc: linuxppc-dev, Tim Abbott, Paul Mackerras, Sam Ravnborg
There is already an architecture-independent __page_aligned_data macro
for this purpose, so removing the powerpc-specific macro should be
harmless.
Signed-off-by: Tim Abbott <tabbott@ksplice.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: linuxppc-dev@ozlabs.org
Cc: Sam Ravnborg <sam@ravnborg.org>
---
arch/powerpc/include/asm/page_64.h | 8 --------
1 files changed, 0 insertions(+), 8 deletions(-)
diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h
index 3f17b83..3c7118f 100644
--- a/arch/powerpc/include/asm/page_64.h
+++ b/arch/powerpc/include/asm/page_64.h
@@ -162,14 +162,6 @@ do { \
#endif /* !CONFIG_HUGETLB_PAGE */
-#ifdef MODULE
-#define __page_aligned __attribute__((__aligned__(PAGE_SIZE)))
-#else
-#define __page_aligned \
- __attribute__((__aligned__(PAGE_SIZE), \
- __section__(".data.page_aligned")))
-#endif
-
#define VM_DATA_DEFAULT_FLAGS \
(test_thread_flag(TIF_32BIT) ? \
VM_DATA_DEFAULT_FLAGS32 : VM_DATA_DEFAULT_FLAGS64)
--
1.6.4.3
^ permalink raw reply related
* Re: From: Tim Abbott <tabbott@ksplice.com>
From: Tim Abbott @ 2009-10-13 15:56 UTC (permalink / raw)
To: linux-kernel; +Cc: Paul Mackerras, Sam Ravnborg, linuxppc-dev
In-Reply-To: <1255449206-16650-1-git-send-email-tabbott@ksplice.com>
Well, I think I just found a bug in git-send-email. I'll resend with the
actual subject line.
-Tim Abbott
On Tue, 13 Oct 2009, Tim Abbott wrote:
> There is already an architecture-independent __page_aligned_data macro
> for this purpose, so removing the powerpc-specific macro should be
> harmless.
>
> Signed-off-by: Tim Abbott <tabbott@ksplice.com>
> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> Cc: Paul Mackerras <paulus@samba.org>
> Cc: linuxppc-dev@ozlabs.org
> Cc: Sam Ravnborg <sam@ravnborg.org>
> ---
> arch/powerpc/include/asm/page_64.h | 8 --------
> 1 files changed, 0 insertions(+), 8 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h
> index 3f17b83..3c7118f 100644
> --- a/arch/powerpc/include/asm/page_64.h
> +++ b/arch/powerpc/include/asm/page_64.h
> @@ -162,14 +162,6 @@ do { \
>
> #endif /* !CONFIG_HUGETLB_PAGE */
>
> -#ifdef MODULE
> -#define __page_aligned __attribute__((__aligned__(PAGE_SIZE)))
> -#else
> -#define __page_aligned \
> - __attribute__((__aligned__(PAGE_SIZE), \
> - __section__(".data.page_aligned")))
> -#endif
> -
> #define VM_DATA_DEFAULT_FLAGS \
> (test_thread_flag(TIF_32BIT) ? \
> VM_DATA_DEFAULT_FLAGS32 : VM_DATA_DEFAULT_FLAGS64)
> --
> 1.6.4.3
>
>
^ permalink raw reply
* Re: [PATCH 0/8] gianfar: Add support for hibernation
From: Andy Fleming @ 2009-10-13 17:22 UTC (permalink / raw)
To: David Miller; +Cc: scottwood, linuxppc-dev, netdev
In-Reply-To: <20091012.235747.195783342.davem@davemloft.net>
On Oct 13, 2009, at 1:57 AM, David Miller wrote:
> From: Anton Vorontsov <avorontsov@ru.mvista.com>
> Date: Mon, 12 Oct 2009 20:00:00 +0400
>
>> Here are few patches that add support for hibernation for gianfar
>> driver.
>>
>> Technically, we could just do gfar_close() and then gfar_enet_open()
>> sequence to restore gianfar functionality after hibernation, but
>> close/open does so many unneeded things (e.g. BDs buffers freeing and
>> allocation, IRQ freeing and requesting), that I felt it would be much
>> better to cleanup and refactor some code to make the hibernation [and
>> not only hibernation] code a little bit prettier.
>
> I applied all of this, it's a really nice patch set. If there are any
> problems we can deal with it using follow-on fixups.
>
> I noticed something, in patch #3 where you remove the spurious wrap
> bit setting in startup_gfar(). It looks like that was not only
> spurious but it was doing it wrong too.
>
> It's writing garbage into the status word, because it's not using the
> BD_LFLAG() macro to shift the value up 16 bits.
>
No, it was fine (though made unnecessary by other patches). The BD
has a union:
struct {
u16 status; /* Status Fields */
u16 length; /* Buffer length */
};
u32 lstatus;
so when you write "lstatus", you need to use the BD_LFLAG() macro, but
when you write "status", you are just setting the status bits.
Andy
^ permalink raw reply
* Re: [PATCH 1/5 v3] dynamic logical partitioning infrastructure
From: Nathan Fontenot @ 2009-10-13 18:06 UTC (permalink / raw)
To: linuxppc-dev; +Cc: linux-kernel
In-Reply-To: <4AB3A05A.6010204@austin.ibm.com>
This patch provides the kernel DLPAR infrastructure in a new filed named
dlpar.c. The functionality provided is for acquiring and releasing a
resource from firmware and the parsing of information returned from the
ibm,configure-connector rtas call. Additionally, this exports the pSeries
reconfiguration notifier chain so that it can be invoked when
device tree updates are made.
Updated to remove an extraneous of_node_put() in the removal of a device
tree node path.
Signed-off-by: Nathan Fontenot <nfont@austin.ibm.com>
---
Index: powerpc/arch/powerpc/platforms/pseries/dlpar.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ powerpc/arch/powerpc/platforms/pseries/dlpar.c 2009-10-08 11:08:42.000000000 -0500
@@ -0,0 +1,414 @@
+/*
+ * dlpar.c - support for dynamic reconfiguration (including PCI
+ * Hotplug and Dynamic Logical Partitioning on RPA platforms).
+ *
+ * Copyright (C) 2009 Nathan Fontenot
+ * Copyright (C) 2009 IBM Corporation
+ *
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kref.h>
+#include <linux/notifier.h>
+#include <linux/proc_fs.h>
+#include <linux/spinlock.h>
+
+#include <asm/prom.h>
+#include <asm/machdep.h>
+#include <asm/uaccess.h>
+#include <asm/rtas.h>
+#include <asm/pSeries_reconfig.h>
+
+#define CFG_CONN_WORK_SIZE 4096
+static char workarea[CFG_CONN_WORK_SIZE];
+static DEFINE_SPINLOCK(workarea_lock);
+
+struct cc_workarea {
+ u32 drc_index;
+ u32 zero;
+ u32 name_offset;
+ u32 prop_length;
+ u32 prop_offset;
+};
+
+static struct property *parse_cc_property(char *workarea)
+{
+ struct property *prop;
+ struct cc_workarea *ccwa;
+ char *name;
+ char *value;
+
+ prop = kzalloc(sizeof(*prop), GFP_KERNEL);
+ if (!prop)
+ return NULL;
+
+ ccwa = (struct cc_workarea *)workarea;
+ name = workarea + ccwa->name_offset;
+ prop->name = kzalloc(strlen(name) + 1, GFP_KERNEL);
+ if (!prop->name) {
+ kfree(prop);
+ return NULL;
+ }
+
+ strcpy(prop->name, name);
+
+ prop->length = ccwa->prop_length;
+ value = workarea + ccwa->prop_offset;
+ prop->value = kzalloc(prop->length, GFP_KERNEL);
+ if (!prop->value) {
+ kfree(prop->name);
+ kfree(prop);
+ return NULL;
+ }
+
+ memcpy(prop->value, value, prop->length);
+ return prop;
+}
+
+static void free_property(struct property *prop)
+{
+ kfree(prop->name);
+ kfree(prop->value);
+ kfree(prop);
+}
+
+static struct device_node *parse_cc_node(char *work_area)
+{
+ struct device_node *dn;
+ struct cc_workarea *ccwa;
+ char *name;
+
+ dn = kzalloc(sizeof(*dn), GFP_KERNEL);
+ if (!dn)
+ return NULL;
+
+ ccwa = (struct cc_workarea *)work_area;
+ name = work_area + ccwa->name_offset;
+ dn->full_name = kzalloc(strlen(name) + 1, GFP_KERNEL);
+ if (!dn->full_name) {
+ kfree(dn);
+ return NULL;
+ }
+
+ strcpy(dn->full_name, name);
+ return dn;
+}
+
+static void free_one_cc_node(struct device_node *dn)
+{
+ struct property *prop;
+
+ while (dn->properties) {
+ prop = dn->properties;
+ dn->properties = prop->next;
+ free_property(prop);
+ }
+
+ kfree(dn->full_name);
+ kfree(dn);
+}
+
+static void free_cc_nodes(struct device_node *dn)
+{
+ if (dn->child)
+ free_cc_nodes(dn->child);
+
+ if (dn->sibling)
+ free_cc_nodes(dn->sibling);
+
+ free_one_cc_node(dn);
+}
+
+#define NEXT_SIBLING 1
+#define NEXT_CHILD 2
+#define NEXT_PROPERTY 3
+#define PREV_PARENT 4
+#define MORE_MEMORY 5
+#define CALL_AGAIN -2
+#define ERR_CFG_USE -9003
+
+struct device_node *configure_connector(u32 drc_index)
+{
+ struct device_node *dn;
+ struct device_node *first_dn = NULL;
+ struct device_node *last_dn = NULL;
+ struct property *property;
+ struct property *last_property = NULL;
+ struct cc_workarea *ccwa;
+ int cc_token;
+ int rc;
+
+ cc_token = rtas_token("ibm,configure-connector");
+ if (cc_token == RTAS_UNKNOWN_SERVICE)
+ return NULL;
+
+ spin_lock(&workarea_lock);
+
+ ccwa = (struct cc_workarea *)&workarea[0];
+ ccwa->drc_index = drc_index;
+ ccwa->zero = 0;
+
+ rc = rtas_call(cc_token, 2, 1, NULL, workarea, NULL);
+ while (rc) {
+ switch (rc) {
+ case NEXT_SIBLING:
+ dn = parse_cc_node(workarea);
+ if (!dn)
+ goto cc_error;
+
+ dn->parent = last_dn->parent;
+ last_dn->sibling = dn;
+ last_dn = dn;
+ break;
+
+ case NEXT_CHILD:
+ dn = parse_cc_node(workarea);
+ if (!dn)
+ goto cc_error;
+
+ if (!first_dn)
+ first_dn = dn;
+ else {
+ dn->parent = last_dn;
+ if (last_dn)
+ last_dn->child = dn;
+ }
+
+ last_dn = dn;
+ break;
+
+ case NEXT_PROPERTY:
+ property = parse_cc_property(workarea);
+ if (!property)
+ goto cc_error;
+
+ if (!last_dn->properties)
+ last_dn->properties = property;
+ else
+ last_property->next = property;
+
+ last_property = property;
+ break;
+
+ case PREV_PARENT:
+ last_dn = last_dn->parent;
+ break;
+
+ case CALL_AGAIN:
+ break;
+
+ case MORE_MEMORY:
+ case ERR_CFG_USE:
+ default:
+ printk(KERN_ERR "Unexpected Error (%d) "
+ "returned from configure-connector\n", rc);
+ goto cc_error;
+ }
+
+ rc = rtas_call(cc_token, 2, 1, NULL, workarea, NULL);
+ }
+
+ spin_unlock(&workarea_lock);
+ return first_dn;
+
+cc_error:
+ spin_unlock(&workarea_lock);
+
+ if (first_dn)
+ free_cc_nodes(first_dn);
+
+ return NULL;
+}
+
+static struct device_node *derive_parent(const char *path)
+{
+ struct device_node *parent;
+ char parent_path[128];
+ int parent_path_len;
+
+ parent_path_len = strrchr(path, '/') - path + 1;
+ strlcpy(parent_path, path, parent_path_len);
+
+ parent = of_find_node_by_path(parent_path);
+
+ return parent;
+}
+
+static int add_one_node(struct device_node *dn)
+{
+ struct proc_dir_entry *ent;
+ int rc;
+
+ of_node_set_flag(dn, OF_DYNAMIC);
+ kref_init(&dn->kref);
+ dn->parent = derive_parent(dn->full_name);
+
+ rc = blocking_notifier_call_chain(&pSeries_reconfig_chain,
+ PSERIES_RECONFIG_ADD, dn);
+ if (rc == NOTIFY_BAD) {
+ printk(KERN_ERR "Failed to add device node %s\n",
+ dn->full_name);
+ return -ENOMEM; /* For now, safe to assume kmalloc failure */
+ }
+
+ of_attach_node(dn);
+
+#ifdef CONFIG_PROC_DEVICETREE
+ ent = proc_mkdir(strrchr(dn->full_name, '/') + 1, dn->parent->pde);
+ if (ent)
+ proc_device_tree_add_node(dn, ent);
+#endif
+
+ of_node_put(dn->parent);
+ return 0;
+}
+
+int add_device_tree_nodes(struct device_node *dn)
+{
+ struct device_node *child = dn->child;
+ struct device_node *sibling = dn->sibling;
+ int rc;
+
+ dn->child = NULL;
+ dn->sibling = NULL;
+ dn->parent = NULL;
+
+ rc = add_one_node(dn);
+ if (rc)
+ return rc;
+
+ if (child) {
+ rc = add_device_tree_nodes(child);
+ if (rc)
+ return rc;
+ }
+
+ if (sibling)
+ rc = add_device_tree_nodes(sibling);
+
+ return rc;
+}
+
+static int remove_one_node(struct device_node *dn)
+{
+ struct device_node *parent = dn->parent;
+ struct property *prop = dn->properties;
+
+#ifdef CONFIG_PROC_DEVICETREE
+ while (prop) {
+ remove_proc_entry(prop->name, dn->pde);
+ prop = prop->next;
+ }
+
+ if (dn->pde)
+ remove_proc_entry(dn->pde->name, parent->pde);
+#endif
+
+ blocking_notifier_call_chain(&pSeries_reconfig_chain,
+ PSERIES_RECONFIG_REMOVE, dn);
+ of_detach_node(dn);
+ of_node_put(dn); /* Must decrement the refcount */
+
+ return 0;
+}
+
+static int _remove_device_tree_nodes(struct device_node *dn)
+{
+ int rc;
+
+ if (dn->child) {
+ rc = _remove_device_tree_nodes(dn->child);
+ if (rc)
+ return rc;
+ }
+
+ if (dn->sibling) {
+ rc = _remove_device_tree_nodes(dn->sibling);
+ if (rc)
+ return rc;
+ }
+
+ rc = remove_one_node(dn);
+ return rc;
+}
+
+int remove_device_tree_nodes(struct device_node *dn)
+{
+ int rc;
+
+ if (dn->child) {
+ rc = _remove_device_tree_nodes(dn->child);
+ if (rc)
+ return rc;
+ }
+
+ rc = remove_one_node(dn);
+ return rc;
+}
+
+#define DR_ENTITY_SENSE 9003
+#define DR_ENTITY_PRESENT 1
+#define DR_ENTITY_UNUSABLE 2
+#define ALLOCATION_STATE 9003
+#define ALLOC_UNUSABLE 0
+#define ALLOC_USABLE 1
+#define ISOLATION_STATE 9001
+#define ISOLATE 0
+#define UNISOLATE 1
+
+int acquire_drc(u32 drc_index)
+{
+ int dr_status, rc;
+
+ rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status,
+ DR_ENTITY_SENSE, drc_index);
+ if (rc || dr_status != DR_ENTITY_UNUSABLE)
+ return -1;
+
+ rc = rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_USABLE);
+ if (rc)
+ return rc;
+
+ rc = rtas_set_indicator(ISOLATION_STATE, drc_index, UNISOLATE);
+ if (rc) {
+ rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_UNUSABLE);
+ return rc;
+ }
+
+ return 0;
+}
+
+int release_drc(u32 drc_index)
+{
+ int dr_status, rc;
+
+ rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status,
+ DR_ENTITY_SENSE, drc_index);
+ if (rc || dr_status != DR_ENTITY_PRESENT)
+ return -1;
+
+ rc = rtas_set_indicator(ISOLATION_STATE, drc_index, ISOLATE);
+ if (rc)
+ return rc;
+
+ rc = rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_UNUSABLE);
+ if (rc) {
+ rtas_set_indicator(ISOLATION_STATE, drc_index, UNISOLATE);
+ return rc;
+ }
+
+ return 0;
+}
+
+static int pseries_dlpar_init(void)
+{
+ if (!machine_is(pseries))
+ return 0;
+
+ return 0;
+}
+device_initcall(pseries_dlpar_init);
Index: powerpc/arch/powerpc/platforms/pseries/Makefile
===================================================================
--- powerpc.orig/arch/powerpc/platforms/pseries/Makefile 2009-09-11 12:43:39.000000000 -0500
+++ powerpc/arch/powerpc/platforms/pseries/Makefile 2009-09-11 12:51:52.000000000 -0500
@@ -8,7 +8,7 @@
obj-y := lpar.o hvCall.o nvram.o reconfig.o \
setup.o iommu.o ras.o rtasd.o \
- firmware.o power.o
+ firmware.o power.o dlpar.o
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_XICS) += xics.o
obj-$(CONFIG_SCANLOG) += scanlog.o
Index: powerpc/arch/powerpc/include/asm/pSeries_reconfig.h
===================================================================
--- powerpc.orig/arch/powerpc/include/asm/pSeries_reconfig.h 2009-09-11 12:43:39.000000000 -0500
+++ powerpc/arch/powerpc/include/asm/pSeries_reconfig.h 2009-10-08 09:37:40.000000000 -0500
@@ -17,6 +17,7 @@
#ifdef CONFIG_PPC_PSERIES
extern int pSeries_reconfig_notifier_register(struct notifier_block *);
extern void pSeries_reconfig_notifier_unregister(struct notifier_block *);
+extern struct blocking_notifier_head pSeries_reconfig_chain;
#else /* !CONFIG_PPC_PSERIES */
static inline int pSeries_reconfig_notifier_register(struct notifier_block *nb)
{
Index: powerpc/arch/powerpc/platforms/pseries/reconfig.c
===================================================================
--- powerpc.orig/arch/powerpc/platforms/pseries/reconfig.c 2009-09-11 12:43:39.000000000 -0500
+++ powerpc/arch/powerpc/platforms/pseries/reconfig.c 2009-10-08 09:37:49.000000000 -0500
@@ -95,7 +95,7 @@
return parent;
}
-static BLOCKING_NOTIFIER_HEAD(pSeries_reconfig_chain);
+BLOCKING_NOTIFIER_HEAD(pSeries_reconfig_chain);
int pSeries_reconfig_notifier_register(struct notifier_block *nb)
{
^ permalink raw reply
* Re: [PATCH 4/5 v3] kernel handling of memory DLPAR
From: Nathan Fontenot @ 2009-10-13 18:13 UTC (permalink / raw)
To: linuxppc-dev; +Cc: linux-kernel
In-Reply-To: <4AB3A13D.1060405@austin.ibm.com>
This adds the capability to DLPAR add and remove memory from the kernel. The
patch extends the powerpc handling of memory_add_physaddr_to_nid(), which is
called from the sysfs memory 'probe' file to first ensure that the memory
has been added to the system. This is done by creating a platform specific
callout from the routine. The pseries implementation of this handles the
DLPAR work to add the memory to the system and update the device tree.
The patch also creates a pseries only 'release' sys file,
/sys/devices/system/memory/release. This file handles the DLPAR release of
memory back to firmware and updating of the device-tree.
Updated to add #ifdef CONFIG_MEMORY_HOTPLUG around the memory hotplug specific
updates. This allows the file to be built without CONFIG_MEMORY_HOTPLUG
defined.
Signed-off-by: Nathan Fontenot <nfont@austin.ibm.com>
---
Index: powerpc/arch/powerpc/platforms/pseries/dlpar.c
===================================================================
--- powerpc.orig/arch/powerpc/platforms/pseries/dlpar.c 2009-10-08 11:08:42.000000000 -0500
+++ powerpc/arch/powerpc/platforms/pseries/dlpar.c 2009-10-13 13:08:22.000000000 -0500
@@ -16,6 +16,10 @@
#include <linux/notifier.h>
#include <linux/proc_fs.h>
#include <linux/spinlock.h>
+#include <linux/memory_hotplug.h>
+#include <linux/sysdev.h>
+#include <linux/sysfs.h>
+
#include <asm/prom.h>
#include <asm/machdep.h>
@@ -404,11 +408,165 @@
return 0;
}
+#ifdef CONFIG_MEMORY_HOTPLUG
+
+static struct property *clone_property(struct property *old_prop)
+{
+ struct property *new_prop;
+
+ new_prop = kzalloc((sizeof *new_prop), GFP_KERNEL);
+ if (!new_prop)
+ return NULL;
+
+ new_prop->name = kzalloc(strlen(old_prop->name) + 1, GFP_KERNEL);
+ new_prop->value = kzalloc(old_prop->length + 1, GFP_KERNEL);
+ if (!new_prop->name || !new_prop->value) {
+ free_property(new_prop);
+ return NULL;
+ }
+
+ strcpy(new_prop->name, old_prop->name);
+ memcpy(new_prop->value, old_prop->value, old_prop->length);
+ new_prop->length = old_prop->length;
+
+ return new_prop;
+}
+
+int platform_probe_memory(u64 phys_addr)
+{
+ struct device_node *dn;
+ struct property *new_prop, *old_prop;
+ struct property *lmb_sz_prop;
+ struct of_drconf_cell *drmem;
+ u64 lmb_size;
+ int num_entries, i, rc;
+
+ if (!phys_addr)
+ return -EINVAL;
+
+ dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+ if (!dn)
+ return -EINVAL;
+
+ lmb_sz_prop = of_find_property(dn, "ibm,lmb-size", NULL);
+ lmb_size = *(u64 *)lmb_sz_prop->value;
+
+ old_prop = of_find_property(dn, "ibm,dynamic-memory", NULL);
+
+ num_entries = *(u32 *)old_prop->value;
+ drmem = (struct of_drconf_cell *)
+ ((char *)old_prop->value + sizeof(u32));
+
+ for (i = 0; i < num_entries; i++) {
+ u64 lmb_end_addr = drmem[i].base_addr + lmb_size;
+ if (phys_addr >= drmem[i].base_addr
+ && phys_addr < lmb_end_addr)
+ break;
+ }
+
+ if (i >= num_entries) {
+ of_node_put(dn);
+ return -EINVAL;
+ }
+
+ if (drmem[i].flags & DRCONF_MEM_ASSIGNED) {
+ of_node_put(dn);
+ return 0;
+ }
+
+ rc = acquire_drc(drmem[i].drc_index);
+ if (rc) {
+ of_node_put(dn);
+ return -1;
+ }
+
+ new_prop = clone_property(old_prop);
+ drmem = (struct of_drconf_cell *)
+ ((char *)new_prop->value + sizeof(u32));
+
+ drmem[i].flags |= DRCONF_MEM_ASSIGNED;
+ prom_update_property(dn, new_prop, old_prop);
+
+ rc = blocking_notifier_call_chain(&pSeries_reconfig_chain,
+ PSERIES_DRCONF_MEM_ADD,
+ &drmem[i].base_addr);
+ if (rc == NOTIFY_BAD) {
+ prom_update_property(dn, old_prop, new_prop);
+ release_drc(drmem[i].drc_index);
+ }
+
+ of_node_put(dn);
+ return rc == NOTIFY_BAD ? -1 : 0;
+}
+
+static ssize_t memory_release_store(struct class *class, const char *buf,
+ size_t count)
+{
+ unsigned long drc_index;
+ struct device_node *dn;
+ struct property *new_prop, *old_prop;
+ struct of_drconf_cell *drmem;
+ int num_entries;
+ int i, rc;
+
+ rc = strict_strtoul(buf, 0, &drc_index);
+ if (rc)
+ return -EINVAL;
+
+ dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+ if (!dn)
+ return 0;
+
+ old_prop = of_find_property(dn, "ibm,dynamic-memory", NULL);
+ new_prop = clone_property(old_prop);
+
+ num_entries = *(u32 *)new_prop->value;
+ drmem = (struct of_drconf_cell *)
+ ((char *)new_prop->value + sizeof(u32));
+
+ for (i = 0; i < num_entries; i++) {
+ if (drmem[i].drc_index == drc_index)
+ break;
+ }
+
+ if (i >= num_entries) {
+ free_property(new_prop);
+ of_node_put(dn);
+ return -EINVAL;
+ }
+
+ drmem[i].flags &= ~DRCONF_MEM_ASSIGNED;
+ prom_update_property(dn, new_prop, old_prop);
+
+ rc = blocking_notifier_call_chain(&pSeries_reconfig_chain,
+ PSERIES_DRCONF_MEM_REMOVE,
+ &drmem[i].base_addr);
+ if (rc != NOTIFY_BAD)
+ rc = release_drc(drc_index);
+
+ if (rc)
+ prom_update_property(dn, old_prop, new_prop);
+
+ of_node_put(dn);
+ return rc ? -1 : count;
+}
+
+static struct class_attribute class_attr_mem_release =
+ __ATTR(release, S_IWUSR, NULL, memory_release_store);
+#endif
+
static int pseries_dlpar_init(void)
{
if (!machine_is(pseries))
return 0;
+#ifdef CONFIG_MEMORY_HOTPLUG
+ if (sysfs_create_file(&memory_sysdev_class.kset.kobj,
+ &class_attr_mem_release.attr))
+ printk(KERN_INFO "DLPAR: Could not create sysfs memory "
+ "release file\n");
+#endif
+
return 0;
}
device_initcall(pseries_dlpar_init);
Index: powerpc/arch/powerpc/mm/mem.c
===================================================================
--- powerpc.orig/arch/powerpc/mm/mem.c 2009-10-08 11:07:45.000000000 -0500
+++ powerpc/arch/powerpc/mm/mem.c 2009-10-08 11:08:54.000000000 -0500
@@ -111,8 +111,19 @@
#ifdef CONFIG_MEMORY_HOTPLUG
#ifdef CONFIG_NUMA
+int __attribute ((weak)) platform_probe_memory(u64 start)
+{
+ return 0;
+}
+
int memory_add_physaddr_to_nid(u64 start)
{
+ int rc;
+
+ rc = platform_probe_memory(start);
+ if (rc)
+ return rc;
+
return hot_add_scn_to_nid(start);
}
#endif
^ permalink raw reply
* Re: [PATCH 5/5 v2] kernel handling of CPU DLPAR
From: Nathan Fontenot @ 2009-10-13 18:14 UTC (permalink / raw)
To: linuxppc-dev; +Cc: linux-kernel
In-Reply-To: <4AB3A172.4090601@austin.ibm.com>
This adds the capability to DLPAR add and remove CPUs from the kernel. The
creates two new files /sys/devices/system/cpu/probe and
/sys/devices/system/cpu/release to handle the DLPAR addition and removal of
CPUs respectively.
CPU DLPAR add is accomplished by writing the drc-index of the CPU to the
probe file, and removal is done by writing the device-tree path of the cpu
to the release file.
Updated to include #ifdef CONFIG_HOTPLUG_CPU around the cpu hotplug specific
bits so that it will build without CONFIG_HOTPLUG_CPU defined.
Signed-off-by: Nathan Fontenot <nfont@austin.ibm.com>
---
Index: powerpc/arch/powerpc/platforms/pseries/dlpar.c
===================================================================
--- powerpc.orig/arch/powerpc/platforms/pseries/dlpar.c 2009-10-13 13:08:22.000000000 -0500
+++ powerpc/arch/powerpc/platforms/pseries/dlpar.c 2009-10-13 13:09:00.000000000 -0500
@@ -1,11 +1,11 @@
/*
- * dlpar.c - support for dynamic reconfiguration (including PCI
- * Hotplug and Dynamic Logical Partitioning on RPA platforms).
+ * dlpar.c - support for dynamic reconfiguration (including PCI,
+ * Memory, and CPU Hotplug and Dynamic Logical Partitioning on
+ * PAPR platforms).
*
* Copyright (C) 2009 Nathan Fontenot
* Copyright (C) 2009 IBM Corporation
*
- *
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
* 2 as published by the Free Software Foundation.
@@ -19,6 +19,7 @@
#include <linux/memory_hotplug.h>
#include <linux/sysdev.h>
#include <linux/sysfs.h>
+#include <linux/cpu.h>
#include <asm/prom.h>
@@ -408,6 +409,82 @@
return 0;
}
+#ifdef CONFIG_HOTPLUG_CPU
+static ssize_t cpu_probe_store(struct class *class, const char *buf,
+ size_t count)
+{
+ struct device_node *dn;
+ unsigned long drc_index;
+ char *cpu_name;
+ int rc;
+
+ rc = strict_strtoul(buf, 0, &drc_index);
+ if (rc)
+ return -EINVAL;
+
+ rc = acquire_drc(drc_index);
+ if (rc)
+ return rc;
+
+ dn = configure_connector(drc_index);
+ if (!dn) {
+ release_drc(drc_index);
+ return rc;
+ }
+
+ /* fixup dn name */
+ cpu_name = kzalloc(strlen(dn->full_name) + strlen("/cpus/") + 1,
+ GFP_KERNEL);
+ if (!cpu_name) {
+ free_cc_nodes(dn);
+ release_drc(drc_index);
+ return -ENOMEM;
+ }
+
+ sprintf(cpu_name, "/cpus/%s", dn->full_name);
+ kfree(dn->full_name);
+ dn->full_name = cpu_name;
+
+ rc = add_device_tree_nodes(dn);
+ if (rc)
+ release_drc(drc_index);
+
+ return rc ? rc : count;
+}
+
+static ssize_t cpu_release_store(struct class *class, const char *buf,
+ size_t count)
+{
+ struct device_node *dn;
+ u32 *drc_index;
+ int rc;
+
+ dn = of_find_node_by_path(buf);
+ if (!dn)
+ return -EINVAL;
+
+ drc_index = (u32 *)of_get_property(dn, "ibm,my-drc-index", NULL);
+ if (!drc_index) {
+ of_node_put(dn);
+ return -EINVAL;
+ }
+
+ rc = release_drc(*drc_index);
+ if (rc) {
+ of_node_put(dn);
+ return rc;
+ }
+
+ rc = remove_device_tree_nodes(dn);
+ if (rc)
+ acquire_drc(*drc_index);
+
+ of_node_put(dn);
+ return rc ? rc : count;
+}
+
+#endif /* CONFIG_HOTPLUG_CPU */
+
#ifdef CONFIG_MEMORY_HOTPLUG
static struct property *clone_property(struct property *old_prop)
@@ -553,6 +630,13 @@
static struct class_attribute class_attr_mem_release =
__ATTR(release, S_IWUSR, NULL, memory_release_store);
+#endif /* CONFIG_MEMORY_HOTPLUG */
+
+#ifdef CONFIG_HOTPLUG_CPU
+static struct class_attribute class_attr_cpu_probe =
+ __ATTR(probe, S_IWUSR, NULL, cpu_probe_store);
+static struct class_attribute class_attr_cpu_release =
+ __ATTR(release, S_IWUSR, NULL, cpu_release_store);
#endif
static int pseries_dlpar_init(void)
@@ -567,6 +651,18 @@
"release file\n");
#endif
+#ifdef CONFIG_HOTPLUG_CPU
+ if (sysfs_create_file(&cpu_sysdev_class.kset.kobj,
+ &class_attr_cpu_probe.attr))
+ printk(KERN_INFO "DLPAR: Could not create sysfs cpu "
+ "probe file\n");
+
+ if (sysfs_create_file(&cpu_sysdev_class.kset.kobj,
+ &class_attr_cpu_release.attr))
+ printk(KERN_INFO "DLPAR: Could not create sysfs cpu "
+ "release file\n");
+#endif
+
return 0;
}
device_initcall(pseries_dlpar_init);
^ permalink raw reply
* Re: [PATCH 0/8] gianfar: Add support for hibernation
From: David Miller @ 2009-10-13 19:09 UTC (permalink / raw)
To: afleming; +Cc: scottwood, linuxppc-dev, netdev
In-Reply-To: <64B2BB18-32DC-4B98-95D6-F203F74040D5@freescale.com>
From: Andy Fleming <afleming@freescale.com>
Date: Tue, 13 Oct 2009 12:22:38 -0500
> No, it was fine (though made unnecessary by other patches). The BD
> has a union:
>
> struct {
> u16 status; /* Status Fields */
> u16 length; /* Buffer length */
> };
> u32 lstatus;
>
> so when you write "lstatus", you need to use the BD_LFLAG() macro, but
> when you write "status", you are just setting the status bits.
Indeed I missed that, thanks.
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox