* [PATCH 00/13] second batch of b.L switcher patches
@ 2013-09-23 23:17 Nicolas Pitre
2013-09-23 23:17 ` [PATCH 01/13] ARM: bL_switcher: Add synchronous enable/disable interface Nicolas Pitre
` (12 more replies)
0 siblings, 13 replies; 14+ messages in thread
From: Nicolas Pitre @ 2013-09-23 23:17 UTC (permalink / raw)
To: linux-arm-kernel
The first batch of patches was posted here:
http://news.gmane.org/group/gmane.linux.ports.arm.kernel/thread=253942
They're now merged in RMK's devel-stable branch and also available via
the linux-next tree.
So here's the second set of patches towards a fully functional and
production quality switcher solution, establishing a landmark to compare
against for any scheduler based solution meant to eventually surpass the
switcher in the mainline kernel.
Those patches are also available here:
git://git.linaro.org/people/nico/linux iks
This set is divided in 3 groups:
Patches 1 to 3 are needed for proper interaction and synchronization with
the cpufreq companion driver that will be posted separately.
Patches 4 to 9 are providing significant switch latency and efficiency
improvements and are purely for optimization purposes.
Patches 10 to 13 are for switch event tracing.
Here's the diffstat:
arch/arm/common/bL_switcher.c | 264 ++++++++++++++++++++++---
arch/arm/common/mcpm_entry.c | 12 ++
arch/arm/common/mcpm_head.S | 16 +-
arch/arm/include/asm/bL_switcher.h | 62 +++++-
arch/arm/include/asm/hardirq.h | 2 +-
arch/arm/include/asm/mcpm.h | 8 +
arch/arm/include/asm/smp.h | 2 +
arch/arm/kernel/smp.c | 21 ++
drivers/irqchip/irq-gic.c | 43 ++++
include/linux/irqchip/arm-gic.h | 2 +
include/trace/events/power_cpu_migrate.h | 67 +++++++
11 files changed, 471 insertions(+), 28 deletions(-)
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH 01/13] ARM: bL_switcher: Add synchronous enable/disable interface
2013-09-23 23:17 [PATCH 00/13] second batch of b.L switcher patches Nicolas Pitre
@ 2013-09-23 23:17 ` Nicolas Pitre
2013-09-23 23:17 ` [PATCH 02/13] ARM: bL_switcher: Add runtime control notifier Nicolas Pitre
` (11 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Nicolas Pitre @ 2013-09-23 23:17 UTC (permalink / raw)
To: linux-arm-kernel
From: Dave Martin <dave.martin@linaro.org>
Some subsystems will need to know for sure whether the switcher is
enabled or disabled during certain critical regions.
This patch provides a simple mutex-based mechanism to discover
whether the switcher is enabled and temporarily lock out further
enable/disable:
* bL_switcher_get_enabled() returns true iff the switcher is
enabled and temporarily inhibits enable/disable.
* bL_switcher_put_enabled() permits enable/disable of the switcher
again after a previous call to bL_switcher_get_enabled().
Signed-off-by: Dave Martin <dave.martin@linaro.org>
Signed-off-by: Nicolas Pitre <nico@linaro.org>
---
arch/arm/common/bL_switcher.c | 27 +++++++++++++++++++++++++--
arch/arm/include/asm/bL_switcher.h | 3 +++
2 files changed, 28 insertions(+), 2 deletions(-)
diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
index 335ff76d4c..7d98629aa4 100644
--- a/arch/arm/common/bL_switcher.c
+++ b/arch/arm/common/bL_switcher.c
@@ -23,6 +23,7 @@
#include <linux/hrtimer.h>
#include <linux/tick.h>
#include <linux/mm.h>
+#include <linux/mutex.h>
#include <linux/string.h>
#include <linux/sysfs.h>
#include <linux/irqchip/arm-gic.h>
@@ -302,6 +303,7 @@ EXPORT_SYMBOL_GPL(bL_switch_request);
* Activation and configuration code.
*/
+static DEFINE_MUTEX(bL_switcher_activation_lock);
static unsigned int bL_switcher_active;
static unsigned int bL_switcher_cpu_original_cluster[NR_CPUS];
static cpumask_t bL_switcher_removed_logical_cpus;
@@ -413,9 +415,11 @@ static int bL_switcher_enable(void)
{
int cpu, ret;
+ mutex_lock(&bL_switcher_activation_lock);
cpu_hotplug_driver_lock();
if (bL_switcher_active) {
cpu_hotplug_driver_unlock();
+ mutex_unlock(&bL_switcher_activation_lock);
return 0;
}
@@ -424,6 +428,7 @@ static int bL_switcher_enable(void)
ret = bL_switcher_halve_cpus();
if (ret) {
cpu_hotplug_driver_unlock();
+ mutex_unlock(&bL_switcher_activation_lock);
return ret;
}
@@ -436,9 +441,10 @@ static int bL_switcher_enable(void)
}
bL_switcher_active = 1;
- cpu_hotplug_driver_unlock();
-
pr_info("big.LITTLE switcher initialized\n");
+
+ cpu_hotplug_driver_unlock();
+ mutex_unlock(&bL_switcher_activation_lock);
return 0;
}
@@ -450,9 +456,11 @@ static void bL_switcher_disable(void)
struct bL_thread *t;
struct task_struct *task;
+ mutex_lock(&bL_switcher_activation_lock);
cpu_hotplug_driver_lock();
if (!bL_switcher_active) {
cpu_hotplug_driver_unlock();
+ mutex_unlock(&bL_switcher_activation_lock);
return;
}
bL_switcher_active = 0;
@@ -497,6 +505,7 @@ static void bL_switcher_disable(void)
bL_switcher_restore_cpus();
cpu_hotplug_driver_unlock();
+ mutex_unlock(&bL_switcher_activation_lock);
}
static ssize_t bL_switcher_active_show(struct kobject *kobj,
@@ -554,6 +563,20 @@ static int __init bL_switcher_sysfs_init(void)
#endif /* CONFIG_SYSFS */
+bool bL_switcher_get_enabled(void)
+{
+ mutex_lock(&bL_switcher_activation_lock);
+
+ return bL_switcher_active;
+}
+EXPORT_SYMBOL_GPL(bL_switcher_get_enabled);
+
+void bL_switcher_put_enabled(void)
+{
+ mutex_unlock(&bL_switcher_activation_lock);
+}
+EXPORT_SYMBOL_GPL(bL_switcher_put_enabled);
+
/*
* Veto any CPU hotplug operation on those CPUs we've removed
* while the switcher is active.
diff --git a/arch/arm/include/asm/bL_switcher.h b/arch/arm/include/asm/bL_switcher.h
index e0c0bba70b..05d7c4cb94 100644
--- a/arch/arm/include/asm/bL_switcher.h
+++ b/arch/arm/include/asm/bL_switcher.h
@@ -14,4 +14,7 @@
int bL_switch_request(unsigned int cpu, unsigned int new_cluster_id);
+bool bL_switcher_get_enabled(void);
+void bL_switcher_put_enabled(void);
+
#endif
--
1.8.4.98.gb022869
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 02/13] ARM: bL_switcher: Add runtime control notifier
2013-09-23 23:17 [PATCH 00/13] second batch of b.L switcher patches Nicolas Pitre
2013-09-23 23:17 ` [PATCH 01/13] ARM: bL_switcher: Add synchronous enable/disable interface Nicolas Pitre
@ 2013-09-23 23:17 ` Nicolas Pitre
2013-09-23 23:17 ` [PATCH 03/13] ARM: bL_switcher: Add switch completion callback for bL_switch_request() Nicolas Pitre
` (10 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Nicolas Pitre @ 2013-09-23 23:17 UTC (permalink / raw)
To: linux-arm-kernel
From: Dave Martin <dave.martin@linaro.org>
Some subsystems will need to respond synchronously to runtime
enabling and disabling of the switcher.
This patch adds a dedicated notifier interface to support such
subsystems. Pre- and post- enable/disable notifications are sent
to registered callbacks, allowing safe transition of non-b.L-
transparent subsystems across these control transitions.
Notifier callbacks may veto switcher (de)activation on pre notifications
only. Post notifications won't revert the action.
If enabling or disabling of the switcher fails after the pre-change
notification has been sent, subsystems which have registered
notifiers can be left in an inappropriate state.
This patch sends a suitable post-change notification on failure,
indicating that the old state has been reestablished.
For example, a failed initialisation will result in the following
sequence:
BL_NOTIFY_PRE_ENABLE
/* switcher initialisation fails */
BL_NOTIFY_POST_DISABLE
It is the responsibility of notified subsystems to respond in an
appropriate way.
Signed-off-by: Dave Martin <dave.martin@linaro.org>
Signed-off-by: Nicolas Pitre <nico@linaro.org>
---
arch/arm/common/bL_switcher.c | 60 +++++++++++++++++++++++++++++++-------
arch/arm/include/asm/bL_switcher.h | 44 ++++++++++++++++++++++++++++
2 files changed, 94 insertions(+), 10 deletions(-)
diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
index 7d98629aa4..016488730c 100644
--- a/arch/arm/common/bL_switcher.c
+++ b/arch/arm/common/bL_switcher.c
@@ -22,6 +22,7 @@
#include <linux/clockchips.h>
#include <linux/hrtimer.h>
#include <linux/tick.h>
+#include <linux/notifier.h>
#include <linux/mm.h>
#include <linux/mutex.h>
#include <linux/string.h>
@@ -304,10 +305,34 @@ EXPORT_SYMBOL_GPL(bL_switch_request);
*/
static DEFINE_MUTEX(bL_switcher_activation_lock);
+static BLOCKING_NOTIFIER_HEAD(bL_activation_notifier);
static unsigned int bL_switcher_active;
static unsigned int bL_switcher_cpu_original_cluster[NR_CPUS];
static cpumask_t bL_switcher_removed_logical_cpus;
+int bL_switcher_register_notifier(struct notifier_block *nb)
+{
+ return blocking_notifier_chain_register(&bL_activation_notifier, nb);
+}
+EXPORT_SYMBOL_GPL(bL_switcher_register_notifier);
+
+int bL_switcher_unregister_notifier(struct notifier_block *nb)
+{
+ return blocking_notifier_chain_unregister(&bL_activation_notifier, nb);
+}
+EXPORT_SYMBOL_GPL(bL_switcher_unregister_notifier);
+
+static int bL_activation_notify(unsigned long val)
+{
+ int ret;
+
+ ret = blocking_notifier_call_chain(&bL_activation_notifier, val, NULL);
+ if (ret & NOTIFY_STOP_MASK)
+ pr_err("%s: notifier chain failed with status 0x%x\n",
+ __func__, ret);
+ return notifier_to_errno(ret);
+}
+
static void bL_switcher_restore_cpus(void)
{
int i;
@@ -425,12 +450,13 @@ static int bL_switcher_enable(void)
pr_info("big.LITTLE switcher initializing\n");
+ ret = bL_activation_notify(BL_NOTIFY_PRE_ENABLE);
+ if (ret)
+ goto error;
+
ret = bL_switcher_halve_cpus();
- if (ret) {
- cpu_hotplug_driver_unlock();
- mutex_unlock(&bL_switcher_activation_lock);
- return ret;
- }
+ if (ret)
+ goto error;
for_each_online_cpu(cpu) {
struct bL_thread *t = &bL_threads[cpu];
@@ -441,11 +467,18 @@ static int bL_switcher_enable(void)
}
bL_switcher_active = 1;
+ bL_activation_notify(BL_NOTIFY_POST_ENABLE);
pr_info("big.LITTLE switcher initialized\n");
+ goto out;
+
+error:
+ pr_warn("big.LITTLE switcher initialization failed\n");
+ bL_activation_notify(BL_NOTIFY_POST_DISABLE);
+out:
cpu_hotplug_driver_unlock();
mutex_unlock(&bL_switcher_activation_lock);
- return 0;
+ return ret;
}
#ifdef CONFIG_SYSFS
@@ -458,11 +491,15 @@ static void bL_switcher_disable(void)
mutex_lock(&bL_switcher_activation_lock);
cpu_hotplug_driver_lock();
- if (!bL_switcher_active) {
- cpu_hotplug_driver_unlock();
- mutex_unlock(&bL_switcher_activation_lock);
- return;
+
+ if (!bL_switcher_active)
+ goto out;
+
+ if (bL_activation_notify(BL_NOTIFY_PRE_DISABLE) != 0) {
+ bL_activation_notify(BL_NOTIFY_POST_ENABLE);
+ goto out;
}
+
bL_switcher_active = 0;
/*
@@ -504,6 +541,9 @@ static void bL_switcher_disable(void)
}
bL_switcher_restore_cpus();
+ bL_activation_notify(BL_NOTIFY_POST_DISABLE);
+
+out:
cpu_hotplug_driver_unlock();
mutex_unlock(&bL_switcher_activation_lock);
}
diff --git a/arch/arm/include/asm/bL_switcher.h b/arch/arm/include/asm/bL_switcher.h
index 05d7c4cb94..b243ca93e8 100644
--- a/arch/arm/include/asm/bL_switcher.h
+++ b/arch/arm/include/asm/bL_switcher.h
@@ -12,9 +12,53 @@
#ifndef ASM_BL_SWITCHER_H
#define ASM_BL_SWITCHER_H
+#include <linux/compiler.h>
+#include <linux/types.h>
+
int bL_switch_request(unsigned int cpu, unsigned int new_cluster_id);
+/*
+ * Register here to be notified about runtime enabling/disabling of
+ * the switcher.
+ *
+ * The notifier chain is called with the switcher activation lock held:
+ * the switcher will not be enabled or disabled during callbacks.
+ * Callbacks must not call bL_switcher_{get,put}_enabled().
+ */
+#define BL_NOTIFY_PRE_ENABLE 0
+#define BL_NOTIFY_POST_ENABLE 1
+#define BL_NOTIFY_PRE_DISABLE 2
+#define BL_NOTIFY_POST_DISABLE 3
+
+#ifdef CONFIG_BL_SWITCHER
+
+int bL_switcher_register_notifier(struct notifier_block *nb);
+int bL_switcher_unregister_notifier(struct notifier_block *nb);
+
+/*
+ * Use these functions to temporarily prevent enabling/disabling of
+ * the switcher.
+ * bL_switcher_get_enabled() returns true if the switcher is currently
+ * enabled. Each call to bL_switcher_get_enabled() must be followed
+ * by a call to bL_switcher_put_enabled(). These functions are not
+ * recursive.
+ */
bool bL_switcher_get_enabled(void);
void bL_switcher_put_enabled(void);
+#else
+static inline int bL_switcher_register_notifier(struct notifier_block *nb)
+{
+ return 0;
+}
+
+static inline int bL_switcher_unregister_notifier(struct notifier_block *nb)
+{
+ return 0;
+}
+
+static inline bool bL_switcher_get_enabled(void) { return false; }
+static inline void bL_switcher_put_enabled(void) { }
+#endif /* CONFIG_BL_SWITCHER */
+
#endif
--
1.8.4.98.gb022869
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 03/13] ARM: bL_switcher: Add switch completion callback for bL_switch_request()
2013-09-23 23:17 [PATCH 00/13] second batch of b.L switcher patches Nicolas Pitre
2013-09-23 23:17 ` [PATCH 01/13] ARM: bL_switcher: Add synchronous enable/disable interface Nicolas Pitre
2013-09-23 23:17 ` [PATCH 02/13] ARM: bL_switcher: Add runtime control notifier Nicolas Pitre
@ 2013-09-23 23:17 ` Nicolas Pitre
2013-09-23 23:17 ` [PATCH 04/13] ARM: bL_switcher: synchronize the outbound with the inbound Nicolas Pitre
` (9 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Nicolas Pitre @ 2013-09-23 23:17 UTC (permalink / raw)
To: linux-arm-kernel
From: Dave Martin <dave.martin@linaro.org>
There is no explicit way to know when a switch started via
bL_switch_request() is complete. This can lead to unpredictable
behaviour when the switcher is controlled by a subsystem which
makes dynamic decisions (such as cpufreq).
The CPU PM notifier is not really suitable for signalling
completion, because the CPU could get suspended and resumed for
other, independent reasons while a switch request is in flight.
Adding a whole new notifier for this seems excessive, and may tempt
people to put heavyweight code on this path.
This patch implements a new bL_switch_request_cb() function that
allows for a per-request lightweight callback, private between the
switcher and the caller of bL_switch_request_cb().
Overlapping switches on a single CPU are considered incorrect if
they are requested via bL_switch_request_cb() with a callback (they
will lead to an unpredictable final state without explicit external
synchronisation to force the requests into a particular order).
Queuing requests robustly would be overkill because only one
subsystem should be attempting to control the switcher at any time.
Overlapping requests of this kind will be failed with -EBUSY to
indicate that the second request won't take effect and the
completer will never be called for it.
bL_switch_request() is retained as a wrapper round the new function,
with the old, fire-and-forget semantics. In this case the last request
will always win. The request may still be denied if a previous request
with a completer is still pending.
Signed-off-by: Dave Martin <dave.martin@linaro.org>
Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org>
---
arch/arm/common/bL_switcher.c | 53 ++++++++++++++++++++++++++++++++++----
arch/arm/include/asm/bL_switcher.h | 10 ++++++-
2 files changed, 57 insertions(+), 6 deletions(-)
diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
index 016488730c..34316be404 100644
--- a/arch/arm/common/bL_switcher.c
+++ b/arch/arm/common/bL_switcher.c
@@ -9,6 +9,7 @@
* published by the Free Software Foundation.
*/
+#include <linux/atomic.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
@@ -25,6 +26,7 @@
#include <linux/notifier.h>
#include <linux/mm.h>
#include <linux/mutex.h>
+#include <linux/spinlock.h>
#include <linux/string.h>
#include <linux/sysfs.h>
#include <linux/irqchip/arm-gic.h>
@@ -224,10 +226,13 @@ static int bL_switch_to(unsigned int new_cluster_id)
}
struct bL_thread {
+ spinlock_t lock;
struct task_struct *task;
wait_queue_head_t wq;
int wanted_cluster;
struct completion started;
+ bL_switch_completion_handler completer;
+ void *completer_cookie;
};
static struct bL_thread bL_threads[NR_CPUS];
@@ -237,6 +242,8 @@ static int bL_switcher_thread(void *arg)
struct bL_thread *t = arg;
struct sched_param param = { .sched_priority = 1 };
int cluster;
+ bL_switch_completion_handler completer;
+ void *completer_cookie;
sched_setscheduler_nocheck(current, SCHED_FIFO, ¶m);
complete(&t->started);
@@ -247,9 +254,21 @@ static int bL_switcher_thread(void *arg)
wait_event_interruptible(t->wq,
t->wanted_cluster != -1 ||
kthread_should_stop());
- cluster = xchg(&t->wanted_cluster, -1);
- if (cluster != -1)
+
+ spin_lock(&t->lock);
+ cluster = t->wanted_cluster;
+ completer = t->completer;
+ completer_cookie = t->completer_cookie;
+ t->wanted_cluster = -1;
+ t->completer = NULL;
+ spin_unlock(&t->lock);
+
+ if (cluster != -1) {
bL_switch_to(cluster);
+
+ if (completer)
+ completer(completer_cookie);
+ }
} while (!kthread_should_stop());
return 0;
@@ -270,16 +289,30 @@ static struct task_struct *bL_switcher_thread_create(int cpu, void *arg)
}
/*
- * bL_switch_request - Switch to a specific cluster for the given CPU
+ * bL_switch_request_cb - Switch to a specific cluster for the given CPU,
+ * with completion notification via a callback
*
* @cpu: the CPU to switch
* @new_cluster_id: the ID of the cluster to switch to.
+ * @completer: switch completion callback. if non-NULL,
+ * @completer(@completer_cookie) will be called on completion of
+ * the switch, in non-atomic context.
+ * @completer_cookie: opaque context argument for @completer.
*
* This function causes a cluster switch on the given CPU by waking up
* the appropriate switcher thread. This function may or may not return
* before the switch has occurred.
+ *
+ * If a @completer callback function is supplied, it will be called when
+ * the switch is complete. This can be used to determine asynchronously
+ * when the switch is complete, regardless of when bL_switch_request()
+ * returns. When @completer is supplied, no new switch request is permitted
+ * for the affected CPU until after the switch is complete, and @completer
+ * has returned.
*/
-int bL_switch_request(unsigned int cpu, unsigned int new_cluster_id)
+int bL_switch_request_cb(unsigned int cpu, unsigned int new_cluster_id,
+ bL_switch_completion_handler completer,
+ void *completer_cookie)
{
struct bL_thread *t;
@@ -289,16 +322,25 @@ int bL_switch_request(unsigned int cpu, unsigned int new_cluster_id)
}
t = &bL_threads[cpu];
+
if (IS_ERR(t->task))
return PTR_ERR(t->task);
if (!t->task)
return -ESRCH;
+ spin_lock(&t->lock);
+ if (t->completer) {
+ spin_unlock(&t->lock);
+ return -EBUSY;
+ }
+ t->completer = completer;
+ t->completer_cookie = completer_cookie;
t->wanted_cluster = new_cluster_id;
+ spin_unlock(&t->lock);
wake_up(&t->wq);
return 0;
}
-EXPORT_SYMBOL_GPL(bL_switch_request);
+EXPORT_SYMBOL_GPL(bL_switch_request_cb);
/*
* Activation and configuration code.
@@ -460,6 +502,7 @@ static int bL_switcher_enable(void)
for_each_online_cpu(cpu) {
struct bL_thread *t = &bL_threads[cpu];
+ spin_lock_init(&t->lock);
init_waitqueue_head(&t->wq);
init_completion(&t->started);
t->wanted_cluster = -1;
diff --git a/arch/arm/include/asm/bL_switcher.h b/arch/arm/include/asm/bL_switcher.h
index b243ca93e8..7d1cce8b8a 100644
--- a/arch/arm/include/asm/bL_switcher.h
+++ b/arch/arm/include/asm/bL_switcher.h
@@ -15,7 +15,15 @@
#include <linux/compiler.h>
#include <linux/types.h>
-int bL_switch_request(unsigned int cpu, unsigned int new_cluster_id);
+typedef void (*bL_switch_completion_handler)(void *cookie);
+
+int bL_switch_request_cb(unsigned int cpu, unsigned int new_cluster_id,
+ bL_switch_completion_handler completer,
+ void *completer_cookie);
+static inline int bL_switch_request(unsigned int cpu, unsigned int new_cluster_id)
+{
+ return bL_switch_request_cb(cpu, new_cluster_id, NULL, NULL);
+}
/*
* Register here to be notified about runtime enabling/disabling of
--
1.8.4.98.gb022869
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 04/13] ARM: bL_switcher: synchronize the outbound with the inbound
2013-09-23 23:17 [PATCH 00/13] second batch of b.L switcher patches Nicolas Pitre
` (2 preceding siblings ...)
2013-09-23 23:17 ` [PATCH 03/13] ARM: bL_switcher: Add switch completion callback for bL_switch_request() Nicolas Pitre
@ 2013-09-23 23:17 ` Nicolas Pitre
2013-09-23 23:17 ` [PATCH 05/13] ARM: SMP: basic IPI triggered completion support Nicolas Pitre
` (8 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Nicolas Pitre @ 2013-09-23 23:17 UTC (permalink / raw)
To: linux-arm-kernel
Let's wait for the inbound CPU to come up and snoop some of the outbound
CPU cache before bringing the outbound CPU down. That should be more
efficient than going down right away.
Possible improvements might involve some monitoring of the CCI event
counters.
Signed-off-by: Nicolas Pitre <nico@linaro.org>
---
arch/arm/common/bL_switcher.c | 24 ++++++++++++++++++++++--
1 file changed, 22 insertions(+), 2 deletions(-)
diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
index 34316be404..aab7c12748 100644
--- a/arch/arm/common/bL_switcher.c
+++ b/arch/arm/common/bL_switcher.c
@@ -55,9 +55,10 @@ static int read_mpidr(void)
* bL switcher core code.
*/
-static void bL_do_switch(void *_unused)
+static void bL_do_switch(void *_arg)
{
unsigned ib_mpidr, ib_cpu, ib_cluster;
+ long volatile handshake, **handshake_ptr = _arg;
pr_debug("%s\n", __func__);
@@ -65,6 +66,13 @@ static void bL_do_switch(void *_unused)
ib_cpu = MPIDR_AFFINITY_LEVEL(ib_mpidr, 0);
ib_cluster = MPIDR_AFFINITY_LEVEL(ib_mpidr, 1);
+ /* Advertise our handshake location */
+ if (handshake_ptr) {
+ handshake = 0;
+ *handshake_ptr = &handshake;
+ } else
+ handshake = -1;
+
/*
* Our state has been saved at this point. Let's release our
* inbound CPU.
@@ -83,6 +91,14 @@ static void bL_do_switch(void *_unused)
* we have none.
*/
+ /*
+ * Let's wait until our inbound is alive.
+ */
+ while (!handshake) {
+ wfe();
+ smp_mb();
+ }
+
/* Let's put ourself down. */
mcpm_cpu_power_down();
@@ -130,6 +146,7 @@ static int bL_switch_to(unsigned int new_cluster_id)
unsigned int ob_mpidr, ob_cpu, ob_cluster, ib_mpidr, ib_cpu, ib_cluster;
struct tick_device *tdev;
enum clock_event_mode tdev_mode;
+ long volatile *handshake_ptr;
int ret;
this_cpu = smp_processor_id();
@@ -198,7 +215,7 @@ static int bL_switch_to(unsigned int new_cluster_id)
cpu_logical_map(that_cpu) = ob_mpidr;
/* Let's do the actual CPU switch. */
- ret = cpu_suspend(0, bL_switchpoint);
+ ret = cpu_suspend((unsigned long)&handshake_ptr, bL_switchpoint);
if (ret > 0)
panic("%s: cpu_suspend() returned %d\n", __func__, ret);
@@ -220,6 +237,9 @@ static int bL_switch_to(unsigned int new_cluster_id)
local_fiq_enable();
local_irq_enable();
+ *handshake_ptr = 1;
+ dsb_sev();
+
if (ret)
pr_err("%s exiting with error %d\n", __func__, ret);
return ret;
--
1.8.4.98.gb022869
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 05/13] ARM: SMP: basic IPI triggered completion support
2013-09-23 23:17 [PATCH 00/13] second batch of b.L switcher patches Nicolas Pitre
` (3 preceding siblings ...)
2013-09-23 23:17 ` [PATCH 04/13] ARM: bL_switcher: synchronize the outbound with the inbound Nicolas Pitre
@ 2013-09-23 23:17 ` Nicolas Pitre
2013-09-23 23:17 ` [PATCH 06/13] ARM: mcpm: add a simple poke mechanism to the early entry code Nicolas Pitre
` (7 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Nicolas Pitre @ 2013-09-23 23:17 UTC (permalink / raw)
To: linux-arm-kernel
We need a mechanism to let an inbound CPU signal that it is alive before
even getting into the kernel environment i.e. from early assembly code.
Using an IPI is the simplest way to achieve that.
This adds some basic infrastructure to register a struct completion
pointer to be "completed" when the dedicated IPI for this task is
received.
Signed-off-by: Nicolas Pitre <nico@linaro.org>
---
arch/arm/include/asm/hardirq.h | 2 +-
arch/arm/include/asm/smp.h | 2 ++
arch/arm/kernel/smp.c | 21 +++++++++++++++++++++
3 files changed, 24 insertions(+), 1 deletion(-)
diff --git a/arch/arm/include/asm/hardirq.h b/arch/arm/include/asm/hardirq.h
index 2740c2a2df..3d7351c844 100644
--- a/arch/arm/include/asm/hardirq.h
+++ b/arch/arm/include/asm/hardirq.h
@@ -5,7 +5,7 @@
#include <linux/threads.h>
#include <asm/irq.h>
-#define NR_IPI 6
+#define NR_IPI 7
typedef struct {
unsigned int __softirq_pending;
diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h
index a8cae71cac..22a3b9b5d4 100644
--- a/arch/arm/include/asm/smp.h
+++ b/arch/arm/include/asm/smp.h
@@ -84,6 +84,8 @@ extern void arch_send_call_function_single_ipi(int cpu);
extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
extern void arch_send_wakeup_ipi_mask(const struct cpumask *mask);
+extern int register_ipi_completion(struct completion *completion, int cpu);
+
struct smp_operations {
#ifdef CONFIG_SMP
/*
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 72024ea8a3..7d80a549ca 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -66,6 +66,7 @@ enum ipi_msg_type {
IPI_CALL_FUNC,
IPI_CALL_FUNC_SINGLE,
IPI_CPU_STOP,
+ IPI_COMPLETION,
};
static DECLARE_COMPLETION(cpu_running);
@@ -456,6 +457,7 @@ static const char *ipi_types[NR_IPI] = {
S(IPI_CALL_FUNC, "Function call interrupts"),
S(IPI_CALL_FUNC_SINGLE, "Single function call interrupts"),
S(IPI_CPU_STOP, "CPU stop interrupts"),
+ S(IPI_COMPLETION, "completion interrupts"),
};
void show_ipi_list(struct seq_file *p, int prec)
@@ -515,6 +517,19 @@ static void ipi_cpu_stop(unsigned int cpu)
cpu_relax();
}
+static DEFINE_PER_CPU(struct completion *, cpu_completion);
+
+int register_ipi_completion(struct completion *completion, int cpu)
+{
+ per_cpu(cpu_completion, cpu) = completion;
+ return IPI_COMPLETION;
+}
+
+static void ipi_complete(unsigned int cpu)
+{
+ complete(per_cpu(cpu_completion, cpu));
+}
+
/*
* Main handler for inter-processor interrupts
*/
@@ -565,6 +580,12 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
irq_exit();
break;
+ case IPI_COMPLETION:
+ irq_enter();
+ ipi_complete(cpu);
+ irq_exit();
+ break;
+
default:
printk(KERN_CRIT "CPU%u: Unknown IPI message 0x%x\n",
cpu, ipinr);
--
1.8.4.98.gb022869
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 06/13] ARM: mcpm: add a simple poke mechanism to the early entry code
2013-09-23 23:17 [PATCH 00/13] second batch of b.L switcher patches Nicolas Pitre
` (4 preceding siblings ...)
2013-09-23 23:17 ` [PATCH 05/13] ARM: SMP: basic IPI triggered completion support Nicolas Pitre
@ 2013-09-23 23:17 ` Nicolas Pitre
2013-09-23 23:17 ` [PATCH 07/13] ARM: GIC: function to retrieve the physical address of the SGIR Nicolas Pitre
` (6 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Nicolas Pitre @ 2013-09-23 23:17 UTC (permalink / raw)
To: linux-arm-kernel
This allows to poke a predetermined value into a specific address
upon entering the early boot code in bL_head.S.
Signed-off-by: Nicolas Pitre <nico@linaro.org>
---
arch/arm/common/mcpm_entry.c | 12 ++++++++++++
arch/arm/common/mcpm_head.S | 16 ++++++++++++++--
arch/arm/include/asm/mcpm.h | 8 ++++++++
3 files changed, 34 insertions(+), 2 deletions(-)
diff --git a/arch/arm/common/mcpm_entry.c b/arch/arm/common/mcpm_entry.c
index 370236dd1a..4a2b32fd53 100644
--- a/arch/arm/common/mcpm_entry.c
+++ b/arch/arm/common/mcpm_entry.c
@@ -27,6 +27,18 @@ void mcpm_set_entry_vector(unsigned cpu, unsigned cluster, void *ptr)
sync_cache_w(&mcpm_entry_vectors[cluster][cpu]);
}
+extern unsigned long mcpm_entry_early_pokes[MAX_NR_CLUSTERS][MAX_CPUS_PER_CLUSTER][2];
+
+void mcpm_set_early_poke(unsigned cpu, unsigned cluster,
+ unsigned long poke_phys_addr, unsigned long poke_val)
+{
+ unsigned long *poke = &mcpm_entry_early_pokes[cluster][cpu][0];
+ poke[0] = poke_phys_addr;
+ poke[1] = poke_val;
+ __cpuc_flush_dcache_area((void *)poke, 8);
+ outer_clean_range(__pa(poke), __pa(poke + 2));
+}
+
static const struct mcpm_platform_ops *platform_ops;
int __init mcpm_platform_register(const struct mcpm_platform_ops *ops)
diff --git a/arch/arm/common/mcpm_head.S b/arch/arm/common/mcpm_head.S
index 39c96df347..49dd5352fe 100644
--- a/arch/arm/common/mcpm_head.S
+++ b/arch/arm/common/mcpm_head.S
@@ -71,12 +71,19 @@ ENTRY(mcpm_entry_point)
* position independent way.
*/
adr r5, 3f
- ldmia r5, {r6, r7, r8, r11}
+ ldmia r5, {r0, r6, r7, r8, r11}
+ add r0, r5, r0 @ r0 = mcpm_entry_early_pokes
add r6, r5, r6 @ r6 = mcpm_entry_vectors
ldr r7, [r5, r7] @ r7 = mcpm_power_up_setup_phys
add r8, r5, r8 @ r8 = mcpm_sync
add r11, r5, r11 @ r11 = first_man_locks
+ @ Perform an early poke, if any
+ add r0, r0, r4, lsl #3
+ ldmia r0, {r0, r1}
+ teq r0, #0
+ strne r1, [r0]
+
mov r0, #MCPM_SYNC_CLUSTER_SIZE
mla r8, r0, r10, r8 @ r8 = sync cluster base
@@ -195,7 +202,8 @@ mcpm_entry_gated:
.align 2
-3: .word mcpm_entry_vectors - .
+3: .word mcpm_entry_early_pokes - .
+ .word mcpm_entry_vectors - 3b
.word mcpm_power_up_setup_phys - 3b
.word mcpm_sync - 3b
.word first_man_locks - 3b
@@ -214,6 +222,10 @@ first_man_locks:
ENTRY(mcpm_entry_vectors)
.space 4 * MAX_NR_CLUSTERS * MAX_CPUS_PER_CLUSTER
+ .type mcpm_entry_early_pokes, #object
+ENTRY(mcpm_entry_early_pokes)
+ .space 8 * MAX_NR_CLUSTERS * MAX_CPUS_PER_CLUSTER
+
.type mcpm_power_up_setup_phys, #object
ENTRY(mcpm_power_up_setup_phys)
.space 4 @ set by mcpm_sync_init()
diff --git a/arch/arm/include/asm/mcpm.h b/arch/arm/include/asm/mcpm.h
index 0f7b7620e9..7626a7fd49 100644
--- a/arch/arm/include/asm/mcpm.h
+++ b/arch/arm/include/asm/mcpm.h
@@ -42,6 +42,14 @@ extern void mcpm_entry_point(void);
void mcpm_set_entry_vector(unsigned cpu, unsigned cluster, void *ptr);
/*
+ * This sets an early poke i.e a value to be poked into some address
+ * from very early assembly code before the CPU is ungated. The
+ * address must be physical, and if 0 then nothing will happen.
+ */
+void mcpm_set_early_poke(unsigned cpu, unsigned cluster,
+ unsigned long poke_phys_addr, unsigned long poke_val);
+
+/*
* CPU/cluster power operations API for higher subsystems to use.
*/
--
1.8.4.98.gb022869
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 07/13] ARM: GIC: function to retrieve the physical address of the SGIR
2013-09-23 23:17 [PATCH 00/13] second batch of b.L switcher patches Nicolas Pitre
` (5 preceding siblings ...)
2013-09-23 23:17 ` [PATCH 06/13] ARM: mcpm: add a simple poke mechanism to the early entry code Nicolas Pitre
@ 2013-09-23 23:17 ` Nicolas Pitre
2013-09-23 23:17 ` [PATCH 08/13] ARM: GIC: interface to send a SGI directly Nicolas Pitre
` (5 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Nicolas Pitre @ 2013-09-23 23:17 UTC (permalink / raw)
To: linux-arm-kernel
In order to have early assembly code signal other CPUs in the system,
we need to get the physical address for the SGIR register used to
send IPIs. Because the register will be used with a precomputed CPU
interface ID number, there is no need for any locking in the assembly
code where this register is written to.
Signed-off-by: Nicolas Pitre <nico@linaro.org>
---
drivers/irqchip/irq-gic.c | 29 +++++++++++++++++++++++++++++
include/linux/irqchip/arm-gic.h | 1 +
2 files changed, 30 insertions(+)
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 6365b59181..09fdf3d574 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -769,6 +769,33 @@ void gic_migrate_target(unsigned int new_cpu_id)
}
}
}
+
+/*
+ * gic_get_sgir_physaddr - get the physical address for the SGI register
+ *
+ * REturn the physical address of the SGI register to be used
+ * by some early assembly code when the kernel is not yet available.
+ */
+static unsigned long gic_dist_physaddr;
+
+unsigned long gic_get_sgir_physaddr(void)
+{
+ if (!gic_dist_physaddr)
+ return 0;
+ return gic_dist_physaddr + GIC_DIST_SOFTINT;
+}
+
+void __init gic_init_physaddr(struct device_node *node)
+{
+ struct resource res;
+ if (of_address_to_resource(node, 0, &res) == 0) {
+ gic_dist_physaddr = res.start;
+ pr_info("GIC physical location is %#lx\n", gic_dist_physaddr);
+ }
+}
+
+#else
+#define gic_init_physaddr(node) do { } while (0)
#endif
static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
@@ -952,6 +979,8 @@ int __init gic_of_init(struct device_node *node, struct device_node *parent)
percpu_offset = 0;
gic_init_bases(gic_cnt, -1, dist_base, cpu_base, percpu_offset, node);
+ if (!gic_cnt)
+ gic_init_physaddr(node);
if (parent) {
irq = irq_of_parse_and_map(node, 0);
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index 46544e381b..dc30835099 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -78,6 +78,7 @@ static inline void gic_init(unsigned int nr, int start,
int gic_get_cpu_id(unsigned int cpu);
void gic_migrate_target(unsigned int new_cpu_id);
+unsigned long gic_get_sgir_physaddr(void);
#endif /* __ASSEMBLY */
--
1.8.4.98.gb022869
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 08/13] ARM: GIC: interface to send a SGI directly
2013-09-23 23:17 [PATCH 00/13] second batch of b.L switcher patches Nicolas Pitre
` (6 preceding siblings ...)
2013-09-23 23:17 ` [PATCH 07/13] ARM: GIC: function to retrieve the physical address of the SGIR Nicolas Pitre
@ 2013-09-23 23:17 ` Nicolas Pitre
2013-09-23 23:17 ` [PATCH 09/13] ARM: bL_switcher: wait until inbound is alive before performing a switch Nicolas Pitre
` (4 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Nicolas Pitre @ 2013-09-23 23:17 UTC (permalink / raw)
To: linux-arm-kernel
The regular gic_raise_softirq() takes as input a CPU mask which is not
adequate when we need to send an IPI to a CPU which is not represented
in the kernel to GIC mapping. That is the case with the b.L switcher
when GIC migration to the inbound CPU has not yet occurred.
Signed-off-by: Nicolas Pitre <nico@linaro.org>
---
drivers/irqchip/irq-gic.c | 14 ++++++++++++++
include/linux/irqchip/arm-gic.h | 1 +
2 files changed, 15 insertions(+)
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 09fdf3d574..9031171c14 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -674,6 +674,20 @@ void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
#ifdef CONFIG_BL_SWITCHER
/*
+ * gic_send_sgi - send a SGI directly to given CPU interface number
+ *
+ * cpu_id: the ID for the destination CPU interface
+ * irq: the IPI number to send a SGI for
+ */
+void gic_send_sgi(unsigned int cpu_id, unsigned int irq)
+{
+ BUG_ON(cpu_id >= NR_GIC_CPU_IF);
+ cpu_id = 1 << cpu_id;
+ /* this always happens on GIC0 */
+ writel_relaxed((cpu_id << 16) | irq, gic_data_dist_base(&gic_data[0]) + GIC_DIST_SOFTINT);
+}
+
+/*
* gic_get_cpu_id - get the CPU interface ID for the specified CPU
*
* @cpu: the logical CPU number to get the GIC ID for.
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index dc30835099..cac496b1e2 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -76,6 +76,7 @@ static inline void gic_init(unsigned int nr, int start,
gic_init_bases(nr, start, dist, cpu, 0, NULL);
}
+void gic_send_sgi(unsigned int cpu_id, unsigned int irq);
int gic_get_cpu_id(unsigned int cpu);
void gic_migrate_target(unsigned int new_cpu_id);
unsigned long gic_get_sgir_physaddr(void);
--
1.8.4.98.gb022869
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 09/13] ARM: bL_switcher: wait until inbound is alive before performing a switch
2013-09-23 23:17 [PATCH 00/13] second batch of b.L switcher patches Nicolas Pitre
` (7 preceding siblings ...)
2013-09-23 23:17 ` [PATCH 08/13] ARM: GIC: interface to send a SGI directly Nicolas Pitre
@ 2013-09-23 23:17 ` Nicolas Pitre
2013-09-23 23:17 ` [PATCH 10/13] ARM: bL_switcher: Basic trace events support Nicolas Pitre
` (3 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Nicolas Pitre @ 2013-09-23 23:17 UTC (permalink / raw)
To: linux-arm-kernel
In some cases, a significant delay may be observed between the moment
a request for a CPU to come up is made and the moment it is ready to
start executing kernel code. This is especially true when a whole
cluster has to be powered up which may take in the order of miliseconds.
It is therefore a good idea to let the outbound CPU continue to execute
code in the mean time, and be notified when the inbound is ready before
performing the actual switch.
This is achieved by registering a completion block with the appropriate
IPI callback, and programming the sending of an IPI by the early assembly
code prior to entering the main kernel code. Once the IPI is delivered
to the outbound CPU, the completion block is "completed" and the switcher
thread is resumed.
Signed-off-by: Nicolas Pitre <nico@linaro.org>
---
arch/arm/common/bL_switcher.c | 30 +++++++++++++++++++++++-------
1 file changed, 23 insertions(+), 7 deletions(-)
diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
index aab7c12748..dc53eb8dcc 100644
--- a/arch/arm/common/bL_switcher.c
+++ b/arch/arm/common/bL_switcher.c
@@ -144,10 +144,11 @@ static int bL_switch_to(unsigned int new_cluster_id)
{
unsigned int mpidr, this_cpu, that_cpu;
unsigned int ob_mpidr, ob_cpu, ob_cluster, ib_mpidr, ib_cpu, ib_cluster;
+ struct completion inbound_alive;
struct tick_device *tdev;
enum clock_event_mode tdev_mode;
long volatile *handshake_ptr;
- int ret;
+ int ipi_nr, ret;
this_cpu = smp_processor_id();
ob_mpidr = read_mpidr();
@@ -166,10 +167,18 @@ static int bL_switch_to(unsigned int new_cluster_id)
pr_debug("before switch: CPU %d MPIDR %#x -> %#x\n",
this_cpu, ob_mpidr, ib_mpidr);
+ this_cpu = smp_processor_id();
+
/* Close the gate for our entry vectors */
mcpm_set_entry_vector(ob_cpu, ob_cluster, NULL);
mcpm_set_entry_vector(ib_cpu, ib_cluster, NULL);
+ /* Install our "inbound alive" notifier. */
+ init_completion(&inbound_alive);
+ ipi_nr = register_ipi_completion(&inbound_alive, this_cpu);
+ ipi_nr |= ((1 << 16) << bL_gic_id[ob_cpu][ob_cluster]);
+ mcpm_set_early_poke(ib_cpu, ib_cluster, gic_get_sgir_physaddr(), ipi_nr);
+
/*
* Let's wake up the inbound CPU now in case it requires some delay
* to come online, but leave it gated in our entry vector code.
@@ -181,6 +190,19 @@ static int bL_switch_to(unsigned int new_cluster_id)
}
/*
+ * Raise a SGI on the inbound CPU to make sure it doesn't stall
+ * in a possible WFI, such as in bL_power_down().
+ */
+ gic_send_sgi(bL_gic_id[ib_cpu][ib_cluster], 0);
+
+ /*
+ * Wait for the inbound to come up. This allows for other
+ * tasks to be scheduled in the mean time.
+ */
+ wait_for_completion(&inbound_alive);
+ mcpm_set_early_poke(ib_cpu, ib_cluster, 0, 0);
+
+ /*
* From this point we are entering the switch critical zone
* and can't take any interrupts anymore.
*/
@@ -190,12 +212,6 @@ static int bL_switch_to(unsigned int new_cluster_id)
/* redirect GIC's SGIs to our counterpart */
gic_migrate_target(bL_gic_id[ib_cpu][ib_cluster]);
- /*
- * Raise a SGI on the inbound CPU to make sure it doesn't stall
- * in a possible WFI, such as in mcpm_power_down().
- */
- arch_send_wakeup_ipi_mask(cpumask_of(this_cpu));
-
tdev = tick_get_device(this_cpu);
if (tdev && !cpumask_equal(tdev->evtdev->cpumask, cpumask_of(this_cpu)))
tdev = NULL;
--
1.8.4.98.gb022869
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 10/13] ARM: bL_switcher: Basic trace events support
2013-09-23 23:17 [PATCH 00/13] second batch of b.L switcher patches Nicolas Pitre
` (8 preceding siblings ...)
2013-09-23 23:17 ` [PATCH 09/13] ARM: bL_switcher: wait until inbound is alive before performing a switch Nicolas Pitre
@ 2013-09-23 23:17 ` Nicolas Pitre
2013-09-23 23:17 ` [PATCH 11/13] ARM: bL_switcher/trace: Add trace trigger for trace bootstrapping Nicolas Pitre
` (2 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Nicolas Pitre @ 2013-09-23 23:17 UTC (permalink / raw)
To: linux-arm-kernel
From: Dave Martin <dave.martin@linaro.org>
This patch adds simple trace events to the b.L switcher code
to allow tracing of CPU migration events.
To make use of the trace events, you will need:
CONFIG_FTRACE=y
CONFIG_ENABLE_DEFAULT_TRACERS=y
The following events are added:
* power:cpu_migrate_begin
* power:cpu_migrate_finish
each with the following data:
u64 timestamp;
u32 cpu_hwid;
power:cpu_migrate_begin occurs immediately before the
switcher-specific migration operations start.
power:cpu_migrate_finish occurs immediately when migration is
completed.
The cpu_hwid field contains the ID fields of the MPIDR.
* For power:cpu_migrate_begin, cpu_hwid is the ID of the outbound
physical CPU (equivalent to (from_phys_cpu,from_phys_cluster)).
* For power:cpu_migrate_finish, cpu_hwid is the ID of the inbound
physical CPU (equivalent to (to_phys_cpu,to_phys_cluster)).
By design, the cpu_hwid field is masked in the same way as the
device tree cpu node reg property, allowing direct correlation to
the DT description of the hardware.
The timestamp is added in order to minimise timing noise. An
accurate system-wide clock should be used for generating this
(hopefully getnstimeofday is appropriate, but it could be changed).
It could be any monotonic shared clock, since the aim is to allow
accurate deltas to be computed. We don't necessarily care about
accurate synchronisation with wall clock time.
In practice, each switch takes place on a single logical CPU,
and the trace infrastructure should guarantee that events are
well-ordered with respect to a single logical CPU.
Signed-off-by: Dave Martin <dave.martin@linaro.org>
Signed-off-by: Nicolas Pitre <nico@linaro.org>
---
arch/arm/common/bL_switcher.c | 17 ++++++++
include/trace/events/power_cpu_migrate.h | 66 ++++++++++++++++++++++++++++++++
2 files changed, 83 insertions(+)
create mode 100644 include/trace/events/power_cpu_migrate.h
diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
index dc53eb8dcc..7002de360d 100644
--- a/arch/arm/common/bL_switcher.c
+++ b/arch/arm/common/bL_switcher.c
@@ -20,6 +20,7 @@
#include <linux/cpumask.h>
#include <linux/kthread.h>
#include <linux/wait.h>
+#include <linux/time.h>
#include <linux/clockchips.h>
#include <linux/hrtimer.h>
#include <linux/tick.h>
@@ -33,10 +34,14 @@
#include <linux/moduleparam.h>
#include <asm/smp_plat.h>
+#include <asm/cputype.h>
#include <asm/suspend.h>
#include <asm/mcpm.h>
#include <asm/bL_switcher.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/power_cpu_migrate.h>
+
/*
* Use our own MPIDR accessors as the generic ones in asm/cputype.h have
@@ -52,6 +57,16 @@ static int read_mpidr(void)
}
/*
+ * Get a global nanosecond time stamp for tracing.
+ */
+static s64 get_ns(void)
+{
+ struct timespec ts;
+ getnstimeofday(&ts);
+ return timespec_to_ns(&ts);
+}
+
+/*
* bL switcher core code.
*/
@@ -208,6 +223,7 @@ static int bL_switch_to(unsigned int new_cluster_id)
*/
local_irq_disable();
local_fiq_disable();
+ trace_cpu_migrate_begin(get_ns(), ob_mpidr);
/* redirect GIC's SGIs to our counterpart */
gic_migrate_target(bL_gic_id[ib_cpu][ib_cluster]);
@@ -250,6 +266,7 @@ static int bL_switch_to(unsigned int new_cluster_id)
tdev->evtdev->next_event, 1);
}
+ trace_cpu_migrate_finish(get_ns(), ib_mpidr);
local_fiq_enable();
local_irq_enable();
diff --git a/include/trace/events/power_cpu_migrate.h b/include/trace/events/power_cpu_migrate.h
new file mode 100644
index 0000000000..3694af0f5b
--- /dev/null
+++ b/include/trace/events/power_cpu_migrate.h
@@ -0,0 +1,66 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM power
+
+#if !defined(_TRACE_POWER_CPU_MIGRATE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_POWER_CPU_MIGRATE_H
+
+#include <linux/tracepoint.h>
+
+#define __cpu_migrate_proto \
+ TP_PROTO(u64 timestamp, \
+ u32 cpu_hwid)
+#define __cpu_migrate_args \
+ TP_ARGS(timestamp, \
+ cpu_hwid)
+
+DECLARE_EVENT_CLASS(cpu_migrate,
+
+ __cpu_migrate_proto,
+ __cpu_migrate_args,
+
+ TP_STRUCT__entry(
+ __field(u64, timestamp )
+ __field(u32, cpu_hwid )
+ ),
+
+ TP_fast_assign(
+ __entry->timestamp = timestamp;
+ __entry->cpu_hwid = cpu_hwid;
+ ),
+
+ TP_printk("timestamp=%llu cpu_hwid=0x%08lX",
+ (unsigned long long)__entry->timestamp,
+ (unsigned long)__entry->cpu_hwid
+ )
+);
+
+#define __define_cpu_migrate_event(name) \
+ DEFINE_EVENT(cpu_migrate, cpu_migrate_##name, \
+ __cpu_migrate_proto, \
+ __cpu_migrate_args \
+ )
+
+__define_cpu_migrate_event(begin);
+__define_cpu_migrate_event(finish);
+
+#undef __define_cpu_migrate
+#undef __cpu_migrate_proto
+#undef __cpu_migrate_args
+
+/* This file can get included multiple times, TRACE_HEADER_MULTI_READ at top */
+#ifndef _PWR_CPU_MIGRATE_EVENT_AVOID_DOUBLE_DEFINING
+#define _PWR_CPU_MIGRATE_EVENT_AVOID_DOUBLE_DEFINING
+
+/*
+ * Set from_phys_cpu and to_phys_cpu to CPU_MIGRATE_ALL_CPUS to indicate
+ * a whole-cluster migration:
+ */
+#define CPU_MIGRATE_ALL_CPUS 0x80000000U
+#endif
+
+#endif /* _TRACE_POWER_CPU_MIGRATE_H */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE power_cpu_migrate
+#include <trace/define_trace.h>
--
1.8.4.98.gb022869
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 11/13] ARM: bL_switcher/trace: Add trace trigger for trace bootstrapping
2013-09-23 23:17 [PATCH 00/13] second batch of b.L switcher patches Nicolas Pitre
` (9 preceding siblings ...)
2013-09-23 23:17 ` [PATCH 10/13] ARM: bL_switcher: Basic trace events support Nicolas Pitre
@ 2013-09-23 23:17 ` Nicolas Pitre
2013-09-23 23:17 ` [PATCH 12/13] ARM: bL_switcher/trace: Add kernel trace trigger interface Nicolas Pitre
2013-09-23 23:17 ` [PATCH 13/13] ARM: bL_switcher: Add query interface to discover CPU affinities Nicolas Pitre
12 siblings, 0 replies; 14+ messages in thread
From: Nicolas Pitre @ 2013-09-23 23:17 UTC (permalink / raw)
To: linux-arm-kernel
From: Dave Martin <dave.martin@linaro.org>
When tracing switching, an external tracer needs a way to bootstrap
its knowledge of the logical<->physical CPU mapping.
This patch adds a sysfs attribute trace_trigger. A write to this
attribute will generate a power:cpu_migrate_current event for each
online CPU, indicating the current physical CPU for each logical
CPU.
Activating or deactivating the switcher also generates these
events, so that the tracer knows about the resulting remapping of
affected CPUs.
Signed-off-by: Dave Martin <dave.martin@linaro.org>
---
arch/arm/common/bL_switcher.c | 36 ++++++++++++++++++++++++++++++++
include/trace/events/power_cpu_migrate.h | 1 +
2 files changed, 37 insertions(+)
diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
index 7002de360d..f0dc025077 100644
--- a/arch/arm/common/bL_switcher.c
+++ b/arch/arm/common/bL_switcher.c
@@ -27,6 +27,7 @@
#include <linux/notifier.h>
#include <linux/mm.h>
#include <linux/mutex.h>
+#include <linux/smp.h>
#include <linux/spinlock.h>
#include <linux/string.h>
#include <linux/sysfs.h>
@@ -531,6 +532,25 @@ static int bL_switcher_halve_cpus(void)
return 0;
}
+static void bL_switcher_trace_trigger_cpu(void *__always_unused info)
+{
+ trace_cpu_migrate_current(get_ns(), read_mpidr());
+}
+
+static int bL_switcher_trace_trigger(void)
+{
+ int ret;
+
+ preempt_disable();
+
+ bL_switcher_trace_trigger_cpu(NULL);
+ ret = smp_call_function(bL_switcher_trace_trigger_cpu, NULL, true);
+
+ preempt_enable();
+
+ return ret;
+}
+
static int bL_switcher_enable(void)
{
int cpu, ret;
@@ -553,6 +573,8 @@ static int bL_switcher_enable(void)
if (ret)
goto error;
+ bL_switcher_trace_trigger();
+
for_each_online_cpu(cpu) {
struct bL_thread *t = &bL_threads[cpu];
spin_lock_init(&t->lock);
@@ -637,6 +659,8 @@ static void bL_switcher_disable(void)
}
bL_switcher_restore_cpus();
+ bL_switcher_trace_trigger();
+
bL_activation_notify(BL_NOTIFY_POST_DISABLE);
out:
@@ -670,11 +694,23 @@ static ssize_t bL_switcher_active_store(struct kobject *kobj,
return (ret >= 0) ? count : ret;
}
+static ssize_t bL_switcher_trace_trigger_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ int ret = bL_switcher_trace_trigger();
+
+ return ret ? ret : count;
+}
+
static struct kobj_attribute bL_switcher_active_attr =
__ATTR(active, 0644, bL_switcher_active_show, bL_switcher_active_store);
+static struct kobj_attribute bL_switcher_trace_trigger_attr =
+ __ATTR(trace_trigger, 0200, NULL, bL_switcher_trace_trigger_store);
+
static struct attribute *bL_switcher_attrs[] = {
&bL_switcher_active_attr.attr,
+ &bL_switcher_trace_trigger_attr.attr,
NULL,
};
diff --git a/include/trace/events/power_cpu_migrate.h b/include/trace/events/power_cpu_migrate.h
index 3694af0f5b..f76dd4de62 100644
--- a/include/trace/events/power_cpu_migrate.h
+++ b/include/trace/events/power_cpu_migrate.h
@@ -42,6 +42,7 @@ DECLARE_EVENT_CLASS(cpu_migrate,
__define_cpu_migrate_event(begin);
__define_cpu_migrate_event(finish);
+__define_cpu_migrate_event(current);
#undef __define_cpu_migrate
#undef __cpu_migrate_proto
--
1.8.4.98.gb022869
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 12/13] ARM: bL_switcher/trace: Add kernel trace trigger interface
2013-09-23 23:17 [PATCH 00/13] second batch of b.L switcher patches Nicolas Pitre
` (10 preceding siblings ...)
2013-09-23 23:17 ` [PATCH 11/13] ARM: bL_switcher/trace: Add trace trigger for trace bootstrapping Nicolas Pitre
@ 2013-09-23 23:17 ` Nicolas Pitre
2013-09-23 23:17 ` [PATCH 13/13] ARM: bL_switcher: Add query interface to discover CPU affinities Nicolas Pitre
12 siblings, 0 replies; 14+ messages in thread
From: Nicolas Pitre @ 2013-09-23 23:17 UTC (permalink / raw)
To: linux-arm-kernel
From: Dave Martin <dave.martin@linaro.org>
This patch exports a bL_switcher_trace_trigger() function to
provide a means for drivers using the trace events to get the
current status when starting a trace session.
Calling this function is equivalent to pinging the trace_trigger
file in sysfs.
Signed-off-by: Dave Martin <dave.martin@linaro.org>
---
arch/arm/common/bL_switcher.c | 3 ++-
arch/arm/include/asm/bL_switcher.h | 3 +++
2 files changed, 5 insertions(+), 1 deletion(-)
diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
index f0dc025077..f4878a3604 100644
--- a/arch/arm/common/bL_switcher.c
+++ b/arch/arm/common/bL_switcher.c
@@ -537,7 +537,7 @@ static void bL_switcher_trace_trigger_cpu(void *__always_unused info)
trace_cpu_migrate_current(get_ns(), read_mpidr());
}
-static int bL_switcher_trace_trigger(void)
+int bL_switcher_trace_trigger(void)
{
int ret;
@@ -550,6 +550,7 @@ static int bL_switcher_trace_trigger(void)
return ret;
}
+EXPORT_SYMBOL_GPL(bL_switcher_trace_trigger);
static int bL_switcher_enable(void)
{
diff --git a/arch/arm/include/asm/bL_switcher.h b/arch/arm/include/asm/bL_switcher.h
index 7d1cce8b8a..8ada5a885c 100644
--- a/arch/arm/include/asm/bL_switcher.h
+++ b/arch/arm/include/asm/bL_switcher.h
@@ -54,6 +54,8 @@ int bL_switcher_unregister_notifier(struct notifier_block *nb);
bool bL_switcher_get_enabled(void);
void bL_switcher_put_enabled(void);
+int bL_switcher_trace_trigger(void);
+
#else
static inline int bL_switcher_register_notifier(struct notifier_block *nb)
{
@@ -67,6 +69,7 @@ static inline int bL_switcher_unregister_notifier(struct notifier_block *nb)
static inline bool bL_switcher_get_enabled(void) { return false; }
static inline void bL_switcher_put_enabled(void) { }
+static inline int bL_switcher_trace_trigger(void) { return 0; }
#endif /* CONFIG_BL_SWITCHER */
#endif
--
1.8.4.98.gb022869
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 13/13] ARM: bL_switcher: Add query interface to discover CPU affinities
2013-09-23 23:17 [PATCH 00/13] second batch of b.L switcher patches Nicolas Pitre
` (11 preceding siblings ...)
2013-09-23 23:17 ` [PATCH 12/13] ARM: bL_switcher/trace: Add kernel trace trigger interface Nicolas Pitre
@ 2013-09-23 23:17 ` Nicolas Pitre
12 siblings, 0 replies; 14+ messages in thread
From: Nicolas Pitre @ 2013-09-23 23:17 UTC (permalink / raw)
To: linux-arm-kernel
From: Dave Martin <dave.martin@linaro.org>
When the switcher is active, there is no straightforward way to
figure out which logical CPU a given physical CPU maps to.
This patch provides a function
bL_switcher_get_logical_index(mpidr), which is analogous to
get_logical_index().
This function returns the logical CPU on which the specified
physical CPU is grouped (or -EINVAL if unknown).
If the switcher is inactive or not present, -EUNATCH is returned instead.
Signed-off-by: Dave Martin <dave.martin@linaro.org>
Signed-off-by: Nicolas Pitre <nico@linaro.org>
---
arch/arm/common/bL_switcher.c | 20 ++++++++++++++++++++
arch/arm/include/asm/bL_switcher.h | 2 ++
2 files changed, 22 insertions(+)
diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
index f4878a3604..63bbc4f705 100644
--- a/arch/arm/common/bL_switcher.c
+++ b/arch/arm/common/bL_switcher.c
@@ -532,6 +532,26 @@ static int bL_switcher_halve_cpus(void)
return 0;
}
+/* Determine the logical CPU a given physical CPU is grouped on. */
+int bL_switcher_get_logical_index(u32 mpidr)
+{
+ int cpu;
+
+ if (!bL_switcher_active)
+ return -EUNATCH;
+
+ mpidr &= MPIDR_HWID_BITMASK;
+ for_each_online_cpu(cpu) {
+ int pairing = bL_switcher_cpu_pairing[cpu];
+ if (pairing == -1)
+ continue;
+ if ((mpidr == cpu_logical_map(cpu)) ||
+ (mpidr == cpu_logical_map(pairing)))
+ return cpu;
+ }
+ return -EINVAL;
+}
+
static void bL_switcher_trace_trigger_cpu(void *__always_unused info)
{
trace_cpu_migrate_current(get_ns(), read_mpidr());
diff --git a/arch/arm/include/asm/bL_switcher.h b/arch/arm/include/asm/bL_switcher.h
index 8ada5a885c..1714800fa1 100644
--- a/arch/arm/include/asm/bL_switcher.h
+++ b/arch/arm/include/asm/bL_switcher.h
@@ -55,6 +55,7 @@ bool bL_switcher_get_enabled(void);
void bL_switcher_put_enabled(void);
int bL_switcher_trace_trigger(void);
+int bL_switcher_get_logical_index(u32 mpidr);
#else
static inline int bL_switcher_register_notifier(struct notifier_block *nb)
@@ -70,6 +71,7 @@ static inline int bL_switcher_unregister_notifier(struct notifier_block *nb)
static inline bool bL_switcher_get_enabled(void) { return false; }
static inline void bL_switcher_put_enabled(void) { }
static inline int bL_switcher_trace_trigger(void) { return 0; }
+static inline int bL_switcher_get_logical_index(u32 mpidr) { return -EUNATCH; }
#endif /* CONFIG_BL_SWITCHER */
#endif
--
1.8.4.98.gb022869
^ permalink raw reply related [flat|nested] 14+ messages in thread
end of thread, other threads:[~2013-09-23 23:17 UTC | newest]
Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-09-23 23:17 [PATCH 00/13] second batch of b.L switcher patches Nicolas Pitre
2013-09-23 23:17 ` [PATCH 01/13] ARM: bL_switcher: Add synchronous enable/disable interface Nicolas Pitre
2013-09-23 23:17 ` [PATCH 02/13] ARM: bL_switcher: Add runtime control notifier Nicolas Pitre
2013-09-23 23:17 ` [PATCH 03/13] ARM: bL_switcher: Add switch completion callback for bL_switch_request() Nicolas Pitre
2013-09-23 23:17 ` [PATCH 04/13] ARM: bL_switcher: synchronize the outbound with the inbound Nicolas Pitre
2013-09-23 23:17 ` [PATCH 05/13] ARM: SMP: basic IPI triggered completion support Nicolas Pitre
2013-09-23 23:17 ` [PATCH 06/13] ARM: mcpm: add a simple poke mechanism to the early entry code Nicolas Pitre
2013-09-23 23:17 ` [PATCH 07/13] ARM: GIC: function to retrieve the physical address of the SGIR Nicolas Pitre
2013-09-23 23:17 ` [PATCH 08/13] ARM: GIC: interface to send a SGI directly Nicolas Pitre
2013-09-23 23:17 ` [PATCH 09/13] ARM: bL_switcher: wait until inbound is alive before performing a switch Nicolas Pitre
2013-09-23 23:17 ` [PATCH 10/13] ARM: bL_switcher: Basic trace events support Nicolas Pitre
2013-09-23 23:17 ` [PATCH 11/13] ARM: bL_switcher/trace: Add trace trigger for trace bootstrapping Nicolas Pitre
2013-09-23 23:17 ` [PATCH 12/13] ARM: bL_switcher/trace: Add kernel trace trigger interface Nicolas Pitre
2013-09-23 23:17 ` [PATCH 13/13] ARM: bL_switcher: Add query interface to discover CPU affinities Nicolas Pitre
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).