LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [RESEND PATCH V5 3/8] cpuidle/ppc: Split timer_interrupt() into timer handling and interrupt handling routines
From: Preeti U Murthy @ 2014-01-22  7:08 UTC (permalink / raw)
  To: peterz, fweisbec, paul.gortmaker, paulus, mingo, mikey, shangw,
	rafael.j.wysocki, galak, =daniel.lezcano, benh, paulmck,
	--to=agraf, arnd, linux-pm, rostedt, michael, john.stultz, anton,
	tglx, chenhui.zhao, deepthi, r58472, geoff, linux-kernel,
	srivatsa.bhat, schwidefsky, svaidy, linuxppc-dev
In-Reply-To: <20140122065918.30650.22437.stgit@preeti.in.ibm.com>

Split timer_interrupt(), which is the local timer interrupt handler on ppc
into routines called during regular interrupt handling and __timer_interrupt(),
which takes care of running local timers and collecting time related stats.

This will enable callers interested only in running expired local timers to
directly call into __timer_interupt(). One of the use cases of this is the
tick broadcast IPI handling in which the sleeping CPUs need to handle the local
timers that have expired.

Signed-off-by: Preeti U Murthy <preeti@linux.vnet.ibm.com>
---

 arch/powerpc/kernel/time.c |   81 +++++++++++++++++++++++++-------------------
 1 file changed, 46 insertions(+), 35 deletions(-)

diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 3ff97db..df2989b 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -478,6 +478,47 @@ void arch_irq_work_raise(void)
 
 #endif /* CONFIG_IRQ_WORK */
 
+void __timer_interrupt(void)
+{
+	struct pt_regs *regs = get_irq_regs();
+	u64 *next_tb = &__get_cpu_var(decrementers_next_tb);
+	struct clock_event_device *evt = &__get_cpu_var(decrementers);
+	u64 now;
+
+	trace_timer_interrupt_entry(regs);
+
+	if (test_irq_work_pending()) {
+		clear_irq_work_pending();
+		irq_work_run();
+	}
+
+	now = get_tb_or_rtc();
+	if (now >= *next_tb) {
+		*next_tb = ~(u64)0;
+		if (evt->event_handler)
+			evt->event_handler(evt);
+		__get_cpu_var(irq_stat).timer_irqs_event++;
+	} else {
+		now = *next_tb - now;
+		if (now <= DECREMENTER_MAX)
+			set_dec((int)now);
+		/* We may have raced with new irq work */
+		if (test_irq_work_pending())
+			set_dec(1);
+		__get_cpu_var(irq_stat).timer_irqs_others++;
+	}
+
+#ifdef CONFIG_PPC64
+	/* collect purr register values often, for accurate calculations */
+	if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+		struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array);
+		cu->current_tb = mfspr(SPRN_PURR);
+	}
+#endif
+
+	trace_timer_interrupt_exit(regs);
+}
+
 /*
  * timer_interrupt - gets called when the decrementer overflows,
  * with interrupts disabled.
@@ -486,8 +527,6 @@ void timer_interrupt(struct pt_regs * regs)
 {
 	struct pt_regs *old_regs;
 	u64 *next_tb = &__get_cpu_var(decrementers_next_tb);
-	struct clock_event_device *evt = &__get_cpu_var(decrementers);
-	u64 now;
 
 	/* Ensure a positive value is written to the decrementer, or else
 	 * some CPUs will continue to take decrementer exceptions.
@@ -519,39 +558,7 @@ void timer_interrupt(struct pt_regs * regs)
 	old_regs = set_irq_regs(regs);
 	irq_enter();
 
-	trace_timer_interrupt_entry(regs);
-
-	if (test_irq_work_pending()) {
-		clear_irq_work_pending();
-		irq_work_run();
-	}
-
-	now = get_tb_or_rtc();
-	if (now >= *next_tb) {
-		*next_tb = ~(u64)0;
-		if (evt->event_handler)
-			evt->event_handler(evt);
-		__get_cpu_var(irq_stat).timer_irqs_event++;
-	} else {
-		now = *next_tb - now;
-		if (now <= DECREMENTER_MAX)
-			set_dec((int)now);
-		/* We may have raced with new irq work */
-		if (test_irq_work_pending())
-			set_dec(1);
-		__get_cpu_var(irq_stat).timer_irqs_others++;
-	}
-
-#ifdef CONFIG_PPC64
-	/* collect purr register values often, for accurate calculations */
-	if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
-		struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array);
-		cu->current_tb = mfspr(SPRN_PURR);
-	}
-#endif
-
-	trace_timer_interrupt_exit(regs);
-
+	__timer_interrupt();
 	irq_exit();
 	set_irq_regs(old_regs);
 }
@@ -828,6 +835,10 @@ static void decrementer_set_mode(enum clock_event_mode mode,
 /* Interrupt handler for the timer broadcast IPI */
 void tick_broadcast_ipi_handler(void)
 {
+	u64 *next_tb = &__get_cpu_var(decrementers_next_tb);
+
+	*next_tb = get_tb_or_rtc();
+	__timer_interrupt();
 }
 
 static void register_decrementer_clockevent(int cpu)

^ permalink raw reply related

* [RESEND PATCH V5 2/8] powerpc: Implement tick broadcast IPI as a fixed IPI message
From: Preeti U Murthy @ 2014-01-22  7:08 UTC (permalink / raw)
  To: peterz, fweisbec, paul.gortmaker, paulus, mingo, mikey, shangw,
	rafael.j.wysocki, galak, =daniel.lezcano, benh, paulmck,
	--to=agraf, arnd, linux-pm, rostedt, michael, john.stultz, anton,
	tglx, chenhui.zhao, deepthi, r58472, geoff, linux-kernel,
	srivatsa.bhat, schwidefsky, svaidy, linuxppc-dev
In-Reply-To: <20140122065918.30650.22437.stgit@preeti.in.ibm.com>

From: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>

For scalability and performance reasons, we want the tick broadcast IPIs
to be handled as efficiently as possible. Fixed IPI messages
are one of the most efficient mechanisms available - they are faster than
the smp_call_function mechanism because the IPI handlers are fixed and hence
they don't involve costly operations such as adding IPI handlers to the target
CPU's function queue, acquiring locks for synchronization etc.

Luckily we have an unused IPI message slot, so use that to implement
tick broadcast IPIs efficiently.

Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
[Functions renamed to tick_broadcast* and Changelog modified by
 Preeti U. Murthy<preeti@linux.vnet.ibm.com>]
Signed-off-by: Preeti U. Murthy <preeti@linux.vnet.ibm.com>
Acked-by: Geoff Levand <geoff@infradead.org> [For the PS3 part]
---

 arch/powerpc/include/asm/smp.h          |    2 +-
 arch/powerpc/include/asm/time.h         |    1 +
 arch/powerpc/kernel/smp.c               |   19 +++++++++++++++----
 arch/powerpc/kernel/time.c              |    5 +++++
 arch/powerpc/platforms/cell/interrupt.c |    2 +-
 arch/powerpc/platforms/ps3/smp.c        |    2 +-
 6 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index 9f7356b..ff51046 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -120,7 +120,7 @@ extern int cpu_to_core_id(int cpu);
  * in /proc/interrupts will be wrong!!! --Troy */
 #define PPC_MSG_CALL_FUNCTION   0
 #define PPC_MSG_RESCHEDULE      1
-#define PPC_MSG_UNUSED		2
+#define PPC_MSG_TICK_BROADCAST	2
 #define PPC_MSG_DEBUGGER_BREAK  3
 
 /* for irq controllers that have dedicated ipis per message (4) */
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index c1f2676..1d428e6 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -28,6 +28,7 @@ extern struct clock_event_device decrementer_clockevent;
 struct rtc_time;
 extern void to_tm(int tim, struct rtc_time * tm);
 extern void GregorianDay(struct rtc_time *tm);
+extern void tick_broadcast_ipi_handler(void);
 
 extern void generic_calibrate_decr(void);
 
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index ee7d76b..6f06f05 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -35,6 +35,7 @@
 #include <asm/ptrace.h>
 #include <linux/atomic.h>
 #include <asm/irq.h>
+#include <asm/hw_irq.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/prom.h>
@@ -145,9 +146,9 @@ static irqreturn_t reschedule_action(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
-static irqreturn_t unused_action(int irq, void *data)
+static irqreturn_t tick_broadcast_ipi_action(int irq, void *data)
 {
-	/* This slot is unused and hence available for use, if needed */
+	tick_broadcast_ipi_handler();
 	return IRQ_HANDLED;
 }
 
@@ -168,14 +169,14 @@ static irqreturn_t debug_ipi_action(int irq, void *data)
 static irq_handler_t smp_ipi_action[] = {
 	[PPC_MSG_CALL_FUNCTION] =  call_function_action,
 	[PPC_MSG_RESCHEDULE] = reschedule_action,
-	[PPC_MSG_UNUSED] = unused_action,
+	[PPC_MSG_TICK_BROADCAST] = tick_broadcast_ipi_action,
 	[PPC_MSG_DEBUGGER_BREAK] = debug_ipi_action,
 };
 
 const char *smp_ipi_name[] = {
 	[PPC_MSG_CALL_FUNCTION] =  "ipi call function",
 	[PPC_MSG_RESCHEDULE] = "ipi reschedule",
-	[PPC_MSG_UNUSED] = "ipi unused",
+	[PPC_MSG_TICK_BROADCAST] = "ipi tick-broadcast",
 	[PPC_MSG_DEBUGGER_BREAK] = "ipi debugger",
 };
 
@@ -251,6 +252,8 @@ irqreturn_t smp_ipi_demux(void)
 			generic_smp_call_function_interrupt();
 		if (all & IPI_MESSAGE(PPC_MSG_RESCHEDULE))
 			scheduler_ipi();
+		if (all & IPI_MESSAGE(PPC_MSG_TICK_BROADCAST))
+			tick_broadcast_ipi_handler();
 		if (all & IPI_MESSAGE(PPC_MSG_DEBUGGER_BREAK))
 			debug_ipi_action(0, NULL);
 	} while (info->messages);
@@ -289,6 +292,14 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask)
 		do_message_pass(cpu, PPC_MSG_CALL_FUNCTION);
 }
 
+void tick_broadcast(const struct cpumask *mask)
+{
+	unsigned int cpu;
+
+	for_each_cpu(cpu, mask)
+		do_message_pass(cpu, PPC_MSG_TICK_BROADCAST);
+}
+
 #if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
 void smp_send_debugger_break(void)
 {
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index b3dab20..3ff97db 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -825,6 +825,11 @@ static void decrementer_set_mode(enum clock_event_mode mode,
 		decrementer_set_next_event(DECREMENTER_MAX, dev);
 }
 
+/* Interrupt handler for the timer broadcast IPI */
+void tick_broadcast_ipi_handler(void)
+{
+}
+
 static void register_decrementer_clockevent(int cpu)
 {
 	struct clock_event_device *dec = &per_cpu(decrementers, cpu);
diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c
index adf3726..8a106b4 100644
--- a/arch/powerpc/platforms/cell/interrupt.c
+++ b/arch/powerpc/platforms/cell/interrupt.c
@@ -215,7 +215,7 @@ void iic_request_IPIs(void)
 {
 	iic_request_ipi(PPC_MSG_CALL_FUNCTION);
 	iic_request_ipi(PPC_MSG_RESCHEDULE);
-	iic_request_ipi(PPC_MSG_UNUSED);
+	iic_request_ipi(PPC_MSG_TICK_BROADCAST);
 	iic_request_ipi(PPC_MSG_DEBUGGER_BREAK);
 }
 
diff --git a/arch/powerpc/platforms/ps3/smp.c b/arch/powerpc/platforms/ps3/smp.c
index 00d1a7c..b358bec 100644
--- a/arch/powerpc/platforms/ps3/smp.c
+++ b/arch/powerpc/platforms/ps3/smp.c
@@ -76,7 +76,7 @@ static int __init ps3_smp_probe(void)
 
 		BUILD_BUG_ON(PPC_MSG_CALL_FUNCTION    != 0);
 		BUILD_BUG_ON(PPC_MSG_RESCHEDULE       != 1);
-		BUILD_BUG_ON(PPC_MSG_UNUSED	      != 2);
+		BUILD_BUG_ON(PPC_MSG_TICK_BROADCAST   != 2);
 		BUILD_BUG_ON(PPC_MSG_DEBUGGER_BREAK   != 3);
 
 		for (i = 0; i < MSG_COUNT; i++) {

^ permalink raw reply related

* [RESEND PATCH V5 1/8] powerpc: Free up the slot of PPC_MSG_CALL_FUNC_SINGLE IPI message
From: Preeti U Murthy @ 2014-01-22  7:08 UTC (permalink / raw)
  To: peterz, fweisbec, paul.gortmaker, paulus, mingo, mikey, shangw,
	rafael.j.wysocki, galak, =daniel.lezcano, benh, paulmck,
	--to=agraf, arnd, linux-pm, rostedt, michael, john.stultz, anton,
	tglx, chenhui.zhao, deepthi, r58472, geoff, linux-kernel,
	srivatsa.bhat, schwidefsky, svaidy, linuxppc-dev
In-Reply-To: <20140122065918.30650.22437.stgit@preeti.in.ibm.com>

From: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>

The IPI handlers for both PPC_MSG_CALL_FUNC and PPC_MSG_CALL_FUNC_SINGLE map
to a common implementation - generic_smp_call_function_single_interrupt(). So,
we can consolidate them and save one of the IPI message slots, (which are
precious on powerpc, since only 4 of those slots are available).

So, implement the functionality of PPC_MSG_CALL_FUNC_SINGLE using
PPC_MSG_CALL_FUNC itself and release its IPI message slot, so that it can be
used for something else in the future, if desired.

Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Preeti U. Murthy <preeti@linux.vnet.ibm.com>
Acked-by: Geoff Levand <geoff@infradead.org> [For the PS3 part]
---

 arch/powerpc/include/asm/smp.h          |    2 +-
 arch/powerpc/kernel/smp.c               |   12 +++++-------
 arch/powerpc/platforms/cell/interrupt.c |    2 +-
 arch/powerpc/platforms/ps3/smp.c        |    2 +-
 4 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index 084e080..9f7356b 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -120,7 +120,7 @@ extern int cpu_to_core_id(int cpu);
  * in /proc/interrupts will be wrong!!! --Troy */
 #define PPC_MSG_CALL_FUNCTION   0
 #define PPC_MSG_RESCHEDULE      1
-#define PPC_MSG_CALL_FUNC_SINGLE	2
+#define PPC_MSG_UNUSED		2
 #define PPC_MSG_DEBUGGER_BREAK  3
 
 /* for irq controllers that have dedicated ipis per message (4) */
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index ac2621a..ee7d76b 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -145,9 +145,9 @@ static irqreturn_t reschedule_action(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
-static irqreturn_t call_function_single_action(int irq, void *data)
+static irqreturn_t unused_action(int irq, void *data)
 {
-	generic_smp_call_function_single_interrupt();
+	/* This slot is unused and hence available for use, if needed */
 	return IRQ_HANDLED;
 }
 
@@ -168,14 +168,14 @@ static irqreturn_t debug_ipi_action(int irq, void *data)
 static irq_handler_t smp_ipi_action[] = {
 	[PPC_MSG_CALL_FUNCTION] =  call_function_action,
 	[PPC_MSG_RESCHEDULE] = reschedule_action,
-	[PPC_MSG_CALL_FUNC_SINGLE] = call_function_single_action,
+	[PPC_MSG_UNUSED] = unused_action,
 	[PPC_MSG_DEBUGGER_BREAK] = debug_ipi_action,
 };
 
 const char *smp_ipi_name[] = {
 	[PPC_MSG_CALL_FUNCTION] =  "ipi call function",
 	[PPC_MSG_RESCHEDULE] = "ipi reschedule",
-	[PPC_MSG_CALL_FUNC_SINGLE] = "ipi call function single",
+	[PPC_MSG_UNUSED] = "ipi unused",
 	[PPC_MSG_DEBUGGER_BREAK] = "ipi debugger",
 };
 
@@ -251,8 +251,6 @@ irqreturn_t smp_ipi_demux(void)
 			generic_smp_call_function_interrupt();
 		if (all & IPI_MESSAGE(PPC_MSG_RESCHEDULE))
 			scheduler_ipi();
-		if (all & IPI_MESSAGE(PPC_MSG_CALL_FUNC_SINGLE))
-			generic_smp_call_function_single_interrupt();
 		if (all & IPI_MESSAGE(PPC_MSG_DEBUGGER_BREAK))
 			debug_ipi_action(0, NULL);
 	} while (info->messages);
@@ -280,7 +278,7 @@ EXPORT_SYMBOL_GPL(smp_send_reschedule);
 
 void arch_send_call_function_single_ipi(int cpu)
 {
-	do_message_pass(cpu, PPC_MSG_CALL_FUNC_SINGLE);
+	do_message_pass(cpu, PPC_MSG_CALL_FUNCTION);
 }
 
 void arch_send_call_function_ipi_mask(const struct cpumask *mask)
diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c
index 2d42f3b..adf3726 100644
--- a/arch/powerpc/platforms/cell/interrupt.c
+++ b/arch/powerpc/platforms/cell/interrupt.c
@@ -215,7 +215,7 @@ void iic_request_IPIs(void)
 {
 	iic_request_ipi(PPC_MSG_CALL_FUNCTION);
 	iic_request_ipi(PPC_MSG_RESCHEDULE);
-	iic_request_ipi(PPC_MSG_CALL_FUNC_SINGLE);
+	iic_request_ipi(PPC_MSG_UNUSED);
 	iic_request_ipi(PPC_MSG_DEBUGGER_BREAK);
 }
 
diff --git a/arch/powerpc/platforms/ps3/smp.c b/arch/powerpc/platforms/ps3/smp.c
index 4b35166..00d1a7c 100644
--- a/arch/powerpc/platforms/ps3/smp.c
+++ b/arch/powerpc/platforms/ps3/smp.c
@@ -76,7 +76,7 @@ static int __init ps3_smp_probe(void)
 
 		BUILD_BUG_ON(PPC_MSG_CALL_FUNCTION    != 0);
 		BUILD_BUG_ON(PPC_MSG_RESCHEDULE       != 1);
-		BUILD_BUG_ON(PPC_MSG_CALL_FUNC_SINGLE != 2);
+		BUILD_BUG_ON(PPC_MSG_UNUSED	      != 2);
 		BUILD_BUG_ON(PPC_MSG_DEBUGGER_BREAK   != 3);
 
 		for (i = 0; i < MSG_COUNT; i++) {

^ permalink raw reply related

* [RESEND PATCH V5 0/8] cpuidle/ppc: Enable deep idle states on PowerNV
From: Preeti U Murthy @ 2014-01-22  7:07 UTC (permalink / raw)
  To: peterz, fweisbec, paul.gortmaker, paulus, mingo, mikey, shangw,
	rafael.j.wysocki, galak, =daniel.lezcano, benh, paulmck,
	--to=agraf, arnd, linux-pm, rostedt, michael, john.stultz, anton,
	tglx, chenhui.zhao, deepthi, r58472, geoff, linux-kernel,
	srivatsa.bhat, schwidefsky, svaidy, linuxppc-dev

On PowerPC, when CPUs enter certain deep idle states, the local timers stop
and the time base could go out of sync with the rest of the cores in the system.

This patchset adds support to wake up CPUs in such idle states by
broadcasting IPIs to them at their next timer events using the tick broadcast
framework in the Linux kernel. We refer to these IPIs as the tick
broadcast IPIs in this patchset.

However the tick broadcast framework as it exists today makes use of an external
clock device to wakeup CPUs in such idle states. But not all implementations of
PowerPC provides such an external clock device.

Hence Patch[6/8]:
[time/cpuidle: Support in tick broadcast framework for archs without external
clock device] adds support in the tick broadcast framework for such
use cases by queuing a hrtimer on one of the CPUs which is meant to handle the wakeup
of CPUs in deep idle states.
This patch was posted separately at: https://lkml.org/lkml/2013/12/12/687.

Patches 1-3 adds support in powerpc to hook onto the tick broadcast framework.

The patchset also includes support for resyncing of time base with the rest of the
cores in the system and context management for fast sleep. PATCH[4/8] and
PATCH[5/8] address these issues.

With the required support for deep idle states thus in place, the
patchset adds "Fast-Sleep" idle state into cpuidle (Patches 7 and 8). "Fast-Sleep"
is a deep idle state on Power8 in which the above mentioned challenges
exist. Fast-Sleep can yield us significantly more power
savings than the idle states that we have in cpuidle so far.

This patchset is based on Ben's ppc next branch at commit fac515db45207718
[Merge remote-tracking branch 'scott/next' into next],  and the
cpuidle driver for powernv posted by Deepthi Dharwar:
https://lkml.org/lkml/2014/1/14/172. The same patchset minus the resolving of
merge conflicts with Ben's ppc next branch had been posted earlier
at http://lkml.org/lkml/2014/1/15/70. This Repost resolves these merge
conflicts with Ben's ppc next branch. Hence the Repost. Besides the earlier
post was based and tested on the mainline commit that was quite old.

However the patchset posted earlier at http://lkml.org/lkml/2014/1/15/70
along wiith Deepthi's patches on cpuidle driver for
powernv applies cleanly on the mainline kernel at commit: 85ce70fdf48aa290b484531
dated Jan 16 2014 and has been tested on the same at the time of this Repost.


Changes in V5: The primary change in this version is in Patch[6/8].
As per the discussions in V4 posting of this patchset, it was decided to
refine handling the wakeup of CPUs in fast-sleep by doing the following:

1. In V4, a polling mechanism was used by the CPU handling broadcast to
find out the time of next wakeup of the CPUs in deep idle states. V5 avoids
polling by a way described under PATCH[6/8] in this patchset.

2. The mechanism of broadcast handling of CPUs in deep idle in the absence of an
external wakeup device should be generic and not arch specific code. Hence in this
version this functionality has been integrated into the tick broadcast framework in
the kernel unlike before where it was handled in powerpc specific code.

3. It was suggested that the "broadcast cpu" can be the time keeping cpu
itself. However this has challenges of its own:

 a. The time keeping cpu need not exist when all cpus are idle. Hence there
are phases in time when time keeping cpu is absent. But for the use case that
this patchset is trying to address we rely on the presence of a broadcast cpu
all the time.

 b. The nomination and un-assignment of the time keeping cpu is not protected
by a lock today and need not be as well since such is its use case in the
kernel. However we would need locks if we double up the time keeping cpu as the
broadcast cpu.

Hence the broadcast cpu is independent of the time-keeping cpu. However PATCH[6/8]
proposes a simpler solution to pick a broadcast cpu in this version.



Changes in V4: https://lkml.org/lkml/2013/11/29/97

1. Add Fast Sleep CPU idle state on PowerNV.

2. Add the required context management for Fast Sleep and the call to OPAL
to synchronize time base after wakeup from fast sleep.

4. Add parsing of CPU idle states from the device tree to populate the
cpuidle
state table.

5. Rename ambiguous functions in the code around waking up of CPUs from fast
sleep.

6. Fixed a bug in re-programming of the hrtimer that is queued to wakeup the
CPUs in fast sleep and modified Changelogs.

7. Added the ARCH_HAS_TICK_BROADCAST option. This signifies that we have a
arch specific function to perform broadcast.


Changes in V3:
http://thread.gmane.org/gmane.linux.power-management.general/38113

1. Fix the way in which a broadcast ipi is handled on the idling cpus. Timer
handling on a broadcast ipi is being done now without missing out any timer
stats generation.

2. Fix a bug in the programming of the hrtimer meant to do broadcast. Program
it to trigger at the earlier of a "broadcast period", and the next wakeup
event. By introducing the "broadcast period" as the maximum period after
which the broadcast hrtimer can fire, we ensure that we do not miss
wakeups in corner cases.

3. On hotplug of a broadcast cpu, trigger the hrtimer meant to do broadcast
to fire immediately on the new broadcast cpu. This will ensure we do not miss
doing a broadcast pending in the nearest future.

4. Change the type of allocation from GFP_KERNEL to GFP_NOWAIT while
initializing bc_hrtimer since we are in an atomic context and cannot sleep.

5. Use the broadcast ipi to wakeup the newly nominated broadcast cpu on
hotplug of the old instead of smp_call_function_single(). This is because we
are interrupt disabled at this point and should not be using
smp_call_function_single or its children in this context to send an ipi.

6. Move GENERIC_CLOCKEVENTS_BROADCAST to arch/powerpc/Kconfig.

7. Fix coding style issues.


Changes in V2: https://lkml.org/lkml/2013/8/14/239

1. Dynamically pick a broadcast CPU, instead of having a dedicated one.
2. Remove the constraint of having to disable tickless idle on the broadcast
CPU by queueing a hrtimer dedicated to do broadcast.



V1 posting: https://lkml.org/lkml/2013/7/25/740.

1. Added the infrastructure to wakeup CPUs in deep idle states in which the
local timers stop.

---

Preeti U Murthy (5):
      cpuidle/ppc: Split timer_interrupt() into timer handling and interrupt handling routines
      powermgt: Add OPAL call to resync timebase on wakeup
      time/cpuidle: Support in tick broadcast framework in the absence of external clock device
      cpuidle/powernv: Add "Fast-Sleep" CPU idle state
      cpuidle/powernv: Parse device tree to setup idle states

Srivatsa S. Bhat (2):
      powerpc: Free up the slot of PPC_MSG_CALL_FUNC_SINGLE IPI message
      powerpc: Implement tick broadcast IPI as a fixed IPI message

Vaidyanathan Srinivasan (1):
      powernv/cpuidle: Add context management for Fast Sleep


 arch/powerpc/Kconfig                           |    2 
 arch/powerpc/include/asm/opal.h                |    2 
 arch/powerpc/include/asm/processor.h           |    1 
 arch/powerpc/include/asm/smp.h                 |    2 
 arch/powerpc/include/asm/time.h                |    1 
 arch/powerpc/kernel/exceptions-64s.S           |   10 +
 arch/powerpc/kernel/idle_power7.S              |   90 +++++++++--
 arch/powerpc/kernel/smp.c                      |   23 ++-
 arch/powerpc/kernel/time.c                     |   88 +++++++----
 arch/powerpc/platforms/cell/interrupt.c        |    2 
 arch/powerpc/platforms/powernv/opal-wrappers.S |    1 
 arch/powerpc/platforms/ps3/smp.c               |    2 
 drivers/cpuidle/cpuidle-powernv.c              |  109 ++++++++++++--
 include/linux/clockchips.h                     |    4 -
 kernel/time/clockevents.c                      |    9 +
 kernel/time/tick-broadcast.c                   |  192 ++++++++++++++++++++++--
 kernel/time/tick-internal.h                    |    8 +
 17 files changed, 442 insertions(+), 104 deletions(-)

^ permalink raw reply

* Re: [PATCH RFC 00/73] tree-wide: clean up some no longer required #include <linux/init.h>
From: Stephen Rothwell @ 2014-01-22  7:00 UTC (permalink / raw)
  To: Paul Gortmaker
  Cc: linux-arch, linux-mips, linux-m68k, rusty, linux-ia64, kvm,
	linux-s390, netdev, x86, linux-kernel, torvalds, gregkh,
	linux-alpha, sparclinux, akpm, linuxppc-dev, linux-arm-kernel
In-Reply-To: <1390339396-3479-1-git-send-email-paul.gortmaker@windriver.com>

[-- Attachment #1: Type: text/plain, Size: 2351 bytes --]

Hi Paul,

On Tue, 21 Jan 2014 16:22:03 -0500 Paul Gortmaker <paul.gortmaker@windriver.com> wrote:
>
> Where: This work exists as a queue of patches that I apply to
> linux-next; since the changes are fixing some things that currently
> can only be found there.  The patch series can be found at:
> 
>    http://git.kernel.org/cgit/linux/kernel/git/paulg/init.git
>    git://git.kernel.org/pub/scm/linux/kernel/git/paulg/init.git
> 
> I've avoided annoying Stephen with another queue of patches for
> linux-next while the development content was in flux, but now that
> the merge window has opened, and new additions are fewer, perhaps he
> wouldn't mind tacking it on the end...  Stephen?

OK, I have added this to the end of linux-next today - we will see how we
go.  It is called "init".

Thanks for adding your subsystem tree as a participant of linux-next.  As
you may know, this is not a judgment of your code.  The purpose of
linux-next is for integration testing and to lower the impact of
conflicts between subsystems in the next merge window. 

You will need to ensure that the patches/commits in your tree/series have
been:
     * submitted under GPL v2 (or later) and include the Contributor's
	Signed-off-by,
     * posted to the relevant mailing list,
     * reviewed by you (or another maintainer of your subsystem tree),
     * successfully unit tested, and 
     * destined for the current or next Linux merge window.

Basically, this should be just what you would send to Linus (or ask him
to fetch).  It is allowed to be rebased if you deem it necessary.

-- 
Cheers,
Stephen Rothwell 
sfr@canb.auug.org.au

Legal Stuff:
By participating in linux-next, your subsystem tree contributions are
public and will be included in the linux-next trees.  You may be sent
e-mail messages indicating errors or other issues when the
patches/commits from your subsystem tree are merged and tested in
linux-next.  These messages may also be cross-posted to the linux-next
mailing list, the linux-kernel mailing list, etc.  The linux-next tree
project and IBM (my employer) make no warranties regarding the linux-next
project, the testing procedures, the results, the e-mails, etc.  If you
don't agree to these ground rules, let me know and I'll remove your tree
from participation in linux-next.

[-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply

* Re: [PATCH 0/4] powernv: kvm: numa fault improvement
From: Aneesh Kumar K.V @ 2014-01-22  5:18 UTC (permalink / raw)
  To: Paul Mackerras, Alexander Graf; +Cc: linuxppc-dev, kvm-ppc, Liu ping fan
In-Reply-To: <20140121112204.GE8265@iris.ozlabs.ibm.com>

Paul Mackerras <paulus@samba.org> writes:

> On Mon, Jan 20, 2014 at 03:48:36PM +0100, Alexander Graf wrote:
>> 
>> On 15.01.2014, at 07:36, Liu ping fan <kernelfans@gmail.com> wrote:
>> 
>> > On Thu, Jan 9, 2014 at 8:08 PM, Alexander Graf <agraf@suse.de> wrote:
>> >> 
>> >> On 11.12.2013, at 09:47, Liu Ping Fan <kernelfans@gmail.com> wrote:
>> >> 
>> >>> This series is based on Aneesh's series  "[PATCH -V2 0/5] powerpc: mm: Numa faults support for ppc64"
>> >>> 
>> >>> For this series, I apply the same idea from the previous thread "[PATCH 0/3] optimize for powerpc _PAGE_NUMA"
>> >>> (for which, I still try to get a machine to show nums)
>> >>> 
>> >>> But for this series, I think that I have a good justification -- the fact of heavy cost when switching context between guest and host,
>> >>> which is  well known.
>> >> 
>> >> This cover letter isn't really telling me anything. Please put a proper description of what you're trying to achieve, why you're trying to achieve what you're trying and convince your readers that it's a good idea to do it the way you do it.
>> >> 
>> > Sorry for the unclear message. After introducing the _PAGE_NUMA,
>> > kvmppc_do_h_enter() can not fill up the hpte for guest. Instead, it
>> > should rely on host's kvmppc_book3s_hv_page_fault() to call
>> > do_numa_page() to do the numa fault check. This incurs the overhead
>> > when exiting from rmode to vmode.  My idea is that in
>> > kvmppc_do_h_enter(), we do a quick check, if the page is right placed,
>> > there is no need to exit to vmode (i.e saving htab, slab switching)
>> > 
>> >>> If my suppose is correct, will CCing kvm@vger.kernel.org from next version.
>> >> 
>> >> This translates to me as "This is an RFC"?
>> >> 
>> > Yes, I am not quite sure about it. I have no bare-metal to verify it.
>> > So I hope at least, from the theory, it is correct.
>> 
>> Paul, could you please give this some thought and maybe benchmark it?
>
> OK, once I get Aneesh to tell me how I get to have ptes with
> _PAGE_NUMA set in the first place. :)
>

I guess we want patch 2, Which Liu has sent separately and I have
reviewed. http://article.gmane.org/gmane.comp.emulators.kvm.powerpc.devel/8619
I am not sure about the rest of the patches in the series.
We definitely don't want to numa migrate on henter. We may want to do
that on fault. But even there, IMHO, we should let the host take the
fault and do the numa migration instead of doing this in guest context.

-aneesh

^ permalink raw reply

* Re: [PATCH] powerpc: fix hw breakpoints on !HAVE_HW_BREAKPOINT configurations
From: Michael Neuling @ 2014-01-22  4:46 UTC (permalink / raw)
  To: Andreas Schwab; +Cc: linuxppc-dev, imunsie
In-Reply-To: <8761pdht31.fsf_-_@igel.home>

[-- Attachment #1: Type: text/plain, Size: 1232 bytes --]

I'm not near my machine to test but looks good.

Thanks,
Mikey
On 22 Jan 2014 08:56, "Andreas Schwab" <schwab@linux-m68k.org> wrote:

> This fixes a logic error that caused a failure to update the hw breakpoint
> registers when not using the hw-breakpoint interface.
>
> Signed-off-by: Andreas Schwab <schwab@linux-m68k.org>
> ---
>  arch/powerpc/kernel/process.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
> index 4a96556..7714950 100644
> --- a/arch/powerpc/kernel/process.c
> +++ b/arch/powerpc/kernel/process.c
> @@ -690,7 +690,7 @@ struct task_struct *__switch_to(struct task_struct
> *prev,
>   * schedule DABR
>   */
>  #ifndef CONFIG_HAVE_HW_BREAKPOINT
> -       if (unlikely(hw_brk_match(&__get_cpu_var(current_brk),
> &new->thread.hw_brk)))
> +       if (unlikely(!hw_brk_match(&__get_cpu_var(current_brk),
> &new->thread.hw_brk)))
>                 set_breakpoint(&new->thread.hw_brk);
>  #endif /* CONFIG_HAVE_HW_BREAKPOINT */
>  #endif
> --
> 1.8.5.3
>
>
> --
> Andreas Schwab, schwab@linux-m68k.org
> GPG Key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
> "And now for something completely different."
>
>

[-- Attachment #2: Type: text/html, Size: 1724 bytes --]

^ permalink raw reply

* Re: [PATCH 10/73] powerpc: use device_initcall for registering rtc devices
From: Paul Gortmaker @ 2014-01-22  2:26 UTC (permalink / raw)
  To: Geoff Levand; +Cc: linux-arch, Paul Mackerras, linuxppc-dev, LKML
In-Reply-To: <1390348085.5027.18.camel@smoke>

On Tue, Jan 21, 2014 at 6:48 PM, Geoff Levand <geoff@infradead.org> wrote:
> Hi Paul,
>
> On Tue, 2014-01-21 at 16:22 -0500, Paul Gortmaker wrote:
>> Currently these two RTC devices are in core platform code
>> where it is not possible for them to be modular.  It will
>> never be modular, so using module_init as an alias for
>> __initcall can be somewhat misleading.
>>
>>  arch/powerpc/kernel/time.c        | 2 +-
>>  arch/powerpc/platforms/ps3/time.c | 3 +--
>>  2 files changed, 2 insertions(+), 3 deletions(-)
>
> I tested the PS3 part of this patch and it seems to work OK.
>
> Acked-by: Geoff Levand <geoff@infradead.org>

Thanks Geoff for the review and testing; I'll add the ack.

Paul.
--

>
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev

^ permalink raw reply

* Re: [PATCH 0/8] Add support for PowerPC Hypervisor supplied performance counters
From: Michael Ellerman @ 2014-01-22  1:32 UTC (permalink / raw)
  To: Cody P Schafer
  Cc: Peter Zijlstra, LKML, Ingo Molnar, Paul Mackerras,
	Arnaldo Carvalho de Melo, Linux PPC
In-Reply-To: <1389916434-2288-1-git-send-email-cody@linux.vnet.ibm.com>

On Thu, 2014-01-16 at 15:53 -0800, Cody P Schafer wrote:
> These patches add basic pmus for 2 powerpc hypervisor interfaces to obtain
> performance counters: gpci ("get performance counter info") and 24x7.
> 
> The counters supplied by these interfaces are continually counting and never
> need to be (and cannot be) disabled or enabled. They additionally do not
> generate any interrupts. This makes them in some regards similar to software
> counters, and as a result their implimentation shares some common code (which
> an initial patch exposes) with the sw counters.

Hi Cody,

Can you please add some more explanation of this series.

In particular why do we need two new PMUs, and how do they relate to each
other?

And can you add an example of how I'd actually use them using perf.

cheers

^ permalink raw reply

* Re: [PATCH 10/73] powerpc: use device_initcall for registering rtc devices
From: Geoff Levand @ 2014-01-21 23:48 UTC (permalink / raw)
  To: Paul Gortmaker; +Cc: linux-arch, linuxppc-dev, Paul Mackerras, linux-kernel
In-Reply-To: <1390339396-3479-11-git-send-email-paul.gortmaker@windriver.com>

Hi Paul,

On Tue, 2014-01-21 at 16:22 -0500, Paul Gortmaker wrote:
> Currently these two RTC devices are in core platform code
> where it is not possible for them to be modular.  It will
> never be modular, so using module_init as an alias for
> __initcall can be somewhat misleading.
> 
>  arch/powerpc/kernel/time.c        | 2 +-
>  arch/powerpc/platforms/ps3/time.c | 3 +--
>  2 files changed, 2 insertions(+), 3 deletions(-)

I tested the PS3 part of this patch and it seems to work OK.

Acked-by: Geoff Levand <geoff@infradead.org>

^ permalink raw reply

* [PATCH 3/3] powerpc/pseries: Report in kernel device tree update to drmgr
From: Tyrel Datwyler @ 2014-01-21 22:55 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: nfont
In-Reply-To: <1390344949-3983-1-git-send-email-tyreld@linux.vnet.ibm.com>

Tradiontally it has been drmgr's responsibilty to update the device tree
through the /proc/ppc64/ofdt interface after a suspend/resume operation.
This patchset however has modified suspend/resume ops to preform that update
entirely in the kernel during the resume. Therefore, a mechanism is required
for drmgr to determine who is responsible for the update. This patch adds a
show function the the "hibernate" attribute that returns 1 if the kernel
updates the device tree after the resume and 0 if drmgr is responsible.

Signed-off-by: Tyrel Datwyler <tyreld@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/pseries/suspend.c | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c
index 16a2552..723115d 100644
--- a/arch/powerpc/platforms/pseries/suspend.c
+++ b/arch/powerpc/platforms/pseries/suspend.c
@@ -174,7 +174,30 @@ out:
 	return rc;
 }
 
-static DEVICE_ATTR(hibernate, S_IWUSR, NULL, store_hibernate);
+#define USER_DT_UPDATE	0
+#define KERN_DT_UPDATE	1
+
+/**
+ * show_hibernate - Report device tree update responsibilty
+ * @dev:		subsys root device
+ * @attr:		device attribute struct
+ * @buf:		buffer
+ *
+ * Report whether a device tree update is performed by the kernel after a
+ * resume, or if drmgr must coordinate the update from user space.
+ *
+ * Return value:
+ *	0 if drmgr is to initiate update, and 1 otherwise
+ **/
+static ssize_t show_hibernate(struct device *dev,
+			      struct device_attribute *attr,
+			      char *buf)
+{
+	return sprintf(buf, "%d\n", KERN_DT_UPDATE);
+}
+
+static DEVICE_ATTR(hibernate, S_IWUSR | S_IRUGO,
+		   show_hibernate, store_hibernate);
 
 static struct bus_type suspend_subsys = {
 	.name = "power",
-- 
1.7.12.4

^ permalink raw reply related

* [PATCH 2/3] powerpc/pseries: Update dynamic cache nodes for suspend/resume operation
From: Tyrel Datwyler @ 2014-01-21 22:55 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: nfont
In-Reply-To: <1390344949-3983-1-git-send-email-tyreld@linux.vnet.ibm.com>

pHyp can change cache nodes for suspend/resume operation. The current code
updates the device tree after all non boot CPUs are enabled. Hence, we do not
modify the cache list based on the latest cache nodes. Also we do not remove
cache entries for the primary CPU.

This patch removes the cache list for the boot CPU, updates the device tree
before enabling nonboot CPUs and adds cache list for the boot cpu.

Signed-off-by: Haren Myneni <hbabu@us.ibm.com>
Signed-off-by: Tyrel Datwyler <tyreld@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/rtas.h |  4 ++++
 arch/powerpc/kernel/rtas.c      | 17 +++++++++++++++++
 arch/powerpc/kernel/time.c      |  6 ++++++
 3 files changed, 27 insertions(+)

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 9bd52c6..da9d733 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -283,6 +283,10 @@ extern void pSeries_log_error(char *buf, unsigned int err_type, int fatal);
 
 #ifdef CONFIG_PPC_PSERIES
 extern int pseries_devicetree_update(s32 scope);
+extern void post_mobility_fixup(void);
+extern void update_dynamic_configuration(void);
+#else /* !CONFIG_PPC_PSERIES */
+void update_dynamic_configuration(void) { }
 #endif
 
 #ifdef CONFIG_PPC_RTAS_DAEMON
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 4cf674d..8249eb2 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -43,6 +43,7 @@
 #include <asm/time.h>
 #include <asm/mmu.h>
 #include <asm/topology.h>
+#include "cacheinfo.h"
 
 struct rtas_t rtas = {
 	.lock = __ARCH_SPIN_LOCK_UNLOCKED
@@ -972,6 +973,22 @@ out:
 	free_cpumask_var(offline_mask);
 	return atomic_read(&data.error);
 }
+
+/*
+ * The device tree cache nodes can be modified during suspend/ resume.
+ * So delete all cache entries and recreate them again after the device tree
+ * update.
+ * We already deleted cache entries for notboot CPUs before suspend. So delete
+ * entries for the primary CPU, recreate entries after the device tree update.
+ * We can create entries for nonboot CPU when enable them later.
+ */
+
+void update_dynamic_configuration(void)
+{
+	cacheinfo_cpu_offline(smp_processor_id());
+	post_mobility_fixup();
+	cacheinfo_cpu_online(smp_processor_id());
+}
 #else /* CONFIG_PPC_PSERIES */
 int rtas_ibm_suspend_me(struct rtas_args *args)
 {
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index b3b1441..5f1ca28 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -69,6 +69,7 @@
 #include <asm/vdso_datapage.h>
 #include <asm/firmware.h>
 #include <asm/cputime.h>
+#include <asm/rtas.h>
 
 /* powerpc clocksource/clockevent code */
 
@@ -592,6 +593,11 @@ void arch_suspend_enable_irqs(void)
 	generic_suspend_enable_irqs();
 	if (ppc_md.suspend_enable_irqs)
 		ppc_md.suspend_enable_irqs();
+	/*
+	 * Update configuration which can be modified based on devicetree
+	 * changes during resume.
+	 */
+	update_dynamic_configuration();
 }
 #endif
 
-- 
1.7.12.4

^ permalink raw reply related

* [PATCH 1/3] powerpc/pseries: Device tree should only be updated once after suspend/migrate
From: Tyrel Datwyler @ 2014-01-21 22:55 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: nfont
In-Reply-To: <1390344949-3983-1-git-send-email-tyreld@linux.vnet.ibm.com>

The current code makes rtas calls for update-nodes, activate-firmware and then
update-nodes again. The FW provides the same data for both update-nodes calls.
As a result a proc entry exists error is reported for the second update while
adding device nodes.

This patch makes a single rtas call for update-nodes after activating the FW.
It also add rtas_busy delay for the activate-firmware rtas call.

Signed-off-by: Haren Myneni <hbabu@us.ibm.com>
Signed-off-by: Tyrel Datwyler <tyreld@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/pseries/mobility.c | 26 ++++++++++----------------
 1 file changed, 10 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index cde4e0a..bde7eba 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -290,13 +290,6 @@ void post_mobility_fixup(void)
 	int rc;
 	int activate_fw_token;
 
-	rc = pseries_devicetree_update(MIGRATION_SCOPE);
-	if (rc) {
-		printk(KERN_ERR "Initial post-mobility device tree update "
-		       "failed: %d\n", rc);
-		return;
-	}
-
 	activate_fw_token = rtas_token("ibm,activate-firmware");
 	if (activate_fw_token == RTAS_UNKNOWN_SERVICE) {
 		printk(KERN_ERR "Could not make post-mobility "
@@ -304,16 +297,17 @@ void post_mobility_fixup(void)
 		return;
 	}
 
-	rc = rtas_call(activate_fw_token, 0, 1, NULL);
-	if (!rc) {
-		rc = pseries_devicetree_update(MIGRATION_SCOPE);
-		if (rc)
-			printk(KERN_ERR "Secondary post-mobility device tree "
-			       "update failed: %d\n", rc);
-	} else {
+	do {
+		rc = rtas_call(activate_fw_token, 0, 1, NULL);
+	} while (rtas_busy_delay(rc));
+
+	if (rc)
 		printk(KERN_ERR "Post-mobility activate-fw failed: %d\n", rc);
-		return;
-	}
+
+	rc = pseries_devicetree_update(MIGRATION_SCOPE);
+	if (rc)
+		printk(KERN_ERR "Post-mobility device tree update "
+			"failed: %d\n", rc);
 
 	return;
 }
-- 
1.7.12.4

^ permalink raw reply related

* [PATCH 0/3] powerpc/pseries: fix issues in suspend/resume code
From: Tyrel Datwyler @ 2014-01-21 22:55 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: nfont

This patchset fixes a couple of issues encountered in the suspend/resume code
base. First when using the kernel device tree update code update-nodes is
unnecessarily called more than once. Second the cpu cache lists are not
updated after a suspend/resume which under certain conditions may cause a
panic. Finally, since the cache list fix utilzes in kernel device tree update
code a means for telling drmgr not to perform a device tree update from 
userspace is required.

Tyrel Datwyler (3):
  powerpc/pseries: Device tree should only be updated once after
    suspend/migrate
  powerpc/pseries: Update dynamic cache nodes for suspend/resume
    operation
  powerpc/pseries: Report in kernel device tree update to drmgr

 arch/powerpc/include/asm/rtas.h           |  4 ++++
 arch/powerpc/kernel/rtas.c                | 17 +++++++++++++++++
 arch/powerpc/kernel/time.c                |  6 ++++++
 arch/powerpc/platforms/pseries/mobility.c | 26 ++++++++++----------------
 arch/powerpc/platforms/pseries/suspend.c  | 25 ++++++++++++++++++++++++-
 5 files changed, 61 insertions(+), 17 deletions(-)

-- 
1.7.12.4

^ permalink raw reply

* [PATCH] powerpc: fix hw breakpoints on !HAVE_HW_BREAKPOINT configurations
From: Andreas Schwab @ 2014-01-21 22:24 UTC (permalink / raw)
  To: Michael Neuling; +Cc: linuxppc-dev, Ian Munsie
In-Reply-To: <12813.1357794092__45363.9676016339$1357794149$gmane$org@ale.ozlabs.ibm.com>

This fixes a logic error that caused a failure to update the hw breakpoint
registers when not using the hw-breakpoint interface.

Signed-off-by: Andreas Schwab <schwab@linux-m68k.org>
---
 arch/powerpc/kernel/process.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 4a96556..7714950 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -690,7 +690,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
  * schedule DABR
  */
 #ifndef CONFIG_HAVE_HW_BREAKPOINT
-	if (unlikely(hw_brk_match(&__get_cpu_var(current_brk), &new->thread.hw_brk)))
+	if (unlikely(!hw_brk_match(&__get_cpu_var(current_brk), &new->thread.hw_brk)))
 		set_breakpoint(&new->thread.hw_brk);
 #endif /* CONFIG_HAVE_HW_BREAKPOINT */
 #endif
-- 
1.8.5.3


-- 
Andreas Schwab, schwab@linux-m68k.org
GPG Key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
"And now for something completely different."

^ permalink raw reply related

* Re: [PATCH 12/73] powerpc: kvm e500/44x is not modular, so don't use module_init
From: Paul Gortmaker @ 2014-01-21 22:23 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-arch, kvm, Gleb Natapov, Alexander Graf, kvm-ppc,
	Paul Gortmaker, Paul Mackerras, Paolo Bonzini, linuxppc-dev
In-Reply-To: <1390339396-3479-13-git-send-email-paul.gortmaker@windriver.com>

On 14-01-21 04:22 PM, Paul Gortmaker wrote:
> In powerpc, CONFIG_KVM is bool, and  so are these three subarch
> options, for the 44x and e500 variants.  This means that any
> module_exit() calls and functions used by them such as the
> kvmppc_booke_exit() are dead code.  Here we remove them.
> 
> In addition, rather than use module_init, which is just
> __initcall for non-modules, we update those as well.
> 
> Note that direct use of __initcall is discouraged, vs. one
> of the priority categorized subgroups.  As __initcall gets
> mapped onto device_initcall, our use of subsys_initcall (which
> seems to make sense for netfilter code) will thus change this

I've fixed the above --  s/netfilter/PPC KVM/

The risks of recycling commit logs...

Paul.
--

> registration from level 6-device to level 4-subsys (i.e. slightly
> earlier).
> 
> However no impact of that small difference is expected,
> since the arch independent kvm code doesn't trigger any init;
> it is the arch initcalls here which actually call kvm_init.
> 
> Cc: Gleb Natapov <gleb@kernel.org>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Alexander Graf <agraf@suse.de>
> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> Cc: Paul Mackerras <paulus@samba.org>
> Cc: kvm@vger.kernel.org
> Cc: kvm-ppc@vger.kernel.org
> Cc: linuxppc-dev@lists.ozlabs.org
> Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
> ---
>  arch/powerpc/include/asm/kvm_ppc.h |  1 -
>  arch/powerpc/kvm/44x.c             | 10 +---------
>  arch/powerpc/kvm/booke.c           |  6 ------
>  arch/powerpc/kvm/e500.c            | 10 +---------
>  arch/powerpc/kvm/e500mc.c          | 10 +---------
>  5 files changed, 3 insertions(+), 34 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
> index c8317fb..8466df5 100644
> --- a/arch/powerpc/include/asm/kvm_ppc.h
> +++ b/arch/powerpc/include/asm/kvm_ppc.h
> @@ -109,7 +109,6 @@ extern void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu);
>  extern int kvmppc_core_check_requests(struct kvm_vcpu *vcpu);
>  
>  extern int kvmppc_booke_init(void);
> -extern void kvmppc_booke_exit(void);
>  
>  extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu);
>  extern int kvmppc_kvm_pv(struct kvm_vcpu *vcpu);
> diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
> index 93221e8..2129fc1 100644
> --- a/arch/powerpc/kvm/44x.c
> +++ b/arch/powerpc/kvm/44x.c
> @@ -222,12 +222,4 @@ static int __init kvmppc_44x_init(void)
>  err_out:
>  	return r;
>  }
> -
> -static void __exit kvmppc_44x_exit(void)
> -{
> -	kvmppc_pr_ops = NULL;
> -	kvmppc_booke_exit();
> -}
> -
> -module_init(kvmppc_44x_init);
> -module_exit(kvmppc_44x_exit);
> +subsys_initcall(kvmppc_44x_init);
> diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
> index 0591e05..49dffa2 100644
> --- a/arch/powerpc/kvm/booke.c
> +++ b/arch/powerpc/kvm/booke.c
> @@ -1995,9 +1995,3 @@ int __init kvmppc_booke_init(void)
>  #endif /* !BOOKE_HV */
>  	return 0;
>  }
> -
> -void __exit kvmppc_booke_exit(void)
> -{
> -	free_pages(kvmppc_booke_handlers, VCPU_SIZE_ORDER);
> -	kvm_exit();
> -}
> diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
> index 497b142..115ef12 100644
> --- a/arch/powerpc/kvm/e500.c
> +++ b/arch/powerpc/kvm/e500.c
> @@ -564,12 +564,4 @@ static int __init kvmppc_e500_init(void)
>  err_out:
>  	return r;
>  }
> -
> -static void __exit kvmppc_e500_exit(void)
> -{
> -	kvmppc_pr_ops = NULL;
> -	kvmppc_booke_exit();
> -}
> -
> -module_init(kvmppc_e500_init);
> -module_exit(kvmppc_e500_exit);
> +subsys_initcall(kvmppc_e500_init);
> diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
> index 4132cd2..612c216 100644
> --- a/arch/powerpc/kvm/e500mc.c
> +++ b/arch/powerpc/kvm/e500mc.c
> @@ -382,12 +382,4 @@ static int __init kvmppc_e500mc_init(void)
>  err_out:
>  	return r;
>  }
> -
> -static void __exit kvmppc_e500mc_exit(void)
> -{
> -	kvmppc_pr_ops = NULL;
> -	kvmppc_booke_exit();
> -}
> -
> -module_init(kvmppc_e500mc_init);
> -module_exit(kvmppc_e500mc_exit);
> +subsys_initcall(kvmppc_e500mc_init);
> 

^ permalink raw reply

* Re: [PATCH] powerpc: set the correct ksp_limit on ppc32 when switching to irq stack
From: Benjamin Herrenschmidt @ 2014-01-21 21:48 UTC (permalink / raw)
  To: Guenter Roeck; +Cc: linuxppc, Kevin Hao
In-Reply-To: <20140121161450.GA3311@roeck-us.net>

On Tue, 2014-01-21 at 08:14 -0800, Guenter Roeck wrote:
> On Fri, Jan 17, 2014 at 12:25:28PM +0800, Kevin Hao wrote:
> > Guenter Roeck has got the following call trace on a p2020 board:
> >   Kernel stack overflow in process eb3e5a00, r1=eb79df90
> >   CPU: 0 PID: 2838 Comm: ssh Not tainted 3.13.0-rc8-juniper-00146-g19eca00 #4
> >   task: eb3e5a00 ti: c0616000 task.ti: ef440000
> >   NIP: c003a420 LR: c003a410 CTR: c0017518
> >   REGS: eb79dee0 TRAP: 0901   Not tainted (3.13.0-rc8-juniper-00146-g19eca00)
> >   MSR: 00029000 <CE,EE,ME>  CR: 24008444  XER: 00000000
> >   GPR00: c003a410 eb79df90 eb3e5a00 00000000 eb05d900 00000001 65d87646 00000000
> >   GPR08: 00000000 020b8000 00000000 00000000 44008442
> >   NIP [c003a420] __do_softirq+0x94/0x1ec
> >   LR [c003a410] __do_softirq+0x84/0x1ec
> >   Call Trace:
> >   [eb79df90] [c003a410] __do_softirq+0x84/0x1ec (unreliable)
> >   [eb79dfe0] [c003a970] irq_exit+0xbc/0xc8
> >   [eb79dff0] [c000cc1c] call_do_irq+0x24/0x3c
> >   [ef441f20] [c00046a8] do_IRQ+0x8c/0xf8
> >   [ef441f40] [c000e7f4] ret_from_except+0x0/0x18
> >   --- Exception: 501 at 0xfcda524
> >       LR = 0x10024900
> >   Instruction dump:
> >   7c781b78 3b40000a 3a73b040 543c0024 3a800000 3b3913a0 7ef5bb78 48201bf9
> >   5463103a 7d3b182e 7e89b92e 7c008146 <3ba00000> 7e7e9b78 48000014 57fff87f
> >   Kernel panic - not syncing: kernel stack overflow
> >   CPU: 0 PID: 2838 Comm: ssh Not tainted 3.13.0-rc8-juniper-00146-g19eca00 #4
> >   Call Trace:
> > 
> > The reason is that we have used the wrong register to calculate the
> > ksp_limit in commit cbc9565ee826 (powerpc: Remove ksp_limit on ppc64).
> > Just fix it.
> > 
> > As suggested by Benjamin Herrenschmidt, also add the C prototype of the
> > function in the comment in order to avoid such kind of errors in the
> > future.
> > 
> Was this patch accepted, or are there any problems with it ?
> I didn't see any comments, and it still isn't upstream nor in linux-next.

It will be merged when I come back from vacation. It was too late for
3.13 so I'll send it to Linus next week and will CC -stable.

Cheers,
Ben.

^ permalink raw reply

* [PATCH] powerpc: Fix endian issues in kexec and crash dump code
From: Anton Blanchard @ 2014-01-21 21:40 UTC (permalink / raw)
  To: benh, paulus; +Cc: linuxppc-dev


We expose a number of OF properties in the kexec and crash dump code
and these need to be big endian.

Cc: stable@vger.kernel.org # v3.13
Signed-off-by: Anton Blanchard <anton@samba.org>
--

diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c
index 75d4f73..015ae55 100644
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -196,7 +196,9 @@ int overlaps_crashkernel(unsigned long start, unsigned long size)
 
 /* Values we need to export to the second kernel via the device tree. */
 static phys_addr_t kernel_end;
+static phys_addr_t crashk_base;
 static phys_addr_t crashk_size;
+static unsigned long long mem_limit;
 
 static struct property kernel_end_prop = {
 	.name = "linux,kernel-end",
@@ -207,7 +209,7 @@ static struct property kernel_end_prop = {
 static struct property crashk_base_prop = {
 	.name = "linux,crashkernel-base",
 	.length = sizeof(phys_addr_t),
-	.value = &crashk_res.start,
+	.value = &crashk_base
 };
 
 static struct property crashk_size_prop = {
@@ -219,9 +221,11 @@ static struct property crashk_size_prop = {
 static struct property memory_limit_prop = {
 	.name = "linux,memory-limit",
 	.length = sizeof(unsigned long long),
-	.value = &memory_limit,
+	.value = &mem_limit,
 };
 
+#define cpu_to_be_ulong	__PASTE(cpu_to_be, BITS_PER_LONG)
+
 static void __init export_crashk_values(struct device_node *node)
 {
 	struct property *prop;
@@ -237,8 +241,9 @@ static void __init export_crashk_values(struct device_node *node)
 		of_remove_property(node, prop);
 
 	if (crashk_res.start != 0) {
+		crashk_base = cpu_to_be_ulong(crashk_res.start),
 		of_add_property(node, &crashk_base_prop);
-		crashk_size = resource_size(&crashk_res);
+		crashk_size = cpu_to_be_ulong(resource_size(&crashk_res));
 		of_add_property(node, &crashk_size_prop);
 	}
 
@@ -246,6 +251,7 @@ static void __init export_crashk_values(struct device_node *node)
 	 * memory_limit is required by the kexec-tools to limit the
 	 * crash regions to the actual memory used.
 	 */
+	mem_limit = cpu_to_be_ulong(memory_limit);
 	of_update_property(node, &memory_limit_prop);
 }
 
@@ -264,7 +270,7 @@ static int __init kexec_setup(void)
 		of_remove_property(node, prop);
 
 	/* information needed by userspace when using default_machine_kexec */
-	kernel_end = __pa(_end);
+	kernel_end = cpu_to_be_ulong(__pa(_end));
 	of_add_property(node, &kernel_end_prop);
 
 	export_crashk_values(node);
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
index be4e6d6..59d229a 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -369,6 +369,7 @@ void default_machine_kexec(struct kimage *image)
 
 /* Values we need to export to the second kernel via the device tree. */
 static unsigned long htab_base;
+static unsigned long htab_size;
 
 static struct property htab_base_prop = {
 	.name = "linux,htab-base",
@@ -379,7 +380,7 @@ static struct property htab_base_prop = {
 static struct property htab_size_prop = {
 	.name = "linux,htab-size",
 	.length = sizeof(unsigned long),
-	.value = &htab_size_bytes,
+	.value = &htab_size,
 };
 
 static int __init export_htab_values(void)
@@ -403,8 +404,9 @@ static int __init export_htab_values(void)
 	if (prop)
 		of_remove_property(node, prop);
 
-	htab_base = __pa(htab_address);
+	htab_base = cpu_to_be64(__pa(htab_address));
 	of_add_property(node, &htab_base_prop);
+	htab_size = cpu_to_be64(htab_size_bytes);
 	of_add_property(node, &htab_size_prop);
 
 	of_node_put(node);

^ permalink raw reply related

* [PATCH 10/73] powerpc: use device_initcall for registering rtc devices
From: Paul Gortmaker @ 2014-01-21 21:22 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-arch, Geoff Levand, Paul Gortmaker, Paul Mackerras,
	linuxppc-dev
In-Reply-To: <1390339396-3479-1-git-send-email-paul.gortmaker@windriver.com>

Currently these two RTC devices are in core platform code
where it is not possible for them to be modular.  It will
never be modular, so using module_init as an alias for
__initcall can be somewhat misleading.

Fix this up now, so that we can relocate module_init from
init.h into module.h in the future.  If we don't do this, we'd
have to add module.h to obviously non-modular code, and that
would be a worse thing.

Note that direct use of __initcall is discouraged, vs. one
of the priority categorized subgroups.  As __initcall gets
mapped onto device_initcall, our use of device_initcall
directly in this change means that the runtime impact is
zero -- they will remain at level 6 in initcall ordering.

Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Geoff Levand <geoff@infradead.org>
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 arch/powerpc/kernel/time.c        | 2 +-
 arch/powerpc/platforms/ps3/time.c | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index b3dab20..63b34fc 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -1064,4 +1064,4 @@ static int __init rtc_init(void)
 	return PTR_ERR_OR_ZERO(pdev);
 }
 
-module_init(rtc_init);
+device_initcall(rtc_init);
diff --git a/arch/powerpc/platforms/ps3/time.c b/arch/powerpc/platforms/ps3/time.c
index ce73ce8..791c614 100644
--- a/arch/powerpc/platforms/ps3/time.c
+++ b/arch/powerpc/platforms/ps3/time.c
@@ -92,5 +92,4 @@ static int __init ps3_rtc_init(void)
 
 	return PTR_ERR_OR_ZERO(pdev);
 }
-
-module_init(ps3_rtc_init);
+device_initcall(ps3_rtc_init);
-- 
1.8.4.1

^ permalink raw reply related

* [PATCH 66/73] drivers/macintosh: delete non-required instances of include <linux/init.h>
From: Paul Gortmaker @ 2014-01-21 21:23 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-arch, Paul Gortmaker, linuxppc-dev
In-Reply-To: <1390339396-3479-1-git-send-email-paul.gortmaker@windriver.com>

None of these files are actually using any __init type directives
and hence don't need to include <linux/init.h>.  Most are just a
left over from __devinit and __cpuinit removal, or simply due to
code getting copied from one driver to the next.

Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 drivers/macintosh/adb-iop.c                 | 1 -
 drivers/macintosh/ams/ams-input.c           | 1 -
 drivers/macintosh/macio-adb.c               | 1 -
 drivers/macintosh/via-macii.c               | 1 -
 drivers/macintosh/via-pmu68k.c              | 1 -
 drivers/macintosh/windfarm_ad7417_sensor.c  | 1 -
 drivers/macintosh/windfarm_fcu_controls.c   | 1 -
 drivers/macintosh/windfarm_lm75_sensor.c    | 1 -
 drivers/macintosh/windfarm_max6690_sensor.c | 1 -
 drivers/macintosh/windfarm_smu_sat.c        | 1 -
 10 files changed, 10 deletions(-)

diff --git a/drivers/macintosh/adb-iop.c b/drivers/macintosh/adb-iop.c
index f5f4da3..2c242f7 100644
--- a/drivers/macintosh/adb-iop.c
+++ b/drivers/macintosh/adb-iop.c
@@ -16,7 +16,6 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/delay.h>
-#include <linux/init.h>
 #include <linux/proc_fs.h>
 
 #include <asm/macintosh.h> 
diff --git a/drivers/macintosh/ams/ams-input.c b/drivers/macintosh/ams/ams-input.c
index 2edae7d..b1bdd48 100644
--- a/drivers/macintosh/ams/ams-input.c
+++ b/drivers/macintosh/ams/ams-input.c
@@ -14,7 +14,6 @@
 
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <linux/init.h>
 #include <linux/delay.h>
 
 #include "ams.h"
diff --git a/drivers/macintosh/macio-adb.c b/drivers/macintosh/macio-adb.c
index 87de8d9..c8fcac0 100644
--- a/drivers/macintosh/macio-adb.c
+++ b/drivers/macintosh/macio-adb.c
@@ -14,7 +14,6 @@
 #include <asm/pgtable.h>
 #include <asm/hydra.h>
 #include <asm/irq.h>
-#include <linux/init.h>
 #include <linux/ioport.h>
 
 struct preg {
diff --git a/drivers/macintosh/via-macii.c b/drivers/macintosh/via-macii.c
index 3725f08..fd27da3 100644
--- a/drivers/macintosh/via-macii.c
+++ b/drivers/macintosh/via-macii.c
@@ -30,7 +30,6 @@
 #include <linux/delay.h>
 #include <linux/adb.h>
 #include <linux/interrupt.h>
-#include <linux/init.h>
 #include <asm/macintosh.h>
 #include <asm/macints.h>
 #include <asm/mac_via.h>
diff --git a/drivers/macintosh/via-pmu68k.c b/drivers/macintosh/via-pmu68k.c
index a00ee41..d2e252e 100644
--- a/drivers/macintosh/via-pmu68k.c
+++ b/drivers/macintosh/via-pmu68k.c
@@ -25,7 +25,6 @@
 #include <linux/miscdevice.h>
 #include <linux/blkdev.h>
 #include <linux/pci.h>
-#include <linux/init.h>
 #include <linux/interrupt.h>
 
 #include <linux/adb.h>
diff --git a/drivers/macintosh/windfarm_ad7417_sensor.c b/drivers/macintosh/windfarm_ad7417_sensor.c
index 7c28b71..76715a7 100644
--- a/drivers/macintosh/windfarm_ad7417_sensor.c
+++ b/drivers/macintosh/windfarm_ad7417_sensor.c
@@ -11,7 +11,6 @@
 #include <linux/kernel.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
-#include <linux/init.h>
 #include <linux/wait.h>
 #include <linux/i2c.h>
 #include <asm/prom.h>
diff --git a/drivers/macintosh/windfarm_fcu_controls.c b/drivers/macintosh/windfarm_fcu_controls.c
index 0226b79..50ba619 100644
--- a/drivers/macintosh/windfarm_fcu_controls.c
+++ b/drivers/macintosh/windfarm_fcu_controls.c
@@ -12,7 +12,6 @@
 #include <linux/kernel.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
-#include <linux/init.h>
 #include <linux/wait.h>
 #include <linux/i2c.h>
 #include <asm/prom.h>
diff --git a/drivers/macintosh/windfarm_lm75_sensor.c b/drivers/macintosh/windfarm_lm75_sensor.c
index 590214b..704dd2e 100644
--- a/drivers/macintosh/windfarm_lm75_sensor.c
+++ b/drivers/macintosh/windfarm_lm75_sensor.c
@@ -12,7 +12,6 @@
 #include <linux/kernel.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
-#include <linux/init.h>
 #include <linux/wait.h>
 #include <linux/i2c.h>
 #include <asm/prom.h>
diff --git a/drivers/macintosh/windfarm_max6690_sensor.c b/drivers/macintosh/windfarm_max6690_sensor.c
index 87e439b..a3504d3 100644
--- a/drivers/macintosh/windfarm_max6690_sensor.c
+++ b/drivers/macintosh/windfarm_max6690_sensor.c
@@ -8,7 +8,6 @@
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
-#include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/i2c.h>
 #include <asm/prom.h>
diff --git a/drivers/macintosh/windfarm_smu_sat.c b/drivers/macintosh/windfarm_smu_sat.c
index ad6223e..b6d70d2 100644
--- a/drivers/macintosh/windfarm_smu_sat.c
+++ b/drivers/macintosh/windfarm_smu_sat.c
@@ -10,7 +10,6 @@
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
-#include <linux/init.h>
 #include <linux/wait.h>
 #include <linux/i2c.h>
 #include <linux/mutex.h>
-- 
1.8.4.1

^ permalink raw reply related

* [PATCH 39/73] powerpc: delete another unrequired instance of <linux/init.h>
From: Paul Gortmaker @ 2014-01-21 21:22 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-arch, Paul Gortmaker, Paul Mackerras, linuxppc-dev
In-Reply-To: <1390339396-3479-1-git-send-email-paul.gortmaker@windriver.com>

Most were already deleted in the 1st pass audit; this instance
showed up more recently.

Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 arch/powerpc/sysdev/indirect_pci.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/powerpc/sysdev/indirect_pci.c b/arch/powerpc/sysdev/indirect_pci.c
index 1f6c570..66f5fd1 100644
--- a/arch/powerpc/sysdev/indirect_pci.c
+++ b/arch/powerpc/sysdev/indirect_pci.c
@@ -13,7 +13,6 @@
 #include <linux/pci.h>
 #include <linux/delay.h>
 #include <linux/string.h>
-#include <linux/init.h>
 
 #include <asm/io.h>
 #include <asm/prom.h>
-- 
1.8.4.1

^ permalink raw reply related

* [PATCH RFC 00/73] tree-wide: clean up some no longer required #include <linux/init.h>
From: Paul Gortmaker @ 2014-01-21 21:22 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-arch, linux-mips, linux-m68k, rusty, linux-ia64, kvm,
	linux-s390, netdev, x86, gregkh, Paul Gortmaker, torvalds,
	linux-alpha, sparclinux, sfr, akpm, linuxppc-dev,
	linux-arm-kernel

TL;DR - We removed cpuinit and devinit, which left ~2000 instances of
include <linux/init.h> that were no longer needed.  To fully enable
this removal/cleanup, we relocate module_init() from init.h into
module.h.  Multi arch/multi config build testing on linux-next has
been used to find and fix any implicit header dependencies prior to
deploying the actual init.h --> module.h move, to preserve bisection.

Additional details beyond TL;DR:

module_init/module_exit and friends moved to module.h
=====================================================
Aside from enabling this init.h cleanup to extend into modular files,
it actually does make sense.  For all modules will use some form of
our initfunc processing/categorization, but not all initfunc users
will be necessarily using modular functionality.  So we move these
module related macros to module.h and ensure module.h sources init.h


module_init in non modular code:
================================
This series uncovered that we are enabling people to use module_init
in non-modular code.  While that works fine, there are at least three
reasons why it probably should not be encouraged:

 1) it makes a casual reader of the code assume the code is modular
    even though it is obj-y (builtin) or controlled by a bool Kconfig.

 2) it makes it too easy to add dead code in a function that is handed
    to module_exit() -- [more on that below]

 3) it breaks our ability to use priority sorted initcalls properly
    [more on that below.]

After this change, a new coder who tries to make use of module_init in
non modular code would find themselves also needing to include the
module.h header.  At which point the odds are greater that they would
ask themselves "Am I doing this right?  I shouldn't need this."

Note that existing non-modular code that already includes module.h and
uses module_init doesn't get fixed here, since they already build w/o
errors triggered by this change; we'll have to hunt them down later.


module_init and initcall ordering:
==================================
We have a group of about ten priority sorted initcalls, that are
called in init/main.c after most of the hard coded/direct calls
have been processed.  These serve the purpose of avoiding everyone
poking at init/main.c to hook in their init sequence.  The bins are:

        pure_initcall               0
        core_initcall               1
        postcore_initcall           2
        arch_initcall               3
        subsys_initcall             4
        fs_initcall                 5
        device_initcall             6
        late_initcall               7

These are meant to eventually replace users of the non specific
priority "__initcall" which currently maps onto device_initcall.
This is of interest, because in non-modular code, cpp does this:

    module_init -->  __initcall --> device_initcall

So all module_init() land in the device_initcall bucket, rather late
in the sequence.  That makes sense, since if it was a module, the init
could be real late (days, weeks after boot).  But now imagine you add
support for some non-modular bus/arch/infrastructure (say for e.g. PCI)
and you use module_init for it.  That means anybody else who wants
to use your subsystem can't do so if they use an initcall of 0 --> 5
priority.  For a real world example of this, see patch #1 in this series:

	https://lkml.org/lkml/2014/1/14/809

We don't want to force code that is clearly arch or subsys or fs
specific to have to use the device_initcall just because something
else has been mistakenly put (or left) in that bucket.  So a couple of
changes do actually change the initcall level where it is inevitably
appropriate to do so.  Those are called out explicitly in their
respective commit logs.


module_exit and dead code
=========================
Built in code will never have an opportunity to call functions that
are registered with module_exit(), so any cases of that uncovered in
this series delete that dead code.  Note that any built-in code that
was already including module.h and using module_exit won't have shown
up as breakage on the build coverage of this series, so we'll have to
find those independently later.  It looks like there may be quite a
few that are invisibly created via module_platform_driver -- a macro
that creates module_init and module_exit automatically.  We may want
to consider relocating module_platform_driver into module.h later...


cpuinit
=======
To finalize the removal of cpuinit, which was done several releases
ago, we remove the remaining stub functions from init.h in this
series.  We've seen one or two "users" try to creep back in, so this
will close the door on that chapter and prevent creep.


When, what and where?
=====================
When: Ideally, barring any objections or massive oversights on my
part, this will go in at or around rc1, i.e. in about 2wks.  In the
meantime I will continue daily re-test on linux-next across ~10 different
arch, using allyesconfig, allmodconfig and arch specific defconfigs
for things like mips/arm/powerpc; as I have been doing for a while.

Where: This work exists as a queue of patches that I apply to
linux-next; since the changes are fixing some things that currently
can only be found there.  The patch series can be found at:

   http://git.kernel.org/cgit/linux/kernel/git/paulg/init.git
   git://git.kernel.org/pub/scm/linux/kernel/git/paulg/init.git

The patches are not in strict chronological order, since when I've
found a header change causes a build regression that is due to an
implicit dependency/inclusion, I place the dependency fix before the
header change that caused it, so that bisection is preserved.

I've avoided annoying Stephen with another queue of patches for
linux-next while the development content was in flux, but now that
the merge window has opened, and new additions are fewer, perhaps he
wouldn't mind tacking it on the end...  Stephen?

In order to reduce the size of the overall queue here, I have already
put some dependency-free changes through maintainer trees after
re-testing them on whatever their development baseline was.  That made
sense for the larger ones (drivers/[net,usb,input] some arch trees...)
and for the kernel/ mm/ and fs/ ones where the changes were less
trivial and an earlier review was desired. But that independent treatment
doesn't scale for handling all the commits -- hence ~1400 of the
full ~2k of init.h removals remain here in this series.

What: The audit for removal of extra init.h lines has covered
drivers/, all of the main architectures (and some of the more fringe
ones), and core dirs like mm/ fs/ and kernel/ too.  The removals from
include/ itself are probably the most valuable, in terms of reducing
the amount of stuff we needlessly feed CPP.  There is probably more
fringe ones to be found, but this covers the majority of them.
Additional ones can be fed in later (through the trivial tree perhaps)
as desired.

Build coverage (from memory) has included, but is not limited to:

  allyesconfig, allmodconfig:
	x86, x86_64, ia64, s390, arm, mips, sparc, powerpc
  arch specifc arch/<name>/config/*config files:
	arm, mips, powerpc
  defconfig:
	(all of the above), c6x, parisc, uml, tile, c6x, blackfin, ...

and it will continue to take place for the next ~2wks, until I can
reliably apply the queue to master and submit a pull request.

Thanks for reading this far, and thanks to those who have merged init.h
cleanup commits already!  Additional comments, reviews and acks welcomed.

Paul.
---

Cc: linux-alpha@vger.kernel.org
Cc: linux-arch@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-ia64@vger.kernel.org
Cc: linux-m68k@lists.linux-m68k.org
Cc: linux-mips@linux-mips.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-s390@vger.kernel.org
Cc: sparclinux@vger.kernel.org
Cc: x86@kernel.org
Cc: netdev@vger.kernel.org
Cc: kvm@vger.kernel.org
Cc: sfr@canb.auug.org.au
Cc: rusty@rustcorp.com.au
Cc: gregkh@linuxfoundation.org
Cc: akpm@linux-foundation.org
Cc: torvalds@linux-foundation.org

Paul Gortmaker (73):
  init: delete the __cpuinit related stubs
  mm: replace module_init usages with subsys_initcall in nommu.c
  fs/notify: don't use module_init for non-modular inotify_user code
  netfilter: don't use module_init/exit in core IPV4 code
  x86: don't use module_init in non-modular intel_mid_vrtc.c
  x86: don't use module_init for non-modular core bootflag code
  x86: replace __init_or_module with __init in non-modular vsmp_64.c
  drivers/tty/hvc: don't use module_init in non-modular hyp. console code
  staging: don't use module_init in non-modular ion_dummy_driver.c
  powerpc: use device_initcall for registering rtc devices
  powerpc: book3s KVM can be modular so it should use module.h
  powerpc: kvm e500/44x is not modular, so don't use module_init
  powerpc: use subsys_initcall for Freescale Local Bus
  powerpc: don't use module_init for non-modular core hugetlb code
  powerpc: don't use module_init in non-modular 83xx suspend code
  arm: include module.h in drivers/bus/omap_l3_smx.c
  arm: fix implicit module.h use in mach-at91 gpio.h
  arm: fix implicit #include <linux/init.h> in entry asm.
  arm: mach-s3c64xx mach-crag6410-module.c is not modular
  arm: use subsys_initcall in non-modular pl320 IPC code
  arm: don't use module_init in non-modular mach-vexpress/spc.c code
  alpha: don't use module_init for non-modular core code
  sparc: don't use module_init in non-modular pci.c code
  m68k: don't use module_init in non-modular mvme16x/rtc.c code
  ia64: don't use module_init for non-modular core kernel/mca.c code
  ia64: don't use module_init in non-modular sim/simscsi.c code
  drivers/clk: don't use module_init in clk-nomadik.c which is non-modular
  cpuidle: don't use modular platform register in non-modular ARM drivers
  drivers/platform: don't use modular register in non-modular pdev_bus.c
  drivers/i2c: busses/i2c-acorn.c is tristate and should use module.h
  module: relocate module_init from init.h to module.h
  logo: emit "#include <linux/init.h> in autogenerated C file
  arm: delete non-required instances of include <linux/init.h>
  mips: delete non-required instances of include <linux/init.h>
  sparc: delete non-required instances of include <linux/init.h>
  s390: delete non-required instances of include <linux/init.h>
  alpha: delete non-required instances of <linux/init.h>
  blackfin: delete non-required instances of <linux/init.h>
  powerpc: delete another unrequired instance of <linux/init.h>
  watchdog: delete non-required instances of include <linux/init.h>
  video: delete non-required instances of include <linux/init.h>
  rtc: delete non-required instances of include <linux/init.h>
  scsi: delete non-required instances of include <linux/init.h>
  spi: delete non-required instances of include <linux/init.h>
  acpi: delete non-required instances of include <linux/init.h>
  drivers/power: delete non-required instances of include <linux/init.h>
  drivers/media: delete non-required instances of include <linux/init.h>
  drivers/ata: delete non-required instances of include <linux/init.h>
  drivers/mtd: delete non-required instances of include <linux/init.h>
  drivers/hwmon: delete non-required instances of include <linux/init.h>
  drivers/i2c: delete non-required instances of include <linux/init.h>
  drivers/pinctrl: delete non-required instances of include <linux/init.h>
  drivers/isdn: delete non-required instances of include <linux/init.h>
  drivers/leds: delete non-required instances of include <linux/init.h>
  drivers/pcmcia: delete non-required instances of include <linux/init.h>
  drivers/char: delete non-required instances of include <linux/init.h>
  drivers/infiniband: delete non-required instances of include <linux/init.h>
  drivers/mfd: delete non-required instances of include <linux/init.h>
  drivers/gpio: delete non-required instances of include <linux/init.h>
  drivers/bluetooth: delete non-required instances of include <linux/init.h>
  drivers/mmc: delete non-required instances of include <linux/init.h>
  drivers/crypto: delete non-required instances of include <linux/init.h>
  drivers/platform: delete non-required instances of include <linux/init.h>
  drivers/misc: delete non-required instances of include <linux/init.h>
  drivers/edac: delete non-required instances of include <linux/init.h>
  drivers/macintosh: delete non-required instances of include <linux/init.h>
  drivers/base: delete non-required instances of include <linux/init.h>
  drivers/cpufreq: delete non-required instances of <linux/init.h>
  drivers/pci: delete non-required instances of <linux/init.h>
  drivers/dma: delete non-required instances of <linux/init.h>
  drivers/gpu: delete non-required instances of <linux/init.h>
  drivers: delete remaining non-required instances of <linux/init.h>
  include: remove needless instances of <linux/init.h>

 arch/alpha/kernel/err_ev6.c                        |  1 -
 arch/alpha/kernel/irq.c                            |  1 -
 arch/alpha/kernel/srmcons.c                        |  3 +-
 arch/alpha/kernel/traps.c                          |  1 -
 arch/alpha/oprofile/op_model_ev4.c                 |  1 -
 arch/alpha/oprofile/op_model_ev5.c                 |  1 -
 arch/alpha/oprofile/op_model_ev6.c                 |  1 -
 arch/alpha/oprofile/op_model_ev67.c                |  1 -
 arch/arm/common/dmabounce.c                        |  1 -
 arch/arm/firmware/trusted_foundations.c            |  1 -
 arch/arm/include/asm/arch_timer.h                  |  1 -
 arch/arm/kernel/entry-armv.S                       |  2 +
 arch/arm/kernel/entry-header.S                     |  1 -
 arch/arm/kernel/hyp-stub.S                         |  1 -
 arch/arm/kernel/suspend.c                          |  1 -
 arch/arm/kernel/unwind.c                           |  1 -
 arch/arm/mach-at91/include/mach/gpio.h             |  1 +
 arch/arm/mach-cns3xxx/pm.c                         |  1 -
 arch/arm/mach-exynos/headsmp.S                     |  1 -
 arch/arm/mach-footbridge/personal.c                |  1 -
 arch/arm/mach-imx/headsmp.S                        |  1 -
 arch/arm/mach-imx/iomux-v3.c                       |  1 -

 [.... snip ~1300 lines ...]

 drivers/watchdog/stmp3xxx_rtc_wdt.c                |  1 -
 drivers/watchdog/wdt_pci.c                         |  1 -
 drivers/xen/xen-stub.c                             |  1 -
 fs/notify/inotify/inotify_user.c                   |  4 +-
 include/drm/drmP.h                                 |  2 +-
 include/linux/fb.h                                 |  1 -
 include/linux/ide.h                                |  1 -
 include/linux/init.h                               | 77 ----------------------
 include/linux/kdb.h                                |  1 -
 include/linux/linux_logo.h                         |  3 -
 include/linux/lsm_audit.h                          |  1 -
 include/linux/module.h                             | 72 ++++++++++++++++++++
 include/linux/moduleparam.h                        |  1 -
 include/linux/netfilter.h                          |  1 -
 include/linux/nls.h                                |  2 +-
 include/linux/percpu_ida.h                         |  1 -
 include/linux/profile.h                            |  1 -
 include/linux/pstore_ram.h                         |  1 -
 include/linux/usb/gadget.h                         |  1 -
 include/linux/zorro.h                              |  1 -
 include/xen/xenbus.h                               |  1 -
 mm/nommu.c                                         |  4 +-
 net/ipv4/netfilter.c                               |  9 +--
 scripts/pnmtologo.c                                |  1 +
 scripts/tags.sh                                    |  2 +-
 1254 files changed, 131 insertions(+), 1431 deletions(-)
 mode change 100755 => 100644 scripts/tags.sh

-- 
1.8.4.1

^ permalink raw reply

* [PATCH 15/73] powerpc: don't use module_init in non-modular 83xx suspend code
From: Paul Gortmaker @ 2014-01-21 21:22 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-arch, Paul Gortmaker, Paul Mackerras, linuxppc-dev
In-Reply-To: <1390339396-3479-1-git-send-email-paul.gortmaker@windriver.com>

The suspend.o is built for SUSPEND -- which is bool, and hence
this code is either present or absent.  It will never be modular,
so using module_init as an alias for __initcall can be somewhat
misleading.

Fix this up now, so that we can relocate module_init from
init.h into module.h in the future.  If we don't do this, we'd
have to add module.h to obviously non-modular code, and that
would be a worse thing.

Note that direct use of __initcall is discouraged, vs. one
of the priority categorized subgroups.  As __initcall gets
mapped onto device_initcall, our use of device_initcall
directly in this change means that the runtime impact is
zero -- it will remain at level 6 in initcall ordering.

Cc: Kumar Gala <galak@kernel.crashing.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 arch/powerpc/platforms/83xx/suspend.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c
index 4b4c081..e9f07c6 100644
--- a/arch/powerpc/platforms/83xx/suspend.c
+++ b/arch/powerpc/platforms/83xx/suspend.c
@@ -446,5 +446,4 @@ static int pmc_init(void)
 {
 	return platform_driver_register(&pmc_driver);
 }
-
-module_init(pmc_init);
+device_initcall(pmc_init);
-- 
1.8.4.1

^ permalink raw reply related

* [PATCH 13/73] powerpc: use subsys_initcall for Freescale Local Bus
From: Paul Gortmaker @ 2014-01-21 21:22 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-arch, Paul Gortmaker, Paul Mackerras, Scott Wood,
	linuxppc-dev
In-Reply-To: <1390339396-3479-1-git-send-email-paul.gortmaker@windriver.com>

The FSL_SOC option is bool, and hence this code is either
present or absent.  It will never be modular, so using
module_init as an alias for __initcall is rather misleading.

Fix this up now, so that we can relocate module_init from
init.h into module.h in the future.  If we don't do this, we'd
have to add module.h to obviously non-modular code, and that
would be a worse thing.

Note that direct use of __initcall is discouraged, vs. one
of the priority categorized subgroups.  As __initcall gets
mapped onto device_initcall, our use of subsys_initcall (which
makes sense for bus code) will thus change this registration
from level 6-device to level 4-subsys (i.e. slightly earlier).
However no observable impact of that small difference has
been observed during testing, or is expected.

Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Scott Wood <scottwood@freescale.com>
Acked-by: Scott Wood <scottwood@freescale.com>
Cc: Kumar Gala <galak@kernel.crashing.org>
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 arch/powerpc/sysdev/fsl_lbc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/sysdev/fsl_lbc.c b/arch/powerpc/sysdev/fsl_lbc.c
index d631022..38138cf 100644
--- a/arch/powerpc/sysdev/fsl_lbc.c
+++ b/arch/powerpc/sysdev/fsl_lbc.c
@@ -407,4 +407,4 @@ static int __init fsl_lbc_init(void)
 {
 	return platform_driver_register(&fsl_lbc_ctrl_driver);
 }
-module_init(fsl_lbc_init);
+subsys_initcall(fsl_lbc_init);
-- 
1.8.4.1

^ permalink raw reply related

* [PATCH 14/73] powerpc: don't use module_init for non-modular core hugetlb code
From: Paul Gortmaker @ 2014-01-21 21:22 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-arch, Paul Gortmaker, Paul Mackerras, linuxppc-dev
In-Reply-To: <1390339396-3479-1-git-send-email-paul.gortmaker@windriver.com>

The hugetlbpage.o is obj-y (always built in).  It will never
be modular, so using module_init as an alias for __initcall is
somewhat misleading.

Fix this up now, so that we can relocate module_init from
init.h into module.h in the future.  If we don't do this, we'd
have to add module.h to obviously non-modular code, and that
would be a worse thing.

Note that direct use of __initcall is discouraged, vs. one
of the priority categorized subgroups.  As __initcall gets
mapped onto device_initcall, our use of arch_initcall (which
makes sense for arch code) will thus change this registration
from level 6-device to level 3-arch (i.e. slightly earlier).
However no observable impact of that small difference has
been observed during testing, or is expected.

Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 arch/powerpc/mm/hugetlbpage.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 90bb6d9..d25c202 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -911,7 +911,7 @@ static int __init hugetlbpage_init(void)
 	return 0;
 }
 #endif
-module_init(hugetlbpage_init);
+arch_initcall(hugetlbpage_init);
 
 void flush_dcache_icache_hugepage(struct page *page)
 {
-- 
1.8.4.1

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox